str_cursor.sa


Generated by gen_html_sa_files from ICSI. Contact gomes@icsi.berkeley.edu for details
 
---------------------------> Sather 1.1 source file <--------------------------
-- Copyright (C) International Computer Science Institute, 1994.
-- COPYRIGHT NOTICE: This code is provided "AS IS" WITHOUT ANY
-- WARRANTY and is subject to the terms of the SATHER LIBRARY GENERAL
-- PUBLIC LICENSE contained in the file "Doc/License" of the Sather
-- distribution.  The license is also available from ICSI, 1947
-- Center St., Suite 600, Berkeley CA 94704, USA.  
-- Sather 0.2 version by Steve Omohundro
-- Sather 1.0 version written by:David Schultz (dschultz@cs.nmt.edu)
-- Recent revision information at the end of the file
-- (gomes) April 95 incorporated get_string_upto_cut from jefu
-- (gomes) current_loc_string, current_line_str, skipping commented space
-- (gomes) [from dbailey@icsi]  get_block and skip_block
-- (gomes) Reformatted, added: has_error, current_line, error_string
-- (gomes) added get_rest_str, added comments from original version
----------> Please email comments to sather-bugs@icsi.berkeley.edu. <-----

class STR_CURSOR

class STR_CURSOR is -- Class for stepping through strings extracting information. For -- strings, cursors are used to sequentially read out the -- components of a string. They may be used to perform a similar -- function to that played by "scanf" in C without need for -- functions with a variable number of arguments. This approach is -- actually much more general since it allows for testing for the -- presence of an `BOOL', for example, at a given point in the -- string. If we were wrong about the structure of the string, -- then the attribute `error' will be set to a non-zero value. attr error:INT; readonly attr comment_char1,comment_char2: CHAR; readonly attr line_no:INT; readonly attr index:INT; readonly attr buf:STR; readonly attr is_done:BOOL; const Max_Real_Digits:INT := 30; const Max_Int_Digits:INT := 9; const Int_No_Bits:INT := 32; --error codes: const No_Error:INT := 0; const Bad_Digit:INT := 1; const Past_EOBuf:INT := 2; const Past_BOBuf:INT := 3; const Too_Many_Digits:INT := 4; const Cut_Set_Member_Not_Found:INT := 5; const Bad_Prefix:INT := 6; const Bad_Boolean:INT := 7; const Bad_Block: INT := 8; const Bad_Char: INT := 9; create(s:STR):SAME is res ::= new; if (void(s)) then res := void; else res.reassign(s); end; res.comment_char1 := ' '; res.comment_char2 := ' '; return (res); end; set_comment_syntax(char1: CHAR) is comment_char1 := char1; comment_char2 := ' '; end; set_comment_syntax(char1,char2: CHAR) is comment_char1 := char1; comment_char2 := char2; end; reassign(s:STR) is -- Change the string that `self' points to -- Clear any error value if (~void(self)) then buf := s; index := 0; line_no := 1; is_done := false; clear_error; if (void(buf) or (buf.length = 0)) then is_done := true; end; end; end; clear_error is error := No_Error; end; -- Reset the error value has_error: BOOL is -- Returns true if the cursor has encountered an error if error = No_Error then return false else return true end; end; error_string: STR is -- Returns a string version of the current error status case error when No_Error then return("No Cursor Error"); when Bad_Digit then return("Bad Digit"); when Past_EOBuf then return("Attempt to read past end"); when Past_BOBuf then return("Attempt to retract past beginning"); when Too_Many_Digits then return("Too many digits"); when Cut_Set_Member_Not_Found then return("Cut set member not found"); when Bad_Prefix then return("Bad prefix"); when Bad_Block then return("Bad block"); when Bad_Char then return("Bad char"); else return("Unknown Error!") end; end; clear is reassign(#STR); end; -- Reset to an empty string item:CHAR is -- Return the current item or '\0' if done if (is_done) then return ('\0'); else return (buf[index]); end; end; skip_space is -- Advance the scanner over space characters loop while!(~is_done); if (item.is_space) then advance_one_char; elsif skip_if_comment_start then -- do nothing else break!; end; end; end; skip_if_comment_start: BOOL is -- Returns true if a comment was skipped if (comment_char1 = ' ') then return(false); elsif (comment_char2 = ' ') then return(skip_single_char_comment) else return(skip_double_char_comment); end; end; private skip_single_char_comment: BOOL is if (item = comment_char1) then discard ::= get_up_to('\n'); advance_one_char; return(true); else return(false); end; end; private skip_double_char_comment: BOOL is if index > buf.size - 2 then return(false); elsif ((item = comment_char1) and (buf[index+1] = comment_char2)) then discard ::= get_up_to('\n'); advance_one_char; return(true); else return(false); end; end; skip_space:SAME is -- Same as skip_space, but return self skip_space; return self; end; skip_word is -- Skip characters until (not including) a space character is hit. loop until!(is_done or item.is_space); advance_one_char; end; end; skip_word:SAME is skip_word; return(self) end; -- Same as skip_word, but return self skip_thru(c:CHAR) is -- Skip characters until (not including) the character 'c' loop until!(is_done or (item = c)); advance_one_char; end; end; skip_thru(c:CHAR):SAME is -- Same as skip_thru, return self skip_thru(c); return self; end; -- Versions that skip upto a certain element of a string skip_thru(s:STR) is -- Skip characters upto (not including) the word "s" -- Translated from STR::search. if void(s) then return end; loop r:INT:=index.upto!(buf.size-s.size); match::=true; loop if buf.elt!(r)/=s.elt! then match:=false; break! end end; if match then index:=r; return; end; end; is_done:=true; return; end; skip_thru(s:STR):SAME is skip_thru(s); return self; end; skip_over(s:STR) is -- Skip characters upto _and including_ the word "s". skip_thru(s); if (~is_done) then index:=index+s.size; end; end; advance_one_char is -- Advance the cursor one position forward if (is_done) then error := Past_EOBuf; else index := index + 1; if (index >= buf.length) then is_done := true; elsif (item = '\n') then line_no := line_no + 1; end; end; end; advance_one_char:SAME is advance_one_char; return self; end; retract_one_char is if (index <= 0) then error := Past_BOBuf; else index := index - 1; if (item = '\n') then line_no := line_no - 1; end; is_done := false; end; end; retract_one_char:SAME is retract_one_char; return self; end; get_char:CHAR is res ::= item; advance_one_char; return (res); end; get_char(c: CHAR) is if item = c then advance_one_char; else error := Bad_Char; end; end; get_word:STR is res ::= #STR; skip_space; loop until!(is_done or item.is_space); res := res + get_char; end; return (res); end; get_word(max_char_count:INT):STR is --get a word up to max_char_count CHAR's long res ::= #STR; skip_space; loop until!(is_done or (max_char_count <= 0) or item.is_space); res := res + get_char; max_char_count := max_char_count - 1; end; return (res); end; get_up_to(c:CHAR):STR is res ::= #STR; loop until!(is_done or item = c); res := res + get_char; end; return (res); end; skip_block(begin_delim, fin_delim: CHAR) is if (item = begin_delim) then advance_one_char; loop if item=fin_delim then advance_one_char; return; elsif item = begin_delim then skip_block(begin_delim,fin_delim); elsif is_done then error := Bad_Block; return; else advance_one_char; end end end; end; get_block(begin_delim, fin_delim: CHAR): STR is -- Return a block if the current character = begin_delim res ::=#STR; if (item = begin_delim) then res := res + get_char; loop if item=fin_delim then res := res + get_char; return(res); elsif item = begin_delim then res := res + get_block(begin_delim,fin_delim); elsif is_done then error := Bad_Block; return(res); else res := res + get_char; end end; end; return(res); end; int:INT is --to support str_curs.sa's int which accepts any of the 4 formats: --1) decimal, 2) binary, 3) hex, 4) octal. --unlike str_curs.sa, this only sets error code and doesn't raise an --exception. --leading 0's in any format do not contribute towards an overflow error --all formats may use '-' to give the 2's complement of the int. res:INT; neg_sign:BOOL; skip_space; neg_sign := get_opt_sign; if (item = '0') then advance_one_char; case (item) when 'b' then advance_one_char; res := get_unsigned_unprefixed_binary; when 'o' then advance_one_char; res := get_unsigned_unprefixed_octal; when 'x' then advance_one_char; res := get_unsigned_unprefixed_hex; else retract_one_char; res := get_unsigned_int; end; --(case) else res := get_unsigned_int; end; if (neg_sign) then res := -res; end; return (res); end; get_int:INT is --decimal format optionally signed with '+', '-', or '\0' res:INT; neg_sign:BOOL; skip_space; neg_sign := get_opt_sign; res := get_unsigned_int; if (neg_sign) then res := -res; end; return (res); end; get_binary:INT is -- BIN ::= OPT_SIGN '0b' (0 | 1)+ -- OPT_SIGN ::= '+' | '-' | '\0' --and stops at the first non-binary digit found. --leading 0's ignored. -- '-' prefix gives 2's complement of following binary number. res:INT; neg_sign:BOOL; skip_space; neg_sign := get_opt_sign; if (get_char /= '0') then error := Bad_Prefix; return (res); elsif (get_char /= 'b') then error := Bad_Prefix; return (res); end; res := get_unsigned_unprefixed_binary; if (neg_sign) then res := -res; end; return (res); end; get_octal:INT is -- OCT ::= '0o' (0 .. 7)+ stoping at first non-octal digit found. --leading 0's ignored. -- '-' prefix gives 2's complement of following octal number. res:INT; neg_sign:BOOL; skip_space; neg_sign := get_opt_sign; if (get_char /= '0') then error := Bad_Prefix; return (res); elsif (get_char /= 'o') then error := Bad_Prefix; return (res); end; res := get_unsigned_unprefixed_octal; if (neg_sign) then res := -res; end; return (res); end; get_hex:INT is -- HEX ::= OPT_SIGN ('0x' | '0X') HEX_DIGIT+ -- OPT_SIGN ::= '+' | '-' | '\0' -- HEX_DIGIT ::= ('0' .. '9') | ('a' .. 'f') | ('A' .. 'F') -- '-' prefix gives 2's complement of following hex dnumber res:INT; neg_sign:BOOL; skip_space; neg_sign := get_opt_sign; if (get_char /= '0') then error := Bad_Prefix; return (res); elsif ((item /= 'x') and (item /= 'X')) then advance_one_char; error := Bad_Prefix; return (res); else --i.e., was 'x' or 'X' advance_one_char; end; res := get_unsigned_unprefixed_hex; if (neg_sign) then res := -res; end; return (res); end; get_flt:FLT is return (get_fltd.flt); end; get_fltd:FLTD is -- Accepts real numbers of format: -- FLTD ::= SIGNED_INT {'.' {UNSIGNED_INT} {'e' SIGNED_INT} -- SIGNED_INT ::= {'+' | '-' | '\0'} UNSIGNED_INT -- UNSIGNED_INT ::= (0 .. 9)+ -- no spaces allowed between components of FLTD res:FLTD; neg_sign:BOOL; skip_space; neg_sign := get_opt_sign; res := get_unsigned_int_as_fltd; if (item = '.') then --eat the '.' and read in an unsigned int as decimal fraction l->r. advance_one_char; res := res + get_frac; end; if (item = 'e') then --eat the 'e' and get the integer exponent advance_one_char; res := res * get_opt_int.fltd.exp10; end; if (neg_sign) then res := -res; end; return (res); end; get_opt_sign:BOOL is --returns 'true' if '-' is found, else 'false'. --advances index by 1 if '-' or '+' is found, else index not advanced. res:BOOL; if (item = '-') then res := true; advance_one_char; --unary minus: elsif (item = '+') then advance_one_char; end; --unary plus: return (res); end; get_str:STR is --get string up to and including '\n' or end of buf. res ::= #STR; skip_space; loop until!(is_done); res := res + get_char; if (item = '\n') then break!; end; end; return (res); end; get_rest_str: STR is -- Return the portion of the string after the current loc res ::= #FSTR; loop until!(is_done); res := res + get_char; end; return(res.str); end; get_str_cut(cut_set:STR):STR is -- get string up to and including cut_set member found. res ::= #STR; c:CHAR; skip_space; loop until!(is_done); c := get_char; res := res + c; if (cut_set.search(c) /= -1) then return (res); end; end; error := Cut_Set_Member_Not_Found; return (res); end; split(c: CHAR): FLIST{STR} is -- Split the remainder of the string into chunks demarcated -- by the split character "c" res ::= #FLIST{STR}; elt: STR := ""; loop until!(is_done); elt := get_str_upto(c); if (is_done) then if ~(elt="") then -- If there was a terminating non- empty segment res := res.push(elt); end; return(res); else res := res.push(elt); advance_one_char; end; end; return(res); end; get_str_upto(t: CHAR): STR is -- Return the next chunk of string upto the character "t" -- or until the end of the string res ::= #FSTR(""); c: CHAR; loop until!(is_done); c := get_char; if (c = t) then retract_one_char; return(res.str); end; res := res+c; end; return(res.str); end; get_str_upto_cut(cut_set:STR):STR is -- get string up to but NOT including cut_set member found. res ::= #STR; c:CHAR; skip_space; loop until!(is_done); c := get_char; if (cut_set.search(c) /= -1) then retract_one_char; return (res); end; res := res + c; end; error := Cut_Set_Member_Not_Found; return (res); end; private get_unsigned_int:INT is -- Doesn't skip spaces: assumes that either "sign" or "decimal -- point" has just been fetched. res:INT; num_digits:INT; loop while!(~is_done and item.is_digit); if ((res /= 0) and (num_digits > Max_Int_Digits)) then error := Too_Many_Digits; break!; end; if ((res = 0) and (item /= '0')) then --found first non-zero digit: throw leading 0's away. num_digits := 1; end; num_digits := num_digits + 1; res := (res * 10) + get_char.digit_value; end; if (num_digits = 0) then error := Bad_Digit; end; return (res); end; private get_unsigned_unprefixed_binary:INT is res:INT; count:INT; bit:CHAR; loop if ((count >= Int_No_Bits) and (res /= 0)) then error := Too_Many_Digits; break!; end; bit := item; case (bit) when '0' then -- do nothing when '1' then if (res = 1) then count := 0; end; else break!; end; res := res.lshift(1).bor(get_char.digit_value); count := count + 1; until!(is_done); end; if (count = 0) then --never started the number! error := Bad_Digit; end; return (res); end; private get_unsigned_unprefixed_octal:INT is res:INT; count:INT; oct:CHAR; loop if ((count >= Int_No_Bits) and (res /= 0)) then error := Too_Many_Digits; break!; end; oct := item; case (oct) when '0' then --do nothing when '1' then if (res = 0) then count := -2 end; --first non-zero digit found: bit count = 1 = (-2) + 3 when '2', '3' then if (res = 0) then count := -1; end; --first non-zero digit found: bit count = 2 = (-1) + 3 when '4', '5', '6', '7' then if (res = 0) then count := 0; end; else break!; end; res := res.lshift(3).bor(get_char.octal_digit_value); count := count + 3; until!(is_done); end; if (count = 0) then --never started the number! error := Bad_Digit; end; return (res); end; private get_unsigned_unprefixed_hex:INT is res:INT; count:INT; hex:CHAR; if (~item.is_hex_digit) then error := Bad_Digit; return 0; end; loop if ((count >= Int_No_Bits) and (res /= 0)) then error := Too_Many_Digits; break!; end; hex := item; case (hex) when '0' then --do nothing when '1' then if (res = 0) then count := -3 end; --first non-zero digit found when '2', '3' then if (res = 0) then count := -2 end; --first non-zero digit found when '4', '5', '6', '7' then if (res = 0) then count := -1; end; --first non-zero digit found when '8', '9', 'a', 'b', 'c', 'd', 'e', 'f', 'A', 'B', 'C', 'D', 'E', 'F' then --do nothing else break!; end; res := res.lshift(4).bor(get_char.hex_digit_value); count := count + 4; until!(is_done); end; return (res); end; private get_unsigned_int_as_fltd:FLTD is --doesn't skip spaces: assumes that -- either "sign" or "decimal point" has just been fetched. res:FLTD; num_digits:INT; if (~item.is_digit) then error := Bad_Digit; else loop while!(~is_done and item.is_digit); num_digits := num_digits + 1; if ((res /= 0.0d) and (num_digits > Max_Real_Digits)) then error := Too_Many_Digits; break!; end; if ((res = 0.0d) and (item /= '0')) then --found first non-zero digit: throw leading 0's away. num_digits := 1; end; res := (res * 10.0d) + get_char.digit_value.fltd; end; end; return (res); end; private get_frac:FLTD is --starting at leftmost digit, read in decimal fraction res:FLTD; multiplier:FLTD := 0.1d; loop until!(is_done or ~item.is_digit); res := res + (get_char.digit_value.fltd * multiplier); multiplier := multiplier / 10.0d; end; return (res); end; get_opt_int:INT is --if int is not present then no error. res:INT; if (error /= No_Error) then return 0 end; --entered with error: cannot do anything. Don't reset exisiting -- error. res := get_int; if (error /= No_Error) then --no int present: eliminate error set error := No_Error; res := 0; end; return (res); end; get_bool:BOOL is -- Only accepts "true" and "false" for consistency -- with sather spec res:BOOL; bool ::= ""; skip_space; loop until!(~(item.is_alpha) or (item.is_space) or is_done); bool := bool + get_char; end; if ((item = '_') or (item.is_digit)) then -- Illegal termination of a bool error := Bad_Boolean; return(res); end; case (bool) when "true","t","True","T","TRUE" then res := true; when "false","f","False","F","FALSE" then res := false; else error := Bad_Boolean; end; return (res); end; test_bool: BOOL is -- Return true if the next string could be interpreted as a bool res:BOOL; -- Save state cur_loc ::= index; cur_line ::= line_no; cur_is_done ::= is_done; cur_error ::= error; bool ::= get_bool; if (error = No_Error) then res := true; else res := false; end; -- Rewind cursor and reset all other attributes index := cur_loc; line_no := cur_line; is_done := cur_is_done; error := cur_error; return (res); end; current_line_str: STR is -- A string consisting of the current line beg_ind: INT := index; if (beg_ind >= buf.size) then return("") end; loop until!(beg_ind < 0); if buf[beg_ind] = '\n' then break! else beg_ind := beg_ind - 1; end; end; res ::= ""; ind ::= beg_ind+1; loop until!(ind >= buf.size); if (buf[ind] = '\n') then break! end; res := res + buf[ind]; ind := ind+1; end; return(res); end; current_loc_str(cursor_char: CHAR): STR is -- A string consisting of a blank line except for the cursor_char -- which is at the current location within that line. -- Eg. If the cursor is at the third word of the line "this is a test" -- This function will return the string " ^" -- Useful for printing out error messages res: STR := ""; beg_ind: INT := index; if (beg_ind >= buf.size) then return("") end; loop until!(beg_ind < 0); if buf[beg_ind] = '\n' then break! else beg_ind := beg_ind - 1; end; end; loc ::= index - (beg_ind + 1); loop (loc).times!; res := res + " "; end; res := res+cursor_char; return(res); end; end;