str_cursor.sa
Generated by gen_html_sa_files from ICSI. Contact gomes@icsi.berkeley.edu for details
---------------------------> Sather 1.1 source file <--------------------------
-- Copyright (C) International Computer Science Institute, 1994.
-- COPYRIGHT NOTICE: This code is provided "AS IS" WITHOUT ANY
-- WARRANTY and is subject to the terms of the SATHER LIBRARY GENERAL
-- PUBLIC LICENSE contained in the file "Doc/License" of the Sather
-- distribution. The license is also available from ICSI, 1947
-- Center St., Suite 600, Berkeley CA 94704, USA.
-- Sather 0.2 version by Steve Omohundro
-- Sather 1.0 version written by:David Schultz (dschultz@cs.nmt.edu)
-- Recent revision information at the end of the file
-- (gomes) April 95 incorporated get_string_upto_cut from jefu
-- (gomes) current_loc_string, current_line_str, skipping commented space
-- (gomes) [from dbailey@icsi] get_block and skip_block
-- (gomes) Reformatted, added: has_error, current_line, error_string
-- (gomes) added get_rest_str, added comments from original version
----------> Please email comments to sather-bugs@icsi.berkeley.edu. <-----
class STR_CURSOR
class STR_CURSOR is
-- Class for stepping through strings extracting information. For
-- strings, cursors are used to sequentially read out the
-- components of a string. They may be used to perform a similar
-- function to that played by "scanf" in C without need for
-- functions with a variable number of arguments. This approach is
-- actually much more general since it allows for testing for the
-- presence of an `BOOL', for example, at a given point in the
-- string. If we were wrong about the structure of the string,
-- then the attribute `error' will be set to a non-zero value.
attr error:INT;
readonly attr comment_char1,comment_char2: CHAR;
readonly attr line_no:INT;
readonly attr index:INT;
readonly attr buf:STR;
readonly attr is_done:BOOL;
const Max_Real_Digits:INT := 30;
const Max_Int_Digits:INT := 9;
const Int_No_Bits:INT := 32;
--error codes:
const No_Error:INT := 0;
const Bad_Digit:INT := 1;
const Past_EOBuf:INT := 2;
const Past_BOBuf:INT := 3;
const Too_Many_Digits:INT := 4;
const Cut_Set_Member_Not_Found:INT := 5;
const Bad_Prefix:INT := 6;
const Bad_Boolean:INT := 7;
const Bad_Block: INT := 8;
const Bad_Char: INT := 9;
create(s:STR):SAME is
res ::= new;
if (void(s)) then res := void; else res.reassign(s); end;
res.comment_char1 := ' ';
res.comment_char2 := ' ';
return (res);
end;
set_comment_syntax(char1: CHAR) is
comment_char1 := char1;
comment_char2 := ' ';
end;
set_comment_syntax(char1,char2: CHAR) is
comment_char1 := char1;
comment_char2 := char2;
end;
reassign(s:STR) is
-- Change the string that `self' points to
-- Clear any error value
if (~void(self)) then
buf := s;
index := 0;
line_no := 1;
is_done := false;
clear_error;
if (void(buf) or (buf.length = 0)) then is_done := true; end;
end;
end;
clear_error is error := No_Error; end;
-- Reset the error value
has_error: BOOL is
-- Returns true if the cursor has encountered an error
if error = No_Error then return false else return true end;
end;
error_string: STR is
-- Returns a string version of the current error status
case error
when No_Error then return("No Cursor Error");
when Bad_Digit then return("Bad Digit");
when Past_EOBuf then return("Attempt to read past end");
when Past_BOBuf then return("Attempt to retract past beginning");
when Too_Many_Digits then return("Too many digits");
when Cut_Set_Member_Not_Found then return("Cut set member not found");
when Bad_Prefix then return("Bad prefix");
when Bad_Block then return("Bad block");
when Bad_Char then return("Bad char");
else return("Unknown Error!") end;
end;
clear is reassign(#STR); end;
-- Reset to an empty string
item:CHAR is
-- Return the current item or '\0' if done
if (is_done) then return ('\0'); else return (buf[index]); end;
end;
skip_space is
-- Advance the scanner over space characters
loop while!(~is_done);
if (item.is_space) then advance_one_char;
elsif skip_if_comment_start then -- do nothing
else break!; end;
end;
end;
skip_if_comment_start: BOOL is
-- Returns true if a comment was skipped
if (comment_char1 = ' ') then return(false);
elsif (comment_char2 = ' ') then return(skip_single_char_comment)
else return(skip_double_char_comment); end;
end;
private skip_single_char_comment: BOOL is
if (item = comment_char1) then
discard ::= get_up_to('\n');
advance_one_char;
return(true);
else
return(false);
end;
end;
private skip_double_char_comment: BOOL is
if index > buf.size - 2 then return(false);
elsif ((item = comment_char1) and (buf[index+1] = comment_char2)) then
discard ::= get_up_to('\n');
advance_one_char;
return(true);
else return(false); end;
end;
skip_space:SAME is
-- Same as skip_space, but return self
skip_space;
return self;
end;
skip_word is
-- Skip characters until (not including) a space character is hit.
loop until!(is_done or item.is_space); advance_one_char; end;
end;
skip_word:SAME is skip_word; return(self) end;
-- Same as skip_word, but return self
skip_thru(c:CHAR) is
-- Skip characters until (not including) the character 'c'
loop until!(is_done or (item = c)); advance_one_char; end;
end;
skip_thru(c:CHAR):SAME is
-- Same as skip_thru, return self
skip_thru(c);
return self;
end;
-- Versions that skip upto a certain element of a string
skip_thru(s:STR) is
-- Skip characters upto (not including) the word "s"
-- Translated from STR::search.
if void(s) then return end;
loop r:INT:=index.upto!(buf.size-s.size); match::=true;
loop if buf.elt!(r)/=s.elt! then
match:=false; break! end end;
if match then
index:=r;
return;
end;
end;
is_done:=true;
return;
end;
skip_thru(s:STR):SAME is skip_thru(s); return self; end;
skip_over(s:STR) is
-- Skip characters upto _and including_ the word "s".
skip_thru(s);
if (~is_done) then
index:=index+s.size;
end;
end;
advance_one_char is
-- Advance the cursor one position forward
if (is_done) then error := Past_EOBuf;
else
index := index + 1;
if (index >= buf.length) then is_done := true;
elsif (item = '\n') then line_no := line_no + 1; end;
end;
end;
advance_one_char:SAME is advance_one_char; return self; end;
retract_one_char is
if (index <= 0) then error := Past_BOBuf;
else
index := index - 1;
if (item = '\n') then line_no := line_no - 1; end;
is_done := false;
end;
end;
retract_one_char:SAME is retract_one_char; return self; end;
get_char:CHAR is
res ::= item;
advance_one_char;
return (res);
end;
get_char(c: CHAR) is
if item = c then advance_one_char;
else error := Bad_Char; end;
end;
get_word:STR is
res ::= #STR;
skip_space;
loop until!(is_done or item.is_space);
res := res + get_char;
end;
return (res);
end;
get_word(max_char_count:INT):STR is
--get a word up to max_char_count CHAR's long
res ::= #STR;
skip_space;
loop until!(is_done or (max_char_count <= 0) or item.is_space);
res := res + get_char;
max_char_count := max_char_count - 1;
end;
return (res);
end;
get_up_to(c:CHAR):STR is
res ::= #STR;
loop until!(is_done or item = c); res := res + get_char; end;
return (res);
end;
skip_block(begin_delim, fin_delim: CHAR) is
if (item = begin_delim) then
advance_one_char;
loop
if item=fin_delim then advance_one_char; return;
elsif item = begin_delim then skip_block(begin_delim,fin_delim);
elsif is_done then error := Bad_Block; return;
else advance_one_char; end
end
end;
end;
get_block(begin_delim, fin_delim: CHAR): STR is
-- Return a block if the current character = begin_delim
res ::=#STR;
if (item = begin_delim) then
res := res + get_char;
loop
if item=fin_delim then res := res + get_char; return(res);
elsif item = begin_delim then
res := res + get_block(begin_delim,fin_delim);
elsif is_done then error := Bad_Block; return(res);
else res := res + get_char; end
end;
end;
return(res);
end;
int:INT is
--to support str_curs.sa's int which accepts any of the 4 formats:
--1) decimal, 2) binary, 3) hex, 4) octal.
--unlike str_curs.sa, this only sets error code and doesn't raise an
--exception.
--leading 0's in any format do not contribute towards an overflow error
--all formats may use '-' to give the 2's complement of the int.
res:INT;
neg_sign:BOOL;
skip_space;
neg_sign := get_opt_sign;
if (item = '0') then
advance_one_char;
case (item)
when 'b' then
advance_one_char;
res := get_unsigned_unprefixed_binary;
when 'o' then
advance_one_char;
res := get_unsigned_unprefixed_octal;
when 'x' then
advance_one_char;
res := get_unsigned_unprefixed_hex;
else
retract_one_char;
res := get_unsigned_int;
end; --(case)
else
res := get_unsigned_int;
end;
if (neg_sign) then res := -res; end;
return (res);
end;
get_int:INT is
--decimal format optionally signed with '+', '-', or '\0'
res:INT;
neg_sign:BOOL;
skip_space;
neg_sign := get_opt_sign;
res := get_unsigned_int;
if (neg_sign) then res := -res; end;
return (res);
end;
get_binary:INT is
-- BIN ::= OPT_SIGN '0b' (0 | 1)+
-- OPT_SIGN ::= '+' | '-' | '\0'
--and stops at the first non-binary digit found.
--leading 0's ignored.
-- '-' prefix gives 2's complement of following binary number.
res:INT;
neg_sign:BOOL;
skip_space;
neg_sign := get_opt_sign;
if (get_char /= '0') then error := Bad_Prefix; return (res);
elsif (get_char /= 'b') then error := Bad_Prefix; return (res); end;
res := get_unsigned_unprefixed_binary;
if (neg_sign) then res := -res; end;
return (res);
end;
get_octal:INT is
-- OCT ::= '0o' (0 .. 7)+ stoping at first non-octal digit found.
--leading 0's ignored.
-- '-' prefix gives 2's complement of following octal number.
res:INT;
neg_sign:BOOL;
skip_space;
neg_sign := get_opt_sign;
if (get_char /= '0') then
error := Bad_Prefix;
return (res);
elsif (get_char /= 'o') then
error := Bad_Prefix;
return (res);
end;
res := get_unsigned_unprefixed_octal;
if (neg_sign) then res := -res; end;
return (res);
end;
get_hex:INT is
-- HEX ::= OPT_SIGN ('0x' | '0X') HEX_DIGIT+
-- OPT_SIGN ::= '+' | '-' | '\0'
-- HEX_DIGIT ::= ('0' .. '9') | ('a' .. 'f') | ('A' .. 'F')
-- '-' prefix gives 2's complement of following hex dnumber
res:INT;
neg_sign:BOOL;
skip_space;
neg_sign := get_opt_sign;
if (get_char /= '0') then error := Bad_Prefix; return (res);
elsif ((item /= 'x') and (item /= 'X')) then
advance_one_char;
error := Bad_Prefix;
return (res);
else
--i.e., was 'x' or 'X'
advance_one_char;
end;
res := get_unsigned_unprefixed_hex;
if (neg_sign) then res := -res; end;
return (res);
end;
get_flt:FLT is return (get_fltd.flt); end;
get_fltd:FLTD is
-- Accepts real numbers of format:
-- FLTD ::= SIGNED_INT {'.' {UNSIGNED_INT} {'e' SIGNED_INT}
-- SIGNED_INT ::= {'+' | '-' | '\0'} UNSIGNED_INT
-- UNSIGNED_INT ::= (0 .. 9)+
-- no spaces allowed between components of FLTD
res:FLTD;
neg_sign:BOOL;
skip_space;
neg_sign := get_opt_sign;
res := get_unsigned_int_as_fltd;
if (item = '.') then
--eat the '.' and read in an unsigned int as decimal fraction l->r.
advance_one_char;
res := res + get_frac;
end;
if (item = 'e') then
--eat the 'e' and get the integer exponent
advance_one_char;
res := res * get_opt_int.fltd.exp10;
end;
if (neg_sign) then res := -res; end;
return (res);
end;
get_opt_sign:BOOL is
--returns 'true' if '-' is found, else 'false'.
--advances index by 1 if '-' or '+' is found, else index not advanced.
res:BOOL;
if (item = '-') then res := true; advance_one_char;
--unary minus:
elsif (item = '+') then advance_one_char; end;
--unary plus:
return (res);
end;
get_str:STR is
--get string up to and including '\n' or end of buf.
res ::= #STR;
skip_space;
loop until!(is_done);
res := res + get_char;
if (item = '\n') then break!; end;
end;
return (res);
end;
get_rest_str: STR is
-- Return the portion of the string after the current loc
res ::= #FSTR;
loop until!(is_done); res := res + get_char; end;
return(res.str);
end;
get_str_cut(cut_set:STR):STR is
-- get string up to and including cut_set member found.
res ::= #STR;
c:CHAR;
skip_space;
loop until!(is_done);
c := get_char;
res := res + c;
if (cut_set.search(c) /= -1) then return (res); end;
end;
error := Cut_Set_Member_Not_Found;
return (res);
end;
split(c: CHAR): FLIST{STR} is
-- Split the remainder of the string into chunks demarcated
-- by the split character "c"
res ::= #FLIST{STR};
elt: STR := "";
loop until!(is_done);
elt := get_str_upto(c);
if (is_done) then
if ~(elt="") then
-- If there was a terminating non- empty segment
res := res.push(elt);
end;
return(res);
else
res := res.push(elt);
advance_one_char;
end;
end;
return(res);
end;
get_str_upto(t: CHAR): STR is
-- Return the next chunk of string upto the character "t"
-- or until the end of the string
res ::= #FSTR("");
c: CHAR;
loop until!(is_done);
c := get_char;
if (c = t) then
retract_one_char;
return(res.str);
end;
res := res+c;
end;
return(res.str);
end;
get_str_upto_cut(cut_set:STR):STR is
-- get string up to but NOT including cut_set member found.
res ::= #STR;
c:CHAR;
skip_space;
loop until!(is_done);
c := get_char;
if (cut_set.search(c) /= -1) then
retract_one_char;
return (res);
end;
res := res + c;
end;
error := Cut_Set_Member_Not_Found;
return (res);
end;
private get_unsigned_int:INT is
-- Doesn't skip spaces: assumes that either "sign" or "decimal
-- point" has just been fetched.
res:INT;
num_digits:INT;
loop while!(~is_done and item.is_digit);
if ((res /= 0) and (num_digits > Max_Int_Digits)) then
error := Too_Many_Digits;
break!;
end;
if ((res = 0) and (item /= '0')) then
--found first non-zero digit: throw leading 0's away.
num_digits := 1;
end;
num_digits := num_digits + 1;
res := (res * 10) + get_char.digit_value;
end;
if (num_digits = 0) then error := Bad_Digit; end;
return (res);
end;
private get_unsigned_unprefixed_binary:INT is
res:INT;
count:INT;
bit:CHAR;
loop
if ((count >= Int_No_Bits) and (res /= 0)) then
error := Too_Many_Digits;
break!;
end;
bit := item;
case (bit)
when '0' then -- do nothing
when '1' then if (res = 1) then count := 0; end;
else break!; end;
res := res.lshift(1).bor(get_char.digit_value);
count := count + 1;
until!(is_done);
end;
if (count = 0) then
--never started the number!
error := Bad_Digit;
end;
return (res);
end;
private get_unsigned_unprefixed_octal:INT is
res:INT;
count:INT;
oct:CHAR;
loop
if ((count >= Int_No_Bits) and (res /= 0)) then
error := Too_Many_Digits;
break!;
end;
oct := item;
case (oct)
when '0' then --do nothing
when '1' then if (res = 0) then count := -2 end;
--first non-zero digit found: bit count = 1 = (-2) + 3
when '2', '3' then if (res = 0) then count := -1; end;
--first non-zero digit found: bit count = 2 = (-1) + 3
when '4', '5', '6', '7' then if (res = 0) then count := 0; end;
else break!; end;
res := res.lshift(3).bor(get_char.octal_digit_value);
count := count + 3;
until!(is_done);
end;
if (count = 0) then
--never started the number!
error := Bad_Digit;
end;
return (res);
end;
private get_unsigned_unprefixed_hex:INT is
res:INT;
count:INT;
hex:CHAR;
if (~item.is_hex_digit) then error := Bad_Digit; return 0; end;
loop
if ((count >= Int_No_Bits) and (res /= 0)) then
error := Too_Many_Digits;
break!;
end;
hex := item;
case (hex)
when '0' then --do nothing
when '1' then if (res = 0) then count := -3 end;
--first non-zero digit found
when '2', '3' then if (res = 0) then count := -2 end;
--first non-zero digit found
when '4', '5', '6', '7' then
if (res = 0) then count := -1; end;
--first non-zero digit found
when '8', '9',
'a', 'b', 'c', 'd', 'e', 'f',
'A', 'B', 'C', 'D', 'E', 'F'
then --do nothing
else
break!;
end;
res := res.lshift(4).bor(get_char.hex_digit_value);
count := count + 4;
until!(is_done);
end;
return (res);
end;
private get_unsigned_int_as_fltd:FLTD is
--doesn't skip spaces: assumes that
-- either "sign" or "decimal point" has just been fetched.
res:FLTD;
num_digits:INT;
if (~item.is_digit) then error := Bad_Digit;
else
loop while!(~is_done and item.is_digit);
num_digits := num_digits + 1;
if ((res /= 0.0d) and (num_digits > Max_Real_Digits)) then
error := Too_Many_Digits;
break!;
end;
if ((res = 0.0d) and (item /= '0')) then
--found first non-zero digit: throw leading 0's away.
num_digits := 1;
end;
res := (res * 10.0d) + get_char.digit_value.fltd;
end;
end;
return (res);
end;
private get_frac:FLTD is
--starting at leftmost digit, read in decimal fraction
res:FLTD;
multiplier:FLTD := 0.1d;
loop until!(is_done or ~item.is_digit);
res := res + (get_char.digit_value.fltd * multiplier);
multiplier := multiplier / 10.0d;
end;
return (res);
end;
get_opt_int:INT is
--if int is not present then no error.
res:INT;
if (error /= No_Error) then return 0 end;
--entered with error: cannot do anything. Don't reset exisiting
-- error.
res := get_int;
if (error /= No_Error) then
--no int present: eliminate error set
error := No_Error;
res := 0;
end;
return (res);
end;
get_bool:BOOL is
-- Only accepts "true" and "false" for consistency
-- with sather spec
res:BOOL;
bool ::= "";
skip_space;
loop until!(~(item.is_alpha) or (item.is_space) or is_done);
bool := bool + get_char;
end;
if ((item = '_') or (item.is_digit)) then
-- Illegal termination of a bool
error := Bad_Boolean;
return(res);
end;
case (bool)
when "true","t","True","T","TRUE" then res := true;
when "false","f","False","F","FALSE" then res := false;
else error := Bad_Boolean; end;
return (res);
end;
test_bool: BOOL is
-- Return true if the next string could be interpreted as a bool
res:BOOL;
-- Save state
cur_loc ::= index;
cur_line ::= line_no;
cur_is_done ::= is_done;
cur_error ::= error;
bool ::= get_bool;
if (error = No_Error) then res := true;
else res := false; end;
-- Rewind cursor and reset all other attributes
index := cur_loc;
line_no := cur_line;
is_done := cur_is_done;
error := cur_error;
return (res);
end;
current_line_str: STR is
-- A string consisting of the current line
beg_ind: INT := index;
if (beg_ind >= buf.size) then return("") end;
loop until!(beg_ind < 0);
if buf[beg_ind] = '\n' then break!
else beg_ind := beg_ind - 1; end;
end;
res ::= "";
ind ::= beg_ind+1;
loop until!(ind >= buf.size);
if (buf[ind] = '\n') then break! end;
res := res + buf[ind];
ind := ind+1;
end;
return(res);
end;
current_loc_str(cursor_char: CHAR): STR is
-- A string consisting of a blank line except for the cursor_char
-- which is at the current location within that line.
-- Eg. If the cursor is at the third word of the line "this is a test"
-- This function will return the string " ^"
-- Useful for printing out error messages
res: STR := "";
beg_ind: INT := index;
if (beg_ind >= buf.size) then return("") end;
loop until!(beg_ind < 0);
if buf[beg_ind] = '\n' then break!
else beg_ind := beg_ind - 1; end;
end;
loc ::= index - (beg_ind + 1);
loop (loc).times!; res := res + " "; end;
res := res+cursor_char;
return(res);
end;
end;