str_cursor.sa

Generated by gen_html_sa_files from ICSI. Contact gomes@icsi.berkeley.edu for details
 
---------------------------> Sather 1.1 source file <--------------------------
-- Copyright (C) International Computer Science Institute, 1994.
-- COPYRIGHT NOTICE: This code is provided "AS IS" WITHOUT ANY
-- WARRANTY and is subject to the terms of the SATHER LIBRARY GENERAL
-- PUBLIC LICENSE contained in the file "Doc/License" of the Sather
-- distribution.  The license is also available from ICSI, 1947
-- Center St., Suite 600, Berkeley CA 94704, USA.  
-- Sather 0.2 version by Steve Omohundro
-- Sather 1.0 version written by:David Schultz (dschultz@cs.nmt.edu)
-- Recent revision information at the end of the file
-- (gomes) April 95 incorporated get_string_upto_cut from jefu
-- (gomes) current_loc_string, current_line_str, skipping commented space
-- (gomes) [from dbailey@icsi]  get_block and skip_block
-- (gomes) Reformatted, added: has_error, current_line, error_string
-- (gomes) added get_rest_str, added comments from original version
----------> Please email comments to sather-bugs@icsi.berkeley.edu. <-----

class STR_CURSOR 
class STR_CURSOR is
   -- Class for stepping through strings extracting information.  For
   -- strings, cursors are used to sequentially read out the
   -- components of a string. They may be used to perform a similar
   -- function to that played by "scanf" in C without need for
   -- functions with a variable number of arguments.  This approach is
   -- actually much more general since it allows for testing for the
   -- presence of an `BOOL', for example, at a given point in the
   -- string.  If we were wrong about the structure of the string,
   -- then the attribute `error' will be set to a non-zero value.

   attr error:INT;
   readonly attr comment_char1,comment_char2: CHAR;
   readonly attr line_no:INT;
   readonly attr index:INT;
   readonly attr buf:STR;
   readonly attr is_done:BOOL;
   
   const Max_Real_Digits:INT := 30;
   const Max_Int_Digits:INT := 9;
   const Int_No_Bits:INT := 32;
   
   --error codes:
   const No_Error:INT := 0;
   const Bad_Digit:INT := 1;
   const Past_EOBuf:INT := 2;
   const Past_BOBuf:INT := 3;
   const Too_Many_Digits:INT := 4;
   const Cut_Set_Member_Not_Found:INT := 5;
   const Bad_Prefix:INT := 6;
   const Bad_Boolean:INT := 7;
   const Bad_Block: INT := 8;
   const Bad_Char: INT := 9;

   create(s:STR):SAME is
      res ::= new;
      if (void(s)) then res := void;  else res.reassign(s); end;
      res.comment_char1 := ' ';
      res.comment_char2 := ' ';
      return (res);
   end;

   set_comment_syntax(char1: CHAR) is
      comment_char1 := char1;
      comment_char2 := ' ';
   end;
   
   set_comment_syntax(char1,char2: CHAR) is
      comment_char1 := char1;
      comment_char2 := char2;
   end;
   
   reassign(s:STR) is
      -- Change the string that `self' points to
      -- Clear any error value
      if (~void(self)) then
         buf := s;
         index := 0;
         line_no := 1;
         is_done := false;
         clear_error;
         if (void(buf) or (buf.length = 0)) then is_done := true;  end;
      end;
   end;

   clear_error is   error := No_Error;  end;
   -- Reset the error value
   
   has_error: BOOL is 
      -- Returns true if the cursor has encountered an error
      if error = No_Error then return false else return true  end;
   end;
   
   error_string: STR is
      -- Returns a string version of the current error status
      case error
      when No_Error then return("No Cursor Error");
      when Bad_Digit then return("Bad Digit");
      when Past_EOBuf then return("Attempt to read past end");
      when Past_BOBuf then return("Attempt to retract past beginning");
      when Too_Many_Digits then return("Too many digits");
      when Cut_Set_Member_Not_Found then return("Cut set member not found");
      when Bad_Prefix then return("Bad prefix");
      when Bad_Block then return("Bad block");
      when Bad_Char then return("Bad char");
      else return("Unknown Error!") end;
   end;
   
   
   clear is  reassign(#STR);  end;
   -- Reset to an empty string
   
   item:CHAR is
      -- Return the current item or '\0' if done
      if (is_done) then	 return ('\0');   else return (buf[index]);    end;
   end;
   
   skip_space is
      -- Advance the scanner over space characters
      loop while!(~is_done); 
	 if (item.is_space) then advance_one_char;
	 elsif skip_if_comment_start then -- do nothing
	 else break!; end;
      end;
   end;

   skip_if_comment_start: BOOL is
      -- Returns true if a comment was skipped
      if (comment_char1 = ' ') then return(false); 
      elsif (comment_char2 = ' ') then return(skip_single_char_comment) 
      else return(skip_double_char_comment);  end;
   end;
   
   private skip_single_char_comment: BOOL is
      if (item = comment_char1) then
	 discard ::= get_up_to('\n');
	 advance_one_char;
	 return(true);
      else
	 return(false);
      end;
   end;
   
   private skip_double_char_comment: BOOL is
      if index > buf.size - 2 then return(false); 
      elsif ((item = comment_char1) and (buf[index+1] = comment_char2)) then
	 discard ::= get_up_to('\n');
	 advance_one_char; 
	 return(true);
      else return(false); end;
   end;
	 
   skip_space:SAME is
      -- Same as skip_space, but return self
      skip_space;
      return self;
   end;
   
   skip_word is
      -- Skip characters until (not including) a space character is hit.
      loop until!(is_done or item.is_space);  advance_one_char;   end;
   end;
   
   skip_word:SAME is skip_word; return(self) end;
   -- Same as skip_word, but return self

   skip_thru(c:CHAR) is
      -- Skip characters until (not including) the character 'c'
      loop until!(is_done or (item = c)); advance_one_char;  end;
   end;

   skip_thru(c:CHAR):SAME is
      -- Same as skip_thru, return self
      skip_thru(c);
      return self;
   end;

-- Versions that skip upto a certain element of a string
   skip_thru(s:STR) is
      -- Skip characters upto (not including) the word "s"
      -- Translated from STR::search.
      if void(s) then return end;
      loop r:INT:=index.upto!(buf.size-s.size); match::=true;
	 loop if buf.elt!(r)/=s.elt! then
	    match:=false; break! end end;
	 if match then
	    index:=r;
	    return;
	 end;
      end;
      is_done:=true;
      return;
   end;

   skip_thru(s:STR):SAME is  skip_thru(s);  return self;  end;

   skip_over(s:STR) is
      -- Skip characters upto _and including_ the word "s".
      skip_thru(s);
      if (~is_done) then
	 index:=index+s.size;
      end;
   end;

   advance_one_char is
      -- Advance the cursor one position forward
      if (is_done) then error := Past_EOBuf;
      else
	 index := index + 1;
	 if (index >= buf.length) then   is_done := true;
	 elsif (item = '\n') then   line_no := line_no + 1;  end;
      end;
   end;

   advance_one_char:SAME is  advance_one_char;  return self;  end;
   
   retract_one_char is
      if (index <= 0) then error := Past_BOBuf;
      else
	 index := index - 1;
	 if (item = '\n') then line_no := line_no - 1; end;
	 is_done := false;
      end;
   end;
   
   retract_one_char:SAME is  retract_one_char;   return self;  end;
   
   get_char:CHAR is
      res ::= item;
      advance_one_char;
      return (res);
   end;
   
   get_char(c: CHAR) is
      if item = c then advance_one_char;
      else  error := Bad_Char;   end;
   end;
   
   get_word:STR is
      res ::= #STR;
      skip_space;
      loop until!(is_done or item.is_space);
	 res := res + get_char;
      end;
      return (res);
   end;
   
   get_word(max_char_count:INT):STR is
      --get a word up to max_char_count CHAR's long
      res ::= #STR;
      skip_space;
      loop until!(is_done or (max_char_count <= 0) or item.is_space);
	 res := res + get_char;
	 max_char_count := max_char_count - 1;
      end;
      return (res);
   end;

   get_up_to(c:CHAR):STR is
      res ::= #STR;
      loop until!(is_done or item = c);  res := res + get_char;  end;
      return (res);
   end;
   
   skip_block(begin_delim, fin_delim: CHAR) is
      if (item = begin_delim) then 
	 advance_one_char;
	 loop
	    if item=fin_delim then advance_one_char;  return;
	    elsif item = begin_delim then skip_block(begin_delim,fin_delim);
	    elsif is_done then error := Bad_Block; return;
	    else advance_one_char;  end
	 end
      end;
   end;
   
   get_block(begin_delim, fin_delim: CHAR): STR is
      -- Return a block if the current character = begin_delim
      res ::=#STR;
      if (item = begin_delim) then 
	 res := res + get_char;
	 loop
	    if item=fin_delim then res := res + get_char;  return(res);
	    elsif item = begin_delim then
	       res := res + get_block(begin_delim,fin_delim);
	    elsif is_done then error := Bad_Block; return(res);
	    else res := res + get_char;  end
	 end;
      end;
      return(res);
   end;
   
   int:INT is
      --to support str_curs.sa's int which accepts any of the 4 formats:
      --1) decimal, 2) binary, 3) hex, 4) octal.
      --unlike str_curs.sa, this only sets error code and doesn't raise an
      --exception.
      
      --leading 0's in any format do not contribute towards an overflow error
      --all formats may use '-' to give the 2's complement of the int.
      
      res:INT;
      neg_sign:BOOL;
      
      skip_space;
      neg_sign := get_opt_sign;
      
      if (item = '0') then
	 advance_one_char;
	 
	 case (item)
	 when 'b' then
		  advance_one_char;
	    res := get_unsigned_unprefixed_binary;
	 when 'o' then
		  advance_one_char;
	    res := get_unsigned_unprefixed_octal;
	 when 'x' then
		  advance_one_char;
	    res := get_unsigned_unprefixed_hex;
	 else
	    retract_one_char;
	    res := get_unsigned_int;
	 end;  --(case)
      else
	 res := get_unsigned_int;
      end;
      
      if (neg_sign) then res := -res;      end;
      return (res);
   end;
   
   get_int:INT is
      --decimal format optionally signed with '+', '-', or '\0'
      res:INT;
      neg_sign:BOOL;
      skip_space;
      neg_sign := get_opt_sign;
      res := get_unsigned_int;
      if (neg_sign) then res := -res;   end;
      return (res);
   end;
   
   get_binary:INT is
      -- BIN ::= OPT_SIGN '0b' (0 | 1)+
      -- OPT_SIGN ::= '+' | '-' | '\0'
      --and stops at the first non-binary digit found.
      --leading 0's ignored.
      -- '-' prefix gives 2's complement of following binary number.
      res:INT;
      neg_sign:BOOL;
      skip_space;
      neg_sign := get_opt_sign;
      if (get_char /= '0') then error := Bad_Prefix; return (res);
      elsif (get_char /= 'b') then error := Bad_Prefix; return (res);  end;

      res := get_unsigned_unprefixed_binary;
      if (neg_sign) then res := -res;   end;
      return (res);
   end;
   
   get_octal:INT is
      -- OCT ::= '0o' (0 .. 7)+  stoping at first non-octal digit found.
      --leading 0's ignored.
      -- '-' prefix gives 2's complement of following octal number.
      res:INT;
      neg_sign:BOOL;
      skip_space;
      neg_sign := get_opt_sign;
      if (get_char /= '0') then
	 error := Bad_Prefix;
	 return (res);
      elsif (get_char /= 'o') then
	 error := Bad_Prefix;
	 return (res);
      end;
      res := get_unsigned_unprefixed_octal;
      if (neg_sign) then res := -res;   end;
      return (res);
   end; 
   
   get_hex:INT is
      --  HEX ::= OPT_SIGN ('0x' | '0X') HEX_DIGIT+
      --  OPT_SIGN ::= '+' | '-' | '\0'
      --  HEX_DIGIT ::= ('0' .. '9') | ('a' .. 'f') | ('A' .. 'F')
      --  '-' prefix gives 2's complement of following hex dnumber
      res:INT;
      neg_sign:BOOL;
      skip_space;
      neg_sign := get_opt_sign;
      if (get_char /= '0') then error := Bad_Prefix; return (res);
      elsif ((item /= 'x') and  (item /= 'X')) then
	 advance_one_char;
	 error := Bad_Prefix;
	 return (res);
      else
	 --i.e., was 'x' or 'X'
	 advance_one_char;
      end;
      res := get_unsigned_unprefixed_hex;
      if (neg_sign) then res := -res;    end;
      return (res);
   end;
   
   get_flt:FLT is   return (get_fltd.flt);  end;
   
   get_fltd:FLTD is
      -- Accepts real numbers of format:
      --  FLTD         ::= SIGNED_INT {'.' {UNSIGNED_INT} {'e' SIGNED_INT}
      --  SIGNED_INT   ::= {'+' | '-' | '\0'} UNSIGNED_INT
      --  UNSIGNED_INT ::= (0 .. 9)+
      -- no spaces allowed between components of FLTD
      res:FLTD;
      neg_sign:BOOL;
      skip_space;
      neg_sign := get_opt_sign;
      res := get_unsigned_int_as_fltd;
      if (item = '.') then
	 --eat the '.' and read in an unsigned int as decimal fraction l->r.
	 advance_one_char;
	 res := res + get_frac;
      end;
      if (item = 'e') then
	 --eat the 'e' and get the integer exponent
	 advance_one_char;
	 res := res * get_opt_int.fltd.exp10;
      end;
      if (neg_sign) then res := -res;  end;
      return (res);
   end;
   
   get_opt_sign:BOOL is
      --returns 'true' if '-' is found,  else 'false'.
      --advances index by 1 if '-' or '+' is found, else index not advanced.
      res:BOOL;
      if (item = '-') then res := true; advance_one_char;
	 --unary minus:
      elsif (item = '+') then advance_one_char; end;
	 --unary plus:
      return (res);
   end;
   
   get_str:STR is
      --get string up to and including '\n' or end of buf.
      res ::= #STR;
      skip_space;
      loop until!(is_done);
	 res := res + get_char;
	 if (item = '\n') then   break!; end;
      end;
      return (res);
   end;
   
   get_rest_str: STR is
      -- Return the portion of the string after the current loc
      res ::= #FSTR;
      loop until!(is_done); res := res + get_char; end;
      return(res.str);
   end;
   
   get_str_cut(cut_set:STR):STR is
      -- get string up to and including cut_set member found.
      res ::= #STR;
      c:CHAR;
      skip_space;
      loop until!(is_done);
	 c := get_char;
	 res := res + c;
	 if (cut_set.search(c) /= -1) then  return (res); end;
      end;
      error := Cut_Set_Member_Not_Found;
      return (res);
   end;
   
   split(c: CHAR): FLIST{STR} is
      -- Split the remainder of the string into chunks demarcated
      -- by the split character "c"
      res ::= #FLIST{STR};
      elt: STR := "";
      loop until!(is_done);
	 elt := get_str_upto(c);
	 if (is_done) then
	    if ~(elt="") then
	       -- If there was a terminating non- empty segment
	       res := res.push(elt);
	    end;
	    return(res);
	 else 
	    res := res.push(elt); 
	    advance_one_char;
	 end;
      end;
      return(res);
   end;
   
   get_str_upto(t: CHAR): STR is
      -- Return the next chunk of string upto the character "t"
      -- or until the end of the string
      res ::= #FSTR("");
      c: CHAR;
      loop until!(is_done);
	 c := get_char;
	 if (c = t) then 
	    retract_one_char;
	    return(res.str);
	 end;
	 res := res+c;
      end;
      return(res.str);
   end;
   
   get_str_upto_cut(cut_set:STR):STR is
      -- get string up to but NOT including cut_set member found.
      res ::= #STR;
      c:CHAR;
      skip_space;
      loop until!(is_done);
	 c := get_char;
	 if (cut_set.search(c) /= -1) then  
	    retract_one_char;
	    return (res); 
	 end;
	 res := res + c;
      end;
      error := Cut_Set_Member_Not_Found;
      return (res);
   end;
   
   private get_unsigned_int:INT is
      -- Doesn't skip spaces: assumes that either "sign" or "decimal
      -- point" has just been fetched.
      res:INT;
      num_digits:INT;
      
      loop while!(~is_done and item.is_digit);  
	 if ((res /= 0) and (num_digits > Max_Int_Digits)) then
	    error := Too_Many_Digits;
	    break!;
	 end;
	 if ((res = 0) and (item /= '0')) then
	    --found first non-zero digit:  throw leading 0's away.
	    num_digits := 1;
	 end;
	 num_digits := num_digits + 1;
	 res := (res * 10) + get_char.digit_value;
      end;

      if (num_digits = 0) then error := Bad_Digit;  end;
      return (res);
   end;
   
   private get_unsigned_unprefixed_binary:INT is
      res:INT;
      count:INT;
      bit:CHAR;
      loop	 
	 if ((count >= Int_No_Bits) and (res /= 0)) then
	    error := Too_Many_Digits;
	    break!;
	 end;
	 bit := item;
	 case (bit)
	 when '0' then -- do nothing
	 when '1' then if (res = 1) then  count := 0; end;
	 else  break!; end;
	 res := res.lshift(1).bor(get_char.digit_value);
	 count := count + 1;
	 until!(is_done);
      end;
      if (count = 0) then 
	 --never started the number!
	 error := Bad_Digit; 
      end;
      return (res);
   end;
   
   private get_unsigned_unprefixed_octal:INT is
      res:INT;
      count:INT;
      oct:CHAR;
      loop
	 if ((count >= Int_No_Bits) and (res /= 0)) then
	    error := Too_Many_Digits;
	    break!;
	 end;
	 oct := item;
	 case (oct)
	 when '0' then  --do nothing
	 when '1' then if (res = 0) then count := -2 end;
	    --first non-zero digit found: bit count = 1 = (-2) + 3
	 when '2', '3' then  if (res = 0) then   count := -1; end;
	    --first non-zero digit found: bit count = 2 = (-1) + 3
	 when '4', '5', '6', '7' then if (res = 0) then count := 0;  end;
	 else   break!; end; 

	 res := res.lshift(3).bor(get_char.octal_digit_value);
	 count := count + 3;
	 until!(is_done);
      end;
      if (count = 0) then
	 --never started the number!
	 error := Bad_Digit;
      end;
      return (res);
   end;
   
   private get_unsigned_unprefixed_hex:INT is
      res:INT;
      count:INT;
      hex:CHAR;
      if (~item.is_hex_digit) then error := Bad_Digit; return 0;  end;
      loop
	 if ((count >= Int_No_Bits) and (res /= 0)) then
	    error := Too_Many_Digits;
	    break!;
	 end;
	 hex := item;
	 case (hex)
	 when '0' then    --do nothing
	 when '1' then if (res = 0) then count := -3 end;
		     --first non-zero digit found
	 when '2', '3' then if (res = 0) then count := -2 end;
		     --first non-zero digit found
	 when '4', '5', '6', '7'  then 
		  if (res = 0) then  count := -1; end;
		     --first non-zero digit found
	 when '8', '9',
	          'a', 'b', 'c', 'd', 'e', 'f',
		  'A', 'B', 'C', 'D', 'E', 'F'
	 then  --do nothing
	 else  
	    break!; 
	 end;
	 res := res.lshift(4).bor(get_char.hex_digit_value);
	 count := count + 4;
	 until!(is_done);
      end;
      return (res);
   end;
   
   private get_unsigned_int_as_fltd:FLTD is
      --doesn't skip spaces:  assumes that 
      --  either "sign" or "decimal point" has just been fetched.
      res:FLTD;
      num_digits:INT;
      if (~item.is_digit) then error := Bad_Digit;
      else
	 loop while!(~is_done and item.is_digit);
	    num_digits := num_digits + 1;
	    if ((res /= 0.0d) and (num_digits > Max_Real_Digits)) then
	       error := Too_Many_Digits;
	       break!;
	    end;
	    if ((res = 0.0d) and (item /= '0')) then
	       --found first non-zero digit:  throw leading 0's away.
	       num_digits := 1;
	    end;
	    res := (res * 10.0d) + get_char.digit_value.fltd;
	 end;
      end;
      return (res);
   end;
   
   private get_frac:FLTD is
      --starting at leftmost digit, read in decimal fraction
      res:FLTD;
      multiplier:FLTD := 0.1d;
      loop until!(is_done or ~item.is_digit);
	 res := res + (get_char.digit_value.fltd * multiplier);
	 multiplier := multiplier / 10.0d;
      end;
      return (res);
   end;
   
   get_opt_int:INT is
      --if int is not present then no error.
      res:INT;
      if (error /= No_Error) then return 0 end;
         --entered with error: cannot do anything.  Don't reset exisiting 
         --  error.
      res := get_int;
      if (error /= No_Error) then
         --no int present: eliminate error set
         error := No_Error;
         res := 0;
      end;
      return (res);
   end;

   get_bool:BOOL is
      -- Only accepts "true" and "false" for consistency 
      -- with sather spec
      res:BOOL;
      bool ::= "";

      skip_space;
      loop until!(~(item.is_alpha) or (item.is_space) or is_done);
	 bool := bool + get_char;
      end;
      if ((item = '_')  or (item.is_digit)) then
	 -- Illegal termination of a bool
	 error := Bad_Boolean;  
	 return(res);
      end;
      case (bool)
      when "true","t","True","T","TRUE" then  res := true;
      when "false","f","False","F","FALSE" then res := false;
      else error := Bad_Boolean;  end;
      return (res);
   end;

   test_bool: BOOL is
      -- Return true if the next string could be interpreted as a bool
      res:BOOL;
      -- Save state
      cur_loc ::= index;
      cur_line ::= line_no;
      cur_is_done ::= is_done;
      cur_error ::= error;
      
      bool ::= get_bool;
      if (error = No_Error) then res := true;
      else res := false; end;
      -- Rewind cursor and reset all other attributes
      index := cur_loc;		
      line_no := cur_line;
      is_done := cur_is_done;
      error := cur_error;
      return (res);
   end;

   current_line_str: STR is
      -- A string consisting of the current line
      beg_ind: INT := index;
      if (beg_ind >= buf.size) then return("") end;
      loop until!(beg_ind < 0);
	 if buf[beg_ind] = '\n' then break! 
	 else beg_ind := beg_ind - 1; end;
      end;
      res ::= "";
      ind ::= beg_ind+1;
      loop until!(ind >= buf.size);
	 if (buf[ind] = '\n') then break! end;
	 res := res + buf[ind];
	 ind := ind+1;
      end;
      return(res);
   end;
   
   current_loc_str(cursor_char: CHAR): STR is
      -- A string consisting of a blank line except for the cursor_char
      -- which is at the current location within that line.
      -- Eg. If the cursor is at the third word of the line "this is a test"
      -- This function will return the string               "        ^"
      -- Useful for printing out error messages
      res: STR := "";
      beg_ind: INT := index;
      if (beg_ind >= buf.size) then return("") end;
      loop until!(beg_ind < 0);
	 if buf[beg_ind] = '\n' then break! 
	 else beg_ind := beg_ind - 1; end;
      end;
      loc ::= index - (beg_ind + 1);
      loop (loc).times!; res := res + " "; end;
      res := res+cursor_char;
      return(res);
   end;
	 
end;