fstr.sa
Generated by gen_html_sa_files from ICSI. Contact gomes@icsi.berkeley.edu for details
---------------------------> Sather 1.1 source file <--------------------------
-- Copyright (C) International Computer Science Institute, 1994. COPYRIGHT --
-- NOTICE: This code is provided "AS IS" WITHOUT ANY WARRANTY and is subject --
-- to the terms of the SATHER LIBRARY GENERAL PUBLIC LICENSE contained in --
-- the file "Doc/License" of the Sather distribution. The license is also --
-- available from ICSI, 1947 Center St., Suite 600, Berkeley CA 94704, USA. --
--------> Please email comments to sather-bugs@icsi.berkeley.edu. <----------
-- fstr.sa: Buffers for efficiently constructing strings.
-- Jan6/96 Incorporated Erik's diffs
class FSTR < $IS_LT{FSTR}, $HASH, $STR
class FSTR < $IS_LT{FSTR}, $HASH, $STR is
-- Buffers for efficiently constructing strings by repeated
-- concatenation using amortized doubling.
include COMPARABLE;
include FLIST{CHAR}
asize->asize,loc->loc; -- Needs to be public for some uses.
-- Some useful features are:
-- size:INT The current string size.
-- create(n:INT):SAME A buffer of initial size `n'.
-- copy:SAME A copy of self.
-- aget(ind:INT):CHAR The character `ind'.
-- aset(ind:INT, c:CHAR) Set character `ind' to `c'.
-- is_eq(l:SAME):BOOL True if self equals l as strings.
-- is_empty:BOOL True if self is the empty string.
-- clear Make self represent the empty string.
-- elt!:CHAR The characters in self.
-- elt!(beg:INT):CHAR Characters starting at `beg'.
-- elt!(beg,num:INT):CHAR `num' chars beginning at `beg'.
-- elt!(beg,num,step:INT):CHAR `num' chars beginning at `beg',
-- stepping by `step'.
-- ind!:INT The indices of the buffer.
-- contains(c:CHAR):BOOL True if self contains `c'.
-- to_reverse Reverse the order of the characters.
create:SAME is
-- A new buffer.
return new(16) end;
create(sz:INT):SAME pre sz>=0 is-- INT::is_lt BOOL::not
-- A new buffer of size `sz'.
if sz=0 then return void-- INT::is_eq
else return new(sz)
end;
end;
acopy(s:STR) is
builtin FSTR_ACOPY_STR;
end;
acopy(f:FSTR) pre asize>=f.asize is-- FSTR::asize INT::is_lt FSTR::asize BOOL::not
builtin FSTR_ACOPY_FSTR;
end;
acopyn(s:STR,n:INT) pre n <= s.length is-- INT::is_lt STR::length BOOL::not
builtin FSTR_ACOPYN_STR_INT;
end;
acopyn(s:FSTR,n:INT) pre n <= s.length is-- INT::is_lt FSTR::length BOOL::not
builtin FSTR_ACOPYN_FSTR_INT;
end;
create(s:STR):SAME is
-- added by MBK to make STR::fstr:FSTR faster
if void(s) then
return create;-- FSTR::create
else
l ::= s.length;-- STR::length
r:SAME;
if l=0 then r:=new(1); -- Can't have FSTR with no buffer!-- INT::is_eq
else r:=new(l);
end;
r.loc := l;-- FSTR::loc
r.acopyn(s,l); -- this puppy should be macroized into memcpy-- FSTR::acopyn
return r;
end;
end;
length:INT is
-- The number of characters in self. Another name for `size'.
return size end;-- FSTR::size
push(c:CHAR):SAME is
-- Add a new character to the end of self and return it.
-- If self is void, create a new list. Usage: `l:=l.push(e)'.
-- This routine needs to go fast too, which is the reason behind
-- the "l" temporary. Modified by MBK.
r:SAME;
l:INT;
if void(self) then r:=create; l := 0;-- FSTR::create
elsif loc<asize then r:=self; l := r.loc; -- normal path-- FSTR::loc FSTR::asize FSTR::loc
else
r:=new(2*asize);-- FSTR::asize
l := loc; -- FSTR::loc
r.loc:=l+1;-- FSTR::loc INT::plus
r.acopyn(self,l);-- FSTR::acopyn
SYS::destroy(self); -- The old one should never be used now.-- SYS::destroy
end;
r.loc:=l+1;-- FSTR::loc INT::plus
r[l]:=c;-- FSTR::aset
return r;
end;
str:STR is
-- A string version of self.
return STR::from_fstr(self) end;-- STR::from_fstr
clear is
-- Set self to the empty string. Retain the array.
-- Self may be void.
if ~void(self) then loc:=0 end end;-- BOOL::not FSTR::loc
acopy(beg:INT,src:STR) is
builtin FSTR_ACOPY_INT_STR;
end;
acopy(beg:INT,src:FSTR) is
builtin FSTR_ACOPY_INT_FSTR;
end;
-- The plus routines in FSTR are overloaded individually
-- instead of using a single plus($STR). This is done for
-- speed; when the compiler does specialization on arguments
-- other than self, this can be simplified.
plus(s:STR):SAME
-- Append the string `s' to self and return it. modified by MBK
-- et al to make it go fast. Called by compiler frequently.
-- post result.str = initial(self.str) + s
is
r:SAME;
l ::= s.length;-- STR::length
if void(self) then
r:=create(2*l);-- FSTR::create INT::times
elsif (loc + l < asize) then-- FSTR::loc INT::plus FSTR::asize
r:=self;
else
r :=new(2*(asize+l));-- INT::times FSTR::asize INT::plus
r.loc := loc;-- FSTR::loc FSTR::loc
r.acopy(self);-- FSTR::acopy
SYS::destroy(self); -- The old one should never be used now.-- SYS::destroy
end;
if (l = 0) then return r; end;-- INT::is_eq
r.loc := r.loc + l;-- FSTR::loc FSTR::loc INT::plus
r.acopy(r.loc-l,s); -- This one is MACROIZED to a memcpy.-- FSTR::acopy FSTR::loc INT::minus
-- r::=self; loop r:=r.push(s.elt!) end; return r end;
return r;
end;
plus(s:SAME):SAME
-- Append `s' to self and return it.
--post result.str = initial(self.str) + initial(s.str)
is
r:SAME;
l ::= s.length;-- FSTR::length
if void(self) then
r:=create(2*l);-- FSTR::create INT::times
elsif (loc + l < asize) then-- FSTR::loc INT::plus FSTR::asize
r:=self;
else
r :=new(2*(asize+l));-- INT::times FSTR::asize INT::plus
r.loc := loc;-- FSTR::loc FSTR::loc
r.acopy(self);-- FSTR::acopy
SYS::destroy(self); -- The old one should never be used now.-- SYS::destroy
end;
if (l = 0) then return r; end;-- INT::is_eq
r.loc := r.loc + l;-- FSTR::loc FSTR::loc INT::plus
r.acopy(r.loc-l,s); -- This one is MACROIZED to a memcpy.-- FSTR::acopy FSTR::loc INT::minus
-- r::=self; loop r:=r.push(s.elt!) end; return r end;
return r;
end;
plus(b:BOOL):SAME is
-- Append `b' to self and return it.
if b then return self + "true"
else return self + "false" end end;
plus(c:CHAR):SAME is
-- Append `c' to self and return it.
return push(c) end;-- FSTR::push
plus(i:INT):SAME is
-- Append `i' to self and return it.
return i.str_in(self) end;-- INT::str_in
plus(f:FLT):SAME is
-- Append `f' to self and return it.
return (self + (f.str)) end;
-- OLD, better version, does not work as yet return f.str_in(self) end;
private is_eq_helper(s:SAME,i:INT):BOOL is
builtin FSTR_MEMCMP_FSTR_INT;
end;
private is_eq_helper(s:STR,i:INT):BOOL is
builtin FSTR_MEMCMP_STR_INT;
end;
is_eq(s:SAME):BOOL
-- True if `s' equals self. Either may be void. MBK.
post result = (initial(self).str.is_eq(s.str)) is-- BOOL::is_eq FSTR::str STR::is_eq FSTR::str
s1,s2:INT;
if void(self) then s1 := -1; else s1 := loc; end;-- FSTR::loc
if void(s) then s2 := -1; else s2 := s.loc; end;-- FSTR::loc
-- -1 is an otherwise illegal value.
-- We thus distinguish 'void' from 0 length FSTR.
if s1 /= s2 then return false end; -- INT::is_eq BOOL::not
return is_eq_helper(s,s1); -- MACROized.-- FSTR::is_eq_helper
end;
is_eq(s:STR):BOOL is
-- so you can say `` if FSTR = "blabitty blah blah blah" ''
s1,s2:INT;
if void(self) then s1 := -1; else s1 := loc; end;-- FSTR::loc
if void(s) then s2 := -1; else s2 := s.size; end;-- STR::size
-- -1 is an otherwise illegal value.
-- We thus distinguish 'void' from 0 length FSTR.
if s1 /= s2 then return false end; -- INT::is_eq BOOL::not
return is_eq_helper(s,s1); -- MACROized.-- FSTR::is_eq_helper
end;
-- is_eq(s:SAME):BOOL is
-- -- True if `s' equals self. Either may be void.
-- if s.size/=size then return false end;
-- loop if elt!/=s.elt! then return false end end;
-- return true end;
is_neq(s:SAME):BOOL is
-- True if `s' is not equal to self. Either may be void.
return ~is_eq(s) end;
is_neq(s:STR):BOOL is
-- True if `s' is not equal to self. Either may be void.
return ~is_eq(s) end;
is_lt(s:SAME):BOOL is
-- True if self is lexicographically before `s'.
-- Void is before everything else.
if size=0 then-- FSTR::size INT::is_eq
if s.size/=0 then return true else return false end end;-- FSTR::size INT::is_eq BOOL::not
if s.size=0 then return false end;-- FSTR::size INT::is_eq
loop c::=elt!; sc::=s.elt!;-- FSTR::elt! FSTR::elt!
if c.is_gt(sc) then return false-- CHAR::is_gt
elsif c.is_lt(sc) then return true end end;-- CHAR::is_lt
if size<s.size then return true -- FSTR::size INT::is_lt FSTR::size
else return false end end;
hash:INT is
-- Keep It Simple, Stupid.
if void(self) then return 0 end;
if (length = 0) then return 0 end;-- FSTR::length INT::is_eq
i::= length-1;-- FSTR::length INT::minus
r:INT:=532415.mplus([i].int);-- INT::mplus FSTR::aget CHAR::int
-- 532415 = 'A' * (2^13-1)
i := i-1;-- INT::minus
loop while!(i>=0);-- INT::is_lt BOOL::not
r := (r.mtimes(1664525)).mplus(1013904223).mplus([i].int); -- INT::mtimes INT::mplus INT::mplus FSTR::aget CHAR::int
i := i-1-- INT::minus
end;
return r;
end;
hash0:INT is
-- An inexpensive to compute hash function of self.
-- Gives an INT with rightmost 24 bits. Also gives
-- lousy hash functions.
-- Void gives 0.
r::=0;
loop i::=ind!; r:=r.bxor([i].int.lshift(i.band(15))) end;-- FSTR::ind! INT::bxor FSTR::aget CHAR::int INT::lshift INT::band
return r end;
thumbprint:STR is
-- Compute a representation to use in place of the whole text.
-- The probability that two thumbprints are the same for two
-- different FSTRs should be vanishingly small. This is a
-- little more paranoid than the hash function.
return size.str+':'+hash+':'+hash0;-- FSTR::size INT::str STR::plus FSTR::hash STR::plus FSTR::hash0
end;
append_file(nm:STR):SAME is
-- Open the file named `nm' in the current directory, append
-- its contents to self, close the file, and return the new
-- buffer. Do nothing if the file cannot be opened.
fd::=RUNTIME::rt_file_open(nm);
if fd<0 then return self end;
sz::=RUNTIME::rt_file_size(fd);
if sz=0 then return self end;
r:SAME;
bst:INT;
if void(self) then
r:=new(sz); bst:=0; r.loc:=sz;
elsif sz<=asize-loc then
r:=self; bst:=loc; r.loc:=loc+sz;
else
r:=new((2*asize).max(loc+sz)); bst:=loc;
r.acopy(self); r.loc:=loc+sz; end;
RUNTIME::rt_file_in_fstr(fd,r,0,sz,bst);
RUNTIME::rt_file_close(fd);
return r end;
append_file_range(nm:STR,st,sz:INT):SAME is
-- Open the file named `nm' in the current directory, append
-- at most `sz' characters starting at `st' to self (only as
-- many as are there), close the file, and return the new buffer.
-- Do nothing if the file cannot be opened.
fd::=RUNTIME::rt_file_open(nm);
if fd<0 then return self end;
fsz::=RUNTIME::rt_file_size(fd);
if fsz=0 then return self end;
asz::=(fsz-st).min(sz); -- Actual size of range.
r:SAME;
bst:INT;
if void(self) then
r:=new(asz); bst:=0; r.loc:=asz;
elsif asz<=asize-loc then
r:=self; bst:=loc; r.loc:=loc+asz;
else
r:=new((2*asize).max(loc+asz)); bst:=loc;
r.acopy(self); r.loc:=loc+asz; end;
RUNTIME::rt_file_in_fstr(fd,r,st,asz,bst);
RUNTIME::rt_file_close(fd);
return r end;
is_upper:BOOL is
-- True if each alphabetic character of self is upper case.
-- Self may be void.
loop if elt!.is_upper.not then return false end end;
return true end;
is_lower:BOOL is
-- True if each alphabetic character of self is lower case.
-- Self may be void.
loop if elt!.is_lower.not then return false end end;
return true end;
head(i:INT):SAME
-- The first `i' characters of self.
-- Self may be void if i=0.
pre i.is_bet(0,size) is
if void(self) or i = 0 then return void end;
r::=new(i);
r.loc := i;
r.acopyn(self,i);
return r;
end;
tail(i:INT):SAME
-- The last `i' characters of self.
-- Self may be void if i=0.
pre i.is_bet(0,size) post result.size = i is
if void(self) then return self end;
r::=#SAME(i);
r.loc := i;
r.acopy(0,i,size-i,self);
return r;
end;
substring(beg,num:INT):SAME
-- The substring with `num' charcters whose first character has
-- index `beg'. Self may be void if beg=0 and num=0.
pre num>=0 and beg.is_bet(0,size-num) post result.size = num is-- INT::is_lt BOOL::not INT::is_bet FSTR::size INT::minus FSTR::size INT::is_eq
if void(self) then return void end;
r::=#SAME(num); -- FSTR::create
r.loc:=num; -- FSTR::loc
r.acopy(0,num,beg,self); -- FSTR::acopy
return r;
end;
separate!(s:SAME):FSTR is
-- On the first iteration just outputs `s', on successive
-- iterations it outputs self followed by `s'. Useful for
-- forming lists,
-- Eg: loop #OUT + ", ".fstr.separate!(a.elt!) end;
-- incorporated Erik's fixes
yield s;
loop num ::= size+s.size; f ::= #SAME(num); f.loc := num;
f.acopy(self); f.acopy(size,s);
yield f;
end;
-- old version: loop yield self.copy + s; end;
end;
end; -- class FSTR