hashtab.sa

Generated by gen_html_sa_files from ICSI. Contact gomes@icsi.berkeley.edu for details
 
---------------------------> Sather 1.1 source file <--------------------------
-- $Id: hashtab.sa,v 1.6 1996/06/05 01:03:05 gomes Exp $
-- Author: Holger Klawitter <holger@math.uni-muenster.de>
--
-- hashtab.sa: Support classes for Dynamic Hash Tables.
--   DYNAMIC_BUCKET_TABLE{K,BUCKET}: A Dynamic Hash Table
--   BUCKET{E}: Buckets for storing elements in the table.
--   DATABUCKET{K,E}: Buckets for storing elements with associated data.


class DYNAMIC_BUCKET_TABLE{E,BUCKET} 
class DYNAMIC_BUCKET_TABLE{E,BUCKET} is
   -- Data structure used to implement a hash table
   -- 
   -- Implementation:
   -- Per-Ake Larson; Communications of the ACM Vol.31 (1988) P.446-457
   -- The directory/segment structure is changed against a dymnamically
   -- changing array as attr.
   include COMPARE{E} elt_eq->elt_key_eq,elt_hash->elt_key_hash,
	 elt_nil->elt_key_nil,elt_lt->,is_elt_nil->;
   
   private shared lower_fill_ratio: FLT := 0.800;
   private shared upper_fill_ratio: FLT := 1.000;
   -- Limits splitting or merging buckets.

   private attr store: AREF{BUCKET};
   -- Here is the data being stored.

   private attr doubles: INT;
   -- Number of times the initial table size has been doubled.

   private attr split_pos: INT;
   -- Position of the next bucket to split.

   private attr bound: INT;
   -- Upper bound for split_pos = initial_size * 2.pow(doubles)

   private attr asize: INT;
   -- The actual size. Array access beyond this bound is illegal.

   private attr minsize: INT;
   -- The initial size is also the minimal size. Further shrinking
   -- will not afflict the data storage.

   readonly attr n_inds: INT;
   -- Returns the number of elements in the set.

   private hash(e:E): INT  pre ~void(self) is-- BOOL::not
      -- Returns the bucket to store e. This number will be
      -- generated from the hash-value and be normailzed through
      -- the actual size of the set.
      h,res: INT;

      h := elt_key_hash(e);-- STAT::elt_key_hash
      res := h % bound;-- STAT::bound
      if res >= split_pos then return res end;-- STAT::split_pos BOOL::not
      return h % ( bound.lshift(1) )-- INT::mod STAT::bound INT::lshift
   end;

   set_bucket(i:INT,l:BUCKET) pre 0 <= i and i < asize and ~void(self) is -- INT::is_lt BOOL::not STAT::asize BOOL::not
      store[i]:=l-- STAT::store AREF{1}::aset
   end;

   bucket(i:INT): BUCKET pre 0 <= i and i < asize and ~void(self) is -- INT::is_lt BOOL::not STAT::asize BOOL::not
      return store[i]-- STAT::store AREF{1}::aget
   end;

   create: SAME is
      if void(self) then return create_sized(16) end;-- STAT::create_sized
      return create_sized(minsize.rshift(1))-- STAT::minsize INT::rshift
   end;

   create_sized(initial_size:INT): SAME 
      pre initial_size.is_even and initial_size > 0 is-- INT::is_even INT::is_lt
      res ::= new;
      res.store := #AREF{BUCKET}(initial_size.lshift(2));-- STAT::store AREF{1}::create INT::lshift
      res.bound := initial_size;-- STAT::bound
      res.asize := initial_size.lshift(1);-- STAT::asize INT::lshift
      res.minsize := initial_size.lshift(1);-- STAT::minsize INT::lshift
      return res
   end;

   map_copy: SAME	pre ~void(self) is
      -- Returns a copy of self with all properties set like self.
      res ::= new;
      res.store := store.create(store.asize);
      res.asize := asize;
      res.n_inds := n_inds;
      res.minsize := minsize;
      res.bound := bound;
      res.doubles := doubles;
      res.split_pos := split_pos;
      loop
	 i ::= 0.upto!(asize-1);
	 res.store[i] := store[i].copy_list;
      end;
      return res
   end;

-- The functions changing the size of the bucket table:
-- They are split into two parts.
-- 1.) Splitting the next bucket into two. (update_*)
-- 2.) Resizing the storage area. (shrink/grow)

   private grow pre ~void(self) is -- BOOL::not
      -- Increases the size of the array by one.
      -- The functions changing the size of the bucket table:
      -- They are split into two parts.
      -- 1.) Splitting the next bucket into two. (update_*)
      -- 2.) Resizing the storage area. (shrink/grow)
      if store.asize = asize then-- STAT::store AREF{1}::asize STAT::asize
	 news ::= store.create(asize.lshift(1));-- STAT::store AREF{1}::create STAT::asize INT::lshift
	 loop news.aset!(store.aelt!) end;-- AREF{1}::aset! STAT::store AREF{1}::aelt!
	 store := news-- STAT::store
      end;
      asize := asize + 1;-- STAT::asize STAT::asize INT::plus
   end;

   private shrink  pre ~void(self) is
      -- Decreases the size of the array by one.
      -- Requests to shrink under the initial size will be ignored.
      if asize = minsize then return end;
      if store.asize = asize.lshift(1) then
	 news ::= store.create(asize);
	 loop news.aset!(store.aelt!) end;
	 store := news
      end;
      asize := asize - 1;
   end;

   private update_insert is
      -- Checks the actual fill ratio of the set.
      -- Adds a bucket to the hash table if the ratio is high enough. 
      -- The functions changing the size of the bucket table
      -- are split into two parts.
      -- 1.) Splitting the next bucket into two. (update_*)
      -- 2.) Resizing the storage area. (shrink/grow)
      if n_inds.flt / (bound+split_pos).flt < upper_fill_ratio then-- STAT::n_inds INT::flt FLT::div STAT::bound STAT::split_pos INT::flt STAT::upper_fill_ratio
	 return
      end;

      cur ::= bucket(split_pos);-- STAT::split_pos
      prev ::= cur; prev := void;
      -- This is an ugly hack to make prev to have the same type
      -- like cur.
      loop until!( void(cur) );
	 if elt_key_hash(cur.item) % ( bound.lshift(1) ) = split_pos-- STAT::elt_key_hash DATABUCKET{2}::item INT::mod STAT::bound INT::lshift STAT::split_pos
	 then -- keep in the old bucket
	    prev := cur;
	    cur := cur.next;-- DATABUCKET{2}::next
	 else -- put into new bucket
	    if void(prev) then -- the first one for new bucket
	       set_bucket(split_pos, cur.next);-- STAT::split_pos DATABUCKET{2}::next
	       cur.next( bucket(bound + split_pos) );-- DATABUCKET{2}::next STAT::bound STAT::split_pos
	       set_bucket( bound + split_pos, cur );-- STAT::bound STAT::split_pos
	       cur := bucket( split_pos )-- STAT::split_pos
	    else
	       prev.next(cur.next);-- DATABUCKET{2}::next DATABUCKET{2}::next
	       cur.next( bucket( bound + split_pos ));-- DATABUCKET{2}::next STAT::bound STAT::split_pos
	       set_bucket( bound + split_pos, cur );-- STAT::bound STAT::split_pos
	       cur := prev.next-- DATABUCKET{2}::next
	    end
	 end	
      end;
      grow;-- STAT::grow
      split_pos := split_pos + 1;-- STAT::split_pos STAT::split_pos INT::plus
      if split_pos = bound then-- STAT::split_pos STAT::bound
	 split_pos := 0;-- STAT::split_pos
	 doubles := doubles + 1;-- STAT::doubles STAT::doubles INT::plus
	 bound := bound.lshift(1);-- STAT::bound STAT::bound INT::lshift
      end
   end;

   private update_delete is
      -- Checks the actual fill ratio of the set.
      -- Removes a bucket from the hash table if the ratio is low enough.
      if n_inds.flt / (bound+split_pos).flt > lower_fill_ratio then
	 return
      end;

--      to_merge ::= bucket(split_pos);
--      if void(to_merge) then -- just get the other bucket
--	 set_bucket( split_pos, bucket( bound + split_pos ))
--      else 
--	 to_merge.append( bucket( bound + split_pos ) )
--      end;
--      set_bucket( bound + split_pos, void );
--      shrink;
      split_pos := split_pos - 1;
      if split_pos < 0 then
	 if doubles = 0 then
	    split_pos := 0
	 else
	    doubles := doubles - 1;
	    bound := bound.rshift(1);
	    split_pos := bound - 1;
	 end
      end;
      shrink;
	to_merge ::= bucket(split_pos);
	if void(to_merge) then -- just get the other bucket
	    set_bucket( split_pos, bucket( bound + split_pos ))
	else 
	    to_merge.append( bucket( bound + split_pos ) )
	end;
	set_bucket( bound + split_pos, void );
     end;
   
   dbg: STR
   -- Returns an interal string representation of the hashtable.
   -- For debugging only.
   is
      res:STR;
      res := "split_pos=" + split_pos + ", bound=" + bound +
                ", asize=" + asize + ", size=" + n_inds + ", minsize=" +
                minsize + "\n";
      loop
	 l ::= store.aelt!;
	 bkt: STR := "";
	 itemstr: STR;
	 item ::= l.list!.item;
	 typecase item
	 when $STR then itemstr := item.str
	 else itemstr := SYS::id(item).str; end;
	 loop bkt := bkt + ",".separate!(itemstr) end;
	 res := res + ", ".separate!(0.up!.str+"="+bkt);
      end;
      return res+"\n";
   end; -- dbg

end; -- DYNAMIC_BUCKET_TABLE


class DYNAMIC_DATABUCKET_TABLE{K,E} 
class DYNAMIC_DATABUCKET_TABLE{K,E} is
   -- Core of the hash table implementation, to make it easier to include into
   -- bags,maps and multimaps
   include DYNAMIC_BUCKET_TABLE{K,DATABUCKET{K,E}};   
   
   private data_nil: E is
      e: E;
      typecase e
      when $NIL then 
	 temp::= e.nil; typecase temp when E then return temp end;-- INT::nil
      else return void   end
   end;

   map_aset(k:K,e:E) pre ~void(self) is-- BOOL::not
      -- Over-ride data if 'k' exists.
      -- Otherwise grow the bucket chain associated with hash(k)
      h ::= hash(k);-- STAT::hash
      loop
	 b ::= bucket(h).list!;-- STAT::bucket DATABUCKET{2}::list!
	 if elt_key_eq(b.item,k) then b.data := e; return end-- STAT::elt_key_eq DATABUCKET{2}::item DATABUCKET{2}::data
      end;
      set_bucket( h, #DATABUCKET{K,E}(k,e,bucket(h)) );-- STAT::set_bucket DATABUCKET{2}::create STAT::bucket
      n_inds := n_inds + 1;-- STAT::n_inds STAT::n_inds INT::plus
      update_insert-- STAT::update_insert
   end;

   map_delete(k:K): E  pre ~void(self) is
      -- Removes an element from the set.
      -- Does nothing if there is no such element.
      h ::= hash(k);
      b ::= bucket(h);
      prev ::= b; prev := void; -- NASTY HACK

      loop until!( void(b) or elt_key_eq(b.item,k) );
	 prev := b;
	 b := b.next
      end;
      if void(b) then
	 return data_nil -- or raise error ?
      end;
      res ::= b.data;
      if void(prev) then  set_bucket( h, b.next )
      else prev.next(b.next)   end;
      n_inds := n_inds - 1;
      update_delete;
      return res
   end;

   map_has_ind(k:K): BOOL  pre ~void(self) is
      loop
	 if elt_key_eq(bucket(hash(k)).list!.item,k) then  return true end
      end;
      return false
   end;

   map_aget(k:K): E  pre ~void(self) is-- BOOL::not
      -- Returns the element equal to 'e' from the set.
      -- Returns void or T::nil if there is no such element.
      -- Self may not be void.
      loop
	 b ::= bucket(hash(k)).list!;-- STAT::bucket STAT::hash DATABUCKET{2}::list!
	 if elt_key_eq(b.item,k) then return b.data end-- STAT::elt_key_eq DATABUCKET{2}::item DATABUCKET{2}::data
      end;
      return data_nil-- STAT::data_nil
   end;

   map_key!: K 
      pre ~void(self) 
   is-- BOOL::not
      loop
	 b ::= bucket( 0.upto!(bound+split_pos-1) );-- STAT::bucket INT::upto! STAT::bound STAT::split_pos INT::minus
	 loop yield b.list!.item end-- DATABUCKET{2}::list! DATABUCKET{2}::item
      end
   end;

   map_elt!: E  pre ~void(self) is-- BOOL::not
      loop
	 b ::= bucket( 0.upto!(bound+split_pos-1) );-- STAT::bucket INT::upto! STAT::bound STAT::split_pos INT::minus
	 loop yield b.list!.data end-- DATABUCKET{2}::list! DATABUCKET{2}::data
      end
   end;

   map_pair!: TUP{K,E} pre ~void(self) is
      loop
	 b ::= bucket( 0.upto!(bound+split_pos-1) );
	 loop
	    bk ::= b.list!;
	    yield #TUP{K,E}(bk.item,bk.data)
	 end
      end
   end;

end;


class BUCKET{E} < $NEXT{BUCKET{E}}  
class BUCKET{E} < $NEXT{BUCKET{E}}  is
   -- Mainly this class adds an item to NEXT and gives some handy constructors.
   include NEXT{SAME};

   attr item: E;
   
   create(e:E): SAME is
      -- Create a link containing e.
      res ::= new;
      res.item := e;
      return res
   end;

   create(e:E,n:SAME): SAME is
      -- Create a link containig e and prepending the link n.
      -- n may be void.
      res ::= new;
      res.item := e; res.next := n;
      return res
   end;

   copy_list: SAME is
      -- Returns a copy of self and all following links. The
      -- objects will not be 'copy'ed.
      -- Self may be void.
      if void(self) then return void end;
      return #SAME(item,next.copy_list)
   end;

   list!: SAME is
      -- Yields all subsequent elements in the list.
      b ::= self;
      loop until!( void(b) ); yield b; b := b.next end
   end;

end;


class DATABUCKET{K,E} < $NEXT{DATABUCKET{K,E}} 
class DATABUCKET{K,E} < $NEXT{DATABUCKET{K,E}} is
   -- An addition to BUCKET{K} this class also adds data.
   -- One would like to include BUCKET{K}, but SAME is expanded too
   -- early, wait for Sather 1.1. :-)

   include NEXT{SAME};
   
   attr item: K;
   attr data: E;
   
   create(k:K): SAME is
      res ::= new;
      res.item := k;
      return res
   end;
   
   create(k:K,e:E): SAME is
      res ::= new;
      res.item := k; res.data := e;
      return res
   end;
   
   create(k:K,e:E,n:SAME): SAME is
      res ::= new;
      res.item := k; res.data := e; res.next := n;-- DATABUCKET{2}::item DATABUCKET{2}::data DATABUCKET{2}::next
      return res
   end;

   copy_list: SAME  is
      if void(self) then return void end;
      return #SAME(item,data,next.copy_list)
   end;

   list!: SAME is
      -- Yields all subsequent elements in the list.
      b ::= self;
      loop until!( void(b) ); yield b; b := b.next end-- DATABUCKET{2}::next
   end;

end; -- DATABUCKET{K,E}