(*;;;;;;;;;;;;;;;;;;;;;;;;;;; -*- Mode: SML -*- ;;;;;;;;;;;;;;;;;;;;;;;;;;;;; %% ===================================================================== %% Hash.sml -- A General Hash Function Generator %% Author : John Hamer %% Created On : Fri Feb 18 12:05:51 2000 %% Last Modified By: John Hamer %% Last Modified On: Tue Apr 25 08:40:44 2000 %% PURPOSE %% A general hash function generates an approximately uniform %% random distribution from even highly skewed input. Put another %% way, a difference of a single bit in the input can affect every %% single bit in the output, and each output bit is equally likely. %% This property makes them ideal for all hashing applications. %% %% The code given here is a direct translation of a Pascal program %% written by Robert Uzgalas. %% ===================================================================== *) load "Word"; load "Int"; load "Real"; (* We use the SML Word library extensively, since our code requires at least 31 bits (SML int is 16 bits on my mosml system). *) fun bsl(w, i) = Word.<<(w, Word.fromInt i) fun bsr(w, i) = Word.>>(w, Word.fromInt i) val orb = Word.orb val xorb = Word.xorb val andb = Word.andb (* A (good) pseudo-random number generator. Or, try www.random.org *) val s1 = ref 0w0 (* A 31-bit quantity *) val s2 = ref 0w0 (* A 29-bit quantity *) fun tauset i1 i2 = ( s1 := andb(0wx7fffffff, i1); s2 := andb(0wx1fffffff, i2) ) fun taurand ir = ( (* Return a pseudo-random number between 0 and ir-1 (special case: if ir = 0, we return 29 all bits *) (* 1st RN: 12 bit circular jumps in a 31 bit field *) let val a = bsr(xorb(bsl(!s1, 13), !s1), 19) in s1 := andb(0wx7fffffff, xorb(bsl(!s1, 12), a)) end; (* 2nd RN: 17 bit circular jumps in a 29 bit field *) let val a = bsr(xorb(bsl(!s2, 2), !s2), 12) in s2 := andb(0wx1fffffff, xorb( bsl(!s2, 17), a)) end; (* Combine RNs into one *) let val comb = xorb(!s1, bsl(!s2, 2)) (* align randoms at 29 bits *) in Word.toInt( if ir = 0 then comb else Word.mod(comb, Word.fromInt ir) ) end ) fun permute data = (* Generate a random permutation of the array data. Note that tauset needs to called before this routine is used. The algorithm swaps each element in turn with another randomly chosen from further along the array. How does it work? Starting with an array of three elements, abc, involves two random swaps: index 0 with index 0, 1 or 2, followed by index 1 with index 1 or 2. This code also naively attempts to swap index 2 with itself, with no harmful effect. The first swap gives one of 3 equally likely outcomes after the first swap: abc bac cba. The second swap has an even chance of making no further change, but if it does make a change then acb, bca abd cab are the possible results. Graphically, we have this: abc | abc | | abc | | acb | bac | | bac | | bca | cba | | cba | | cab | | | | | `array after second swap (1/3 x 1/2 chance each) | | | `array after first swap (1/3 chance each) | `starting array Observe that each of the arrangements after the second swap are equally likely (assuming taurand is a good pseudo-random number generator). *) let fun swap x i j = if i = j then () else ( Array.update(data, i, Array.sub(data, j)); Array.update(data, j, x) ) in Array.appi (fn (i,x) => swap x i (i + taurand (Array.length data - i))) (data,0,NONE) end (* buztbl is an array of 256 specially selected random numbers. It has the property that, when viewed as a 256 x Word.wordSize table of bits, each of the Word.wordSize columns has exactly the same number of 0s as 1s. That is, the table has no bias toward 0s or 1s in any of the bit positions. We construct the table in Word.wordSize steps, with each step adding a single bit to each entry. An array of 256 bits, btab256, is filled with equal numbers of 0s and 1s, and then randomly permuted. The bits from this array are then shifted into each corresponding entry in buztbl. buzinit is a word containing an equal(ish) number of 0s and 1s, in a random permutation. This value is used as a starting hash value. (Aside: Robert Uzgalas's nickname is Buz) *) val buztbl = Array.array (256, 0w0) val buzhinit = ref 0w0 fun buzinit (i1, i2) = (* i1, i2 are the seeds for the random number generator *) let fun isOdd( i ) = Word.fromInt (i mod 2) (* Iteration: call f(0), f(1), ..., f(n-1) *) fun for( n, f ) = if n <= 0 then () else (f (n-1); for(n-1, f) ) in tauset i1 i2; let val btab256 = Array.tabulate (256, isOdd) in for( Word.wordSize, fn _ => (permute btab256; Array.modifyi (fn (i, x) => orb( bsl(x,1), Array.sub(btab256, i))) (buztbl,0,NONE) ) ) end; let val btab = Array.tabulate (Word.wordSize, isOdd) in permute btab; buzhinit := Array.foldl (fn (b, x) => andb(0wx7fffffff, orb(bsl(x,1), b))) 0w0 btab end end (* * You can test this module using the following function. * Remember to call buzinit first! *) fun hash str = let fun brotl h = orb( bsl(h,1), bsr(h,Word.wordSize - 1) ) fun f (c,h) = xorb(brotl h, Array.sub(buztbl, ord c)) in List.foldl f (!buzhinit) (String.explode str) end (* * For comparison *) fun simpleHash str = Word.fromInt (List.foldl (fn (c,h)=>h+ord c) 0 (String.explode str)) (* * Hash function recommended by Knuth *) local open Word; fun f (c,h) = xorb(xorb(<<(h,0w5),>>(h,0w27)), Word.fromInt (ord c)); in fun knuthHash name = List.foldr f 0w0 (String.explode name) end;