1 /**
2  * Compiler implementation of the D programming language
3  * http://dlang.org
4  *
5  * Copyright: Copyright (c) 1999-2016 by Digital Mars, All Rights Reserved
6  * Authors:   Walter Bright, http://www.digitalmars.com
7  * License:   $(LINK2 http://www.boost.org/LICENSE_1_0.txt, Boost License 1.0)
8  * Source:    $(DMDSRC root/_stringtable.d)
9  */
10 
11 module ddmd.root.stringtable;
12 
13 import core.stdc..string;
14 import ddmd.root.rmem;
15 
16 enum POOL_BITS = 12;
17 enum POOL_SIZE = (1U << POOL_BITS);
18 
19 // TODO: Merge with root.String
20 // MurmurHash2 was written by Austin Appleby, and is placed in the public
21 // domain. The author hereby disclaims copyright to this source code.
22 // https://sites.google.com/site/murmurhash/
23 private uint calcHash(const(char)* key, size_t len) pure nothrow @nogc
24 {
25     // 'm' and 'r' are mixing constants generated offline.
26     // They're not really 'magic', they just happen to work well.
27     enum uint m = 0x5bd1e995;
28     enum int r = 24;
29     // Initialize the hash to a 'random' value
30     uint h = cast(uint)len;
31     // Mix 4 bytes at a time into the hash
32     const(ubyte)* data = cast(const(ubyte)*)key;
33     while (len >= 4)
34     {
35         uint k = data[3] << 24 | data[2] << 16 | data[1] << 8 | data[0];
36         k *= m;
37         k ^= k >> r;
38         h = (h * m) ^ (k * m);
39         data += 4;
40         len -= 4;
41     }
42     // Handle the last few bytes of the input array
43     switch (len & 3)
44     {
45     case 3:
46         h ^= data[2] << 16;
47         goto case;
48     case 2:
49         h ^= data[1] << 8;
50         goto case;
51     case 1:
52         h ^= data[0];
53         h *= m;
54         goto default;
55     default:
56         break;
57     }
58     // Do a few final mixes of the hash to ensure the last few
59     // bytes are well-incorporated.
60     h ^= h >> 13;
61     h *= m;
62     h ^= h >> 15;
63     return h;
64 }
65 
66 private size_t nextpow2(size_t val) pure nothrow @nogc @safe
67 {
68     size_t res = 1;
69     while (res < val)
70         res <<= 1;
71     return res;
72 }
73 
74 enum loadFactor = 0.8;
75 
76 struct StringEntry
77 {
78     uint hash;
79     uint vptr;
80 }
81 
82 // StringValue is a variable-length structure. It has neither proper c'tors nor a
83 // factory method because the only thing which should be creating these is StringTable.
84 struct StringValue
85 {
86     void* ptrvalue;
87     size_t length;
88 
89 nothrow:
90 pure:
91     extern (C++) char* lstring()
92     {
93         return cast(char*)(&this + 1);
94     }
95 
96     extern (C++) size_t len() const
97     {
98         return length;
99     }
100 
101     extern (C++) const(char)* toDchars() const
102     {
103         return cast(const(char)*)(&this + 1);
104     }
105 }
106 
107 struct StringTable
108 {
109 private:
110     StringEntry* table;
111     size_t tabledim;
112     ubyte** pools;
113     size_t npools;
114     size_t nfill;
115     size_t count;
116 
117 public:
118     extern (C++) void _init(size_t size = 0) nothrow
119     {
120         size = nextpow2(cast(size_t)(size / loadFactor));
121         if (size < 32)
122             size = 32;
123         table = cast(StringEntry*)mem.xcalloc(size, (table[0]).sizeof);
124         tabledim = size;
125         pools = null;
126         npools = nfill = 0;
127         count = 0;
128     }
129 
130     extern (C++) void reset(size_t size = 0) nothrow
131     {
132         for (size_t i = 0; i < npools; ++i)
133             mem.xfree(pools[i]);
134         mem.xfree(table);
135         mem.xfree(pools);
136         table = null;
137         pools = null;
138         _init(size);
139     }
140 
141     extern (C++) ~this() nothrow
142     {
143         for (size_t i = 0; i < npools; ++i)
144             mem.xfree(pools[i]);
145         mem.xfree(table);
146         mem.xfree(pools);
147         table = null;
148         pools = null;
149     }
150 
151     extern (C++) StringValue* lookup(const(char)* s, size_t length) nothrow pure
152     {
153         const(hash_t) hash = calcHash(s, length);
154         const(size_t) i = findSlot(hash, s, length);
155         // printf("lookup %.*s %p\n", (int)length, s, table[i].value ?: NULL);
156         return getValue(table[i].vptr);
157     }
158 
159     extern (C++) StringValue* insert(const(char)* s, size_t length, void* ptrvalue) nothrow
160     {
161         const(hash_t) hash = calcHash(s, length);
162         size_t i = findSlot(hash, s, length);
163         if (table[i].vptr)
164             return null; // already in table
165         if (++count > tabledim * loadFactor)
166         {
167             grow();
168             i = findSlot(hash, s, length);
169         }
170         table[i].hash = hash;
171         table[i].vptr = allocValue(s, length, ptrvalue);
172         // printf("insert %.*s %p\n", (int)length, s, table[i].value ?: NULL);
173         return getValue(table[i].vptr);
174     }
175 
176     extern (C++) StringValue* update(const(char)* s, size_t length) nothrow
177     {
178         const(hash_t) hash = calcHash(s, length);
179         size_t i = findSlot(hash, s, length);
180         if (!table[i].vptr)
181         {
182             if (++count > tabledim * loadFactor)
183             {
184                 grow();
185                 i = findSlot(hash, s, length);
186             }
187             table[i].hash = hash;
188             table[i].vptr = allocValue(s, length, null);
189         }
190         // printf("update %.*s %p\n", (int)length, s, table[i].value ?: NULL);
191         return getValue(table[i].vptr);
192     }
193 
194     /********************************
195      * Walk the contents of the string table,
196      * calling fp for each entry.
197      * Params:
198      *      fp = function to call. Returns !=0 to stop
199      * Returns:
200      *      last return value of fp call
201      */
202     extern (C++) int apply(int function(const(StringValue)*) fp)
203     {
204         foreach (const se; table[0 .. tabledim])
205         {
206             if (!se.vptr)
207                 continue;
208             const sv = getValue(se.vptr);
209             int result = (*fp)(sv);
210             if (result)
211                 return result;
212         }
213         return 0;
214     }
215 
216 private:
217 nothrow:
218     uint allocValue(const(char)* s, size_t length, void* ptrvalue)
219     {
220         const(size_t) nbytes = StringValue.sizeof + length + 1;
221         if (!npools || nfill + nbytes > POOL_SIZE)
222         {
223             pools = cast(ubyte**)mem.xrealloc(pools, ++npools * (pools[0]).sizeof);
224             pools[npools - 1] = cast(ubyte*)mem.xmalloc(nbytes > POOL_SIZE ? nbytes : POOL_SIZE);
225             nfill = 0;
226         }
227         StringValue* sv = cast(StringValue*)&pools[npools - 1][nfill];
228         sv.ptrvalue = ptrvalue;
229         sv.length = length;
230         .memcpy(sv.lstring(), s, length);
231         sv.lstring()[length] = 0;
232         const(uint) vptr = cast(uint)(npools << POOL_BITS | nfill);
233         nfill += nbytes + (-nbytes & 7); // align to 8 bytes
234         return vptr;
235     }
236 
237     StringValue* getValue(uint vptr) pure
238     {
239         if (!vptr)
240             return null;
241         const(size_t) idx = (vptr >> POOL_BITS) - 1;
242         const(size_t) off = vptr & POOL_SIZE - 1;
243         return cast(StringValue*)&pools[idx][off];
244     }
245 
246     size_t findSlot(hash_t hash, const(char)* s, size_t length) pure
247     {
248         // quadratic probing using triangular numbers
249         // http://stackoverflow.com/questions/2348187/moving-from-linear-probing-to-quadratic-probing-hash-collisons/2349774#2349774
250         for (size_t i = hash & (tabledim - 1), j = 1;; ++j)
251         {
252             const(StringValue)* sv;
253             if (!table[i].vptr || table[i].hash == hash && (sv = getValue(table[i].vptr)).length == length && .memcmp(s, sv.toDchars(), length) == 0)
254                 return i;
255             i = (i + j) & (tabledim - 1);
256         }
257     }
258 
259     void grow()
260     {
261         const odim = tabledim;
262         auto otab = table;
263         tabledim *= 2;
264         table = cast(StringEntry*)mem.xcalloc(tabledim, (table[0]).sizeof);
265         foreach (const se; otab[0 .. odim])
266         {
267             if (!se.vptr)
268                 continue;
269             const sv = getValue(se.vptr);
270             table[findSlot(se.hash, sv.toDchars(), sv.length)] = se;
271         }
272         mem.xfree(otab);
273     }
274 }