1 /** 2 * Compiler implementation of the D programming language 3 * http://dlang.org 4 * 5 * Copyright: Copyright (c) 1999-2016 by Digital Mars, All Rights Reserved 6 * Authors: Walter Bright, http://www.digitalmars.com 7 * License: $(LINK2 http://www.boost.org/LICENSE_1_0.txt, Boost License 1.0) 8 * Source: $(DMDSRC root/_stringtable.d) 9 */ 10 11 module ddmd.root.stringtable; 12 13 import core.stdc..string; 14 import ddmd.root.rmem; 15 16 enum POOL_BITS = 12; 17 enum POOL_SIZE = (1U << POOL_BITS); 18 19 // TODO: Merge with root.String 20 // MurmurHash2 was written by Austin Appleby, and is placed in the public 21 // domain. The author hereby disclaims copyright to this source code. 22 // https://sites.google.com/site/murmurhash/ 23 private uint calcHash(const(char)* key, size_t len) pure nothrow @nogc 24 { 25 // 'm' and 'r' are mixing constants generated offline. 26 // They're not really 'magic', they just happen to work well. 27 enum uint m = 0x5bd1e995; 28 enum int r = 24; 29 // Initialize the hash to a 'random' value 30 uint h = cast(uint)len; 31 // Mix 4 bytes at a time into the hash 32 const(ubyte)* data = cast(const(ubyte)*)key; 33 while (len >= 4) 34 { 35 uint k = data[3] << 24 | data[2] << 16 | data[1] << 8 | data[0]; 36 k *= m; 37 k ^= k >> r; 38 h = (h * m) ^ (k * m); 39 data += 4; 40 len -= 4; 41 } 42 // Handle the last few bytes of the input array 43 switch (len & 3) 44 { 45 case 3: 46 h ^= data[2] << 16; 47 goto case; 48 case 2: 49 h ^= data[1] << 8; 50 goto case; 51 case 1: 52 h ^= data[0]; 53 h *= m; 54 goto default; 55 default: 56 break; 57 } 58 // Do a few final mixes of the hash to ensure the last few 59 // bytes are well-incorporated. 60 h ^= h >> 13; 61 h *= m; 62 h ^= h >> 15; 63 return h; 64 } 65 66 private size_t nextpow2(size_t val) pure nothrow @nogc @safe 67 { 68 size_t res = 1; 69 while (res < val) 70 res <<= 1; 71 return res; 72 } 73 74 enum loadFactor = 0.8; 75 76 struct StringEntry 77 { 78 uint hash; 79 uint vptr; 80 } 81 82 // StringValue is a variable-length structure. It has neither proper c'tors nor a 83 // factory method because the only thing which should be creating these is StringTable. 84 struct StringValue 85 { 86 void* ptrvalue; 87 size_t length; 88 89 nothrow: 90 pure: 91 extern (C++) char* lstring() 92 { 93 return cast(char*)(&this + 1); 94 } 95 96 extern (C++) size_t len() const 97 { 98 return length; 99 } 100 101 extern (C++) const(char)* toDchars() const 102 { 103 return cast(const(char)*)(&this + 1); 104 } 105 } 106 107 struct StringTable 108 { 109 private: 110 StringEntry* table; 111 size_t tabledim; 112 ubyte** pools; 113 size_t npools; 114 size_t nfill; 115 size_t count; 116 117 public: 118 extern (C++) void _init(size_t size = 0) nothrow 119 { 120 size = nextpow2(cast(size_t)(size / loadFactor)); 121 if (size < 32) 122 size = 32; 123 table = cast(StringEntry*)mem.xcalloc(size, (table[0]).sizeof); 124 tabledim = size; 125 pools = null; 126 npools = nfill = 0; 127 count = 0; 128 } 129 130 extern (C++) void reset(size_t size = 0) nothrow 131 { 132 for (size_t i = 0; i < npools; ++i) 133 mem.xfree(pools[i]); 134 mem.xfree(table); 135 mem.xfree(pools); 136 table = null; 137 pools = null; 138 _init(size); 139 } 140 141 extern (C++) ~this() nothrow 142 { 143 for (size_t i = 0; i < npools; ++i) 144 mem.xfree(pools[i]); 145 mem.xfree(table); 146 mem.xfree(pools); 147 table = null; 148 pools = null; 149 } 150 151 extern (C++) StringValue* lookup(const(char)* s, size_t length) nothrow pure 152 { 153 const(hash_t) hash = calcHash(s, length); 154 const(size_t) i = findSlot(hash, s, length); 155 // printf("lookup %.*s %p\n", (int)length, s, table[i].value ?: NULL); 156 return getValue(table[i].vptr); 157 } 158 159 extern (C++) StringValue* insert(const(char)* s, size_t length, void* ptrvalue) nothrow 160 { 161 const(hash_t) hash = calcHash(s, length); 162 size_t i = findSlot(hash, s, length); 163 if (table[i].vptr) 164 return null; // already in table 165 if (++count > tabledim * loadFactor) 166 { 167 grow(); 168 i = findSlot(hash, s, length); 169 } 170 table[i].hash = hash; 171 table[i].vptr = allocValue(s, length, ptrvalue); 172 // printf("insert %.*s %p\n", (int)length, s, table[i].value ?: NULL); 173 return getValue(table[i].vptr); 174 } 175 176 extern (C++) StringValue* update(const(char)* s, size_t length) nothrow 177 { 178 const(hash_t) hash = calcHash(s, length); 179 size_t i = findSlot(hash, s, length); 180 if (!table[i].vptr) 181 { 182 if (++count > tabledim * loadFactor) 183 { 184 grow(); 185 i = findSlot(hash, s, length); 186 } 187 table[i].hash = hash; 188 table[i].vptr = allocValue(s, length, null); 189 } 190 // printf("update %.*s %p\n", (int)length, s, table[i].value ?: NULL); 191 return getValue(table[i].vptr); 192 } 193 194 /******************************** 195 * Walk the contents of the string table, 196 * calling fp for each entry. 197 * Params: 198 * fp = function to call. Returns !=0 to stop 199 * Returns: 200 * last return value of fp call 201 */ 202 extern (C++) int apply(int function(const(StringValue)*) fp) 203 { 204 foreach (const se; table[0 .. tabledim]) 205 { 206 if (!se.vptr) 207 continue; 208 const sv = getValue(se.vptr); 209 int result = (*fp)(sv); 210 if (result) 211 return result; 212 } 213 return 0; 214 } 215 216 private: 217 nothrow: 218 uint allocValue(const(char)* s, size_t length, void* ptrvalue) 219 { 220 const(size_t) nbytes = StringValue.sizeof + length + 1; 221 if (!npools || nfill + nbytes > POOL_SIZE) 222 { 223 pools = cast(ubyte**)mem.xrealloc(pools, ++npools * (pools[0]).sizeof); 224 pools[npools - 1] = cast(ubyte*)mem.xmalloc(nbytes > POOL_SIZE ? nbytes : POOL_SIZE); 225 nfill = 0; 226 } 227 StringValue* sv = cast(StringValue*)&pools[npools - 1][nfill]; 228 sv.ptrvalue = ptrvalue; 229 sv.length = length; 230 .memcpy(sv.lstring(), s, length); 231 sv.lstring()[length] = 0; 232 const(uint) vptr = cast(uint)(npools << POOL_BITS | nfill); 233 nfill += nbytes + (-nbytes & 7); // align to 8 bytes 234 return vptr; 235 } 236 237 StringValue* getValue(uint vptr) pure 238 { 239 if (!vptr) 240 return null; 241 const(size_t) idx = (vptr >> POOL_BITS) - 1; 242 const(size_t) off = vptr & POOL_SIZE - 1; 243 return cast(StringValue*)&pools[idx][off]; 244 } 245 246 size_t findSlot(hash_t hash, const(char)* s, size_t length) pure 247 { 248 // quadratic probing using triangular numbers 249 // http://stackoverflow.com/questions/2348187/moving-from-linear-probing-to-quadratic-probing-hash-collisons/2349774#2349774 250 for (size_t i = hash & (tabledim - 1), j = 1;; ++j) 251 { 252 const(StringValue)* sv; 253 if (!table[i].vptr || table[i].hash == hash && (sv = getValue(table[i].vptr)).length == length && .memcmp(s, sv.toDchars(), length) == 0) 254 return i; 255 i = (i + j) & (tabledim - 1); 256 } 257 } 258 259 void grow() 260 { 261 const odim = tabledim; 262 auto otab = table; 263 tabledim *= 2; 264 table = cast(StringEntry*)mem.xcalloc(tabledim, (table[0]).sizeof); 265 foreach (const se; otab[0 .. odim]) 266 { 267 if (!se.vptr) 268 continue; 269 const sv = getValue(se.vptr); 270 table[findSlot(se.hash, sv.toDchars(), sv.length)] = se; 271 } 272 mem.xfree(otab); 273 } 274 }