1 /** 2 * Compiler implementation of the D programming language 3 * http://dlang.org 4 * 5 * Copyright: Copyright (c) 1999-2016 by Digital Mars, All Rights Reserved 6 * Authors: Walter Bright, http://www.digitalmars.com 7 * License: $(LINK2 http://www.boost.org/LICENSE_1_0.txt, Boost License 1.0) 8 * Source: $(DMDSRC root/_speller.d) 9 */ 10 11 module ddmd.root.speller; 12 13 import core.stdc.limits; 14 import core.stdc.stdlib; 15 import core.stdc..string; 16 17 alias dg_speller_t = void* delegate(const(char)*, ref int); 18 19 __gshared const(char)* idchars = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_"; 20 21 /************************************************** 22 * combine a new result from the spell checker to 23 * find the one with the closest symbol with 24 * respect to the cost defined by the search function 25 * Input/Output: 26 * p best found spelling (NULL if none found yet) 27 * cost cost of p (INT_MAX if none found yet) 28 * Input: 29 * np new found spelling (NULL if none found) 30 * ncost cost of np if non-NULL 31 * Returns: 32 * true if the cost is less or equal 0 33 * false otherwise 34 */ 35 bool combineSpellerResult(ref void* p, ref int cost, void* np, int ncost) 36 { 37 if (np && ncost < cost) 38 { 39 p = np; 40 cost = ncost; 41 if (cost <= 0) 42 return true; 43 } 44 return false; 45 } 46 47 void* spellerY(const(char)* seed, size_t seedlen, dg_speller_t dg, const(char)* charset, size_t index, int* cost) 48 { 49 if (!seedlen) 50 return null; 51 assert(seed[seedlen] == 0); 52 char[30] tmp; 53 char* buf; 54 if (seedlen <= tmp.sizeof - 2) 55 buf = tmp.ptr; 56 else 57 { 58 buf = cast(char*)alloca(seedlen + 2); // leave space for extra char 59 if (!buf) 60 return null; // no matches 61 } 62 memcpy(buf, seed, index); 63 *cost = INT_MAX; 64 void* p = null; 65 int ncost; 66 /* Delete at seed[index] */ 67 if (index < seedlen) 68 { 69 memcpy(buf + index, seed + index + 1, seedlen - index); 70 assert(buf[seedlen - 1] == 0); 71 void* np = dg(buf, ncost); 72 if (combineSpellerResult(p, *cost, np, ncost)) 73 return p; 74 } 75 if (charset && *charset) 76 { 77 /* Substitutions */ 78 if (index < seedlen) 79 { 80 memcpy(buf, seed, seedlen + 1); 81 for (const(char)* s = charset; *s; s++) 82 { 83 buf[index] = *s; 84 //printf("sub buf = '%s'\n", buf); 85 void* np = dg(buf, ncost); 86 if (combineSpellerResult(p, *cost, np, ncost)) 87 return p; 88 } 89 assert(buf[seedlen] == 0); 90 } 91 /* Insertions */ 92 memcpy(buf + index + 1, seed + index, seedlen + 1 - index); 93 for (const(char)* s = charset; *s; s++) 94 { 95 buf[index] = *s; 96 //printf("ins buf = '%s'\n", buf); 97 void* np = dg(buf, ncost); 98 if (combineSpellerResult(p, *cost, np, ncost)) 99 return p; 100 } 101 assert(buf[seedlen + 1] == 0); 102 } 103 return p; // return "best" result 104 } 105 106 void* spellerX(const(char)* seed, size_t seedlen, dg_speller_t dg, const(char)* charset, int flag) 107 { 108 if (!seedlen) 109 return null; 110 char[30] tmp; 111 char* buf; 112 if (seedlen <= tmp.sizeof - 2) 113 buf = tmp.ptr; 114 else 115 { 116 buf = cast(char*)alloca(seedlen + 2); // leave space for extra char 117 if (!buf) 118 return null; // no matches 119 } 120 int cost = INT_MAX, ncost; 121 void* p = null, np; 122 /* Deletions */ 123 memcpy(buf, seed + 1, seedlen); 124 for (size_t i = 0; i < seedlen; i++) 125 { 126 //printf("del buf = '%s'\n", buf); 127 if (flag) 128 np = spellerY(buf, seedlen - 1, dg, charset, i, &ncost); 129 else 130 np = dg(buf, ncost); 131 if (combineSpellerResult(p, cost, np, ncost)) 132 return p; 133 buf[i] = seed[i]; 134 } 135 /* Transpositions */ 136 if (!flag) 137 { 138 memcpy(buf, seed, seedlen + 1); 139 for (size_t i = 0; i + 1 < seedlen; i++) 140 { 141 // swap [i] and [i + 1] 142 buf[i] = seed[i + 1]; 143 buf[i + 1] = seed[i]; 144 //printf("tra buf = '%s'\n", buf); 145 if (combineSpellerResult(p, cost, dg(buf, ncost), ncost)) 146 return p; 147 buf[i] = seed[i]; 148 } 149 } 150 if (charset && *charset) 151 { 152 /* Substitutions */ 153 memcpy(buf, seed, seedlen + 1); 154 for (size_t i = 0; i < seedlen; i++) 155 { 156 for (const(char)* s = charset; *s; s++) 157 { 158 buf[i] = *s; 159 //printf("sub buf = '%s'\n", buf); 160 if (flag) 161 np = spellerY(buf, seedlen, dg, charset, i + 1, &ncost); 162 else 163 np = dg(buf, ncost); 164 if (combineSpellerResult(p, cost, np, ncost)) 165 return p; 166 } 167 buf[i] = seed[i]; 168 } 169 /* Insertions */ 170 memcpy(buf + 1, seed, seedlen + 1); 171 for (size_t i = 0; i <= seedlen; i++) // yes, do seedlen+1 iterations 172 { 173 for (const(char)* s = charset; *s; s++) 174 { 175 buf[i] = *s; 176 //printf("ins buf = '%s'\n", buf); 177 if (flag) 178 np = spellerY(buf, seedlen + 1, dg, charset, i + 1, &ncost); 179 else 180 np = dg(buf, ncost); 181 if (combineSpellerResult(p, cost, np, ncost)) 182 return p; 183 } 184 buf[i] = seed[i]; // going past end of seed[] is ok, as we hit the 0 185 } 186 } 187 return p; // return "best" result 188 } 189 190 /************************************************** 191 * Looks for correct spelling. 192 * Currently only looks a 'distance' of one from the seed[]. 193 * This does an exhaustive search, so can potentially be very slow. 194 * Input: 195 * seed wrongly spelled word 196 * dg search delegate 197 * charset character set 198 * Returns: 199 * NULL no correct spellings found 200 * void* value returned by dg() for first possible correct spelling 201 */ 202 void* speller(const(char)* seed, scope dg_speller_t dg, const(char)* charset) 203 { 204 size_t seedlen = strlen(seed); 205 size_t maxdist = seedlen < 4 ? seedlen / 2 : 2; 206 for (int distance = 0; distance < maxdist; distance++) 207 { 208 void* p = spellerX(seed, seedlen, dg, charset, distance); 209 if (p) 210 return p; 211 // if (seedlen > 10) 212 // break; 213 } 214 return null; // didn't find it 215 } 216 217 unittest 218 { 219 static __gshared const(char)*** cases = 220 [ 221 ["hello", "hell", "y"], 222 ["hello", "hel", "y"], 223 ["hello", "ello", "y"], 224 ["hello", "llo", "y"], 225 ["hello", "hellox", "y"], 226 ["hello", "helloxy", "y"], 227 ["hello", "xhello", "y"], 228 ["hello", "xyhello", "y"], 229 ["hello", "ehllo", "y"], 230 ["hello", "helol", "y"], 231 ["hello", "abcd", "n"], 232 ["hello", "helxxlo", "y"], 233 ["hello", "ehlxxlo", "n"], 234 ["hello", "heaao", "y"], 235 ["_123456789_123456789_123456789_123456789", "_123456789_123456789_123456789_12345678", "y"], 236 [null, null, null] 237 ]; 238 //printf("unittest_speller()\n"); 239 240 void* dgarg; 241 242 void* speller_test(const(char)* s, ref int cost) 243 { 244 //printf("speller_test(%s, %s)\n", dgarg, s); 245 cost = 0; 246 if (strcmp(cast(char*)dgarg, s) == 0) 247 return dgarg; 248 return null; 249 } 250 251 dgarg = cast(char*)"hell"; 252 const(void)* p = speller(cast(const(char)*)"hello", &speller_test, idchars); 253 assert(p !is null); 254 for (int i = 0; cases[i][0]; i++) 255 { 256 //printf("case [%d]\n", i); 257 dgarg = cast(void*)cases[i][1]; 258 void* p2 = speller(cases[i][0], &speller_test, idchars); 259 if (p2) 260 assert(cases[i][2][0] == 'y'); 261 else 262 assert(cases[i][2][0] == 'n'); 263 } 264 //printf("unittest_speller() success\n"); 265 }