1 /**
2  * Compiler implementation of the
3  * $(LINK2 http://www.dlang.org, D programming language).
4  *
5  * Copyright:   Copyright (c) 1999-2016 by Digital Mars, All Rights Reserved
6  * Authors:     $(LINK2 http://www.digitalmars.com, Walter Bright)
7  * License:     $(LINK2 http://www.boost.org/LICENSE_1_0.txt, Boost License 1.0)
8  * Source:      $(DMDSRC _utf.d)
9  */
10 
11 module ddmd.utf;
12 
13 nothrow pure @nogc:
14 
15 /// The Unicode code space is the range of code points [0x000000,0x10FFFF]
16 /// except the UTF-16 surrogate pairs in the range [0xD800,0xDFFF]
17 bool utf_isValidDchar(dchar c)
18 {
19     // TODO: Whether non-char code points should be rejected is pending review
20     // largest character code point
21     if (c > 0x10FFFF)
22         return false;
23     // 0xFFFE and 0xFFFF are valid for internal use, like Phobos std.utf.isValidDChar
24     // See also https://issues.dlang.org/show_bug.cgi?id=1357
25     // surrogate pairs
26     if (0xD800 <= c && c <= 0xDFFF)
27         return false;
28     return true;
29 }
30 
31 /*******************************
32  * Return !=0 if unicode alpha.
33  * Use table from C99 Appendix D.
34  */
35 bool isUniAlpha(dchar c)
36 {
37     static immutable wchar[2][] ALPHA_TABLE =
38     [
39         [0x00AA, 0x00AA],
40         [0x00B5, 0x00B5],
41         [0x00B7, 0x00B7],
42         [0x00BA, 0x00BA],
43         [0x00C0, 0x00D6],
44         [0x00D8, 0x00F6],
45         [0x00F8, 0x01F5],
46         [0x01FA, 0x0217],
47         [0x0250, 0x02A8],
48         [0x02B0, 0x02B8],
49         [0x02BB, 0x02BB],
50         [0x02BD, 0x02C1],
51         [0x02D0, 0x02D1],
52         [0x02E0, 0x02E4],
53         [0x037A, 0x037A],
54         [0x0386, 0x0386],
55         [0x0388, 0x038A],
56         [0x038C, 0x038C],
57         [0x038E, 0x03A1],
58         [0x03A3, 0x03CE],
59         [0x03D0, 0x03D6],
60         [0x03DA, 0x03DA],
61         [0x03DC, 0x03DC],
62         [0x03DE, 0x03DE],
63         [0x03E0, 0x03E0],
64         [0x03E2, 0x03F3],
65         [0x0401, 0x040C],
66         [0x040E, 0x044F],
67         [0x0451, 0x045C],
68         [0x045E, 0x0481],
69         [0x0490, 0x04C4],
70         [0x04C7, 0x04C8],
71         [0x04CB, 0x04CC],
72         [0x04D0, 0x04EB],
73         [0x04EE, 0x04F5],
74         [0x04F8, 0x04F9],
75         [0x0531, 0x0556],
76         [0x0559, 0x0559],
77         [0x0561, 0x0587],
78         [0x05B0, 0x05B9],
79         [0x05BB, 0x05BD],
80         [0x05BF, 0x05BF],
81         [0x05C1, 0x05C2],
82         [0x05D0, 0x05EA],
83         [0x05F0, 0x05F2],
84         [0x0621, 0x063A],
85         [0x0640, 0x0652],
86         [0x0660, 0x0669],
87         [0x0670, 0x06B7],
88         [0x06BA, 0x06BE],
89         [0x06C0, 0x06CE],
90         [0x06D0, 0x06DC],
91         [0x06E5, 0x06E8],
92         [0x06EA, 0x06ED],
93         [0x06F0, 0x06F9],
94         [0x0901, 0x0903],
95         [0x0905, 0x0939],
96         [0x093D, 0x094D],
97         [0x0950, 0x0952],
98         [0x0958, 0x0963],
99         [0x0966, 0x096F],
100         [0x0981, 0x0983],
101         [0x0985, 0x098C],
102         [0x098F, 0x0990],
103         [0x0993, 0x09A8],
104         [0x09AA, 0x09B0],
105         [0x09B2, 0x09B2],
106         [0x09B6, 0x09B9],
107         [0x09BE, 0x09C4],
108         [0x09C7, 0x09C8],
109         [0x09CB, 0x09CD],
110         [0x09DC, 0x09DD],
111         [0x09DF, 0x09E3],
112         [0x09E6, 0x09F1],
113         [0x0A02, 0x0A02],
114         [0x0A05, 0x0A0A],
115         [0x0A0F, 0x0A10],
116         [0x0A13, 0x0A28],
117         [0x0A2A, 0x0A30],
118         [0x0A32, 0x0A33],
119         [0x0A35, 0x0A36],
120         [0x0A38, 0x0A39],
121         [0x0A3E, 0x0A42],
122         [0x0A47, 0x0A48],
123         [0x0A4B, 0x0A4D],
124         [0x0A59, 0x0A5C],
125         [0x0A5E, 0x0A5E],
126         [0x0A66, 0x0A6F],
127         [0x0A74, 0x0A74],
128         [0x0A81, 0x0A83],
129         [0x0A85, 0x0A8B],
130         [0x0A8D, 0x0A8D],
131         [0x0A8F, 0x0A91],
132         [0x0A93, 0x0AA8],
133         [0x0AAA, 0x0AB0],
134         [0x0AB2, 0x0AB3],
135         [0x0AB5, 0x0AB9],
136         [0x0ABD, 0x0AC5],
137         [0x0AC7, 0x0AC9],
138         [0x0ACB, 0x0ACD],
139         [0x0AD0, 0x0AD0],
140         [0x0AE0, 0x0AE0],
141         [0x0AE6, 0x0AEF],
142         [0x0B01, 0x0B03],
143         [0x0B05, 0x0B0C],
144         [0x0B0F, 0x0B10],
145         [0x0B13, 0x0B28],
146         [0x0B2A, 0x0B30],
147         [0x0B32, 0x0B33],
148         [0x0B36, 0x0B39],
149         [0x0B3D, 0x0B43],
150         [0x0B47, 0x0B48],
151         [0x0B4B, 0x0B4D],
152         [0x0B5C, 0x0B5D],
153         [0x0B5F, 0x0B61],
154         [0x0B66, 0x0B6F],
155         [0x0B82, 0x0B83],
156         [0x0B85, 0x0B8A],
157         [0x0B8E, 0x0B90],
158         [0x0B92, 0x0B95],
159         [0x0B99, 0x0B9A],
160         [0x0B9C, 0x0B9C],
161         [0x0B9E, 0x0B9F],
162         [0x0BA3, 0x0BA4],
163         [0x0BA8, 0x0BAA],
164         [0x0BAE, 0x0BB5],
165         [0x0BB7, 0x0BB9],
166         [0x0BBE, 0x0BC2],
167         [0x0BC6, 0x0BC8],
168         [0x0BCA, 0x0BCD],
169         [0x0BE7, 0x0BEF],
170         [0x0C01, 0x0C03],
171         [0x0C05, 0x0C0C],
172         [0x0C0E, 0x0C10],
173         [0x0C12, 0x0C28],
174         [0x0C2A, 0x0C33],
175         [0x0C35, 0x0C39],
176         [0x0C3E, 0x0C44],
177         [0x0C46, 0x0C48],
178         [0x0C4A, 0x0C4D],
179         [0x0C60, 0x0C61],
180         [0x0C66, 0x0C6F],
181         [0x0C82, 0x0C83],
182         [0x0C85, 0x0C8C],
183         [0x0C8E, 0x0C90],
184         [0x0C92, 0x0CA8],
185         [0x0CAA, 0x0CB3],
186         [0x0CB5, 0x0CB9],
187         [0x0CBE, 0x0CC4],
188         [0x0CC6, 0x0CC8],
189         [0x0CCA, 0x0CCD],
190         [0x0CDE, 0x0CDE],
191         [0x0CE0, 0x0CE1],
192         [0x0CE6, 0x0CEF],
193         [0x0D02, 0x0D03],
194         [0x0D05, 0x0D0C],
195         [0x0D0E, 0x0D10],
196         [0x0D12, 0x0D28],
197         [0x0D2A, 0x0D39],
198         [0x0D3E, 0x0D43],
199         [0x0D46, 0x0D48],
200         [0x0D4A, 0x0D4D],
201         [0x0D60, 0x0D61],
202         [0x0D66, 0x0D6F],
203         [0x0E01, 0x0E3A],
204         [0x0E40, 0x0E5B],
205         [0x0E81, 0x0E82],
206         [0x0E84, 0x0E84],
207         [0x0E87, 0x0E88],
208         [0x0E8A, 0x0E8A],
209         [0x0E8D, 0x0E8D],
210         [0x0E94, 0x0E97],
211         [0x0E99, 0x0E9F],
212         [0x0EA1, 0x0EA3],
213         [0x0EA5, 0x0EA5],
214         [0x0EA7, 0x0EA7],
215         [0x0EAA, 0x0EAB],
216         [0x0EAD, 0x0EAE],
217         [0x0EB0, 0x0EB9],
218         [0x0EBB, 0x0EBD],
219         [0x0EC0, 0x0EC4],
220         [0x0EC6, 0x0EC6],
221         [0x0EC8, 0x0ECD],
222         [0x0ED0, 0x0ED9],
223         [0x0EDC, 0x0EDD],
224         [0x0F00, 0x0F00],
225         [0x0F18, 0x0F19],
226         [0x0F20, 0x0F33],
227         [0x0F35, 0x0F35],
228         [0x0F37, 0x0F37],
229         [0x0F39, 0x0F39],
230         [0x0F3E, 0x0F47],
231         [0x0F49, 0x0F69],
232         [0x0F71, 0x0F84],
233         [0x0F86, 0x0F8B],
234         [0x0F90, 0x0F95],
235         [0x0F97, 0x0F97],
236         [0x0F99, 0x0FAD],
237         [0x0FB1, 0x0FB7],
238         [0x0FB9, 0x0FB9],
239         [0x10A0, 0x10C5],
240         [0x10D0, 0x10F6],
241         [0x1E00, 0x1E9B],
242         [0x1EA0, 0x1EF9],
243         [0x1F00, 0x1F15],
244         [0x1F18, 0x1F1D],
245         [0x1F20, 0x1F45],
246         [0x1F48, 0x1F4D],
247         [0x1F50, 0x1F57],
248         [0x1F59, 0x1F59],
249         [0x1F5B, 0x1F5B],
250         [0x1F5D, 0x1F5D],
251         [0x1F5F, 0x1F7D],
252         [0x1F80, 0x1FB4],
253         [0x1FB6, 0x1FBC],
254         [0x1FBE, 0x1FBE],
255         [0x1FC2, 0x1FC4],
256         [0x1FC6, 0x1FCC],
257         [0x1FD0, 0x1FD3],
258         [0x1FD6, 0x1FDB],
259         [0x1FE0, 0x1FEC],
260         [0x1FF2, 0x1FF4],
261         [0x1FF6, 0x1FFC],
262         [0x203F, 0x2040],
263         [0x207F, 0x207F],
264         [0x2102, 0x2102],
265         [0x2107, 0x2107],
266         [0x210A, 0x2113],
267         [0x2115, 0x2115],
268         [0x2118, 0x211D],
269         [0x2124, 0x2124],
270         [0x2126, 0x2126],
271         [0x2128, 0x2128],
272         [0x212A, 0x2131],
273         [0x2133, 0x2138],
274         [0x2160, 0x2182],
275         [0x3005, 0x3007],
276         [0x3021, 0x3029],
277         [0x3041, 0x3093],
278         [0x309B, 0x309C],
279         [0x30A1, 0x30F6],
280         [0x30FB, 0x30FC],
281         [0x3105, 0x312C],
282         [0x4E00, 0x9FA5],
283         [0xAC00, 0xD7A3]
284     ];
285 
286     size_t high = ALPHA_TABLE.length - 1;
287     // Shortcut search if c is out of range
288     size_t low = (c < ALPHA_TABLE[0][0] || ALPHA_TABLE[high][1] < c) ? high + 1 : 0;
289     // Binary search
290     while (low <= high)
291     {
292         size_t mid = (low + high) >> 1;
293         if (c < ALPHA_TABLE[mid][0])
294             high = mid - 1;
295         else if (ALPHA_TABLE[mid][1] < c)
296             low = mid + 1;
297         else
298         {
299             assert(ALPHA_TABLE[mid][0] <= c && c <= ALPHA_TABLE[mid][1]);
300             return true;
301         }
302     }
303     return false;
304 }
305 
306 /**
307  * Returns the code length of c in code units.
308  */
309 int utf_codeLengthChar(dchar c)
310 {
311     if (c <= 0x7F)
312         return 1;
313     if (c <= 0x7FF)
314         return 2;
315     if (c <= 0xFFFF)
316         return 3;
317     if (c <= 0x10FFFF)
318         return 4;
319     assert(false);
320 }
321 
322 int utf_codeLengthWchar(dchar c)
323 {
324     return c <= 0xFFFF ? 1 : 2;
325 }
326 
327 /**
328  * Returns the code length of c in code units for the encoding.
329  * sz is the encoding: 1 = utf8, 2 = utf16, 4 = utf32.
330  */
331 int utf_codeLength(int sz, dchar c)
332 {
333     if (sz == 1)
334         return utf_codeLengthChar(c);
335     if (sz == 2)
336         return utf_codeLengthWchar(c);
337     assert(sz == 4);
338     return 1;
339 }
340 
341 void utf_encodeChar(char* s, dchar c)
342 {
343     assert(s !is null);
344     assert(utf_isValidDchar(c));
345     if (c <= 0x7F)
346     {
347         s[0] = cast(char)c;
348     }
349     else if (c <= 0x07FF)
350     {
351         s[0] = cast(char)(0xC0 | (c >> 6));
352         s[1] = cast(char)(0x80 | (c & 0x3F));
353     }
354     else if (c <= 0xFFFF)
355     {
356         s[0] = cast(char)(0xE0 | (c >> 12));
357         s[1] = cast(char)(0x80 | ((c >> 6) & 0x3F));
358         s[2] = cast(char)(0x80 | (c & 0x3F));
359     }
360     else if (c <= 0x10FFFF)
361     {
362         s[0] = cast(char)(0xF0 | (c >> 18));
363         s[1] = cast(char)(0x80 | ((c >> 12) & 0x3F));
364         s[2] = cast(char)(0x80 | ((c >> 6) & 0x3F));
365         s[3] = cast(char)(0x80 | (c & 0x3F));
366     }
367     else
368         assert(0);
369 }
370 
371 void utf_encodeWchar(wchar* s, dchar c)
372 {
373     assert(s !is null);
374     assert(utf_isValidDchar(c));
375     if (c <= 0xFFFF)
376     {
377         s[0] = cast(wchar)c;
378     }
379     else
380     {
381         s[0] = cast(wchar)((((c - 0x010000) >> 10) & 0x03FF) + 0xD800);
382         s[1] = cast(wchar)(((c - 0x010000) & 0x03FF) + 0xDC00);
383     }
384 }
385 
386 void utf_encode(int sz, void* s, dchar c)
387 {
388     if (sz == 1)
389         utf_encodeChar(cast(char*)s, c);
390     else if (sz == 2)
391         utf_encodeWchar(cast(wchar*)s, c);
392     else
393     {
394         assert(sz == 4);
395         *(cast(dchar*)s) = c;
396     }
397 }
398 
399 /********************************************
400  * Decode a UTF-8 sequence as a single UTF-32 code point.
401  * Params:
402  *      s = UTF-8 sequence
403  *      len = number of code units in s[]
404  *      ridx = starting index in s[], updated to reflect number of code units decoded
405  *      rresult = set to character decoded
406  * Returns:
407  *      null on success, otherwise error message string
408  */
409 immutable(char*) utf_decodeChar(const(char)* s, size_t len, ref size_t ridx, out dchar rresult)
410 {
411     // UTF-8 decoding errors
412     static immutable char* UTF8_DECODE_OK = null; // no error
413     static immutable char* UTF8_DECODE_OUTSIDE_CODE_SPACE = "Outside Unicode code space";
414     static immutable char* UTF8_DECODE_TRUNCATED_SEQUENCE = "Truncated UTF-8 sequence";
415     static immutable char* UTF8_DECODE_OVERLONG = "Overlong UTF-8 sequence";
416     static immutable char* UTF8_DECODE_INVALID_TRAILER = "Invalid trailing code unit";
417     static immutable char* UTF8_DECODE_INVALID_CODE_POINT = "Invalid code point decoded";
418 
419     /* The following encodings are valid, except for the 5 and 6 byte
420      * combinations:
421      *      0xxxxxxx
422      *      110xxxxx 10xxxxxx
423      *      1110xxxx 10xxxxxx 10xxxxxx
424      *      11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
425      *      111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
426      *      1111110x 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
427      */
428     static immutable uint[] UTF8_STRIDE =
429     [
430         1,
431         1,
432         1,
433         1,
434         1,
435         1,
436         1,
437         1,
438         1,
439         1,
440         1,
441         1,
442         1,
443         1,
444         1,
445         1,
446         1,
447         1,
448         1,
449         1,
450         1,
451         1,
452         1,
453         1,
454         1,
455         1,
456         1,
457         1,
458         1,
459         1,
460         1,
461         1,
462         1,
463         1,
464         1,
465         1,
466         1,
467         1,
468         1,
469         1,
470         1,
471         1,
472         1,
473         1,
474         1,
475         1,
476         1,
477         1,
478         1,
479         1,
480         1,
481         1,
482         1,
483         1,
484         1,
485         1,
486         1,
487         1,
488         1,
489         1,
490         1,
491         1,
492         1,
493         1,
494         1,
495         1,
496         1,
497         1,
498         1,
499         1,
500         1,
501         1,
502         1,
503         1,
504         1,
505         1,
506         1,
507         1,
508         1,
509         1,
510         1,
511         1,
512         1,
513         1,
514         1,
515         1,
516         1,
517         1,
518         1,
519         1,
520         1,
521         1,
522         1,
523         1,
524         1,
525         1,
526         1,
527         1,
528         1,
529         1,
530         1,
531         1,
532         1,
533         1,
534         1,
535         1,
536         1,
537         1,
538         1,
539         1,
540         1,
541         1,
542         1,
543         1,
544         1,
545         1,
546         1,
547         1,
548         1,
549         1,
550         1,
551         1,
552         1,
553         1,
554         1,
555         1,
556         1,
557         1,
558         0xFF,
559         0xFF,
560         0xFF,
561         0xFF,
562         0xFF,
563         0xFF,
564         0xFF,
565         0xFF,
566         0xFF,
567         0xFF,
568         0xFF,
569         0xFF,
570         0xFF,
571         0xFF,
572         0xFF,
573         0xFF,
574         0xFF,
575         0xFF,
576         0xFF,
577         0xFF,
578         0xFF,
579         0xFF,
580         0xFF,
581         0xFF,
582         0xFF,
583         0xFF,
584         0xFF,
585         0xFF,
586         0xFF,
587         0xFF,
588         0xFF,
589         0xFF,
590         0xFF,
591         0xFF,
592         0xFF,
593         0xFF,
594         0xFF,
595         0xFF,
596         0xFF,
597         0xFF,
598         0xFF,
599         0xFF,
600         0xFF,
601         0xFF,
602         0xFF,
603         0xFF,
604         0xFF,
605         0xFF,
606         0xFF,
607         0xFF,
608         0xFF,
609         0xFF,
610         0xFF,
611         0xFF,
612         0xFF,
613         0xFF,
614         0xFF,
615         0xFF,
616         0xFF,
617         0xFF,
618         0xFF,
619         0xFF,
620         0xFF,
621         0xFF,
622         2,
623         2,
624         2,
625         2,
626         2,
627         2,
628         2,
629         2,
630         2,
631         2,
632         2,
633         2,
634         2,
635         2,
636         2,
637         2,
638         2,
639         2,
640         2,
641         2,
642         2,
643         2,
644         2,
645         2,
646         2,
647         2,
648         2,
649         2,
650         2,
651         2,
652         2,
653         2,
654         3,
655         3,
656         3,
657         3,
658         3,
659         3,
660         3,
661         3,
662         3,
663         3,
664         3,
665         3,
666         3,
667         3,
668         3,
669         3,
670         4,
671         4,
672         4,
673         4,
674         4,
675         4,
676         4,
677         4,
678         5,
679         5,
680         5,
681         5,
682         6,
683         6,
684         0xFF,
685         0xFF
686     ];
687 
688     assert(s !is null);
689     size_t i = ridx++;
690     assert(i < len);
691     char u = s[i];
692     // Pre-stage results for ASCII and error cases
693     rresult = u;
694     //printf("utf_decodeChar(s = %02x, %02x, %02x len = %d)\n", u, s[1], s[2], len);
695     // Get expected sequence length
696     size_t n = UTF8_STRIDE[u];
697     switch (n)
698     {
699     case 1:
700         // ASCII
701         return UTF8_DECODE_OK;
702     case 2:
703     case 3:
704     case 4:
705         // multi-byte UTF-8
706         break;
707     default:
708         // 5- or 6-byte sequence
709         return UTF8_DECODE_OUTSIDE_CODE_SPACE;
710     }
711     if (len < i + n) // source too short
712         return UTF8_DECODE_TRUNCATED_SEQUENCE;
713     // Pick off 7 - n low bits from first code unit
714     dchar c = u & ((1 << (7 - n)) - 1);
715     /* The following combinations are overlong, and illegal:
716      *      1100000x (10xxxxxx)
717      *      11100000 100xxxxx (10xxxxxx)
718      *      11110000 1000xxxx (10xxxxxx 10xxxxxx)
719      *      11111000 10000xxx (10xxxxxx 10xxxxxx 10xxxxxx)
720      *      11111100 100000xx (10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx)
721      */
722     char u2 = s[++i];
723     // overlong combination
724     if ((u & 0xFE) == 0xC0 || (u == 0xE0 && (u2 & 0xE0) == 0x80) || (u == 0xF0 && (u2 & 0xF0) == 0x80) || (u == 0xF8 && (u2 & 0xF8) == 0x80) || (u == 0xFC && (u2 & 0xFC) == 0x80))
725         return UTF8_DECODE_OVERLONG;
726     // Decode remaining bits
727     for (n += i - 1; i != n; ++i)
728     {
729         u = s[i];
730         if ((u & 0xC0) != 0x80) // trailing bytes are 10xxxxxx
731             return UTF8_DECODE_INVALID_TRAILER;
732         c = (c << 6) | (u & 0x3F);
733     }
734     if (!utf_isValidDchar(c))
735         return UTF8_DECODE_INVALID_CODE_POINT;
736     ridx = i;
737     rresult = c;
738     return UTF8_DECODE_OK;
739 }
740 
741 /********************************************
742  * Decode a UTF-16 sequence as a single UTF-32 code point.
743  * Params:
744  *      s = UTF-16 sequence
745  *      len = number of code units in s[]
746  *      ridx = starting index in s[], updated to reflect number of code units decoded
747  *      rresult = set to character decoded
748  * Returns:
749  *      null on success, otherwise error message string
750  */
751 immutable(char*) utf_decodeWchar(const(wchar)* s, size_t len, ref size_t ridx, out dchar rresult)
752 {
753     // UTF-16 decoding errors
754     static immutable char* UTF16_DECODE_OK = null; // no error
755     static immutable char* UTF16_DECODE_TRUNCATED_SEQUENCE = "Truncated UTF-16 sequence";
756     static immutable char* UTF16_DECODE_INVALID_SURROGATE = "Invalid low surrogate";
757     static immutable char* UTF16_DECODE_UNPAIRED_SURROGATE = "Unpaired surrogate";
758     static immutable char* UTF16_DECODE_INVALID_CODE_POINT = "Invalid code point decoded";
759 
760     assert(s !is null);
761     size_t i = ridx++;
762     assert(i < len);
763     // Pre-stage results for ASCII and error cases
764     dchar u = rresult = s[i];
765     if (u < 0x80) // ASCII
766         return UTF16_DECODE_OK;
767     if (0xD800 <= u && u <= 0xDBFF) // Surrogate pair
768     {
769         if (len <= i + 1)
770             return UTF16_DECODE_TRUNCATED_SEQUENCE;
771         wchar u2 = s[i + 1];
772         if (u2 < 0xDC00 || 0xDFFF < u)
773             return UTF16_DECODE_INVALID_SURROGATE;
774         u = ((u - 0xD7C0) << 10) + (u2 - 0xDC00);
775         ++ridx;
776     }
777     else if (0xDC00 <= u && u <= 0xDFFF)
778         return UTF16_DECODE_UNPAIRED_SURROGATE;
779     if (!utf_isValidDchar(u))
780         return UTF16_DECODE_INVALID_CODE_POINT;
781     rresult = u;
782     return UTF16_DECODE_OK;
783 }