1 /** 2 * Compiler implementation of the 3 * $(LINK2 http://www.dlang.org, D programming language). 4 * 5 * Copyright: Copyright (c) 1999-2016 by Digital Mars, All Rights Reserved 6 * Authors: $(LINK2 http://www.digitalmars.com, Walter Bright) 7 * License: $(LINK2 http://www.boost.org/LICENSE_1_0.txt, Boost License 1.0) 8 * Source: $(DMDSRC _utf.d) 9 */ 10 11 module ddmd.utf; 12 13 nothrow pure @nogc: 14 15 /// The Unicode code space is the range of code points [0x000000,0x10FFFF] 16 /// except the UTF-16 surrogate pairs in the range [0xD800,0xDFFF] 17 bool utf_isValidDchar(dchar c) 18 { 19 // TODO: Whether non-char code points should be rejected is pending review 20 // largest character code point 21 if (c > 0x10FFFF) 22 return false; 23 // 0xFFFE and 0xFFFF are valid for internal use, like Phobos std.utf.isValidDChar 24 // See also https://issues.dlang.org/show_bug.cgi?id=1357 25 // surrogate pairs 26 if (0xD800 <= c && c <= 0xDFFF) 27 return false; 28 return true; 29 } 30 31 /******************************* 32 * Return !=0 if unicode alpha. 33 * Use table from C99 Appendix D. 34 */ 35 bool isUniAlpha(dchar c) 36 { 37 static immutable wchar[2][] ALPHA_TABLE = 38 [ 39 [0x00AA, 0x00AA], 40 [0x00B5, 0x00B5], 41 [0x00B7, 0x00B7], 42 [0x00BA, 0x00BA], 43 [0x00C0, 0x00D6], 44 [0x00D8, 0x00F6], 45 [0x00F8, 0x01F5], 46 [0x01FA, 0x0217], 47 [0x0250, 0x02A8], 48 [0x02B0, 0x02B8], 49 [0x02BB, 0x02BB], 50 [0x02BD, 0x02C1], 51 [0x02D0, 0x02D1], 52 [0x02E0, 0x02E4], 53 [0x037A, 0x037A], 54 [0x0386, 0x0386], 55 [0x0388, 0x038A], 56 [0x038C, 0x038C], 57 [0x038E, 0x03A1], 58 [0x03A3, 0x03CE], 59 [0x03D0, 0x03D6], 60 [0x03DA, 0x03DA], 61 [0x03DC, 0x03DC], 62 [0x03DE, 0x03DE], 63 [0x03E0, 0x03E0], 64 [0x03E2, 0x03F3], 65 [0x0401, 0x040C], 66 [0x040E, 0x044F], 67 [0x0451, 0x045C], 68 [0x045E, 0x0481], 69 [0x0490, 0x04C4], 70 [0x04C7, 0x04C8], 71 [0x04CB, 0x04CC], 72 [0x04D0, 0x04EB], 73 [0x04EE, 0x04F5], 74 [0x04F8, 0x04F9], 75 [0x0531, 0x0556], 76 [0x0559, 0x0559], 77 [0x0561, 0x0587], 78 [0x05B0, 0x05B9], 79 [0x05BB, 0x05BD], 80 [0x05BF, 0x05BF], 81 [0x05C1, 0x05C2], 82 [0x05D0, 0x05EA], 83 [0x05F0, 0x05F2], 84 [0x0621, 0x063A], 85 [0x0640, 0x0652], 86 [0x0660, 0x0669], 87 [0x0670, 0x06B7], 88 [0x06BA, 0x06BE], 89 [0x06C0, 0x06CE], 90 [0x06D0, 0x06DC], 91 [0x06E5, 0x06E8], 92 [0x06EA, 0x06ED], 93 [0x06F0, 0x06F9], 94 [0x0901, 0x0903], 95 [0x0905, 0x0939], 96 [0x093D, 0x094D], 97 [0x0950, 0x0952], 98 [0x0958, 0x0963], 99 [0x0966, 0x096F], 100 [0x0981, 0x0983], 101 [0x0985, 0x098C], 102 [0x098F, 0x0990], 103 [0x0993, 0x09A8], 104 [0x09AA, 0x09B0], 105 [0x09B2, 0x09B2], 106 [0x09B6, 0x09B9], 107 [0x09BE, 0x09C4], 108 [0x09C7, 0x09C8], 109 [0x09CB, 0x09CD], 110 [0x09DC, 0x09DD], 111 [0x09DF, 0x09E3], 112 [0x09E6, 0x09F1], 113 [0x0A02, 0x0A02], 114 [0x0A05, 0x0A0A], 115 [0x0A0F, 0x0A10], 116 [0x0A13, 0x0A28], 117 [0x0A2A, 0x0A30], 118 [0x0A32, 0x0A33], 119 [0x0A35, 0x0A36], 120 [0x0A38, 0x0A39], 121 [0x0A3E, 0x0A42], 122 [0x0A47, 0x0A48], 123 [0x0A4B, 0x0A4D], 124 [0x0A59, 0x0A5C], 125 [0x0A5E, 0x0A5E], 126 [0x0A66, 0x0A6F], 127 [0x0A74, 0x0A74], 128 [0x0A81, 0x0A83], 129 [0x0A85, 0x0A8B], 130 [0x0A8D, 0x0A8D], 131 [0x0A8F, 0x0A91], 132 [0x0A93, 0x0AA8], 133 [0x0AAA, 0x0AB0], 134 [0x0AB2, 0x0AB3], 135 [0x0AB5, 0x0AB9], 136 [0x0ABD, 0x0AC5], 137 [0x0AC7, 0x0AC9], 138 [0x0ACB, 0x0ACD], 139 [0x0AD0, 0x0AD0], 140 [0x0AE0, 0x0AE0], 141 [0x0AE6, 0x0AEF], 142 [0x0B01, 0x0B03], 143 [0x0B05, 0x0B0C], 144 [0x0B0F, 0x0B10], 145 [0x0B13, 0x0B28], 146 [0x0B2A, 0x0B30], 147 [0x0B32, 0x0B33], 148 [0x0B36, 0x0B39], 149 [0x0B3D, 0x0B43], 150 [0x0B47, 0x0B48], 151 [0x0B4B, 0x0B4D], 152 [0x0B5C, 0x0B5D], 153 [0x0B5F, 0x0B61], 154 [0x0B66, 0x0B6F], 155 [0x0B82, 0x0B83], 156 [0x0B85, 0x0B8A], 157 [0x0B8E, 0x0B90], 158 [0x0B92, 0x0B95], 159 [0x0B99, 0x0B9A], 160 [0x0B9C, 0x0B9C], 161 [0x0B9E, 0x0B9F], 162 [0x0BA3, 0x0BA4], 163 [0x0BA8, 0x0BAA], 164 [0x0BAE, 0x0BB5], 165 [0x0BB7, 0x0BB9], 166 [0x0BBE, 0x0BC2], 167 [0x0BC6, 0x0BC8], 168 [0x0BCA, 0x0BCD], 169 [0x0BE7, 0x0BEF], 170 [0x0C01, 0x0C03], 171 [0x0C05, 0x0C0C], 172 [0x0C0E, 0x0C10], 173 [0x0C12, 0x0C28], 174 [0x0C2A, 0x0C33], 175 [0x0C35, 0x0C39], 176 [0x0C3E, 0x0C44], 177 [0x0C46, 0x0C48], 178 [0x0C4A, 0x0C4D], 179 [0x0C60, 0x0C61], 180 [0x0C66, 0x0C6F], 181 [0x0C82, 0x0C83], 182 [0x0C85, 0x0C8C], 183 [0x0C8E, 0x0C90], 184 [0x0C92, 0x0CA8], 185 [0x0CAA, 0x0CB3], 186 [0x0CB5, 0x0CB9], 187 [0x0CBE, 0x0CC4], 188 [0x0CC6, 0x0CC8], 189 [0x0CCA, 0x0CCD], 190 [0x0CDE, 0x0CDE], 191 [0x0CE0, 0x0CE1], 192 [0x0CE6, 0x0CEF], 193 [0x0D02, 0x0D03], 194 [0x0D05, 0x0D0C], 195 [0x0D0E, 0x0D10], 196 [0x0D12, 0x0D28], 197 [0x0D2A, 0x0D39], 198 [0x0D3E, 0x0D43], 199 [0x0D46, 0x0D48], 200 [0x0D4A, 0x0D4D], 201 [0x0D60, 0x0D61], 202 [0x0D66, 0x0D6F], 203 [0x0E01, 0x0E3A], 204 [0x0E40, 0x0E5B], 205 [0x0E81, 0x0E82], 206 [0x0E84, 0x0E84], 207 [0x0E87, 0x0E88], 208 [0x0E8A, 0x0E8A], 209 [0x0E8D, 0x0E8D], 210 [0x0E94, 0x0E97], 211 [0x0E99, 0x0E9F], 212 [0x0EA1, 0x0EA3], 213 [0x0EA5, 0x0EA5], 214 [0x0EA7, 0x0EA7], 215 [0x0EAA, 0x0EAB], 216 [0x0EAD, 0x0EAE], 217 [0x0EB0, 0x0EB9], 218 [0x0EBB, 0x0EBD], 219 [0x0EC0, 0x0EC4], 220 [0x0EC6, 0x0EC6], 221 [0x0EC8, 0x0ECD], 222 [0x0ED0, 0x0ED9], 223 [0x0EDC, 0x0EDD], 224 [0x0F00, 0x0F00], 225 [0x0F18, 0x0F19], 226 [0x0F20, 0x0F33], 227 [0x0F35, 0x0F35], 228 [0x0F37, 0x0F37], 229 [0x0F39, 0x0F39], 230 [0x0F3E, 0x0F47], 231 [0x0F49, 0x0F69], 232 [0x0F71, 0x0F84], 233 [0x0F86, 0x0F8B], 234 [0x0F90, 0x0F95], 235 [0x0F97, 0x0F97], 236 [0x0F99, 0x0FAD], 237 [0x0FB1, 0x0FB7], 238 [0x0FB9, 0x0FB9], 239 [0x10A0, 0x10C5], 240 [0x10D0, 0x10F6], 241 [0x1E00, 0x1E9B], 242 [0x1EA0, 0x1EF9], 243 [0x1F00, 0x1F15], 244 [0x1F18, 0x1F1D], 245 [0x1F20, 0x1F45], 246 [0x1F48, 0x1F4D], 247 [0x1F50, 0x1F57], 248 [0x1F59, 0x1F59], 249 [0x1F5B, 0x1F5B], 250 [0x1F5D, 0x1F5D], 251 [0x1F5F, 0x1F7D], 252 [0x1F80, 0x1FB4], 253 [0x1FB6, 0x1FBC], 254 [0x1FBE, 0x1FBE], 255 [0x1FC2, 0x1FC4], 256 [0x1FC6, 0x1FCC], 257 [0x1FD0, 0x1FD3], 258 [0x1FD6, 0x1FDB], 259 [0x1FE0, 0x1FEC], 260 [0x1FF2, 0x1FF4], 261 [0x1FF6, 0x1FFC], 262 [0x203F, 0x2040], 263 [0x207F, 0x207F], 264 [0x2102, 0x2102], 265 [0x2107, 0x2107], 266 [0x210A, 0x2113], 267 [0x2115, 0x2115], 268 [0x2118, 0x211D], 269 [0x2124, 0x2124], 270 [0x2126, 0x2126], 271 [0x2128, 0x2128], 272 [0x212A, 0x2131], 273 [0x2133, 0x2138], 274 [0x2160, 0x2182], 275 [0x3005, 0x3007], 276 [0x3021, 0x3029], 277 [0x3041, 0x3093], 278 [0x309B, 0x309C], 279 [0x30A1, 0x30F6], 280 [0x30FB, 0x30FC], 281 [0x3105, 0x312C], 282 [0x4E00, 0x9FA5], 283 [0xAC00, 0xD7A3] 284 ]; 285 286 size_t high = ALPHA_TABLE.length - 1; 287 // Shortcut search if c is out of range 288 size_t low = (c < ALPHA_TABLE[0][0] || ALPHA_TABLE[high][1] < c) ? high + 1 : 0; 289 // Binary search 290 while (low <= high) 291 { 292 size_t mid = (low + high) >> 1; 293 if (c < ALPHA_TABLE[mid][0]) 294 high = mid - 1; 295 else if (ALPHA_TABLE[mid][1] < c) 296 low = mid + 1; 297 else 298 { 299 assert(ALPHA_TABLE[mid][0] <= c && c <= ALPHA_TABLE[mid][1]); 300 return true; 301 } 302 } 303 return false; 304 } 305 306 /** 307 * Returns the code length of c in code units. 308 */ 309 int utf_codeLengthChar(dchar c) 310 { 311 if (c <= 0x7F) 312 return 1; 313 if (c <= 0x7FF) 314 return 2; 315 if (c <= 0xFFFF) 316 return 3; 317 if (c <= 0x10FFFF) 318 return 4; 319 assert(false); 320 } 321 322 int utf_codeLengthWchar(dchar c) 323 { 324 return c <= 0xFFFF ? 1 : 2; 325 } 326 327 /** 328 * Returns the code length of c in code units for the encoding. 329 * sz is the encoding: 1 = utf8, 2 = utf16, 4 = utf32. 330 */ 331 int utf_codeLength(int sz, dchar c) 332 { 333 if (sz == 1) 334 return utf_codeLengthChar(c); 335 if (sz == 2) 336 return utf_codeLengthWchar(c); 337 assert(sz == 4); 338 return 1; 339 } 340 341 void utf_encodeChar(char* s, dchar c) 342 { 343 assert(s !is null); 344 assert(utf_isValidDchar(c)); 345 if (c <= 0x7F) 346 { 347 s[0] = cast(char)c; 348 } 349 else if (c <= 0x07FF) 350 { 351 s[0] = cast(char)(0xC0 | (c >> 6)); 352 s[1] = cast(char)(0x80 | (c & 0x3F)); 353 } 354 else if (c <= 0xFFFF) 355 { 356 s[0] = cast(char)(0xE0 | (c >> 12)); 357 s[1] = cast(char)(0x80 | ((c >> 6) & 0x3F)); 358 s[2] = cast(char)(0x80 | (c & 0x3F)); 359 } 360 else if (c <= 0x10FFFF) 361 { 362 s[0] = cast(char)(0xF0 | (c >> 18)); 363 s[1] = cast(char)(0x80 | ((c >> 12) & 0x3F)); 364 s[2] = cast(char)(0x80 | ((c >> 6) & 0x3F)); 365 s[3] = cast(char)(0x80 | (c & 0x3F)); 366 } 367 else 368 assert(0); 369 } 370 371 void utf_encodeWchar(wchar* s, dchar c) 372 { 373 assert(s !is null); 374 assert(utf_isValidDchar(c)); 375 if (c <= 0xFFFF) 376 { 377 s[0] = cast(wchar)c; 378 } 379 else 380 { 381 s[0] = cast(wchar)((((c - 0x010000) >> 10) & 0x03FF) + 0xD800); 382 s[1] = cast(wchar)(((c - 0x010000) & 0x03FF) + 0xDC00); 383 } 384 } 385 386 void utf_encode(int sz, void* s, dchar c) 387 { 388 if (sz == 1) 389 utf_encodeChar(cast(char*)s, c); 390 else if (sz == 2) 391 utf_encodeWchar(cast(wchar*)s, c); 392 else 393 { 394 assert(sz == 4); 395 *(cast(dchar*)s) = c; 396 } 397 } 398 399 /******************************************** 400 * Decode a UTF-8 sequence as a single UTF-32 code point. 401 * Params: 402 * s = UTF-8 sequence 403 * len = number of code units in s[] 404 * ridx = starting index in s[], updated to reflect number of code units decoded 405 * rresult = set to character decoded 406 * Returns: 407 * null on success, otherwise error message string 408 */ 409 immutable(char*) utf_decodeChar(const(char)* s, size_t len, ref size_t ridx, out dchar rresult) 410 { 411 // UTF-8 decoding errors 412 static immutable char* UTF8_DECODE_OK = null; // no error 413 static immutable char* UTF8_DECODE_OUTSIDE_CODE_SPACE = "Outside Unicode code space"; 414 static immutable char* UTF8_DECODE_TRUNCATED_SEQUENCE = "Truncated UTF-8 sequence"; 415 static immutable char* UTF8_DECODE_OVERLONG = "Overlong UTF-8 sequence"; 416 static immutable char* UTF8_DECODE_INVALID_TRAILER = "Invalid trailing code unit"; 417 static immutable char* UTF8_DECODE_INVALID_CODE_POINT = "Invalid code point decoded"; 418 419 /* The following encodings are valid, except for the 5 and 6 byte 420 * combinations: 421 * 0xxxxxxx 422 * 110xxxxx 10xxxxxx 423 * 1110xxxx 10xxxxxx 10xxxxxx 424 * 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx 425 * 111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 426 * 1111110x 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 427 */ 428 static immutable uint[] UTF8_STRIDE = 429 [ 430 1, 431 1, 432 1, 433 1, 434 1, 435 1, 436 1, 437 1, 438 1, 439 1, 440 1, 441 1, 442 1, 443 1, 444 1, 445 1, 446 1, 447 1, 448 1, 449 1, 450 1, 451 1, 452 1, 453 1, 454 1, 455 1, 456 1, 457 1, 458 1, 459 1, 460 1, 461 1, 462 1, 463 1, 464 1, 465 1, 466 1, 467 1, 468 1, 469 1, 470 1, 471 1, 472 1, 473 1, 474 1, 475 1, 476 1, 477 1, 478 1, 479 1, 480 1, 481 1, 482 1, 483 1, 484 1, 485 1, 486 1, 487 1, 488 1, 489 1, 490 1, 491 1, 492 1, 493 1, 494 1, 495 1, 496 1, 497 1, 498 1, 499 1, 500 1, 501 1, 502 1, 503 1, 504 1, 505 1, 506 1, 507 1, 508 1, 509 1, 510 1, 511 1, 512 1, 513 1, 514 1, 515 1, 516 1, 517 1, 518 1, 519 1, 520 1, 521 1, 522 1, 523 1, 524 1, 525 1, 526 1, 527 1, 528 1, 529 1, 530 1, 531 1, 532 1, 533 1, 534 1, 535 1, 536 1, 537 1, 538 1, 539 1, 540 1, 541 1, 542 1, 543 1, 544 1, 545 1, 546 1, 547 1, 548 1, 549 1, 550 1, 551 1, 552 1, 553 1, 554 1, 555 1, 556 1, 557 1, 558 0xFF, 559 0xFF, 560 0xFF, 561 0xFF, 562 0xFF, 563 0xFF, 564 0xFF, 565 0xFF, 566 0xFF, 567 0xFF, 568 0xFF, 569 0xFF, 570 0xFF, 571 0xFF, 572 0xFF, 573 0xFF, 574 0xFF, 575 0xFF, 576 0xFF, 577 0xFF, 578 0xFF, 579 0xFF, 580 0xFF, 581 0xFF, 582 0xFF, 583 0xFF, 584 0xFF, 585 0xFF, 586 0xFF, 587 0xFF, 588 0xFF, 589 0xFF, 590 0xFF, 591 0xFF, 592 0xFF, 593 0xFF, 594 0xFF, 595 0xFF, 596 0xFF, 597 0xFF, 598 0xFF, 599 0xFF, 600 0xFF, 601 0xFF, 602 0xFF, 603 0xFF, 604 0xFF, 605 0xFF, 606 0xFF, 607 0xFF, 608 0xFF, 609 0xFF, 610 0xFF, 611 0xFF, 612 0xFF, 613 0xFF, 614 0xFF, 615 0xFF, 616 0xFF, 617 0xFF, 618 0xFF, 619 0xFF, 620 0xFF, 621 0xFF, 622 2, 623 2, 624 2, 625 2, 626 2, 627 2, 628 2, 629 2, 630 2, 631 2, 632 2, 633 2, 634 2, 635 2, 636 2, 637 2, 638 2, 639 2, 640 2, 641 2, 642 2, 643 2, 644 2, 645 2, 646 2, 647 2, 648 2, 649 2, 650 2, 651 2, 652 2, 653 2, 654 3, 655 3, 656 3, 657 3, 658 3, 659 3, 660 3, 661 3, 662 3, 663 3, 664 3, 665 3, 666 3, 667 3, 668 3, 669 3, 670 4, 671 4, 672 4, 673 4, 674 4, 675 4, 676 4, 677 4, 678 5, 679 5, 680 5, 681 5, 682 6, 683 6, 684 0xFF, 685 0xFF 686 ]; 687 688 assert(s !is null); 689 size_t i = ridx++; 690 assert(i < len); 691 char u = s[i]; 692 // Pre-stage results for ASCII and error cases 693 rresult = u; 694 //printf("utf_decodeChar(s = %02x, %02x, %02x len = %d)\n", u, s[1], s[2], len); 695 // Get expected sequence length 696 size_t n = UTF8_STRIDE[u]; 697 switch (n) 698 { 699 case 1: 700 // ASCII 701 return UTF8_DECODE_OK; 702 case 2: 703 case 3: 704 case 4: 705 // multi-byte UTF-8 706 break; 707 default: 708 // 5- or 6-byte sequence 709 return UTF8_DECODE_OUTSIDE_CODE_SPACE; 710 } 711 if (len < i + n) // source too short 712 return UTF8_DECODE_TRUNCATED_SEQUENCE; 713 // Pick off 7 - n low bits from first code unit 714 dchar c = u & ((1 << (7 - n)) - 1); 715 /* The following combinations are overlong, and illegal: 716 * 1100000x (10xxxxxx) 717 * 11100000 100xxxxx (10xxxxxx) 718 * 11110000 1000xxxx (10xxxxxx 10xxxxxx) 719 * 11111000 10000xxx (10xxxxxx 10xxxxxx 10xxxxxx) 720 * 11111100 100000xx (10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx) 721 */ 722 char u2 = s[++i]; 723 // overlong combination 724 if ((u & 0xFE) == 0xC0 || (u == 0xE0 && (u2 & 0xE0) == 0x80) || (u == 0xF0 && (u2 & 0xF0) == 0x80) || (u == 0xF8 && (u2 & 0xF8) == 0x80) || (u == 0xFC && (u2 & 0xFC) == 0x80)) 725 return UTF8_DECODE_OVERLONG; 726 // Decode remaining bits 727 for (n += i - 1; i != n; ++i) 728 { 729 u = s[i]; 730 if ((u & 0xC0) != 0x80) // trailing bytes are 10xxxxxx 731 return UTF8_DECODE_INVALID_TRAILER; 732 c = (c << 6) | (u & 0x3F); 733 } 734 if (!utf_isValidDchar(c)) 735 return UTF8_DECODE_INVALID_CODE_POINT; 736 ridx = i; 737 rresult = c; 738 return UTF8_DECODE_OK; 739 } 740 741 /******************************************** 742 * Decode a UTF-16 sequence as a single UTF-32 code point. 743 * Params: 744 * s = UTF-16 sequence 745 * len = number of code units in s[] 746 * ridx = starting index in s[], updated to reflect number of code units decoded 747 * rresult = set to character decoded 748 * Returns: 749 * null on success, otherwise error message string 750 */ 751 immutable(char*) utf_decodeWchar(const(wchar)* s, size_t len, ref size_t ridx, out dchar rresult) 752 { 753 // UTF-16 decoding errors 754 static immutable char* UTF16_DECODE_OK = null; // no error 755 static immutable char* UTF16_DECODE_TRUNCATED_SEQUENCE = "Truncated UTF-16 sequence"; 756 static immutable char* UTF16_DECODE_INVALID_SURROGATE = "Invalid low surrogate"; 757 static immutable char* UTF16_DECODE_UNPAIRED_SURROGATE = "Unpaired surrogate"; 758 static immutable char* UTF16_DECODE_INVALID_CODE_POINT = "Invalid code point decoded"; 759 760 assert(s !is null); 761 size_t i = ridx++; 762 assert(i < len); 763 // Pre-stage results for ASCII and error cases 764 dchar u = rresult = s[i]; 765 if (u < 0x80) // ASCII 766 return UTF16_DECODE_OK; 767 if (0xD800 <= u && u <= 0xDBFF) // Surrogate pair 768 { 769 if (len <= i + 1) 770 return UTF16_DECODE_TRUNCATED_SEQUENCE; 771 wchar u2 = s[i + 1]; 772 if (u2 < 0xDC00 || 0xDFFF < u) 773 return UTF16_DECODE_INVALID_SURROGATE; 774 u = ((u - 0xD7C0) << 10) + (u2 - 0xDC00); 775 ++ridx; 776 } 777 else if (0xDC00 <= u && u <= 0xDFFF) 778 return UTF16_DECODE_UNPAIRED_SURROGATE; 779 if (!utf_isValidDchar(u)) 780 return UTF16_DECODE_INVALID_CODE_POINT; 781 rresult = u; 782 return UTF16_DECODE_OK; 783 }