]> arthur.barton.de Git - netatalk.git/commitdiff
composition of surrogate pair
authorHAT <hat@fa2.so-net.ne.jp>
Wed, 22 Dec 2010 11:20:36 +0000 (20:20 +0900)
committerHAT <hat@fa2.so-net.ne.jp>
Wed, 22 Dec 2010 11:20:36 +0000 (20:20 +0900)
contrib/misc/make-precompose.h.pl [changed mode: 0644->0755]
libatalk/unicode/precompose.h
libatalk/unicode/util_unistr.c

old mode 100644 (file)
new mode 100755 (executable)
index 9e85815..558a537
 # table for binary search --------------------------------------------------
 
 open(UNICODEDATA, "<$ARGV[0]");
-open(PRECOMPOSETEMP, ">precompose.TEMP");
-open( DECOMPOSETEMP, ">decompose.TEMP");
+
+open(PRECOMPOSE_TEMP, ">precompose.TEMP");
+open( DECOMPOSE_TEMP, ">decompose.TEMP");
+
+open(PRECOMPOSE_SP_TEMP, ">precompose_sp.TEMP");
+open( DECOMPOSE_SP_TEMP, ">decompose_sp.TEMP");
 
 while (<UNICODEDATA>){
     chop;
@@ -41,11 +45,6 @@ while (<UNICODEDATA>){
        $leftbracket  = "  { ";
        $rightbracket =" },     ";
 
-       if (hex($code0) > 0xFFFF) {           # DELETE THIS LINE  IF INTERNAL CODE IS UCS4
-           $leftbracket  = "\/\*{ ";         # DELETE THIS LINE  IF INTERNAL CODE IS UCS4
-           $rightbracket =" },\*\/   ";      # DELETE THIS LINE  IF INTERNAL CODE IS UCS4
-       }                                     # DELETE THIS LINE  IF INTERNAL CODE IS UCS4
-       
        # AFP 3.x Spec
        if ( ((0x2000  <= hex($code0)) && (hex($code0) <=  0x2FFF))
          || ((0xFE30  <= hex($code0)) && (hex($code0) <=  0xFE4F))
@@ -54,9 +53,28 @@ while (<UNICODEDATA>){
            $rightbracket =" },\*\/   ";
        }
        
-       printf(PRECOMPOSETEMP "%s0x%08X, 0x%08X, 0x%08X%s\/\* %s \*\/\n", $leftbracket, hex($code0), hex($base), hex($comb), $rightbracket, $Name1);
+       if (hex($code0) > 0xFFFF) {                            # DELETE THIS LINE  IF INTERNAL CODE IS UCS4
+           
+           $code0_sp_hi = 0xD800 - (0x10000 >> 10) + (hex($code0) >> 10);
+           $code0_sp_lo = 0xDC00 + (hex($code0) & 0x3FF);
+
+            $base_sp_hi = 0xD800 - (0x10000 >> 10) + (hex($base) >> 10);
+            $base_sp_lo = 0xDC00 + (hex($base) & 0x3FF);
 
-       printf( DECOMPOSETEMP "%s0x%08X, 0x%08X, 0x%08X%s\/\* %s \*\/\n", $leftbracket, hex($code0), hex($base), hex($comb), $rightbracket, $Name1);
+            $comb_sp_hi = 0xD800 - (0x10000 >> 10) + (hex($comb) >> 10);
+            $comb_sp_lo = 0xDC00 + (hex($comb) & 0x3FF);
+
+           printf(PRECOMPOSE_SP_TEMP "%s0x%04X%04X, 0x%04X%04X, 0x%04X%04X%s\/\* %s \*\/\n",
+                  $leftbracket, $code0_sp_hi ,$code0_sp_lo, $base_sp_hi, $base_sp_lo, $comb_sp_hi, $comb_sp_lo, $rightbracket, $Name1);
+           printf(DECOMPOSE_SP_TEMP "%s0x%04X%04X, 0x%04X%04X, 0x%04X%04X%s\/\* %s \*\/\n",
+                   $leftbracket, $code0_sp_hi ,$code0_sp_lo, $base_sp_hi, $base_sp_lo, $comb_sp_hi, $comb_sp_lo, $rightbracket, $Name1);
+
+           $leftbracket  = "\/\*{ ";                          # DELETE THIS LINE  IF INTERNAL CODE IS UCS4
+           $rightbracket =" },\*\/   ";                       # DELETE THIS LINE  IF INTERNAL CODE IS UCS4
+       }                                                      # DELETE THIS LINE  IF INTERNAL CODE IS UCS4
+       
+       printf(PRECOMPOSE_TEMP "%s0x%08X, 0x%08X, 0x%08X%s\/\* %s \*\/\n", $leftbracket, hex($code0), hex($base), hex($comb), $rightbracket, $Name1);
+       printf( DECOMPOSE_TEMP "%s0x%08X, 0x%08X, 0x%08X%s\/\* %s \*\/\n", $leftbracket, hex($code0), hex($base), hex($comb), $rightbracket, $Name1);
        
     }
 }
@@ -66,6 +84,9 @@ while (<UNICODEDATA>){
 system("sort -k 3 precompose.TEMP \> precompose.SORT");
 system("sort -k 2  decompose.TEMP \>  decompose.SORT");
 
+system("sort -k 3 precompose_sp.TEMP \> precompose_sp.SORT");
+system("sort -k 2  decompose_sp.TEMP \>  decompose_sp.SORT");
+
 # print  -------------------------------------------------------------------
 
 printf ("\/\* This file is generated by contrib/misc/make-precompose.h.pl %s \*\/\n", $ARGV[0]);
@@ -97,6 +118,30 @@ system("cat decompose.SORT");
 print ("\}\;\n");
 print ("\n");
 
+
+
+print ("static const struct \{\n");
+print ("  unsigned int replacement\;\n");
+print ("  unsigned int base\;\n");
+print ("  unsigned int comb\;\n");
+print ("\} precompositions_sp\[\] \= \{\n");
+
+system("cat precompose_sp.SORT");
+
+print ("\}\;\n");
+print ("\n");
+
+print ("static const struct \{\n");
+print ("  unsigned int replacement\;\n");
+print ("  unsigned int base\;\n");
+print ("  unsigned int comb\;\n");
+print ("\} decompositions_sp\[\] \= \{\n");
+
+system("cat decompose_sp.SORT");
+
+print ("\}\;\n");
+print ("\n");
+
 print ("\/\* EOF \*\/\n");
 
 # EOF
index 9eba06920d19e7b651f9e794c19a77049152b70e..2d3a505c22f95eb06d80b726ee297f6b754fccd3 100644 (file)
@@ -1009,6 +1009,9 @@ static const struct {
   { 0x000030FE, 0x000030FD, 0x00003099 },     /* KATAKANA VOICED ITERATION MARK */
   { 0x0000FB2C, 0x0000FB49, 0x000005C1 },     /* HEBREW LETTER SHIN WITH DAGESH AND SHIN DOT */
   { 0x0000FB2D, 0x0000FB49, 0x000005C2 },     /* HEBREW LETTER SHIN WITH DAGESH AND SIN DOT */
+/*{ 0x0001109A, 0x00011099, 0x000110BA },*/   /* KAITHI LETTER DDDHA */
+/*{ 0x0001109C, 0x0001109B, 0x000110BA },*/   /* KAITHI LETTER RHA */
+/*{ 0x000110AB, 0x000110A5, 0x000110BA },*/   /* KAITHI LETTER VA */
 /*{ 0x0001D15E, 0x0001D157, 0x0001D165 },*/   /* MUSICAL SYMBOL HALF NOTE */
 /*{ 0x0001D15F, 0x0001D158, 0x0001D165 },*/   /* MUSICAL SYMBOL QUARTER NOTE */
 /*{ 0x0001D160, 0x0001D15F, 0x0001D16E },*/   /* MUSICAL SYMBOL EIGHTH NOTE */
@@ -2029,6 +2032,9 @@ static const struct {
   { 0x0000FB4C, 0x000005D1, 0x000005BF },     /* HEBREW LETTER BET WITH RAFE */
   { 0x0000FB4D, 0x000005DB, 0x000005BF },     /* HEBREW LETTER KAF WITH RAFE */
   { 0x0000FB4E, 0x000005E4, 0x000005BF },     /* HEBREW LETTER PE WITH RAFE */
+/*{ 0x0001109A, 0x00011099, 0x000110BA },*/   /* KAITHI LETTER DDDHA */
+/*{ 0x0001109C, 0x0001109B, 0x000110BA },*/   /* KAITHI LETTER RHA */
+/*{ 0x000110AB, 0x000110A5, 0x000110BA },*/   /* KAITHI LETTER VA */
 /*{ 0x0001D15E, 0x0001D157, 0x0001D165 },*/   /* MUSICAL SYMBOL HALF NOTE */
 /*{ 0x0001D15F, 0x0001D158, 0x0001D165 },*/   /* MUSICAL SYMBOL QUARTER NOTE */
 /*{ 0x0001D160, 0x0001D15F, 0x0001D16E },*/   /* MUSICAL SYMBOL EIGHTH NOTE */
@@ -2044,4 +2050,50 @@ static const struct {
 /*{ 0x0001D1C0, 0x0001D1BC, 0x0001D16F },*/   /* MUSICAL SYMBOL FUSA BLACK */
 };
 
+static const struct {
+  unsigned int replacement;
+  unsigned int base;
+  unsigned int comb;
+} precompositions_sp[] = {
+  { 0xD804DC9A, 0xD804DC99, 0xD804DCBA },     /* KAITHI LETTER DDDHA */
+  { 0xD804DC9C, 0xD804DC9B, 0xD804DCBA },     /* KAITHI LETTER RHA */
+  { 0xD804DCAB, 0xD804DCA5, 0xD804DCBA },     /* KAITHI LETTER VA */
+  { 0xD834DD5E, 0xD834DD57, 0xD834DD65 },     /* MUSICAL SYMBOL HALF NOTE */
+  { 0xD834DD5F, 0xD834DD58, 0xD834DD65 },     /* MUSICAL SYMBOL QUARTER NOTE */
+  { 0xD834DD60, 0xD834DD5F, 0xD834DD6E },     /* MUSICAL SYMBOL EIGHTH NOTE */
+  { 0xD834DD61, 0xD834DD5F, 0xD834DD6F },     /* MUSICAL SYMBOL SIXTEENTH NOTE */
+  { 0xD834DD62, 0xD834DD5F, 0xD834DD70 },     /* MUSICAL SYMBOL THIRTY-SECOND NOTE */
+  { 0xD834DD63, 0xD834DD5F, 0xD834DD71 },     /* MUSICAL SYMBOL SIXTY-FOURTH NOTE */
+  { 0xD834DD64, 0xD834DD5F, 0xD834DD72 },     /* MUSICAL SYMBOL ONE HUNDRED TWENTY-EIGHTH NOTE */
+  { 0xD834DDBB, 0xD834DDB9, 0xD834DD65 },     /* MUSICAL SYMBOL MINIMA */
+  { 0xD834DDBC, 0xD834DDBA, 0xD834DD65 },     /* MUSICAL SYMBOL MINIMA BLACK */
+  { 0xD834DDBD, 0xD834DDBB, 0xD834DD6E },     /* MUSICAL SYMBOL SEMIMINIMA WHITE */
+  { 0xD834DDBF, 0xD834DDBB, 0xD834DD6F },     /* MUSICAL SYMBOL FUSA WHITE */
+  { 0xD834DDBE, 0xD834DDBC, 0xD834DD6E },     /* MUSICAL SYMBOL SEMIMINIMA BLACK */
+  { 0xD834DDC0, 0xD834DDBC, 0xD834DD6F },     /* MUSICAL SYMBOL FUSA BLACK */
+};
+
+static const struct {
+  unsigned int replacement;
+  unsigned int base;
+  unsigned int comb;
+} decompositions_sp[] = {
+  { 0xD804DC9A, 0xD804DC99, 0xD804DCBA },     /* KAITHI LETTER DDDHA */
+  { 0xD804DC9C, 0xD804DC9B, 0xD804DCBA },     /* KAITHI LETTER RHA */
+  { 0xD804DCAB, 0xD804DCA5, 0xD804DCBA },     /* KAITHI LETTER VA */
+  { 0xD834DD5E, 0xD834DD57, 0xD834DD65 },     /* MUSICAL SYMBOL HALF NOTE */
+  { 0xD834DD5F, 0xD834DD58, 0xD834DD65 },     /* MUSICAL SYMBOL QUARTER NOTE */
+  { 0xD834DD60, 0xD834DD5F, 0xD834DD6E },     /* MUSICAL SYMBOL EIGHTH NOTE */
+  { 0xD834DD61, 0xD834DD5F, 0xD834DD6F },     /* MUSICAL SYMBOL SIXTEENTH NOTE */
+  { 0xD834DD62, 0xD834DD5F, 0xD834DD70 },     /* MUSICAL SYMBOL THIRTY-SECOND NOTE */
+  { 0xD834DD63, 0xD834DD5F, 0xD834DD71 },     /* MUSICAL SYMBOL SIXTY-FOURTH NOTE */
+  { 0xD834DD64, 0xD834DD5F, 0xD834DD72 },     /* MUSICAL SYMBOL ONE HUNDRED TWENTY-EIGHTH NOTE */
+  { 0xD834DDBB, 0xD834DDB9, 0xD834DD65 },     /* MUSICAL SYMBOL MINIMA */
+  { 0xD834DDBC, 0xD834DDBA, 0xD834DD65 },     /* MUSICAL SYMBOL MINIMA BLACK */
+  { 0xD834DDBD, 0xD834DDBB, 0xD834DD6E },     /* MUSICAL SYMBOL SEMIMINIMA WHITE */
+  { 0xD834DDBE, 0xD834DDBC, 0xD834DD6E },     /* MUSICAL SYMBOL SEMIMINIMA BLACK */
+  { 0xD834DDBF, 0xD834DDBB, 0xD834DD6F },     /* MUSICAL SYMBOL FUSA WHITE */
+  { 0xD834DDC0, 0xD834DDBC, 0xD834DD6F },     /* MUSICAL SYMBOL FUSA BLACK */
+};
+
 /* EOF */
index d08f86262e659c2ca643b596c2d672fd04f02de7..93b4a287e44a45697aa2c2f39d77a93c85429097 100644 (file)
 #define HANGUL_SCOUNT (HANGUL_LCOUNT * HANGUL_NCOUNT)   /* 11172 */
 
 #define MAXCOMBLEN 3
+#define MAXCOMBSPLEN 2
+#define COMBBUFLEN 4     /* max(MAXCOMBLEN, MAXCOMBSPLEN*2) */
 
+/*******************************************************************
+ Convert a wide character to upper/lower case.
+********************************************************************/
 ucs2_t toupper_w(ucs2_t val)
 {
        if ( val >= 0x0040 && val <= 0x007F)
@@ -333,7 +338,10 @@ ucs2_t *strcat_w(ucs2_t *dest, const ucs2_t *src)
 }
 
 
-/* ------------------------ */
+/*******************************************************************
+binary search for pre|decomposition
+********************************************************************/
+
 static ucs2_t do_precomposition(unsigned int base, unsigned int comb) 
 {
        int min = 0;
@@ -357,6 +365,30 @@ static ucs2_t do_precomposition(unsigned int base, unsigned int comb)
        return 0;
 }
 
+/* ------------------------ */
+static u_int32_t do_precomposition_sp(unsigned int base_sp, unsigned int comb_sp) 
+{
+       int min = 0;
+       int max = sizeof(precompositions_sp) / sizeof(precompositions_sp[0]) - 1;
+       int mid;
+       u_int64_t sought = ((u_int64_t)base_sp << 32) | (u_int64_t)comb_sp, that;
+
+       /* binary search */
+       while (max >= min) {
+               mid = (min + max) / 2;
+               that = ((u_int64_t)precompositions_sp[mid].base << 32) | ((u_int64_t)precompositions_sp[mid].comb);
+               if (that < sought) {
+                       min = mid + 1;
+               } else if (that > sought) {
+                       max = mid - 1;
+               } else {
+                       return precompositions_sp[mid].replacement;
+               }
+       }
+       /* no match */
+       return 0;
+}
+
 /* -------------------------- */
 static u_int32_t do_decomposition(ucs2_t base) 
 {
@@ -383,36 +415,70 @@ static u_int32_t do_decomposition(ucs2_t base)
        return 0;
 }
 
-/* we can't use static, this stuff needs to be reentrant */
-/* static char comp[MAXPATHLEN +1]; */
+/* -------------------------- */
+static u_int64_t do_decomposition_sp(unsigned int base) 
+{
+       int min = 0;
+       int max = sizeof(decompositions_sp) / sizeof(decompositions_sp[0]) - 1;
+       int mid;
+       u_int32_t sought = base;
+       u_int32_t that;
+       u_int64_t result;
+
+       /* binary search */
+       while (max >= min) {
+               mid = (min + max) / 2;
+               that = decompositions_sp[mid].replacement;
+               if (that < sought) {
+                       min = mid + 1;
+               } else if (that > sought) {
+                       max = mid - 1;
+               } else {
+                       result = ((u_int64_t)decompositions_sp[mid].base << 32) | ((u_int64_t)decompositions_sp[mid].comb);
+                       return result;
+               }
+       }
+       /* no match */
+       return 0;
+}
+
+/*******************************************************************
+pre|decomposition
+
+   we can't use static, this stuff needs to be reentrant
+   static char comp[MAXPATHLEN +1];
+
+   exclude U2000-U2FFF, UFE30-UFE4F and U2F800-U2FA1F ranges
+   in decompositions[] from decomposition according to AFP 3.x spec
+
+   We don't implement Singleton and Canonical Ordering
+   because they cause the problem of the roundtrip
+   such as Dancing Icon
+********************************************************************/
 
 size_t precompose_w (ucs2_t *name, size_t inplen, ucs2_t *comp, size_t *outlen)
 {
        size_t i;
        ucs2_t base, comb;
+       u_int32_t base_sp, comb_sp;
        ucs2_t *in, *out;
        ucs2_t hangul_lindex, hangul_vindex;
        ucs2_t result;
+       u_int32_t result_sp;
        size_t o_len = *outlen;
-
+       
        if (!inplen || (inplen & 1) || inplen > o_len)
                return (size_t)-1;
        
-       /*   Actually,                                                 */
-       /*   Decomposition and Canonical Ordering are necessary here.  */
-       /*                                                             */
-       /*         Ex. in = CanonicalOrdering(decompose_w(name))       */
-       /*                                                             */
-       /*   A new mapping table is needed for CanonicalOrdering.      */
-       
        i = 0;
        in  = name;
        out = comp;
-
+       
        base = *in;
        while (*outlen > 2) {
                i += 2;
                in++;
+
                if (i == inplen) {
                        *out = base;
                        out++;
@@ -420,9 +486,10 @@ size_t precompose_w (ucs2_t *name, size_t inplen, ucs2_t *comp, size_t *outlen)
                        *outlen -= 2;
                        return o_len - *outlen;
                }
+
                comb = *in;
                result = 0;
-               
+
                /* Non-Combination Character */
                if (comb < 0x300) ;
                
@@ -445,8 +512,42 @@ size_t precompose_w (ucs2_t *name, size_t inplen, ucs2_t *comp, size_t *outlen)
                        }
                }
                
-               /* Combining Sequence */
-               else if ((result = do_precomposition(base, comb))) {
+               /* Binary Search for Surrogate Pair */
+               else if ((0xD800 <= base) && (base < 0xDC00)) {
+                       if ((0xDC00 <= comb) && (comb < 0xE000) && (i + 4 <= inplen)) {
+                               base_sp = ((u_int32_t)base << 16) | (u_int32_t)comb;
+                               do {
+                                       comb_sp = ((u_int32_t)in[1] << 16) | (u_int32_t)in[2];
+                                       if (result_sp = do_precomposition_sp(base_sp, comb_sp)) {
+                                               base_sp = result_sp;
+                                               i += 4;
+                                               in +=2;
+                                       }
+                               } while ((i + 4 <= inplen) && result_sp) ;
+
+                               *out = base_sp >> 16;
+                               out++;
+                               *outlen -= 2;
+
+                               if (*outlen <= 2) {
+                                       errno = E2BIG;
+                                       return (size_t)-1;
+                               }
+
+                               *out = base_sp & 0xFFFF;
+                               out++;
+                               *outlen -= 2;
+
+                               i += 2;
+                               in++;
+                               base = *in;
+
+                               result = 1;
+                       }
+               }
+
+               /* Binary Search for BMP */
+               else if (result = do_precomposition(base, comb)) {
                        base = result;
                }
                
@@ -457,25 +558,22 @@ size_t precompose_w (ucs2_t *name, size_t inplen, ucs2_t *comp, size_t *outlen)
                        base = comb;
                }
        }
-       
+
        errno = E2BIG;
        return (size_t)-1;
 }
 
 /* --------------- */
-
-/* Singleton Decomposition is unsupported.               */
-/* A new mapping table is needed for implementation.     */
-
 size_t decompose_w (ucs2_t *name, size_t inplen, ucs2_t *comp, size_t *outlen)
 {
        size_t i;
        size_t comblen;
-       ucs2_t base;
-       ucs2_t comb[MAXCOMBLEN];
+       ucs2_t base, comb[COMBBUFLEN];
+       u_int32_t base_sp;
        ucs2_t hangul_sindex, tjamo;
        ucs2_t *in, *out;
        unsigned int result;
+       u_int64_t result_sp;
        size_t o_len = *outlen;
 
        if (!inplen || (inplen & 1))
@@ -495,31 +593,57 @@ size_t decompose_w (ucs2_t *name, size_t inplen, ucs2_t *comp, size_t *outlen)
                else if ((HANGUL_SBASE <= base) && (base < HANGUL_SBASE + HANGUL_SCOUNT)) {
                        hangul_sindex = base - HANGUL_SBASE;
                        base = HANGUL_LBASE + hangul_sindex / HANGUL_NCOUNT;
-                       comb[MAXCOMBLEN-2] = HANGUL_VBASE + (hangul_sindex % HANGUL_NCOUNT) / HANGUL_TCOUNT;
+                       comb[COMBBUFLEN-2] = HANGUL_VBASE + (hangul_sindex % HANGUL_NCOUNT) / HANGUL_TCOUNT;
                        
                        /* <L,V> */
                        if ((tjamo = HANGUL_TBASE + hangul_sindex % HANGUL_TCOUNT) == HANGUL_TBASE) {
-                               comb[MAXCOMBLEN-1] = comb[MAXCOMBLEN-2];
+                               comb[COMBBUFLEN-1] = comb[COMBBUFLEN-2];
                                comblen = 1;
                        }
                        
                        /* <L,V,T> */
                        else {
-                               comb[MAXCOMBLEN-1] = tjamo;
+                               comb[COMBBUFLEN-1] = tjamo;
                                comblen = 2;
                        }
                }
                
-               /* Combining Sequence */
-               /* exclude U2000-U2FFF and UFE30-UFE4F ranges in decompositions[]     */
-               /* from decomposition according to AFP 3.1 spec    */
+               /* Binary Search for Surrogate Pair */
+               else if ((0xD800 <= base) && (base < 0xDC00)) {
+                       if (i + 2 < inplen) {
+                               base_sp =  ((u_int32_t)base << 16) | (u_int32_t)in[1];
+                               do {
+                                       if ( !(result_sp = do_decomposition_sp(base_sp))) break;
+                                       comblen += 2;
+                                       base_sp = result_sp >> 32;
+                                       comb[COMBBUFLEN-comblen] = (result_sp >> 16) & 0xFFFF;  /* hi */
+                                       comb[COMBBUFLEN-comblen+1] = result_sp & 0xFFFF;        /* lo */
+                               } while (comblen < (MAXCOMBSPLEN<<1));
+
+                               if (*outlen < (comblen + 1) << 1) {
+                                       errno = E2BIG;
+                                       return (size_t)-1;
+                               }
+
+                               *out = base_sp >> 16;   /* hi */
+                               out++;
+                               *outlen -= 2;
+                               
+                               base = base_sp & 0xFFFF; /* lo */
+                               
+                               i += 2;
+                               in++;
+                       }
+               }
+                       
+               /* Binary Search for BMP */
                else {
                        do {
-                               if ((comblen >= MAXCOMBLEN) || !(result = do_decomposition(base))) break;
+                               if ( !(result = do_decomposition(base))) break;
                                comblen++;
                                base = result  >> 16;
-                               comb[MAXCOMBLEN-comblen] = result & 0xffff;
-                       } while (0x007f < base) ;
+                               comb[COMBBUFLEN-comblen] = result & 0xFFFF;
+                       } while ((0x007f < base) && (comblen < MAXCOMBLEN));
                }
                
                if (*outlen < (comblen + 1) << 1) {
@@ -532,7 +656,7 @@ size_t decompose_w (ucs2_t *name, size_t inplen, ucs2_t *comp, size_t *outlen)
                *outlen -= 2;
                
                while ( comblen > 0 ) {
-                       *out = comb[MAXCOMBLEN-comblen];
+                       *out = comb[COMBBUFLEN-comblen];
                        out++;
                        *outlen -= 2;
                        comblen--;
@@ -541,13 +665,15 @@ size_t decompose_w (ucs2_t *name, size_t inplen, ucs2_t *comp, size_t *outlen)
                i += 2;
                in++;
        }
-
-       /* Is Canonical Ordering necessary here? */
        
        *out = 0;
        return o_len-*outlen;
 }
 
+/*******************************************************************
+length of UTF-8 character and string
+********************************************************************/
+
 size_t utf8_charlen ( char* utf8 )
 {
        unsigned char *p;