From 6815f8ba1a1a7f46fc3855d4f942f2bd28f25539 Mon Sep 17 00:00:00 2001 From: HAT Date: Sat, 25 Dec 2010 17:32:58 +0900 Subject: [PATCH] move macros from util_unistr.c to precompose.h --- contrib/misc/make-precompose.h.pl | 195 +++++++++++++++++++++++------- libatalk/unicode/precompose.h | 35 ++++-- libatalk/unicode/util_unistr.c | 63 ++++------ 3 files changed, 207 insertions(+), 86 deletions(-) diff --git a/contrib/misc/make-precompose.h.pl b/contrib/misc/make-precompose.h.pl index 558a537b..1700a7c8 100755 --- a/contrib/misc/make-precompose.h.pl +++ b/contrib/misc/make-precompose.h.pl @@ -1,6 +1,19 @@ #!/usr/bin/perl - +# # usage: make-precompose.h.pl UnicodeData.txt > precompose.h +# +# (c) 2008-2010 by HAT +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# # See # http://www.unicode.org/Public/UNIDATA/UCD.html @@ -9,17 +22,14 @@ # http://www.unicode.org/Public/UNIDATA/UnicodeData.txt -# table for binary search -------------------------------------------------- +# temp files for binary search (compose.TEMP, compose_sp.TEMP) ------------- open(UNICODEDATA, "<$ARGV[0]"); -open(PRECOMPOSE_TEMP, ">precompose.TEMP"); -open( DECOMPOSE_TEMP, ">decompose.TEMP"); - -open(PRECOMPOSE_SP_TEMP, ">precompose_sp.TEMP"); -open( DECOMPOSE_SP_TEMP, ">decompose_sp.TEMP"); +open(COMPOSE_TEMP, ">compose.TEMP"); +open(COMPOSE_SP_TEMP, ">compose_sp.TEMP"); -while (){ +while () { chop; ( $code0, @@ -37,65 +47,168 @@ while (){ $Simple_Uppercase_Mapping12, $Simple_Lowercase_Mapping13, $Simple_Titlecase_Mapping14 - ) = split(/\;/); + ) = split(/\;/); if (($Decomposition_Mapping5 ne "") && ($Decomposition_Mapping5 !~ /\ 0xFFFF) { # DELETE THIS LINE IF INTERNAL CODE IS UCS4 - + + if (hex($code0) > 0xFFFF) { + $code0_sp_hi = 0xD800 - (0x10000 >> 10) + (hex($code0) >> 10); $code0_sp_lo = 0xDC00 + (hex($code0) & 0x3FF); - $base_sp_hi = 0xD800 - (0x10000 >> 10) + (hex($base) >> 10); - $base_sp_lo = 0xDC00 + (hex($base) & 0x3FF); + $base_sp_hi = 0xD800 - (0x10000 >> 10) + (hex($base) >> 10); + $base_sp_lo = 0xDC00 + (hex($base) & 0x3FF); - $comb_sp_hi = 0xD800 - (0x10000 >> 10) + (hex($comb) >> 10); - $comb_sp_lo = 0xDC00 + (hex($comb) & 0x3FF); + $comb_sp_hi = 0xD800 - (0x10000 >> 10) + (hex($comb) >> 10); + $comb_sp_lo = 0xDC00 + (hex($comb) & 0x3FF); - printf(PRECOMPOSE_SP_TEMP "%s0x%04X%04X, 0x%04X%04X, 0x%04X%04X%s\/\* %s \*\/\n", + printf(COMPOSE_SP_TEMP "%s0x%04X%04X, 0x%04X%04X, 0x%04X%04X%s\/\* %s \*\/\n", $leftbracket, $code0_sp_hi ,$code0_sp_lo, $base_sp_hi, $base_sp_lo, $comb_sp_hi, $comb_sp_lo, $rightbracket, $Name1); - printf(DECOMPOSE_SP_TEMP "%s0x%04X%04X, 0x%04X%04X, 0x%04X%04X%s\/\* %s \*\/\n", - $leftbracket, $code0_sp_hi ,$code0_sp_lo, $base_sp_hi, $base_sp_lo, $comb_sp_hi, $comb_sp_lo, $rightbracket, $Name1); - - $leftbracket = "\/\*{ "; # DELETE THIS LINE IF INTERNAL CODE IS UCS4 - $rightbracket =" },\*\/ "; # DELETE THIS LINE IF INTERNAL CODE IS UCS4 - } # DELETE THIS LINE IF INTERNAL CODE IS UCS4 - - printf(PRECOMPOSE_TEMP "%s0x%08X, 0x%08X, 0x%08X%s\/\* %s \*\/\n", $leftbracket, hex($code0), hex($base), hex($comb), $rightbracket, $Name1); - printf( DECOMPOSE_TEMP "%s0x%08X, 0x%08X, 0x%08X%s\/\* %s \*\/\n", $leftbracket, hex($code0), hex($base), hex($comb), $rightbracket, $Name1); - + + $leftbracket = "\/\*{ "; + $rightbracket =" },\*\/ "; + } + + printf(COMPOSE_TEMP "%s0x%08X, 0x%08X, 0x%08X%s\/\* %s \*\/\n", $leftbracket, hex($code0), hex($base), hex($comb), $rightbracket, $Name1); + } } +close(UNICODEDATA); + +close(COMPOSE_TEMP); +close(COMPOSE_SP_TEMP); + +# macros for BMP (PRECOMP_COUNT, DECOMP_COUNT, MAXCOMBLEN) ---------------- + +open(COMPOSE_TEMP, ") { + if (m/^\/\*/) { + next; + } + $comp_table[$comp_count][0] = substr($_, 4, 10); + $comp_table[$comp_count][1] = substr($_, 16, 10); + $comp_count++; +} + +$maxcomblen = 2; # Hangul's maxcomblen is already 2. That is, VT. + +for ($i = 0 ; $i < $comp_count ; $i++) { + $base = $comp_table[$i][1]; + $comblen = 1; + $j = 0; + while ($j < $comp_count) { + if ($base ne $comp_table[$j][0]) { + $j++; + next; + } else { + $comblen++; + $base = $comp_table[$j][1]; + $j = 0; + } + } + $maxcomblen = ($maxcomblen > $comblen) ? $maxcomblen : $comblen; +} + +close(COMPOSE_TEMP); + +# macros for SP (PRECOMP_SP_COUNT,DECOMP_SP_COUNT, MAXCOMBSPLEN) ----------- + +open(COMPOSE_SP_TEMP, ") { + if (m/^\/\*/) { + next; + } + $comp_sp_table[$comp_sp_count][0] = substr($_, 4, 10); + $comp_sp_table[$comp_sp_count][1] = substr($_, 16, 10); + $comp_sp_count++; +} + +$maxcombsplen = 2; # one char have 2 codepoints, like a D8xx DCxx. + +for ($i = 0 ; $i < $comp_sp_count ; $i++) { + $base_sp = $comp_sp_table[$i][1]; + $comblen = 2; + $j = 0; + while ($j < $comp_sp_count) { + if ($base_sp ne $comp_sp_table[$j][0]) { + $j++; + next; + } else { + $comblen += 2; + $base_sp = $comp_sp_table[$j][1]; + $j = 0; + } + } + $maxcombsplen = ($maxcombsplen > $comblen) ? $maxcombsplen : $comblen; +} + +close(COMPOSE_SP_TEMP); + +# macro for buffer length (COMBBUFLEN) ------------------------------------- + +$combbuflen = ($maxcomblen > $maxcombsplen) ? $maxcomblen : $maxcombsplen; + # sort --------------------------------------------------------------------- -system("sort -k 3 precompose.TEMP \> precompose.SORT"); -system("sort -k 2 decompose.TEMP \> decompose.SORT"); +system("sort -k 3 compose.TEMP \> precompose.SORT"); +system("sort -k 2 compose.TEMP \> decompose.SORT"); -system("sort -k 3 precompose_sp.TEMP \> precompose_sp.SORT"); -system("sort -k 2 decompose_sp.TEMP \> decompose_sp.SORT"); +system("sort -k 3 compose_sp.TEMP \> precompose_sp.SORT"); +system("sort -k 2 compose_sp.TEMP \> decompose_sp.SORT"); # print ------------------------------------------------------------------- -printf ("\/\* This file is generated by contrib/misc/make-precompose.h.pl %s \*\/\n", $ARGV[0]); print ("\/\* DO NOT EDIT BY HAND\!\!\! \*\/\n"); +print ("\/\* This file is generated by \*\/\n"); +printf ("\/\* contrib/misc/make-precompose.h.pl %s \*\/\n", $ARGV[0]); print ("\n"); printf ("\/\* %s is got from \*\/\n", $ARGV[0]); print ("\/\* http\:\/\/www.unicode.org\/Public\/UNIDATA\/UnicodeData.txt \*\/\n"); print ("\n"); +print ("\#define HANGUL_SBASE 0xAC00\n"); +print ("\#define HANGUL_LBASE 0x1100\n"); +print ("\#define HANGUL_VBASE 0x1161\n"); +print ("\#define HANGUL_TBASE 0x11A7\n"); +print ("\#define HANGUL_LCOUNT 19\n"); +print ("\#define HANGUL_VCOUNT 21\n"); +print ("\#define HANGUL_TCOUNT 28\n"); +print ("\#define HANGUL_NCOUNT 588 \/\* (HANGUL_VCOUNT \* HANGUL_TCOUNT) \*\/\n"); +print ("\#define HANGUL_SCOUNT 11172 \/\* (HANGUL_LCOUNT \* HANGUL_NCOUNT) \*\/\n"); +print ("\n"); + +printf ("\#define PRECOMP_COUNT %d\n", $comp_count); +printf ("\#define DECOMP_COUNT %d\n", $comp_count); +printf ("\#define MAXCOMBLEN %d\n", $maxcomblen); +print ("\n"); +printf ("\#define PRECOMP_SP_COUNT %d\n", $comp_sp_count); +printf ("\#define DECOMP_SP_COUNT %d\n", $comp_sp_count); +printf ("\#define MAXCOMBSPLEN %d\n", $maxcombsplen); +print ("\n"); +printf ("\#define COMBBUFLEN %d \/\* max\(MAXCOMBLEN\,MAXCOMBSPLEN\) \*\/\n", $combbuflen); +print ("\n"); + print ("static const struct \{\n"); print (" unsigned int replacement\;\n"); print (" unsigned int base\;\n"); @@ -121,9 +234,9 @@ print ("\n"); print ("static const struct \{\n"); -print (" unsigned int replacement\;\n"); -print (" unsigned int base\;\n"); -print (" unsigned int comb\;\n"); +print (" unsigned int replacement_sp\;\n"); +print (" unsigned int base_sp\;\n"); +print (" unsigned int comb_sp\;\n"); print ("\} precompositions_sp\[\] \= \{\n"); system("cat precompose_sp.SORT"); @@ -132,9 +245,9 @@ print ("\}\;\n"); print ("\n"); print ("static const struct \{\n"); -print (" unsigned int replacement\;\n"); -print (" unsigned int base\;\n"); -print (" unsigned int comb\;\n"); +print (" unsigned int replacement_sp\;\n"); +print (" unsigned int base_sp\;\n"); +print (" unsigned int comb_sp\;\n"); print ("\} decompositions_sp\[\] \= \{\n"); system("cat decompose_sp.SORT"); diff --git a/libatalk/unicode/precompose.h b/libatalk/unicode/precompose.h index 2d3a505c..da959fa0 100644 --- a/libatalk/unicode/precompose.h +++ b/libatalk/unicode/precompose.h @@ -1,9 +1,30 @@ -/* This file is generated by contrib/misc/make-precompose.h.pl UnicodeData.txt */ /* DO NOT EDIT BY HAND!!! */ +/* This file is generated by */ +/* contrib/misc/make-precompose.h.pl UnicodeData.txt */ /* UnicodeData.txt is got from */ /* http://www.unicode.org/Public/UNIDATA/UnicodeData.txt */ +#define HANGUL_SBASE 0xAC00 +#define HANGUL_LBASE 0x1100 +#define HANGUL_VBASE 0x1161 +#define HANGUL_TBASE 0x11A7 +#define HANGUL_LCOUNT 19 +#define HANGUL_VCOUNT 21 +#define HANGUL_TCOUNT 28 +#define HANGUL_NCOUNT 588 /* (HANGUL_VCOUNT * HANGUL_TCOUNT) */ +#define HANGUL_SCOUNT 11172 /* (HANGUL_LCOUNT * HANGUL_NCOUNT) */ + +#define PRECOMP_COUNT 955 +#define DECOMP_COUNT 955 +#define MAXCOMBLEN 3 + +#define PRECOMP_SP_COUNT 16 +#define DECOMP_SP_COUNT 16 +#define MAXCOMBSPLEN 4 + +#define COMBBUFLEN 4 /* max(MAXCOMBLEN,MAXCOMBSPLEN) */ + static const struct { unsigned int replacement; unsigned int base; @@ -2051,9 +2072,9 @@ static const struct { }; static const struct { - unsigned int replacement; - unsigned int base; - unsigned int comb; + unsigned int replacement_sp; + unsigned int base_sp; + unsigned int comb_sp; } precompositions_sp[] = { { 0xD804DC9A, 0xD804DC99, 0xD804DCBA }, /* KAITHI LETTER DDDHA */ { 0xD804DC9C, 0xD804DC9B, 0xD804DCBA }, /* KAITHI LETTER RHA */ @@ -2074,9 +2095,9 @@ static const struct { }; static const struct { - unsigned int replacement; - unsigned int base; - unsigned int comb; + unsigned int replacement_sp; + unsigned int base_sp; + unsigned int comb_sp; } decompositions_sp[] = { { 0xD804DC9A, 0xD804DC99, 0xD804DCBA }, /* KAITHI LETTER DDDHA */ { 0xD804DC9C, 0xD804DC9B, 0xD804DCBA }, /* KAITHI LETTER RHA */ diff --git a/libatalk/unicode/util_unistr.c b/libatalk/unicode/util_unistr.c index 93b4a287..21dd156a 100644 --- a/libatalk/unicode/util_unistr.c +++ b/libatalk/unicode/util_unistr.c @@ -17,20 +17,6 @@ #include "precompose.h" #include "byteorder.h" -#define HANGUL_SBASE 0xAC00 -#define HANGUL_LBASE 0x1100 -#define HANGUL_VBASE 0x1161 -#define HANGUL_TBASE 0x11A7 -#define HANGUL_LCOUNT 19 -#define HANGUL_VCOUNT 21 -#define HANGUL_TCOUNT 28 -#define HANGUL_NCOUNT (HANGUL_VCOUNT * HANGUL_TCOUNT) /* 588 */ -#define HANGUL_SCOUNT (HANGUL_LCOUNT * HANGUL_NCOUNT) /* 11172 */ - -#define MAXCOMBLEN 3 -#define MAXCOMBSPLEN 2 -#define COMBBUFLEN 4 /* max(MAXCOMBLEN, MAXCOMBSPLEN*2) */ - /******************************************************************* Convert a wide character to upper/lower case. ********************************************************************/ @@ -345,7 +331,7 @@ binary search for pre|decomposition static ucs2_t do_precomposition(unsigned int base, unsigned int comb) { int min = 0; - int max = sizeof(precompositions) / sizeof(precompositions[0]) - 1; + int max = PRECOMP_COUNT - 1; int mid; u_int32_t sought = (base << 16) | comb, that; @@ -369,20 +355,20 @@ static ucs2_t do_precomposition(unsigned int base, unsigned int comb) static u_int32_t do_precomposition_sp(unsigned int base_sp, unsigned int comb_sp) { int min = 0; - int max = sizeof(precompositions_sp) / sizeof(precompositions_sp[0]) - 1; + int max = PRECOMP_SP_COUNT - 1; int mid; - u_int64_t sought = ((u_int64_t)base_sp << 32) | (u_int64_t)comb_sp, that; + u_int64_t sought_sp = ((u_int64_t)base_sp << 32) | (u_int64_t)comb_sp, that_sp; /* binary search */ while (max >= min) { mid = (min + max) / 2; - that = ((u_int64_t)precompositions_sp[mid].base << 32) | ((u_int64_t)precompositions_sp[mid].comb); - if (that < sought) { + that_sp = ((u_int64_t)precompositions_sp[mid].base_sp << 32) | ((u_int64_t)precompositions_sp[mid].comb_sp); + if (that_sp < sought_sp) { min = mid + 1; - } else if (that > sought) { + } else if (that_sp > sought_sp) { max = mid - 1; } else { - return precompositions_sp[mid].replacement; + return precompositions_sp[mid].replacement_sp; } } /* no match */ @@ -393,7 +379,7 @@ static u_int32_t do_precomposition_sp(unsigned int base_sp, unsigned int comb_sp static u_int32_t do_decomposition(ucs2_t base) { int min = 0; - int max = sizeof(decompositions) / sizeof(decompositions[0]) - 1; + int max = DECOMP_COUNT - 1; int mid; u_int32_t sought = base; u_int32_t result, that; @@ -416,26 +402,26 @@ static u_int32_t do_decomposition(ucs2_t base) } /* -------------------------- */ -static u_int64_t do_decomposition_sp(unsigned int base) +static u_int64_t do_decomposition_sp(unsigned int base_sp) { int min = 0; - int max = sizeof(decompositions_sp) / sizeof(decompositions_sp[0]) - 1; + int max = DECOMP_SP_COUNT - 1; int mid; - u_int32_t sought = base; - u_int32_t that; - u_int64_t result; + u_int32_t sought_sp = base_sp; + u_int32_t that_sp; + u_int64_t result_sp; /* binary search */ while (max >= min) { mid = (min + max) / 2; - that = decompositions_sp[mid].replacement; - if (that < sought) { + that_sp = decompositions_sp[mid].replacement_sp; + if (that_sp < sought_sp) { min = mid + 1; - } else if (that > sought) { + } else if (that_sp > sought_sp) { max = mid - 1; } else { - result = ((u_int64_t)decompositions_sp[mid].base << 32) | ((u_int64_t)decompositions_sp[mid].comb); - return result; + result_sp = ((u_int64_t)decompositions_sp[mid].base_sp << 32) | ((u_int64_t)decompositions_sp[mid].comb_sp); + return result_sp; } } /* no match */ @@ -448,12 +434,13 @@ pre|decomposition we can't use static, this stuff needs to be reentrant static char comp[MAXPATHLEN +1]; - exclude U2000-U2FFF, UFE30-UFE4F and U2F800-U2FA1F ranges - in decompositions[] from decomposition according to AFP 3.x spec - - We don't implement Singleton and Canonical Ordering + We don't implement Singleton and Canonical Ordering. + We ignore CompositionExclusions.txt. because they cause the problem of the roundtrip - such as Dancing Icon + such as Dancing Icon. + + exclude U2000-U2FFF, UFE30-UFE4F and U2F800-U2FA1F ranges + in precompose.h from composition according to AFP 3.x spec ********************************************************************/ size_t precompose_w (ucs2_t *name, size_t inplen, ucs2_t *comp, size_t *outlen) @@ -618,7 +605,7 @@ size_t decompose_w (ucs2_t *name, size_t inplen, ucs2_t *comp, size_t *outlen) base_sp = result_sp >> 32; comb[COMBBUFLEN-comblen] = (result_sp >> 16) & 0xFFFF; /* hi */ comb[COMBBUFLEN-comblen+1] = result_sp & 0xFFFF; /* lo */ - } while (comblen < (MAXCOMBSPLEN<<1)); + } while (comblen < MAXCOMBSPLEN); if (*outlen < (comblen + 1) << 1) { errno = E2BIG; -- 2.39.2