3 # usage: make-precompose.h.pl UnicodeData.txt > precompose.h
6 # http://www.unicode.org/Public/UNIDATA/UCD.html
7 # http://www.unicode.org/reports/tr15/
8 # http://www.unicode.org/Public/*/ucd/UnicodeData*.txt
9 # http://www.unicode.org/Public/UNIDATA/UnicodeData.txt
12 # table for binary search --------------------------------------------------
14 open(UNICODEDATA, "<$ARGV[0]");
16 open(PRECOMPOSE_TEMP, ">precompose.TEMP");
17 open( DECOMPOSE_TEMP, ">decompose.TEMP");
19 open(PRECOMPOSE_SP_TEMP, ">precompose_sp.TEMP");
20 open( DECOMPOSE_SP_TEMP, ">decompose_sp.TEMP");
22 while (<UNICODEDATA>){
28 $Canonical_Combining_Class3,
30 $Decomposition_Mapping5,
37 $Simple_Uppercase_Mapping12,
38 $Simple_Lowercase_Mapping13,
39 $Simple_Titlecase_Mapping14
42 if (($Decomposition_Mapping5 ne "") && ($Decomposition_Mapping5 !~ /\</) && ($Decomposition_Mapping5 =~ / /)) {
43 ($base, $comb) = split(/ /,$Decomposition_Mapping5);
46 $rightbracket =" }, ";
49 if ( ((0x2000 <= hex($code0)) && (hex($code0) <= 0x2FFF))
50 || ((0xFE30 <= hex($code0)) && (hex($code0) <= 0xFE4F))
51 || ((0x2F800 <= hex($code0)) && (hex($code0) <= 0x2FA1F))) {
52 $leftbracket = "\/\*{ ";
53 $rightbracket =" },\*\/ ";
56 if (hex($code0) > 0xFFFF) { # DELETE THIS LINE IF INTERNAL CODE IS UCS4
58 $code0_sp_hi = 0xD800 - (0x10000 >> 10) + (hex($code0) >> 10);
59 $code0_sp_lo = 0xDC00 + (hex($code0) & 0x3FF);
61 $base_sp_hi = 0xD800 - (0x10000 >> 10) + (hex($base) >> 10);
62 $base_sp_lo = 0xDC00 + (hex($base) & 0x3FF);
64 $comb_sp_hi = 0xD800 - (0x10000 >> 10) + (hex($comb) >> 10);
65 $comb_sp_lo = 0xDC00 + (hex($comb) & 0x3FF);
67 printf(PRECOMPOSE_SP_TEMP "%s0x%04X%04X, 0x%04X%04X, 0x%04X%04X%s\/\* %s \*\/\n",
68 $leftbracket, $code0_sp_hi ,$code0_sp_lo, $base_sp_hi, $base_sp_lo, $comb_sp_hi, $comb_sp_lo, $rightbracket, $Name1);
69 printf(DECOMPOSE_SP_TEMP "%s0x%04X%04X, 0x%04X%04X, 0x%04X%04X%s\/\* %s \*\/\n",
70 $leftbracket, $code0_sp_hi ,$code0_sp_lo, $base_sp_hi, $base_sp_lo, $comb_sp_hi, $comb_sp_lo, $rightbracket, $Name1);
72 $leftbracket = "\/\*{ "; # DELETE THIS LINE IF INTERNAL CODE IS UCS4
73 $rightbracket =" },\*\/ "; # DELETE THIS LINE IF INTERNAL CODE IS UCS4
74 } # DELETE THIS LINE IF INTERNAL CODE IS UCS4
76 printf(PRECOMPOSE_TEMP "%s0x%08X, 0x%08X, 0x%08X%s\/\* %s \*\/\n", $leftbracket, hex($code0), hex($base), hex($comb), $rightbracket, $Name1);
77 printf( DECOMPOSE_TEMP "%s0x%08X, 0x%08X, 0x%08X%s\/\* %s \*\/\n", $leftbracket, hex($code0), hex($base), hex($comb), $rightbracket, $Name1);
82 # sort ---------------------------------------------------------------------
84 system("sort -k 3 precompose.TEMP \> precompose.SORT");
85 system("sort -k 2 decompose.TEMP \> decompose.SORT");
87 system("sort -k 3 precompose_sp.TEMP \> precompose_sp.SORT");
88 system("sort -k 2 decompose_sp.TEMP \> decompose_sp.SORT");
90 # print -------------------------------------------------------------------
92 printf ("\/\* This file is generated by contrib/misc/make-precompose.h.pl %s \*\/\n", $ARGV[0]);
93 print ("\/\* DO NOT EDIT BY HAND\!\!\! \*\/\n");
95 printf ("\/\* %s is got from \*\/\n", $ARGV[0]);
96 print ("\/\* http\:\/\/www.unicode.org\/Public\/UNIDATA\/UnicodeData.txt \*\/\n");
99 print ("static const struct \{\n");
100 print (" unsigned int replacement\;\n");
101 print (" unsigned int base\;\n");
102 print (" unsigned int comb\;\n");
103 print ("\} precompositions\[\] \= \{\n");
105 system("cat precompose.SORT");
110 print ("static const struct \{\n");
111 print (" unsigned int replacement\;\n");
112 print (" unsigned int base\;\n");
113 print (" unsigned int comb\;\n");
114 print ("\} decompositions\[\] \= \{\n");
116 system("cat decompose.SORT");
123 print ("static const struct \{\n");
124 print (" unsigned int replacement\;\n");
125 print (" unsigned int base\;\n");
126 print (" unsigned int comb\;\n");
127 print ("\} precompositions_sp\[\] \= \{\n");
129 system("cat precompose_sp.SORT");
134 print ("static const struct \{\n");
135 print (" unsigned int replacement\;\n");
136 print (" unsigned int base\;\n");
137 print (" unsigned int comb\;\n");
138 print ("\} decompositions_sp\[\] \= \{\n");
140 system("cat decompose_sp.SORT");
145 print ("\/\* EOF \*\/\n");