3 # usage: make-casetable.pl <infile> <outfile1> <outfile2>
4 # make-casetable.pl UnicodeData.txt utf16_casetable.h utf16_case.c
6 # (c) 2011 by HAT <hat@fa2.so-net.ne.jp>
8 # This program is free software; you can redistribute it and/or modify
9 # it under the terms of the GNU General Public License as published by
10 # the Free Software Foundation; either version 2 of the License, or
11 # (at your option) any later version.
13 # This program is distributed in the hope that it will be useful,
14 # but WITHOUT ANY WARRANTY; without even the implied warranty of
15 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 # GNU General Public License for more details.
20 # http://www.unicode.org/reports/tr44/
21 # http://www.unicode.org/Public/UNIDATA/UnicodeData.txt
23 # One block has 64 chars.
27 # block 1 = U+0000 - U+003F
28 # block 2 = U+0040 - U+007F
30 # block 1024 = U+FFC0 - U+FFFF
35 # block 1025 = U+010000 - U+01003F
36 # block 1026 = U+010040 - U+01007F
38 # block 17408 = U+10FFC0 - U+10FFFF
41 # Dummy block is for edge detection.
42 # If block include upper/lower chars, block_enable[]=1.
49 our $General_Category2;
50 our $Canonical_Combining_Class3;
52 our $Decomposition_Mapping5;
57 our $Unicode_1_Name10;
59 our $Simple_Uppercase_Mapping12;
60 our $Simple_Lowercase_Mapping13;
61 our $Simple_Titlecase_Mapping14;
83 open(CHEADER, ">$ARGV[1]");
84 open(CSOURCE, ">$ARGV[2]");
86 printf (CHEADER "\/\*\n");
87 printf (CHEADER "DO NOT EDIT BY HAND\!\!\!\n");
88 printf (CHEADER "\n");
89 printf (CHEADER "This file is generated by\n");
90 printf (CHEADER " contrib/shell_utils/make-casetable.pl %s %s %s\n", $ARGV[0], $ARGV[1], $ARGV[2]);
91 printf (CHEADER "\n");
92 printf (CHEADER "%s is got from\n", $ARGV[0]);
93 printf (CHEADER "http\:\/\/www.unicode.org\/Public\/UNIDATA\/UnicodeData.txt\n");
94 printf (CHEADER "\*\/\n");
95 printf (CHEADER "\n");
97 printf (CSOURCE "\/\*\n");
98 printf (CSOURCE "DO NOT EDIT BY HAND\!\!\!\n");
99 printf (CSOURCE "\n");
100 printf (CSOURCE "This file is generated by\n");
101 printf (CSOURCE " contrib/shell_utils/make-casetable.pl %s %s %s\n", $ARGV[0], $ARGV[1], $ARGV[2]);
102 printf (CSOURCE "\n");
103 printf (CSOURCE "%s is got from\n", $ARGV[0]);
104 printf (CSOURCE "http\:\/\/www.unicode.org\/Public\/UNIDATA\/UnicodeData.txt\n");
105 printf (CSOURCE "\*\/\n");
106 printf (CSOURCE "\n");
107 printf (CSOURCE "\#include \<stdint.h\>\n");
108 printf (CSOURCE "\#include \<atalk\/unicode.h\>\n");
109 printf (CSOURCE "\#include \"%s\"\n", $ARGV[1]);
110 printf (CSOURCE "\n");
112 &make_array("upper");
113 &make_array("lower");
115 printf (CHEADER "\/\* EOF \*\/\n");
116 printf (CSOURCE "\/\* EOF \*\/\n");
122 ###########################################################################
125 # init table -----------------------------------------------------
127 for ($char = 0 ; $char <= 0xFFFF ; $char++) {
128 $table[$char][0] = $char; # mapped char
129 $table[$char][1] = $char; # orig char
130 $table[$char][2] = ""; # char name
133 for ($char = 0x10000 ; $char <= 0x10FFFF ; $char++) {
134 $sp = ((0xD800 - (0x10000 >> 10) + ($char >> 10)) << 16)
135 + (0xDC00 + ($char & 0x3FF));
136 $table_sp[$char][0] = $sp; # mapped surrogate pair
137 $table_sp[$char][1] = $sp; # orig surrogate pair
138 $table_sp[$char][2] = $char; # mapped char
139 $table_sp[$char][3] = $char; # orig char
140 $table_sp[$char][4] = ""; # char name
143 for ($block = 0 ; $block <= 1025 ; $block++) {
144 $block_enable[$block] = 0;
147 $block_enable[1] = 1; # ASCII block is forcibly included
148 $block_enable[2] = 1; # in the array for Speed-Up.
150 for ($block = 1024 ; $block <= 17409 ; $block++) {
151 $block_enable_sp[$block] = 0;
154 # write data to table --------------------------------------------
156 open(UNICODEDATA, "<$ARGV[0]");
158 while (<UNICODEDATA>) {
164 $Canonical_Combining_Class3,
166 $Decomposition_Mapping5,
173 $Simple_Uppercase_Mapping12,
174 $Simple_Lowercase_Mapping13,
175 $Simple_Titlecase_Mapping14
178 if ($_[0] eq "upper") {
179 $Mapping = $Simple_Uppercase_Mapping12;
180 } elsif ($_[0] eq "lower") {
181 $Mapping = $Simple_Lowercase_Mapping13;
186 next if ($Mapping eq "");
188 $hex_code0 = hex($code0);
189 $hex_Mapping = hex($Mapping);
191 if ($hex_code0 <= 0xFFFF) {
192 $table[$hex_code0][0] = $hex_Mapping;
193 #table[$hex_code0][1] already set
194 $table[$hex_code0][2] = $Name1;
195 $block_enable[($hex_code0 / 64) +1] = 1;
197 $sp = ((0xD800 - (0x10000 >> 10) + ($hex_Mapping >> 10)) << 16)
198 + (0xDC00 + ($hex_Mapping & 0x3FF));
199 $table_sp[$hex_code0][0] = $sp;
200 #table_sp[$hex_code0][1] already set
201 $table_sp[$hex_code0][2] = $hex_Mapping;
202 #table_sp[$hex_code0][3] already set
203 $table_sp[$hex_code0][4] = $Name1;
204 $block_enable_sp[($hex_code0 / 64) +1] = 1;
210 # array for BMP --------------------------------------------------
212 printf(CSOURCE "\/*******************************************************************\n");
213 printf(CSOURCE " Convert a wide character to %s case.\n", $_[0]);
214 printf(CSOURCE "*******************************************************************\/\n");
215 printf(CSOURCE "ucs2\_t to%s\_w\(ucs2\_t val\)\n", $_[0]);
216 printf(CSOURCE "{\n");
220 for ($block = 1 ; $block <= 1024 ; $block++) {
222 # rising edge detection
223 if ($block_enable[$block - 1] == 0 && $block_enable[$block] == 1) {
224 $block_start = $block;
227 # falling edge detection
228 if ($block_enable[$block] == 1 && $block_enable[$block + 1] == 0) {
231 $char_start = ($block_start -1)* 64;
232 $char_end = ($block_end * 64) -1;
234 printf(CHEADER "static const uint16\_t %s\_table\_%d\[%d\] \= \{\n",
235 $_[0], $table_no, $char_end - $char_start +1);
237 for ($char = $char_start ; $char <= $char_end ; $char++) {
238 printf(CHEADER " 0x%04X, /*U\+%04X*/ /*%s*/\n",
244 printf(CHEADER "\}\;\n");
245 printf(CHEADER "\n");
247 if ($char_start == 0x0000) {
248 printf(CSOURCE " if \( val \<\= 0x%04X)\n",
250 printf(CSOURCE " return %s\_table\_%d\[val]\;\n",
253 printf(CSOURCE " if \( val \>\= 0x%04X \&\& val \<\= 0x%04X)\n",
254 $char_start, $char_end);
255 printf(CSOURCE " return %s\_table\_%d\[val-0x%04X\]\;\n",
256 $_[0], $table_no, $char_start);
258 printf(CSOURCE "\n");
264 printf(CSOURCE "\treturn \(val\)\;\n");
265 printf(CSOURCE "\}\n");
266 printf(CSOURCE "\n");
268 # array for Surrogate Pair ---------------------------------------
270 printf(CSOURCE "\/*******************************************************************\n");
271 printf(CSOURCE " Convert a surrogate pair to %s case.\n", $_[0]);
272 printf(CSOURCE "*******************************************************************\/\n");
273 printf(CSOURCE "uint32\_t to%s\_sp\(uint32\_t val\)\n", $_[0]);
274 printf(CSOURCE "{\n");
278 for ($block = 1025 ; $block <= 17408 ; $block++) {
280 # rising edge detection
281 if ((($block_enable_sp[$block - 1] == 0) || ((($block - 1) & 0xF) == 0))
282 && ($block_enable_sp[$block] == 1)) {
283 $block_start = $block;
286 # falling edge detection
287 if (($block_enable_sp[$block] == 1) &&
288 ((($block - 1) & 0xF == 0xF) || ($block_enable_sp[$block + 1] == 0))) {
291 $char_start = ($block_start -1)* 64;
292 $char_end = ($block_end * 64) -1;
294 printf(CHEADER "static const uint32\_t %s\_table\_sp\_%d\[%d\] \= \{\n",
295 $_[0], $table_no, $char_end - $char_start +1);
297 for ($char = $char_start ; $char <= $char_end ; $char++) {
298 printf(CHEADER " 0x%08X, /*0x%08X*/ /*U\+%06X*/ /*U\+%06X*/ /*%s*/\n",
306 printf(CHEADER "\}\;\n");
307 printf(CHEADER "\n");
309 printf(CSOURCE " if \( val \>\= 0x%08X \&\& val \<\= 0x%08X)\n",
310 $table_sp[$char_start][1], $table_sp[$char_end][1]);
311 printf(CSOURCE " return %s\_table\_sp\_%d\[val-0x%08X\]\;\n",
312 $_[0], $table_no, $table_sp[$char_start][1]);
313 printf(CSOURCE "\n");
319 printf(CSOURCE "\treturn \(val\)\;\n");
320 printf(CSOURCE "\}\n");
321 printf(CSOURCE "\n");