3 # usage: make-precompose.h.pl UnicodeData.txt > precompose.h
6 # http://www.unicode.org/Public/UNIDATA/UCD.html
7 # http://www.unicode.org/reports/tr15/
8 # http://www.unicode.org/Public/*/ucd/UnicodeData*.txt
9 # http://www.unicode.org/Public/UNIDATA/UnicodeData.txt
12 # table for binary search --------------------------------------------------
14 open(UNICODEDATA, "<$ARGV[0]");
15 open(PRECOMPOSETEMP, ">precompose.TEMP");
16 open( DECOMPOSETEMP, ">decompose.TEMP");
18 while (<UNICODEDATA>){
24 $Canonical_Combining_Class3,
26 $Decomposition_Mapping5,
33 $Simple_Uppercase_Mapping12,
34 $Simple_Lowercase_Mapping13,
35 $Simple_Titlecase_Mapping14
38 if (($Decomposition_Mapping5 ne "") && ($Decomposition_Mapping5 !~ /\</) && ($Decomposition_Mapping5 =~ / /)) {
39 ($base, $comb) = split(/ /,$Decomposition_Mapping5);
42 $rightbracket =" }, ";
44 if (hex($code0) > 0xFFFF) { # DELETE THIS LINE IF INTERNAL CODE IS UCS4
45 $leftbracket = "\/\*{ "; # DELETE THIS LINE IF INTERNAL CODE IS UCS4
46 $rightbracket =" },\*\/ "; # DELETE THIS LINE IF INTERNAL CODE IS UCS4
47 } # DELETE THIS LINE IF INTERNAL CODE IS UCS4
49 printf(PRECOMPOSETEMP "%s0x%08X, 0x%08X, 0x%08X%s\/\* %s \*\/\n", $leftbracket, hex($code0), hex($base), hex($comb), $rightbracket, $Name1);
52 if ( ((0x2000 <= hex($code0)) && (hex($code0) <= 0x2FFF))
53 || ((0xFE30 <= hex($code0)) && (hex($code0) <= 0xFE4F))
54 || ((0x2F800 <= hex($code0)) && (hex($code0) <= 0x2FA1F))) {
55 $leftbracket = "\/\*{ ";
56 $rightbracket =" },\*\/ ";
59 printf( DECOMPOSETEMP "%s0x%08X, 0x%08X, 0x%08X%s\/\* %s \*\/\n", $leftbracket, hex($code0), hex($base), hex($comb), $rightbracket, $Name1);
64 # sort ---------------------------------------------------------------------
66 system("sort -k 3 precompose.TEMP \> precompose.SORT");
67 system("sort -k 2 decompose.TEMP \> decompose.SORT");
69 # print -------------------------------------------------------------------
71 printf ("\/\* This file is generated from contrib/misc/make-precompose.h.pl %s \*\/\n", $ARGV[0]);
72 print ("\/\* DO NOT EDIT BY HAND\!\!\! \*\/\n");
74 printf ("\/\* %s is got from \*\/\n", $ARGV[0]);
75 print ("\/\* http\:\/\/www.unicode.org\/Public\/UNIDATA\/UnicodeData.txt \*\/\n");
78 print ("static const struct \{\n");
79 print (" unsigned int replacement\;\n");
80 print (" unsigned int base\;\n");
81 print (" unsigned int comb\;\n");
82 print ("\} precompositions\[\] \= \{\n");
84 system("cat precompose.SORT");
89 print ("static const struct \{\n");
90 print (" unsigned int replacement\;\n");
91 print (" unsigned int base\;\n");
92 print (" unsigned int comb\;\n");
93 print ("\} decompositions\[\] \= \{\n");
95 system("cat decompose.SORT");
100 print ("\/\* EOF \*\/\n");