]> arthur.barton.de Git - netatalk.git/blob - contrib/misc/make-precompose.h.pl
composition of surrogate pair
[netatalk.git] / contrib / misc / make-precompose.h.pl
1 #!/usr/bin/perl
2
3 # usage: make-precompose.h.pl UnicodeData.txt > precompose.h
4
5 # See
6 # http://www.unicode.org/Public/UNIDATA/UCD.html
7 # http://www.unicode.org/reports/tr15/
8 # http://www.unicode.org/Public/*/ucd/UnicodeData*.txt
9 # http://www.unicode.org/Public/UNIDATA/UnicodeData.txt
10
11
12 # table for binary search --------------------------------------------------
13
14 open(UNICODEDATA, "<$ARGV[0]");
15
16 open(PRECOMPOSE_TEMP, ">precompose.TEMP");
17 open( DECOMPOSE_TEMP, ">decompose.TEMP");
18
19 open(PRECOMPOSE_SP_TEMP, ">precompose_sp.TEMP");
20 open( DECOMPOSE_SP_TEMP, ">decompose_sp.TEMP");
21
22 while (<UNICODEDATA>){
23     chop;
24     (
25      $code0,
26      $Name1,
27      $General_Category2,
28      $Canonical_Combining_Class3,
29      $Bidi_Class4,
30      $Decomposition_Mapping5,
31      $Numeric_Value6,
32      $Numeric_Value7,
33      $Numeric_Value8,
34      $Bidi_Mirrored9,
35      $Unicode_1_Name10,
36      $ISO_Comment11,
37      $Simple_Uppercase_Mapping12,
38      $Simple_Lowercase_Mapping13,
39      $Simple_Titlecase_Mapping14
40      ) = split(/\;/);
41
42     if (($Decomposition_Mapping5 ne "") && ($Decomposition_Mapping5 !~ /\</) && ($Decomposition_Mapping5 =~ / /)) {
43         ($base, $comb) = split(/ /,$Decomposition_Mapping5);
44         
45         $leftbracket  = "  { ";
46         $rightbracket =" },     ";
47
48         # AFP 3.x Spec
49         if ( ((0x2000  <= hex($code0)) && (hex($code0) <=  0x2FFF))
50           || ((0xFE30  <= hex($code0)) && (hex($code0) <=  0xFE4F))
51           || ((0x2F800 <= hex($code0)) && (hex($code0) <= 0x2FA1F))) {
52             $leftbracket  = "\/\*{ ";
53             $rightbracket =" },\*\/   ";
54         }
55         
56         if (hex($code0) > 0xFFFF) {                            # DELETE THIS LINE  IF INTERNAL CODE IS UCS4
57             
58             $code0_sp_hi = 0xD800 - (0x10000 >> 10) + (hex($code0) >> 10);
59             $code0_sp_lo = 0xDC00 + (hex($code0) & 0x3FF);
60
61             $base_sp_hi = 0xD800 - (0x10000 >> 10) + (hex($base) >> 10);
62             $base_sp_lo = 0xDC00 + (hex($base) & 0x3FF);
63
64             $comb_sp_hi = 0xD800 - (0x10000 >> 10) + (hex($comb) >> 10);
65             $comb_sp_lo = 0xDC00 + (hex($comb) & 0x3FF);
66
67             printf(PRECOMPOSE_SP_TEMP "%s0x%04X%04X, 0x%04X%04X, 0x%04X%04X%s\/\* %s \*\/\n",
68                    $leftbracket, $code0_sp_hi ,$code0_sp_lo, $base_sp_hi, $base_sp_lo, $comb_sp_hi, $comb_sp_lo, $rightbracket, $Name1);
69             printf(DECOMPOSE_SP_TEMP "%s0x%04X%04X, 0x%04X%04X, 0x%04X%04X%s\/\* %s \*\/\n",
70                    $leftbracket, $code0_sp_hi ,$code0_sp_lo, $base_sp_hi, $base_sp_lo, $comb_sp_hi, $comb_sp_lo, $rightbracket, $Name1);
71
72             $leftbracket  = "\/\*{ ";                          # DELETE THIS LINE  IF INTERNAL CODE IS UCS4
73             $rightbracket =" },\*\/   ";                       # DELETE THIS LINE  IF INTERNAL CODE IS UCS4
74         }                                                      # DELETE THIS LINE  IF INTERNAL CODE IS UCS4
75         
76         printf(PRECOMPOSE_TEMP "%s0x%08X, 0x%08X, 0x%08X%s\/\* %s \*\/\n", $leftbracket, hex($code0), hex($base), hex($comb), $rightbracket, $Name1);
77         printf( DECOMPOSE_TEMP "%s0x%08X, 0x%08X, 0x%08X%s\/\* %s \*\/\n", $leftbracket, hex($code0), hex($base), hex($comb), $rightbracket, $Name1);
78         
79     }
80 }
81
82 # sort ---------------------------------------------------------------------
83
84 system("sort -k 3 precompose.TEMP \> precompose.SORT");
85 system("sort -k 2  decompose.TEMP \>  decompose.SORT");
86
87 system("sort -k 3 precompose_sp.TEMP \> precompose_sp.SORT");
88 system("sort -k 2  decompose_sp.TEMP \>  decompose_sp.SORT");
89
90 # print  -------------------------------------------------------------------
91
92 printf ("\/\* This file is generated by contrib/misc/make-precompose.h.pl %s \*\/\n", $ARGV[0]);
93 print ("\/\* DO NOT EDIT BY HAND\!\!\!                                           \*\/\n");
94 print ("\n");
95 printf ("\/\* %s is got from                                      \*\/\n", $ARGV[0]);
96 print ("\/\* http\:\/\/www.unicode.org\/Public\/UNIDATA\/UnicodeData.txt            \*\/\n");
97 print ("\n");
98
99 print ("static const struct \{\n");
100 print ("  unsigned int replacement\;\n");
101 print ("  unsigned int base\;\n");
102 print ("  unsigned int comb\;\n");
103 print ("\} precompositions\[\] \= \{\n");
104
105 system("cat precompose.SORT");
106
107 print ("\}\;\n");
108 print ("\n");
109
110 print ("static const struct \{\n");
111 print ("  unsigned int replacement\;\n");
112 print ("  unsigned int base\;\n");
113 print ("  unsigned int comb\;\n");
114 print ("\} decompositions\[\] \= \{\n");
115
116 system("cat decompose.SORT");
117
118 print ("\}\;\n");
119 print ("\n");
120
121
122
123 print ("static const struct \{\n");
124 print ("  unsigned int replacement\;\n");
125 print ("  unsigned int base\;\n");
126 print ("  unsigned int comb\;\n");
127 print ("\} precompositions_sp\[\] \= \{\n");
128
129 system("cat precompose_sp.SORT");
130
131 print ("\}\;\n");
132 print ("\n");
133
134 print ("static const struct \{\n");
135 print ("  unsigned int replacement\;\n");
136 print ("  unsigned int base\;\n");
137 print ("  unsigned int comb\;\n");
138 print ("\} decompositions_sp\[\] \= \{\n");
139
140 system("cat decompose_sp.SORT");
141
142 print ("\}\;\n");
143 print ("\n");
144
145 print ("\/\* EOF \*\/\n");
146
147 # EOF