]> arthur.barton.de Git - netatalk.git/blob - contrib/misc/make-precompose.h.pl
77f789a6171b4df16cc3992b645214c8c35e1601
[netatalk.git] / contrib / misc / make-precompose.h.pl
1 #!/usr/bin/perl
2
3 # usage: make-precompose.h.pl UnicodeData.txt > precompose.h
4
5 # See
6 # http://www.unicode.org/Public/UNIDATA/UCD.html
7 # http://www.unicode.org/reports/tr15/
8 # http://www.unicode.org/Public/*/ucd/UnicodeData*.txt
9 # http://www.unicode.org/Public/UNIDATA/UnicodeData.txt
10
11
12 # table for binary search --------------------------------------------------
13
14 open(UNICODEDATA, "<$ARGV[0]");
15 open(PRECOMPOSETEMP, ">precompose.TEMP");
16 open( DECOMPOSETEMP, ">decompose.TEMP");
17
18 while (<UNICODEDATA>){
19     chop;
20     (
21      $code0,
22      $Name1,
23      $General_Category2,
24      $Canonical_Combining_Class3,
25      $Bidi_Class4,
26      $Decomposition_Mapping5,
27      $Numeric_Value6,
28      $Numeric_Value7,
29      $Numeric_Value8,
30      $Bidi_Mirrored9,
31      $Unicode_1_Name10,
32      $ISO_Comment11,
33      $Simple_Uppercase_Mapping12,
34      $Simple_Lowercase_Mapping13,
35      $Simple_Titlecase_Mapping14
36      ) = split(/\;/);
37
38     if (($Decomposition_Mapping5 ne "") && ($Decomposition_Mapping5 !~ /\</) && ($Decomposition_Mapping5 =~ / /)) {
39         ($base, $comb) = split(/ /,$Decomposition_Mapping5);
40         
41         $leftbracket  = "  { ";
42         $rightbracket =" },     ";
43
44         if (hex($code0) > 0xFFFF) {           # DELETE THIS LINE  IF INTERNAL CODE IS UCS4
45             $leftbracket  = "\/\*{ ";         # DELETE THIS LINE  IF INTERNAL CODE IS UCS4
46             $rightbracket =" },\*\/   ";      # DELETE THIS LINE  IF INTERNAL CODE IS UCS4
47         }                                     # DELETE THIS LINE  IF INTERNAL CODE IS UCS4
48         
49         printf(PRECOMPOSETEMP "%s0x%08X, 0x%08X, 0x%08X%s\/\* %s \*\/\n", $leftbracket, hex($code0), hex($base), hex($comb), $rightbracket, $Name1);
50         
51         # AFP 3.x Spec
52         if ( ((0x2000  <= hex($code0)) && (hex($code0) <=  0x2FFF))
53           || ((0xFE30  <= hex($code0)) && (hex($code0) <=  0xFE4F))
54           || ((0x2F800 <= hex($code0)) && (hex($code0) <= 0x2FA1F))) {
55             $leftbracket  = "\/\*{ ";
56             $rightbracket =" },\*\/   ";
57         }
58         
59         printf( DECOMPOSETEMP "%s0x%08X, 0x%08X, 0x%08X%s\/\* %s \*\/\n", $leftbracket, hex($code0), hex($base), hex($comb), $rightbracket, $Name1);
60         
61     }
62 }
63
64 # sort ---------------------------------------------------------------------
65
66 system("sort -k 3 precompose.TEMP \> precompose.SORT");
67 system("sort -k 2  decompose.TEMP \>  decompose.SORT");
68
69 # print  -------------------------------------------------------------------
70
71 printf ("\/\* This file is generated from contrib/misc/make-precompose.h.pl %s \*\/\n", $ARGV[0]);
72 print ("\/\* DO NOT EDIT BY HAND\!\!\!                                           \*\/\n");
73 print ("\n");
74 printf ("\/\* %s is got from                                      \*\/\n", $ARGV[0]);
75 print ("\/\* http\:\/\/www.unicode.org\/Public\/UNIDATA\/UnicodeData.txt            \*\/\n");
76 print ("\n");
77
78 print ("static const struct \{\n");
79 print ("  unsigned int replacement\;\n");
80 print ("  unsigned int base\;\n");
81 print ("  unsigned int comb\;\n");
82 print ("\} precompositions\[\] \= \{\n");
83
84 system("cat precompose.SORT");
85
86 print ("\}\;\n");
87 print ("\n");
88
89 print ("static const struct \{\n");
90 print ("  unsigned int replacement\;\n");
91 print ("  unsigned int base\;\n");
92 print ("  unsigned int comb\;\n");
93 print ("\} decompositions\[\] \= \{\n");
94
95 system("cat decompose.SORT");
96
97 print ("\}\;\n");
98 print ("\n");
99
100 print ("\/\* EOF \*\/\n");
101
102 # EOF