]> arthur.barton.de Git - netatalk.git/commitdiff
Add make-casetable.pl and make-precompose.h.pl to distribution, Bug #3404322
authorFrank Lahm <franklahm@googlemail.com>
Tue, 6 Sep 2011 10:59:49 +0000 (12:59 +0200)
committerFrank Lahm <franklahm@googlemail.com>
Tue, 6 Sep 2011 10:59:49 +0000 (12:59 +0200)
contrib/misc/make-casetable.pl [deleted file]
contrib/misc/make-precompose.h.pl [deleted file]
contrib/shell_utils/Makefile.am
contrib/shell_utils/make-casetable.pl [new file with mode: 0755]
contrib/shell_utils/make-precompose.h.pl [new file with mode: 0755]

diff --git a/contrib/misc/make-casetable.pl b/contrib/misc/make-casetable.pl
deleted file mode 100755 (executable)
index 9af15a9..0000000
+++ /dev/null
@@ -1,324 +0,0 @@
-#!/usr/bin/perl
-#
-# usage: make-casetable.pl <infile> <outfile1> <outfile2>
-#        make-casetable.pl UnicodeData.txt utf16_casetable.h utf16_case.c
-#
-# (c) 2011 by HAT <hat@fa2.so-net.ne.jp>
-#
-#  This program is free software; you can redistribute it and/or modify
-#  it under the terms of the GNU General Public License as published by
-#  the Free Software Foundation; either version 2 of the License, or
-#  (at your option) any later version.
-#
-#  This program is distributed in the hope that it will be useful,
-#  but WITHOUT ANY WARRANTY; without even the implied warranty of
-#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-#  GNU General Public License for more details.
-#
-
-# See
-# http://www.unicode.org/reports/tr44/
-# http://www.unicode.org/Public/UNIDATA/UnicodeData.txt
-
-# One block has 64 chars.
-#
-# BMP
-# block    0 = dummy
-# block    1 = U+0000 - U+003F
-# block    2 = U+0040 - U+007F
-# .....
-# block 1024 = U+FFC0 - U+FFFF
-# block 1025 = dummy
-#
-# Surrogate Pair
-# block  1024 = dummy
-# block  1025 = U+010000 - U+01003F
-# block  1026 = U+010040 - U+01007F
-# .....
-# block 17408 = U+10FFC0 - U+10FFFF
-# block 17409 = dummy
-#
-# Dummy block is for edge detection.
-# If block include upper/lower chars, block_enable[]=1.
-
-use strict;
-use warnings;
-
-our $code0;
-our $Name1;
-our $General_Category2;
-our $Canonical_Combining_Class3;
-our $Bidi_Class4;
-our $Decomposition_Mapping5;
-our $Numeric_Value6;
-our $Numeric_Value7;
-our $Numeric_Value8;
-our $Bidi_Mirrored9;
-our $Unicode_1_Name10;
-our $ISO_Comment11;
-our $Simple_Uppercase_Mapping12;
-our $Simple_Lowercase_Mapping13;
-our $Simple_Titlecase_Mapping14;
-
-our $hex_code0;
-our $Mapping;
-our $hex_Mapping;
-
-our $char;
-our $sp;
-our $block;
-
-our @table;
-our @table_sp;
-
-our @block_enable;
-our @block_enable_sp;
-
-our $table_no;
-our $block_start;
-our $block_end;
-our $char_start;
-our $char_end;
-
-open(CHEADER, ">$ARGV[1]");
-open(CSOURCE, ">$ARGV[2]");
-
-printf (CHEADER "\/\*\n");
-printf (CHEADER "DO NOT EDIT BY HAND\!\!\!\n");
-printf (CHEADER "\n");
-printf (CHEADER "This file is generated by\n");
-printf (CHEADER " contrib/misc/make-casetable.pl %s %s %s\n", $ARGV[0], $ARGV[1], $ARGV[2]);
-printf (CHEADER "\n");
-printf (CHEADER "%s is got from\n", $ARGV[0]);
-printf (CHEADER "http\:\/\/www.unicode.org\/Public\/UNIDATA\/UnicodeData.txt\n");
-printf (CHEADER "\*\/\n");
-printf (CHEADER "\n");
-
-printf (CSOURCE "\/\*\n");
-printf (CSOURCE "DO NOT EDIT BY HAND\!\!\!\n");
-printf (CSOURCE "\n");
-printf (CSOURCE "This file is generated by\n");
-printf (CSOURCE " contrib/misc/make-casetable.pl %s %s %s\n", $ARGV[0], $ARGV[1], $ARGV[2]);
-printf (CSOURCE "\n");
-printf (CSOURCE "%s is got from\n", $ARGV[0]);
-printf (CSOURCE "http\:\/\/www.unicode.org\/Public\/UNIDATA\/UnicodeData.txt\n");
-printf (CSOURCE "\*\/\n");
-printf (CSOURCE "\n");
-printf (CSOURCE "\#include \<stdint.h\>\n");
-printf (CSOURCE "\#include \<atalk\/unicode.h\>\n");
-printf (CSOURCE "\#include \"%s\"\n", $ARGV[1]);
-printf (CSOURCE "\n");
-
-&make_array("upper");
-&make_array("lower");
-
-printf (CHEADER "\/\* EOF \*\/\n");
-printf (CSOURCE "\/\* EOF \*\/\n");
-
-close(CHEADER);
-close(CSOURCE);
-
-
-###########################################################################
-sub make_array{
-
-    # init table -----------------------------------------------------
-
-    for ($char = 0 ; $char <= 0xFFFF ; $char++) {
-        $table[$char][0] = $char;       # mapped char
-        $table[$char][1] = $char;       # orig char
-        $table[$char][2] = "";          # char name
-    }
-
-    for ($char = 0x10000 ; $char <= 0x10FFFF ; $char++) {
-        $sp = ((0xD800 - (0x10000 >> 10) + ($char >> 10)) << 16)
-            + (0xDC00 + ($char & 0x3FF));
-        $table_sp[$char][0] = $sp;      # mapped surrogate pair
-        $table_sp[$char][1] = $sp;      # orig surrogate pair
-        $table_sp[$char][2] = $char;    # mapped char
-        $table_sp[$char][3] = $char;    # orig char
-        $table_sp[$char][4] = "";       # char name
-    }
-
-    for ($block = 0 ; $block <= 1025 ; $block++) {
-        $block_enable[$block] = 0;
-    }
-
-    $block_enable[1] = 1;           # ASCII block is forcibly included
-    $block_enable[2] = 1;           # in the array for Speed-Up.
-
-    for ($block = 1024 ; $block <= 17409 ; $block++) {
-        $block_enable_sp[$block] = 0;
-    }
-
-    # write data to table --------------------------------------------
-
-    open(UNICODEDATA, "<$ARGV[0]");
-
-    while (<UNICODEDATA>) {
-        chop;
-        (
-            $code0,
-            $Name1,
-            $General_Category2,
-            $Canonical_Combining_Class3,
-            $Bidi_Class4,
-            $Decomposition_Mapping5,
-            $Numeric_Value6,
-            $Numeric_Value7,
-            $Numeric_Value8,
-            $Bidi_Mirrored9,
-            $Unicode_1_Name10,
-            $ISO_Comment11,
-            $Simple_Uppercase_Mapping12,
-            $Simple_Lowercase_Mapping13,
-            $Simple_Titlecase_Mapping14
-        ) = split(/\;/);
-
-        if ($_[0] eq "upper") {
-            $Mapping = $Simple_Uppercase_Mapping12;
-        } elsif ($_[0] eq "lower") {
-            $Mapping = $Simple_Lowercase_Mapping13;
-        } else {
-            exit(1);
-        }
-
-        next if ($Mapping eq "");
-
-        $hex_code0 = hex($code0);
-        $hex_Mapping = hex($Mapping);
-
-        if ($hex_code0 <= 0xFFFF) {
-            $table[$hex_code0][0] = $hex_Mapping;
-            #table[$hex_code0][1]   already set
-            $table[$hex_code0][2] = $Name1;
-            $block_enable[($hex_code0 / 64) +1] = 1;
-        } else {
-            $sp = ((0xD800 - (0x10000 >> 10) + ($hex_Mapping >> 10)) << 16)
-                + (0xDC00 + ($hex_Mapping & 0x3FF));
-            $table_sp[$hex_code0][0] = $sp;
-            #table_sp[$hex_code0][1]   already set
-            $table_sp[$hex_code0][2] = $hex_Mapping;
-            #table_sp[$hex_code0][3]   already set
-            $table_sp[$hex_code0][4] = $Name1;
-            $block_enable_sp[($hex_code0 / 64) +1] = 1;
-        }
-    }
-
-    close(UNICODEDATA);
-
-    # array for BMP --------------------------------------------------
-
-    printf(CSOURCE "\/*******************************************************************\n");
-    printf(CSOURCE " Convert a wide character to %s case.\n", $_[0]);
-    printf(CSOURCE "*******************************************************************\/\n");
-    printf(CSOURCE "ucs2\_t to%s\_w\(ucs2\_t val\)\n", $_[0]);
-    printf(CSOURCE "{\n");
-
-    $table_no = 1;
-
-    for ($block = 1 ; $block <= 1024 ; $block++) {
-
-        # rising edge detection
-        if ($block_enable[$block - 1] == 0 && $block_enable[$block] == 1) {
-            $block_start = $block;
-        }
-
-        # falling edge detection
-        if ($block_enable[$block] == 1 && $block_enable[$block + 1] == 0) {
-            $block_end = $block;
-
-            $char_start = ($block_start -1)* 64;
-            $char_end = ($block_end * 64) -1;
-
-            printf(CHEADER "static const uint16\_t %s\_table\_%d\[%d\] \= \{\n",
-                   $_[0], $table_no, $char_end - $char_start +1);
-
-            for ($char = $char_start ; $char <= $char_end ; $char++) {
-                printf(CHEADER "  0x%04X, /*U\+%04X*/ /*%s*/\n",
-                       $table[$char][0],
-                       $table[$char][1],
-                       $table[$char][2]
-                   );
-            }
-            printf(CHEADER "\}\;\n");
-            printf(CHEADER "\n");
-
-            if ($char_start == 0x0000) {
-                printf(CSOURCE "    if \( val \<\= 0x%04X)\n",
-                       $char_end);
-                printf(CSOURCE "        return %s\_table\_%d\[val]\;\n",
-                       $_[0], $table_no);
-            } else {
-                printf(CSOURCE "    if \( val \>\= 0x%04X \&\& val \<\= 0x%04X)\n",
-                       $char_start, $char_end);
-                printf(CSOURCE "        return %s\_table\_%d\[val-0x%04X\]\;\n",
-                       $_[0], $table_no, $char_start);
-            }
-            printf(CSOURCE "\n");
-
-            $table_no++;
-        }
-    }
-
-    printf(CSOURCE "\treturn \(val\)\;\n");
-    printf(CSOURCE "\}\n");
-    printf(CSOURCE "\n");
-
-    # array for Surrogate Pair ---------------------------------------
-
-    printf(CSOURCE "\/*******************************************************************\n");
-    printf(CSOURCE " Convert a surrogate pair to %s case.\n", $_[0]);
-    printf(CSOURCE "*******************************************************************\/\n");
-    printf(CSOURCE "uint32\_t to%s\_sp\(uint32\_t val\)\n", $_[0]);
-    printf(CSOURCE "{\n");
-
-    $table_no = 1;
-
-    for ($block = 1025 ; $block <= 17408 ; $block++) {
-
-        # rising edge detection
-        if ((($block_enable_sp[$block - 1] == 0) || ((($block - 1) & 0xF) == 0))
-                && ($block_enable_sp[$block] == 1)) {
-            $block_start = $block;
-        }
-
-        # falling edge detection
-        if (($block_enable_sp[$block] == 1) &&
-                ((($block - 1) & 0xF == 0xF) || ($block_enable_sp[$block + 1] == 0))) {
-            $block_end = $block;
-
-            $char_start = ($block_start -1)* 64;
-            $char_end = ($block_end * 64) -1;
-
-            printf(CHEADER "static const uint32\_t %s\_table\_sp\_%d\[%d\] \= \{\n",
-                   $_[0], $table_no, $char_end - $char_start +1);
-
-            for ($char = $char_start ; $char <= $char_end ; $char++) {
-                printf(CHEADER "  0x%08X, /*0x%08X*/ /*U\+%06X*/ /*U\+%06X*/ /*%s*/\n",
-                       $table_sp[$char][0],
-                       $table_sp[$char][1],
-                       $table_sp[$char][2],
-                       $table_sp[$char][3],
-                       $table_sp[$char][4]
-                   );
-            }
-            printf(CHEADER "\}\;\n");
-            printf(CHEADER "\n");
-
-            printf(CSOURCE "    if \( val \>\= 0x%08X \&\& val \<\= 0x%08X)\n",
-                   $table_sp[$char_start][1], $table_sp[$char_end][1]);
-            printf(CSOURCE "        return %s\_table\_sp\_%d\[val-0x%08X\]\;\n",
-                   $_[0], $table_no, $table_sp[$char_start][1]);
-            printf(CSOURCE "\n");
-
-            $table_no++;
-        }
-    }
-
-    printf(CSOURCE "\treturn \(val\)\;\n");
-    printf(CSOURCE "\}\n");
-    printf(CSOURCE "\n");
-}
-
-# EOF
diff --git a/contrib/misc/make-precompose.h.pl b/contrib/misc/make-precompose.h.pl
deleted file mode 100755 (executable)
index 11cb8d9..0000000
+++ /dev/null
@@ -1,260 +0,0 @@
-#!/usr/bin/perl
-#
-# usage: make-precompose.h.pl UnicodeData.txt > precompose.h
-#
-# (c) 2008-2011 by HAT <hat@fa2.so-net.ne.jp>
-#
-#  This program is free software; you can redistribute it and/or modify
-#  it under the terms of the GNU General Public License as published by
-#  the Free Software Foundation; either version 2 of the License, or
-#  (at your option) any later version.
-#
-#  This program is distributed in the hope that it will be useful,
-#  but WITHOUT ANY WARRANTY; without even the implied warranty of
-#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-#  GNU General Public License for more details.
-# 
-
-# See
-# http://www.unicode.org/Public/UNIDATA/UCD.html
-# http://www.unicode.org/reports/tr15/
-# http://www.unicode.org/Public/*/ucd/UnicodeData*.txt
-# http://www.unicode.org/Public/UNIDATA/UnicodeData.txt
-
-
-# temp files for binary search (compose.TEMP, compose_sp.TEMP) -------------
-
-open(UNICODEDATA, "<$ARGV[0]");
-
-open(COMPOSE_TEMP, ">compose.TEMP");
-open(COMPOSE_SP_TEMP, ">compose_sp.TEMP");
-
-while (<UNICODEDATA>) {
-    chop;
-    (
-     $code0,
-     $Name1,
-     $General_Category2,
-     $Canonical_Combining_Class3,
-     $Bidi_Class4,
-     $Decomposition_Mapping5,
-     $Numeric_Value6,
-     $Numeric_Value7,
-     $Numeric_Value8,
-     $Bidi_Mirrored9,
-     $Unicode_1_Name10,
-     $ISO_Comment11,
-     $Simple_Uppercase_Mapping12,
-     $Simple_Lowercase_Mapping13,
-     $Simple_Titlecase_Mapping14
-    ) = split(/\;/);
-
-    if (($Decomposition_Mapping5 ne "") && ($Decomposition_Mapping5 !~ /\</) && ($Decomposition_Mapping5 =~ / /)) {
-       ($base, $comb) = split(/ /,$Decomposition_Mapping5);
-
-       $leftbracket  = "  { ";
-       $rightbracket =" },     ";
-
-       # AFP 3.x Spec
-       if ( ((0x2000  <= hex($code0)) && (hex($code0) <=  0x2FFF))
-            || ((0xFE30  <= hex($code0)) && (hex($code0) <=  0xFE4F))
-            || ((0x2F800 <= hex($code0)) && (hex($code0) <= 0x2FA1F))) {
-           $leftbracket  = "\/\*{ ";
-           $rightbracket =" },\*\/   ";
-       }
-
-       if (hex($code0) > 0xFFFF) {
-
-           $code0_sp_hi = 0xD800 - (0x10000 >> 10) + (hex($code0) >> 10);
-           $code0_sp_lo = 0xDC00 + (hex($code0) & 0x3FF);
-
-           $base_sp_hi = 0xD800 - (0x10000 >> 10) + (hex($base) >> 10);
-           $base_sp_lo = 0xDC00 + (hex($base) & 0x3FF);
-
-           $comb_sp_hi = 0xD800 - (0x10000 >> 10) + (hex($comb) >> 10);
-           $comb_sp_lo = 0xDC00 + (hex($comb) & 0x3FF);
-
-           printf(COMPOSE_SP_TEMP "%s0x%04X%04X, 0x%04X%04X, 0x%04X%04X%s\/\* %s \*\/\n",
-                  $leftbracket, $code0_sp_hi ,$code0_sp_lo, $base_sp_hi, $base_sp_lo, $comb_sp_hi, $comb_sp_lo, $rightbracket, $Name1);
-
-           $leftbracket  = "\/\*{ ";
-           $rightbracket =" },\*\/   ";
-       }
-
-       printf(COMPOSE_TEMP "%s0x%08X, 0x%08X, 0x%08X%s\/\* %s \*\/\n", $leftbracket, hex($code0), hex($base), hex($comb), $rightbracket, $Name1);
-
-    }
-}
-
-close(UNICODEDATA);
-
-close(COMPOSE_TEMP);
-close(COMPOSE_SP_TEMP);
-
-# macros for BMP (PRECOMP_COUNT, DECOMP_COUNT, MAXCOMBLEN) ----------------
-
-open(COMPOSE_TEMP, "<compose.TEMP");
-
-@comp_table = ();
-$comp_count = 0;
-
-while (<COMPOSE_TEMP>) {
-    if (m/^\/\*/) {
-       next;
-    }
-    $comp_table[$comp_count][0] = substr($_, 4, 10);
-    $comp_table[$comp_count][1] = substr($_, 16, 10);
-    $comp_count++;
-}
-
-$maxcomblen = 2;      # Hangul's maxcomblen is already 2. That is, VT.
-
-for ($i = 0 ; $i < $comp_count ; $i++) {
-    $base = $comp_table[$i][1];
-    $comblen = 1;
-    $j = 0;
-    while ($j < $comp_count) {
-       if ($base ne $comp_table[$j][0]) {
-           $j++;
-           next;
-       } else {
-           $comblen++;
-           $base =  $comp_table[$j][1];
-           $j = 0;
-       }
-    }
-    $maxcomblen = ($maxcomblen > $comblen) ? $maxcomblen : $comblen;
-}
-
-close(COMPOSE_TEMP);
-
-# macros for SP (PRECOMP_SP_COUNT,DECOMP_SP_COUNT, MAXCOMBSPLEN) -----------
-
-open(COMPOSE_SP_TEMP, "<compose_sp.TEMP");
-
-@comp_sp_table = ();
-$comp_sp_count = 0;
-
-while (<COMPOSE_SP_TEMP>) {
-    if (m/^\/\*/) {
-       next;
-    }
-    $comp_sp_table[$comp_sp_count][0] = substr($_, 4, 10);
-    $comp_sp_table[$comp_sp_count][1] = substr($_, 16, 10);
-    $comp_sp_count++;
-}
-
-$maxcombsplen = 2;     # one char have 2 codepoints, like a D8xx DCxx.
-
-for ($i = 0 ; $i < $comp_sp_count ; $i++) {
-    $base_sp = $comp_sp_table[$i][1];
-    $comblen = 2;
-    $j = 0;
-    while ($j < $comp_sp_count) {
-       if ($base_sp ne $comp_sp_table[$j][0]) {
-           $j++;
-           next;
-       } else {
-           $comblen += 2;
-           $base_sp =  $comp_sp_table[$j][1];
-           $j = 0;
-       }
-    }
-    $maxcombsplen = ($maxcombsplen > $comblen) ? $maxcombsplen : $comblen;
-}
-
-close(COMPOSE_SP_TEMP);
-
-# macro for buffer length (COMBBUFLEN) -------------------------------------
-
-$combbuflen = ($maxcomblen > $maxcombsplen) ? $maxcomblen : $maxcombsplen;
-
-# sort ---------------------------------------------------------------------
-
-system("sort -k 3 compose.TEMP \> precompose.SORT");
-system("sort -k 2 compose.TEMP \>  decompose.SORT");
-
-system("sort -k 3 compose_sp.TEMP \> precompose_sp.SORT");
-system("sort -k 2 compose_sp.TEMP \>  decompose_sp.SORT");
-
-# print  -------------------------------------------------------------------
-
-print ("\/\* DO NOT EDIT BY HAND\!\!\!                                           \*\/\n");
-print ("\/\* This file is generated by                                        \*\/\n");
-printf ("\/\*              contrib/misc/make-precompose.h.pl %s   \*\/\n", $ARGV[0]);
-print ("\n");
-printf ("\/\* %s is got from                                      \*\/\n", $ARGV[0]);
-print ("\/\* http\:\/\/www.unicode.org\/Public\/UNIDATA\/UnicodeData.txt            \*\/\n");
-print ("\n");
-
-print ("\#define SBASE 0xAC00\n");
-print ("\#define LBASE 0x1100\n");
-print ("\#define VBASE 0x1161\n");
-print ("\#define TBASE 0x11A7\n");
-print ("\#define LCOUNT 19\n");
-print ("\#define VCOUNT 21\n");
-print ("\#define TCOUNT 28\n");
-print ("\#define NCOUNT 588     \/\* (VCOUNT \* TCOUNT) \*\/\n");
-print ("\#define SCOUNT 11172   \/\* (LCOUNT \* NCOUNT) \*\/\n");
-print ("\n");
-
-printf ("\#define PRECOMP_COUNT %d\n", $comp_count);
-printf ("\#define DECOMP_COUNT %d\n", $comp_count);
-printf ("\#define MAXCOMBLEN %d\n", $maxcomblen);
-print ("\n");
-printf ("\#define PRECOMP_SP_COUNT %d\n", $comp_sp_count);
-printf ("\#define DECOMP_SP_COUNT %d\n", $comp_sp_count);
-printf ("\#define MAXCOMBSPLEN %d\n", $maxcombsplen);
-print ("\n");
-printf ("\#define COMBBUFLEN %d  \/\* max\(MAXCOMBLEN\,MAXCOMBSPLEN\) \*\/\n", $combbuflen);
-print ("\n");
-
-print ("static const struct \{\n");
-print ("  unsigned int replacement\;\n");
-print ("  unsigned int base\;\n");
-print ("  unsigned int comb\;\n");
-print ("\} precompositions\[\] \= \{\n");
-
-system("cat precompose.SORT");
-
-print ("\}\;\n");
-print ("\n");
-
-print ("static const struct \{\n");
-print ("  unsigned int replacement\;\n");
-print ("  unsigned int base\;\n");
-print ("  unsigned int comb\;\n");
-print ("\} decompositions\[\] \= \{\n");
-
-system("cat decompose.SORT");
-
-print ("\}\;\n");
-print ("\n");
-
-
-
-print ("static const struct \{\n");
-print ("  unsigned int replacement_sp\;\n");
-print ("  unsigned int base_sp\;\n");
-print ("  unsigned int comb_sp\;\n");
-print ("\} precompositions_sp\[\] \= \{\n");
-
-system("cat precompose_sp.SORT");
-
-print ("\}\;\n");
-print ("\n");
-
-print ("static const struct \{\n");
-print ("  unsigned int replacement_sp\;\n");
-print ("  unsigned int base_sp\;\n");
-print ("  unsigned int comb_sp\;\n");
-print ("\} decompositions_sp\[\] \= \{\n");
-
-system("cat decompose_sp.SORT");
-
-print ("\}\;\n");
-print ("\n");
-
-print ("\/\* EOF \*\/\n");
-
-# EOF
index 6ae8b2771c00e971a37b0d010e1deccc16ecf3b4..5e4aaa91abe17fedd8f559c67f94567a379e51de 100644 (file)
@@ -20,4 +20,4 @@ CLEANFILES = $(GENERATED_FILES)
 
 bin_SCRIPTS = $(PERLSCRIPTS) $(GENERATED_FILES)
 
-EXTRA_DIST = $(TEMPLATE_FILES)
+EXTRA_DIST = $(TEMPLATE_FILES) make-casetable.pl make-precompose.h.pl
diff --git a/contrib/shell_utils/make-casetable.pl b/contrib/shell_utils/make-casetable.pl
new file mode 100755 (executable)
index 0000000..9af15a9
--- /dev/null
@@ -0,0 +1,324 @@
+#!/usr/bin/perl
+#
+# usage: make-casetable.pl <infile> <outfile1> <outfile2>
+#        make-casetable.pl UnicodeData.txt utf16_casetable.h utf16_case.c
+#
+# (c) 2011 by HAT <hat@fa2.so-net.ne.jp>
+#
+#  This program is free software; you can redistribute it and/or modify
+#  it under the terms of the GNU General Public License as published by
+#  the Free Software Foundation; either version 2 of the License, or
+#  (at your option) any later version.
+#
+#  This program is distributed in the hope that it will be useful,
+#  but WITHOUT ANY WARRANTY; without even the implied warranty of
+#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+#  GNU General Public License for more details.
+#
+
+# See
+# http://www.unicode.org/reports/tr44/
+# http://www.unicode.org/Public/UNIDATA/UnicodeData.txt
+
+# One block has 64 chars.
+#
+# BMP
+# block    0 = dummy
+# block    1 = U+0000 - U+003F
+# block    2 = U+0040 - U+007F
+# .....
+# block 1024 = U+FFC0 - U+FFFF
+# block 1025 = dummy
+#
+# Surrogate Pair
+# block  1024 = dummy
+# block  1025 = U+010000 - U+01003F
+# block  1026 = U+010040 - U+01007F
+# .....
+# block 17408 = U+10FFC0 - U+10FFFF
+# block 17409 = dummy
+#
+# Dummy block is for edge detection.
+# If block include upper/lower chars, block_enable[]=1.
+
+use strict;
+use warnings;
+
+our $code0;
+our $Name1;
+our $General_Category2;
+our $Canonical_Combining_Class3;
+our $Bidi_Class4;
+our $Decomposition_Mapping5;
+our $Numeric_Value6;
+our $Numeric_Value7;
+our $Numeric_Value8;
+our $Bidi_Mirrored9;
+our $Unicode_1_Name10;
+our $ISO_Comment11;
+our $Simple_Uppercase_Mapping12;
+our $Simple_Lowercase_Mapping13;
+our $Simple_Titlecase_Mapping14;
+
+our $hex_code0;
+our $Mapping;
+our $hex_Mapping;
+
+our $char;
+our $sp;
+our $block;
+
+our @table;
+our @table_sp;
+
+our @block_enable;
+our @block_enable_sp;
+
+our $table_no;
+our $block_start;
+our $block_end;
+our $char_start;
+our $char_end;
+
+open(CHEADER, ">$ARGV[1]");
+open(CSOURCE, ">$ARGV[2]");
+
+printf (CHEADER "\/\*\n");
+printf (CHEADER "DO NOT EDIT BY HAND\!\!\!\n");
+printf (CHEADER "\n");
+printf (CHEADER "This file is generated by\n");
+printf (CHEADER " contrib/misc/make-casetable.pl %s %s %s\n", $ARGV[0], $ARGV[1], $ARGV[2]);
+printf (CHEADER "\n");
+printf (CHEADER "%s is got from\n", $ARGV[0]);
+printf (CHEADER "http\:\/\/www.unicode.org\/Public\/UNIDATA\/UnicodeData.txt\n");
+printf (CHEADER "\*\/\n");
+printf (CHEADER "\n");
+
+printf (CSOURCE "\/\*\n");
+printf (CSOURCE "DO NOT EDIT BY HAND\!\!\!\n");
+printf (CSOURCE "\n");
+printf (CSOURCE "This file is generated by\n");
+printf (CSOURCE " contrib/misc/make-casetable.pl %s %s %s\n", $ARGV[0], $ARGV[1], $ARGV[2]);
+printf (CSOURCE "\n");
+printf (CSOURCE "%s is got from\n", $ARGV[0]);
+printf (CSOURCE "http\:\/\/www.unicode.org\/Public\/UNIDATA\/UnicodeData.txt\n");
+printf (CSOURCE "\*\/\n");
+printf (CSOURCE "\n");
+printf (CSOURCE "\#include \<stdint.h\>\n");
+printf (CSOURCE "\#include \<atalk\/unicode.h\>\n");
+printf (CSOURCE "\#include \"%s\"\n", $ARGV[1]);
+printf (CSOURCE "\n");
+
+&make_array("upper");
+&make_array("lower");
+
+printf (CHEADER "\/\* EOF \*\/\n");
+printf (CSOURCE "\/\* EOF \*\/\n");
+
+close(CHEADER);
+close(CSOURCE);
+
+
+###########################################################################
+sub make_array{
+
+    # init table -----------------------------------------------------
+
+    for ($char = 0 ; $char <= 0xFFFF ; $char++) {
+        $table[$char][0] = $char;       # mapped char
+        $table[$char][1] = $char;       # orig char
+        $table[$char][2] = "";          # char name
+    }
+
+    for ($char = 0x10000 ; $char <= 0x10FFFF ; $char++) {
+        $sp = ((0xD800 - (0x10000 >> 10) + ($char >> 10)) << 16)
+            + (0xDC00 + ($char & 0x3FF));
+        $table_sp[$char][0] = $sp;      # mapped surrogate pair
+        $table_sp[$char][1] = $sp;      # orig surrogate pair
+        $table_sp[$char][2] = $char;    # mapped char
+        $table_sp[$char][3] = $char;    # orig char
+        $table_sp[$char][4] = "";       # char name
+    }
+
+    for ($block = 0 ; $block <= 1025 ; $block++) {
+        $block_enable[$block] = 0;
+    }
+
+    $block_enable[1] = 1;           # ASCII block is forcibly included
+    $block_enable[2] = 1;           # in the array for Speed-Up.
+
+    for ($block = 1024 ; $block <= 17409 ; $block++) {
+        $block_enable_sp[$block] = 0;
+    }
+
+    # write data to table --------------------------------------------
+
+    open(UNICODEDATA, "<$ARGV[0]");
+
+    while (<UNICODEDATA>) {
+        chop;
+        (
+            $code0,
+            $Name1,
+            $General_Category2,
+            $Canonical_Combining_Class3,
+            $Bidi_Class4,
+            $Decomposition_Mapping5,
+            $Numeric_Value6,
+            $Numeric_Value7,
+            $Numeric_Value8,
+            $Bidi_Mirrored9,
+            $Unicode_1_Name10,
+            $ISO_Comment11,
+            $Simple_Uppercase_Mapping12,
+            $Simple_Lowercase_Mapping13,
+            $Simple_Titlecase_Mapping14
+        ) = split(/\;/);
+
+        if ($_[0] eq "upper") {
+            $Mapping = $Simple_Uppercase_Mapping12;
+        } elsif ($_[0] eq "lower") {
+            $Mapping = $Simple_Lowercase_Mapping13;
+        } else {
+            exit(1);
+        }
+
+        next if ($Mapping eq "");
+
+        $hex_code0 = hex($code0);
+        $hex_Mapping = hex($Mapping);
+
+        if ($hex_code0 <= 0xFFFF) {
+            $table[$hex_code0][0] = $hex_Mapping;
+            #table[$hex_code0][1]   already set
+            $table[$hex_code0][2] = $Name1;
+            $block_enable[($hex_code0 / 64) +1] = 1;
+        } else {
+            $sp = ((0xD800 - (0x10000 >> 10) + ($hex_Mapping >> 10)) << 16)
+                + (0xDC00 + ($hex_Mapping & 0x3FF));
+            $table_sp[$hex_code0][0] = $sp;
+            #table_sp[$hex_code0][1]   already set
+            $table_sp[$hex_code0][2] = $hex_Mapping;
+            #table_sp[$hex_code0][3]   already set
+            $table_sp[$hex_code0][4] = $Name1;
+            $block_enable_sp[($hex_code0 / 64) +1] = 1;
+        }
+    }
+
+    close(UNICODEDATA);
+
+    # array for BMP --------------------------------------------------
+
+    printf(CSOURCE "\/*******************************************************************\n");
+    printf(CSOURCE " Convert a wide character to %s case.\n", $_[0]);
+    printf(CSOURCE "*******************************************************************\/\n");
+    printf(CSOURCE "ucs2\_t to%s\_w\(ucs2\_t val\)\n", $_[0]);
+    printf(CSOURCE "{\n");
+
+    $table_no = 1;
+
+    for ($block = 1 ; $block <= 1024 ; $block++) {
+
+        # rising edge detection
+        if ($block_enable[$block - 1] == 0 && $block_enable[$block] == 1) {
+            $block_start = $block;
+        }
+
+        # falling edge detection
+        if ($block_enable[$block] == 1 && $block_enable[$block + 1] == 0) {
+            $block_end = $block;
+
+            $char_start = ($block_start -1)* 64;
+            $char_end = ($block_end * 64) -1;
+
+            printf(CHEADER "static const uint16\_t %s\_table\_%d\[%d\] \= \{\n",
+                   $_[0], $table_no, $char_end - $char_start +1);
+
+            for ($char = $char_start ; $char <= $char_end ; $char++) {
+                printf(CHEADER "  0x%04X, /*U\+%04X*/ /*%s*/\n",
+                       $table[$char][0],
+                       $table[$char][1],
+                       $table[$char][2]
+                   );
+            }
+            printf(CHEADER "\}\;\n");
+            printf(CHEADER "\n");
+
+            if ($char_start == 0x0000) {
+                printf(CSOURCE "    if \( val \<\= 0x%04X)\n",
+                       $char_end);
+                printf(CSOURCE "        return %s\_table\_%d\[val]\;\n",
+                       $_[0], $table_no);
+            } else {
+                printf(CSOURCE "    if \( val \>\= 0x%04X \&\& val \<\= 0x%04X)\n",
+                       $char_start, $char_end);
+                printf(CSOURCE "        return %s\_table\_%d\[val-0x%04X\]\;\n",
+                       $_[0], $table_no, $char_start);
+            }
+            printf(CSOURCE "\n");
+
+            $table_no++;
+        }
+    }
+
+    printf(CSOURCE "\treturn \(val\)\;\n");
+    printf(CSOURCE "\}\n");
+    printf(CSOURCE "\n");
+
+    # array for Surrogate Pair ---------------------------------------
+
+    printf(CSOURCE "\/*******************************************************************\n");
+    printf(CSOURCE " Convert a surrogate pair to %s case.\n", $_[0]);
+    printf(CSOURCE "*******************************************************************\/\n");
+    printf(CSOURCE "uint32\_t to%s\_sp\(uint32\_t val\)\n", $_[0]);
+    printf(CSOURCE "{\n");
+
+    $table_no = 1;
+
+    for ($block = 1025 ; $block <= 17408 ; $block++) {
+
+        # rising edge detection
+        if ((($block_enable_sp[$block - 1] == 0) || ((($block - 1) & 0xF) == 0))
+                && ($block_enable_sp[$block] == 1)) {
+            $block_start = $block;
+        }
+
+        # falling edge detection
+        if (($block_enable_sp[$block] == 1) &&
+                ((($block - 1) & 0xF == 0xF) || ($block_enable_sp[$block + 1] == 0))) {
+            $block_end = $block;
+
+            $char_start = ($block_start -1)* 64;
+            $char_end = ($block_end * 64) -1;
+
+            printf(CHEADER "static const uint32\_t %s\_table\_sp\_%d\[%d\] \= \{\n",
+                   $_[0], $table_no, $char_end - $char_start +1);
+
+            for ($char = $char_start ; $char <= $char_end ; $char++) {
+                printf(CHEADER "  0x%08X, /*0x%08X*/ /*U\+%06X*/ /*U\+%06X*/ /*%s*/\n",
+                       $table_sp[$char][0],
+                       $table_sp[$char][1],
+                       $table_sp[$char][2],
+                       $table_sp[$char][3],
+                       $table_sp[$char][4]
+                   );
+            }
+            printf(CHEADER "\}\;\n");
+            printf(CHEADER "\n");
+
+            printf(CSOURCE "    if \( val \>\= 0x%08X \&\& val \<\= 0x%08X)\n",
+                   $table_sp[$char_start][1], $table_sp[$char_end][1]);
+            printf(CSOURCE "        return %s\_table\_sp\_%d\[val-0x%08X\]\;\n",
+                   $_[0], $table_no, $table_sp[$char_start][1]);
+            printf(CSOURCE "\n");
+
+            $table_no++;
+        }
+    }
+
+    printf(CSOURCE "\treturn \(val\)\;\n");
+    printf(CSOURCE "\}\n");
+    printf(CSOURCE "\n");
+}
+
+# EOF
diff --git a/contrib/shell_utils/make-precompose.h.pl b/contrib/shell_utils/make-precompose.h.pl
new file mode 100755 (executable)
index 0000000..11cb8d9
--- /dev/null
@@ -0,0 +1,260 @@
+#!/usr/bin/perl
+#
+# usage: make-precompose.h.pl UnicodeData.txt > precompose.h
+#
+# (c) 2008-2011 by HAT <hat@fa2.so-net.ne.jp>
+#
+#  This program is free software; you can redistribute it and/or modify
+#  it under the terms of the GNU General Public License as published by
+#  the Free Software Foundation; either version 2 of the License, or
+#  (at your option) any later version.
+#
+#  This program is distributed in the hope that it will be useful,
+#  but WITHOUT ANY WARRANTY; without even the implied warranty of
+#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+#  GNU General Public License for more details.
+# 
+
+# See
+# http://www.unicode.org/Public/UNIDATA/UCD.html
+# http://www.unicode.org/reports/tr15/
+# http://www.unicode.org/Public/*/ucd/UnicodeData*.txt
+# http://www.unicode.org/Public/UNIDATA/UnicodeData.txt
+
+
+# temp files for binary search (compose.TEMP, compose_sp.TEMP) -------------
+
+open(UNICODEDATA, "<$ARGV[0]");
+
+open(COMPOSE_TEMP, ">compose.TEMP");
+open(COMPOSE_SP_TEMP, ">compose_sp.TEMP");
+
+while (<UNICODEDATA>) {
+    chop;
+    (
+     $code0,
+     $Name1,
+     $General_Category2,
+     $Canonical_Combining_Class3,
+     $Bidi_Class4,
+     $Decomposition_Mapping5,
+     $Numeric_Value6,
+     $Numeric_Value7,
+     $Numeric_Value8,
+     $Bidi_Mirrored9,
+     $Unicode_1_Name10,
+     $ISO_Comment11,
+     $Simple_Uppercase_Mapping12,
+     $Simple_Lowercase_Mapping13,
+     $Simple_Titlecase_Mapping14
+    ) = split(/\;/);
+
+    if (($Decomposition_Mapping5 ne "") && ($Decomposition_Mapping5 !~ /\</) && ($Decomposition_Mapping5 =~ / /)) {
+       ($base, $comb) = split(/ /,$Decomposition_Mapping5);
+
+       $leftbracket  = "  { ";
+       $rightbracket =" },     ";
+
+       # AFP 3.x Spec
+       if ( ((0x2000  <= hex($code0)) && (hex($code0) <=  0x2FFF))
+            || ((0xFE30  <= hex($code0)) && (hex($code0) <=  0xFE4F))
+            || ((0x2F800 <= hex($code0)) && (hex($code0) <= 0x2FA1F))) {
+           $leftbracket  = "\/\*{ ";
+           $rightbracket =" },\*\/   ";
+       }
+
+       if (hex($code0) > 0xFFFF) {
+
+           $code0_sp_hi = 0xD800 - (0x10000 >> 10) + (hex($code0) >> 10);
+           $code0_sp_lo = 0xDC00 + (hex($code0) & 0x3FF);
+
+           $base_sp_hi = 0xD800 - (0x10000 >> 10) + (hex($base) >> 10);
+           $base_sp_lo = 0xDC00 + (hex($base) & 0x3FF);
+
+           $comb_sp_hi = 0xD800 - (0x10000 >> 10) + (hex($comb) >> 10);
+           $comb_sp_lo = 0xDC00 + (hex($comb) & 0x3FF);
+
+           printf(COMPOSE_SP_TEMP "%s0x%04X%04X, 0x%04X%04X, 0x%04X%04X%s\/\* %s \*\/\n",
+                  $leftbracket, $code0_sp_hi ,$code0_sp_lo, $base_sp_hi, $base_sp_lo, $comb_sp_hi, $comb_sp_lo, $rightbracket, $Name1);
+
+           $leftbracket  = "\/\*{ ";
+           $rightbracket =" },\*\/   ";
+       }
+
+       printf(COMPOSE_TEMP "%s0x%08X, 0x%08X, 0x%08X%s\/\* %s \*\/\n", $leftbracket, hex($code0), hex($base), hex($comb), $rightbracket, $Name1);
+
+    }
+}
+
+close(UNICODEDATA);
+
+close(COMPOSE_TEMP);
+close(COMPOSE_SP_TEMP);
+
+# macros for BMP (PRECOMP_COUNT, DECOMP_COUNT, MAXCOMBLEN) ----------------
+
+open(COMPOSE_TEMP, "<compose.TEMP");
+
+@comp_table = ();
+$comp_count = 0;
+
+while (<COMPOSE_TEMP>) {
+    if (m/^\/\*/) {
+       next;
+    }
+    $comp_table[$comp_count][0] = substr($_, 4, 10);
+    $comp_table[$comp_count][1] = substr($_, 16, 10);
+    $comp_count++;
+}
+
+$maxcomblen = 2;      # Hangul's maxcomblen is already 2. That is, VT.
+
+for ($i = 0 ; $i < $comp_count ; $i++) {
+    $base = $comp_table[$i][1];
+    $comblen = 1;
+    $j = 0;
+    while ($j < $comp_count) {
+       if ($base ne $comp_table[$j][0]) {
+           $j++;
+           next;
+       } else {
+           $comblen++;
+           $base =  $comp_table[$j][1];
+           $j = 0;
+       }
+    }
+    $maxcomblen = ($maxcomblen > $comblen) ? $maxcomblen : $comblen;
+}
+
+close(COMPOSE_TEMP);
+
+# macros for SP (PRECOMP_SP_COUNT,DECOMP_SP_COUNT, MAXCOMBSPLEN) -----------
+
+open(COMPOSE_SP_TEMP, "<compose_sp.TEMP");
+
+@comp_sp_table = ();
+$comp_sp_count = 0;
+
+while (<COMPOSE_SP_TEMP>) {
+    if (m/^\/\*/) {
+       next;
+    }
+    $comp_sp_table[$comp_sp_count][0] = substr($_, 4, 10);
+    $comp_sp_table[$comp_sp_count][1] = substr($_, 16, 10);
+    $comp_sp_count++;
+}
+
+$maxcombsplen = 2;     # one char have 2 codepoints, like a D8xx DCxx.
+
+for ($i = 0 ; $i < $comp_sp_count ; $i++) {
+    $base_sp = $comp_sp_table[$i][1];
+    $comblen = 2;
+    $j = 0;
+    while ($j < $comp_sp_count) {
+       if ($base_sp ne $comp_sp_table[$j][0]) {
+           $j++;
+           next;
+       } else {
+           $comblen += 2;
+           $base_sp =  $comp_sp_table[$j][1];
+           $j = 0;
+       }
+    }
+    $maxcombsplen = ($maxcombsplen > $comblen) ? $maxcombsplen : $comblen;
+}
+
+close(COMPOSE_SP_TEMP);
+
+# macro for buffer length (COMBBUFLEN) -------------------------------------
+
+$combbuflen = ($maxcomblen > $maxcombsplen) ? $maxcomblen : $maxcombsplen;
+
+# sort ---------------------------------------------------------------------
+
+system("sort -k 3 compose.TEMP \> precompose.SORT");
+system("sort -k 2 compose.TEMP \>  decompose.SORT");
+
+system("sort -k 3 compose_sp.TEMP \> precompose_sp.SORT");
+system("sort -k 2 compose_sp.TEMP \>  decompose_sp.SORT");
+
+# print  -------------------------------------------------------------------
+
+print ("\/\* DO NOT EDIT BY HAND\!\!\!                                           \*\/\n");
+print ("\/\* This file is generated by                                        \*\/\n");
+printf ("\/\*              contrib/misc/make-precompose.h.pl %s   \*\/\n", $ARGV[0]);
+print ("\n");
+printf ("\/\* %s is got from                                      \*\/\n", $ARGV[0]);
+print ("\/\* http\:\/\/www.unicode.org\/Public\/UNIDATA\/UnicodeData.txt            \*\/\n");
+print ("\n");
+
+print ("\#define SBASE 0xAC00\n");
+print ("\#define LBASE 0x1100\n");
+print ("\#define VBASE 0x1161\n");
+print ("\#define TBASE 0x11A7\n");
+print ("\#define LCOUNT 19\n");
+print ("\#define VCOUNT 21\n");
+print ("\#define TCOUNT 28\n");
+print ("\#define NCOUNT 588     \/\* (VCOUNT \* TCOUNT) \*\/\n");
+print ("\#define SCOUNT 11172   \/\* (LCOUNT \* NCOUNT) \*\/\n");
+print ("\n");
+
+printf ("\#define PRECOMP_COUNT %d\n", $comp_count);
+printf ("\#define DECOMP_COUNT %d\n", $comp_count);
+printf ("\#define MAXCOMBLEN %d\n", $maxcomblen);
+print ("\n");
+printf ("\#define PRECOMP_SP_COUNT %d\n", $comp_sp_count);
+printf ("\#define DECOMP_SP_COUNT %d\n", $comp_sp_count);
+printf ("\#define MAXCOMBSPLEN %d\n", $maxcombsplen);
+print ("\n");
+printf ("\#define COMBBUFLEN %d  \/\* max\(MAXCOMBLEN\,MAXCOMBSPLEN\) \*\/\n", $combbuflen);
+print ("\n");
+
+print ("static const struct \{\n");
+print ("  unsigned int replacement\;\n");
+print ("  unsigned int base\;\n");
+print ("  unsigned int comb\;\n");
+print ("\} precompositions\[\] \= \{\n");
+
+system("cat precompose.SORT");
+
+print ("\}\;\n");
+print ("\n");
+
+print ("static const struct \{\n");
+print ("  unsigned int replacement\;\n");
+print ("  unsigned int base\;\n");
+print ("  unsigned int comb\;\n");
+print ("\} decompositions\[\] \= \{\n");
+
+system("cat decompose.SORT");
+
+print ("\}\;\n");
+print ("\n");
+
+
+
+print ("static const struct \{\n");
+print ("  unsigned int replacement_sp\;\n");
+print ("  unsigned int base_sp\;\n");
+print ("  unsigned int comb_sp\;\n");
+print ("\} precompositions_sp\[\] \= \{\n");
+
+system("cat precompose_sp.SORT");
+
+print ("\}\;\n");
+print ("\n");
+
+print ("static const struct \{\n");
+print ("  unsigned int replacement_sp\;\n");
+print ("  unsigned int base_sp\;\n");
+print ("  unsigned int comb_sp\;\n");
+print ("\} decompositions_sp\[\] \= \{\n");
+
+system("cat decompose_sp.SORT");
+
+print ("\}\;\n");
+print ("\n");
+
+print ("\/\* EOF \*\/\n");
+
+# EOF