diff options
author | behdad <behdad> | 2001-12-14 14:25:17 +0000 |
---|---|---|
committer | behdad <behdad> | 2001-12-14 14:25:17 +0000 |
commit | 43e2ad63488f60fd5c9c8527806b539ee8215be7 (patch) | |
tree | 288c36781bb6de856cdbed4fc63c86650281a41f /unidata | |
parent | 29313c8bbdd528f11b8cd8f9155802b1ccecb745 (diff) |
Major update to include all promised changes.
Diffstat (limited to 'unidata')
-rwxr-xr-x | unidata/CreateGetType.pl | 307 |
1 files changed, 0 insertions, 307 deletions
diff --git a/unidata/CreateGetType.pl b/unidata/CreateGetType.pl deleted file mode 100755 index f884d83..0000000 --- a/unidata/CreateGetType.pl +++ /dev/null @@ -1,307 +0,0 @@ -#!/usr/bin/perl -w - -# FriBidi - Library of BiDi algorithm -# Copyright (C) 1999,2000 Dov Grobgeld, and -# Copyright (C) 2001 Roozbeh Pournader -# -# This library is free software; you can redistribute it and/or -# modify it under the terms of the GNU Lesser General Public -# License as published by the Free Software Foundation; either -# version 2.1 of the License, or (at your option) any later version. -# -# This library is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -# Lesser General Public License for more details. -# -# You should have received a copy of the GNU Lesser General Public License -# along with this library, in a file named COPYING.LIB; if not, write to the -# Free Software Foundation, Inc., 59 Temple Place, Suite 330, -# Boston, MA 02111-1307, USA -# -# For licensing issues, contact <dov@imagic.weizmann.ac.il> and -# <fwpg@sharif.edu>. - -###################################################################### -# This is a Perl program for automatically building the function -# fribidi_get_type() which returns the Bidi type of a unicode -# character. To build this function the script parses the -# UnicodeData.txt and BidiMirroring.txt files. -# -# The latest version of these files are always available at: -# http://www.unicode.org/Public/UNIDATA/ -###################################################################### - -use strict; - -my $unicode_data_file = "UnicodeData.txt"; -my $unicode_mirroring_file = "BidiMirroring.txt"; -my $unicode_version = "3.1"; -my $mirroring_version; -my @bidi_entities; -my @mirrors; - -my %type_translate = (L=>'LTR',R=>'RTL','B'=>'BS','S'=>'SS'); - -my %type_names = ("0x10000090" => ["CTL", "Control units"], - "0x01000000" => ["LTR", "Strong left to right"], - "0x02000000" => ["RTL", "Strong right to left"], - "0x03000000" => ["EN", "European digit"], - "0x04000000" => ["ES", "European number separator"], - "0x05000000" => ["ET", "European number terminator"], - "0x06000000" => ["AN", "Arabic digit"], - "0x07000000" => ["CS", "Common Separator"], - "0x08000000" => ["BS", "Block separator"], - "0x09000000" => ["SS", "Segment separator"], - "0x0A000000" => ["WS", "Whitespace"], - "0x0B000000" => ["AL", "Arabic right to left letter"], - "0x0C000000" => ["NSM", "Non-spacing mark"], - "0x0D000000" => ["BN", "Boundary Neutral"], - "0x0E000000" => ["PDF", "Pop directional formatting"], - "0x0F000000" => ["EO", "Embedding or override"], - "0x80000009" => ["ON", "Other Neutral"], - "0x10000091" => ["LRE", "RLE"], - "0x10000092" => ["RLE", "RLE"], - "0x10000093" => ["LRO", "RLO"], - "0x10000094" => ["RLO", "LRO"], - ); - -open(DATA, $unicode_data_file) - or die "Failed opening $unicode_data_file!\n"; - -open(MIRR, $unicode_mirroring_file) - or die "Failed opening $unicode_mirroring_file!\n"; - -$_ = <MIRR>; -if (/^# BidiMirroring-(.*).txt/) { - $mirroring_version = $1; -} - -parse_unicode_data_for_bidi_entries(); -#print_bidi_entities(); -parse_for_mirror_chars(); -#print_mirrored_chars(); -create_c_file(); - -sub parse_unicode_data_for_bidi_entries { - seek(DATA,0,0); - my ($prev_type, $prev_num) = ('',0); - while(<DATA>) { - my ($num, $type) = (split(/;/))[0,4]; - my $tt; - if ($tt = $type_translate{$type}) { - $type = $tt; - } - $num = hex($num); - - if ($prev_type eq $type && $num == $prev_num+1) { - $bidi_entities[-1][2]++; - } else { - push(@bidi_entities, [$num, $type, 1]); - } - $prev_num = $num; - $prev_type = $type; - } -} - -sub parse_for_mirror_chars { - seek(MIRR,0,0); - while(<MIRR>) { - if (/^([0-9A-F]*); ([0-9A-F]*)/) { - push(@mirrors, [$1, $2]); - } - } -} - -sub print_mirrored_chars { - print STDERR "Mirrors:\n"; - for my $m (@mirrors) { - my ($num, $mirror_num) = @$m; - print STDERR "$num <=> $mirror_num\n"; - } -} - -sub print_bidi_entities { - for my $e (@bidi_entities) { - print sprintf("%04x", $e->[0]), " $e->[1] $e->[2]\n"; - } -} - -sub split_entity { - my $e = shift; - my ($start, $type, $len) = @$e; - - my ($first,$last) = ($start, $start+$len-1); - - return ($first, $last, $type); -}; - -sub create_block { - my ($block,$name, $ranges) = @_; - my($title) = <<__; -FriBidiPropCharType ${name}\[256\] = { -__ - my $result; - my $last_type=$ranges->[0]->[2]; - - my $ind = 0; - for my $i ($block*256 .. $block*256 + 255) { - my $found = 0; - - if ($i % 16 == 0) { - $result .= " "; - } - while ($ind < @$ranges) { - if ($i > $ranges->[$ind]->[1]) { - $ind++; - } elsif ($i < $ranges->[$ind]->[0]) { - last; - } else { - $found = 1; - $result .= sprintf("%-3s,", $ranges->[$ind]->[2]); - $last_type=$ranges->[$ind]->[2]; - last; - } - } - if (!$found) { - # Based on Table 3-7 from UTR #9 - - if (0x0590 <= $i && $i <= 0x05FF || 0xFB1D <= $i && $i <= 0xFB4F) { - $result .= sprintf("%-3s,", "RTL"); - } elsif (0x0600 <= $i && $i <= 0x07BF || - 0xFB50 <= $i && $i <= 0xFDFF || - 0xFE70 <= $i && $i <= 0xFEFF) { - $result .= sprintf("%-3s,", "AL"); - } else { - $result .= sprintf("%-3s,", "LTR"); - } - } - if ($i % 16 == 15) { - $result .= "\n"; - } - } - $result .= "};\n\n"; - return($title, $result); -}; - - -sub create_c_file { - # Create the h file - my $num_used_blocks = 0; - - my $c_file =<<__; -/*======================================================================== - * This file was automatically created from $unicode_data_file, version $unicode_version, - * and $unicode_mirroring_file, version $mirroring_version, by the perl script CreateGetType.pl. - *----------------------------------------------------------------------*/ -/* *INDENT-OFF* */ - -#include "fribidi.h" - -__ - - for my $type (keys %type_names) { - $c_file .= "#define $type_names{$type}->[0] FRIBIDI_PROP_TYPE_$type_names{$type}->[0]\n"; - } - $c_file .= "\n\n"; - - my $i = 0; - my $block_array = <<__; -FriBidiPropCharType *FriBidiPropertyBlocks[] = { -__ - - my %seen_blocks; - for my $block (0..0x10FF) { - my @block_ranges; - - while ($i < @bidi_entities) { - my ($first,$last,$type) = split_entity ($bidi_entities[$i]); - - if ($first > $block * 256 + 255) { - last; - } else { - if ($last >= $block * 256) { - push @block_ranges, [ $first > $block * 256 ? $first : $block * 256, - $last < $block * 256 + 255 ? $last : $block * 256 + 255, - $type ]; - } - if ($last < ($block + 1) * 256) { - $i++; - } else { - last; - } - } - } - - - $block_array .= " "; - if (@block_ranges == 0) { - # assuming 0100..01FF will be all LTR - $block_array .= "FriBidiPropertyBlock0100"; - } else { - my $name = sprintf ("FriBidiPropertyBlock%02lX00", $block); - my($title, $block_ctx) = create_block ($block, $name, \@block_ranges); - - # use the fact that a hash table uses a good hash function. ;-) - # (What does that mean?! --RP) - if (exists $seen_blocks{$block_ctx}) { - my $name = $seen_blocks{$block_ctx}; - $block_array .= "$name"; - } else { - $num_used_blocks++; - $block_array .= "$name"; - $c_file .= $title . $block_ctx; - $seen_blocks{$block_ctx} = $name; - } - } - $block_array .= sprintf (", /* %02lX00..%02lXFF */\n", $block, $block); - } - $c_file .= "/* $num_used_blocks blocks defined */\n\n"; - - $c_file .= $block_array - . "};\n\n"; - - for my $type (keys %type_names) { - $c_file .= "#undef $type_names{$type}->[0]\n"; - } - - $c_file .= "\n\n"; - - ###################################################################### - # Mirrored characters. - ###################################################################### - - $c_file .= <<__; -/*====================================================================== -// Mirrored characters include all the characters in the Unicode list -// that have been declared as being mirrored and that have a mirrored -// equivalent. -// -// There are lots of characters that are designed as being mirrored -// but do not have any mirrored glyph, e.g. the sign for there exist. -// Are these used in Arabic? That is are all the mathematical signs -// that are assigned to be mirrorable actually mirrored in Arabic? -// If that is the case, we'll change the below code to include also -// characters that mirror to themself. It will then be the responsibility -// of the display engine to actually mirror these. -//----------------------------------------------------------------------*/ - -struct { - FriBidiChar ch, mirrored_ch; -} FriBidiMirroredChars[] = { -__ - - for my $i (0..@mirrors-1) { - my($ch, $mirrored_ch) = @{$mirrors[$i]}; - $c_file .= " {0x$ch, 0x$mirrored_ch}"; - $c_file .= "," unless $i == @mirrors-1; - $c_file .= "\n"; - } - $c_file .= "};\n\n"; - $c_file .= "gint nFriBidiMirroredChars = " - . scalar(@mirrors) . ";\n"; - - $c_file .="\n\n/* *INDENT-ON* */\n"; - - print $c_file; -} |