summaryrefslogtreecommitdiff
path: root/unidata
diff options
context:
space:
mode:
authorbehdad <behdad>2001-12-14 14:25:17 +0000
committerbehdad <behdad>2001-12-14 14:25:17 +0000
commit43e2ad63488f60fd5c9c8527806b539ee8215be7 (patch)
tree288c36781bb6de856cdbed4fc63c86650281a41f /unidata
parent29313c8bbdd528f11b8cd8f9155802b1ccecb745 (diff)
Major update to include all promised changes.
Diffstat (limited to 'unidata')
-rwxr-xr-xunidata/CreateGetType.pl307
1 files changed, 0 insertions, 307 deletions
diff --git a/unidata/CreateGetType.pl b/unidata/CreateGetType.pl
deleted file mode 100755
index f884d83..0000000
--- a/unidata/CreateGetType.pl
+++ /dev/null
@@ -1,307 +0,0 @@
-#!/usr/bin/perl -w
-
-# FriBidi - Library of BiDi algorithm
-# Copyright (C) 1999,2000 Dov Grobgeld, and
-# Copyright (C) 2001 Roozbeh Pournader
-#
-# This library is free software; you can redistribute it and/or
-# modify it under the terms of the GNU Lesser General Public
-# License as published by the Free Software Foundation; either
-# version 2.1 of the License, or (at your option) any later version.
-#
-# This library is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-# Lesser General Public License for more details.
-#
-# You should have received a copy of the GNU Lesser General Public License
-# along with this library, in a file named COPYING.LIB; if not, write to the
-# Free Software Foundation, Inc., 59 Temple Place, Suite 330,
-# Boston, MA 02111-1307, USA
-#
-# For licensing issues, contact <dov@imagic.weizmann.ac.il> and
-# <fwpg@sharif.edu>.
-
-######################################################################
-# This is a Perl program for automatically building the function
-# fribidi_get_type() which returns the Bidi type of a unicode
-# character. To build this function the script parses the
-# UnicodeData.txt and BidiMirroring.txt files.
-#
-# The latest version of these files are always available at:
-# http://www.unicode.org/Public/UNIDATA/
-######################################################################
-
-use strict;
-
-my $unicode_data_file = "UnicodeData.txt";
-my $unicode_mirroring_file = "BidiMirroring.txt";
-my $unicode_version = "3.1";
-my $mirroring_version;
-my @bidi_entities;
-my @mirrors;
-
-my %type_translate = (L=>'LTR',R=>'RTL','B'=>'BS','S'=>'SS');
-
-my %type_names = ("0x10000090" => ["CTL", "Control units"],
- "0x01000000" => ["LTR", "Strong left to right"],
- "0x02000000" => ["RTL", "Strong right to left"],
- "0x03000000" => ["EN", "European digit"],
- "0x04000000" => ["ES", "European number separator"],
- "0x05000000" => ["ET", "European number terminator"],
- "0x06000000" => ["AN", "Arabic digit"],
- "0x07000000" => ["CS", "Common Separator"],
- "0x08000000" => ["BS", "Block separator"],
- "0x09000000" => ["SS", "Segment separator"],
- "0x0A000000" => ["WS", "Whitespace"],
- "0x0B000000" => ["AL", "Arabic right to left letter"],
- "0x0C000000" => ["NSM", "Non-spacing mark"],
- "0x0D000000" => ["BN", "Boundary Neutral"],
- "0x0E000000" => ["PDF", "Pop directional formatting"],
- "0x0F000000" => ["EO", "Embedding or override"],
- "0x80000009" => ["ON", "Other Neutral"],
- "0x10000091" => ["LRE", "RLE"],
- "0x10000092" => ["RLE", "RLE"],
- "0x10000093" => ["LRO", "RLO"],
- "0x10000094" => ["RLO", "LRO"],
- );
-
-open(DATA, $unicode_data_file)
- or die "Failed opening $unicode_data_file!\n";
-
-open(MIRR, $unicode_mirroring_file)
- or die "Failed opening $unicode_mirroring_file!\n";
-
-$_ = <MIRR>;
-if (/^# BidiMirroring-(.*).txt/) {
- $mirroring_version = $1;
-}
-
-parse_unicode_data_for_bidi_entries();
-#print_bidi_entities();
-parse_for_mirror_chars();
-#print_mirrored_chars();
-create_c_file();
-
-sub parse_unicode_data_for_bidi_entries {
- seek(DATA,0,0);
- my ($prev_type, $prev_num) = ('',0);
- while(<DATA>) {
- my ($num, $type) = (split(/;/))[0,4];
- my $tt;
- if ($tt = $type_translate{$type}) {
- $type = $tt;
- }
- $num = hex($num);
-
- if ($prev_type eq $type && $num == $prev_num+1) {
- $bidi_entities[-1][2]++;
- } else {
- push(@bidi_entities, [$num, $type, 1]);
- }
- $prev_num = $num;
- $prev_type = $type;
- }
-}
-
-sub parse_for_mirror_chars {
- seek(MIRR,0,0);
- while(<MIRR>) {
- if (/^([0-9A-F]*); ([0-9A-F]*)/) {
- push(@mirrors, [$1, $2]);
- }
- }
-}
-
-sub print_mirrored_chars {
- print STDERR "Mirrors:\n";
- for my $m (@mirrors) {
- my ($num, $mirror_num) = @$m;
- print STDERR "$num <=> $mirror_num\n";
- }
-}
-
-sub print_bidi_entities {
- for my $e (@bidi_entities) {
- print sprintf("%04x", $e->[0]), " $e->[1] $e->[2]\n";
- }
-}
-
-sub split_entity {
- my $e = shift;
- my ($start, $type, $len) = @$e;
-
- my ($first,$last) = ($start, $start+$len-1);
-
- return ($first, $last, $type);
-};
-
-sub create_block {
- my ($block,$name, $ranges) = @_;
- my($title) = <<__;
-FriBidiPropCharType ${name}\[256\] = {
-__
- my $result;
- my $last_type=$ranges->[0]->[2];
-
- my $ind = 0;
- for my $i ($block*256 .. $block*256 + 255) {
- my $found = 0;
-
- if ($i % 16 == 0) {
- $result .= " ";
- }
- while ($ind < @$ranges) {
- if ($i > $ranges->[$ind]->[1]) {
- $ind++;
- } elsif ($i < $ranges->[$ind]->[0]) {
- last;
- } else {
- $found = 1;
- $result .= sprintf("%-3s,", $ranges->[$ind]->[2]);
- $last_type=$ranges->[$ind]->[2];
- last;
- }
- }
- if (!$found) {
- # Based on Table 3-7 from UTR #9
-
- if (0x0590 <= $i && $i <= 0x05FF || 0xFB1D <= $i && $i <= 0xFB4F) {
- $result .= sprintf("%-3s,", "RTL");
- } elsif (0x0600 <= $i && $i <= 0x07BF ||
- 0xFB50 <= $i && $i <= 0xFDFF ||
- 0xFE70 <= $i && $i <= 0xFEFF) {
- $result .= sprintf("%-3s,", "AL");
- } else {
- $result .= sprintf("%-3s,", "LTR");
- }
- }
- if ($i % 16 == 15) {
- $result .= "\n";
- }
- }
- $result .= "};\n\n";
- return($title, $result);
-};
-
-
-sub create_c_file {
- # Create the h file
- my $num_used_blocks = 0;
-
- my $c_file =<<__;
-/*========================================================================
- * This file was automatically created from $unicode_data_file, version $unicode_version,
- * and $unicode_mirroring_file, version $mirroring_version, by the perl script CreateGetType.pl.
- *----------------------------------------------------------------------*/
-/* *INDENT-OFF* */
-
-#include "fribidi.h"
-
-__
-
- for my $type (keys %type_names) {
- $c_file .= "#define $type_names{$type}->[0] FRIBIDI_PROP_TYPE_$type_names{$type}->[0]\n";
- }
- $c_file .= "\n\n";
-
- my $i = 0;
- my $block_array = <<__;
-FriBidiPropCharType *FriBidiPropertyBlocks[] = {
-__
-
- my %seen_blocks;
- for my $block (0..0x10FF) {
- my @block_ranges;
-
- while ($i < @bidi_entities) {
- my ($first,$last,$type) = split_entity ($bidi_entities[$i]);
-
- if ($first > $block * 256 + 255) {
- last;
- } else {
- if ($last >= $block * 256) {
- push @block_ranges, [ $first > $block * 256 ? $first : $block * 256,
- $last < $block * 256 + 255 ? $last : $block * 256 + 255,
- $type ];
- }
- if ($last < ($block + 1) * 256) {
- $i++;
- } else {
- last;
- }
- }
- }
-
-
- $block_array .= " ";
- if (@block_ranges == 0) {
- # assuming 0100..01FF will be all LTR
- $block_array .= "FriBidiPropertyBlock0100";
- } else {
- my $name = sprintf ("FriBidiPropertyBlock%02lX00", $block);
- my($title, $block_ctx) = create_block ($block, $name, \@block_ranges);
-
- # use the fact that a hash table uses a good hash function. ;-)
- # (What does that mean?! --RP)
- if (exists $seen_blocks{$block_ctx}) {
- my $name = $seen_blocks{$block_ctx};
- $block_array .= "$name";
- } else {
- $num_used_blocks++;
- $block_array .= "$name";
- $c_file .= $title . $block_ctx;
- $seen_blocks{$block_ctx} = $name;
- }
- }
- $block_array .= sprintf (", /* %02lX00..%02lXFF */\n", $block, $block);
- }
- $c_file .= "/* $num_used_blocks blocks defined */\n\n";
-
- $c_file .= $block_array
- . "};\n\n";
-
- for my $type (keys %type_names) {
- $c_file .= "#undef $type_names{$type}->[0]\n";
- }
-
- $c_file .= "\n\n";
-
- ######################################################################
- # Mirrored characters.
- ######################################################################
-
- $c_file .= <<__;
-/*======================================================================
-// Mirrored characters include all the characters in the Unicode list
-// that have been declared as being mirrored and that have a mirrored
-// equivalent.
-//
-// There are lots of characters that are designed as being mirrored
-// but do not have any mirrored glyph, e.g. the sign for there exist.
-// Are these used in Arabic? That is are all the mathematical signs
-// that are assigned to be mirrorable actually mirrored in Arabic?
-// If that is the case, we'll change the below code to include also
-// characters that mirror to themself. It will then be the responsibility
-// of the display engine to actually mirror these.
-//----------------------------------------------------------------------*/
-
-struct {
- FriBidiChar ch, mirrored_ch;
-} FriBidiMirroredChars[] = {
-__
-
- for my $i (0..@mirrors-1) {
- my($ch, $mirrored_ch) = @{$mirrors[$i]};
- $c_file .= " {0x$ch, 0x$mirrored_ch}";
- $c_file .= "," unless $i == @mirrors-1;
- $c_file .= "\n";
- }
- $c_file .= "};\n\n";
- $c_file .= "gint nFriBidiMirroredChars = "
- . scalar(@mirrors) . ";\n";
-
- $c_file .="\n\n/* *INDENT-ON* */\n";
-
- print $c_file;
-}