summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorbehdad <behdad>2001-02-19 17:22:11 +0000
committerbehdad <behdad>2001-02-19 17:22:11 +0000
commitb20eaeeafbd5f8595ef4ac02cd7311c60cd45a26 (patch)
treec4036306e9af93dd6dc80fe161a5e3541a095d40
parent81388a9ef058e2acd63b9ff51296cef35d2aba75 (diff)
Comitted all the changes made by the FarsiWeb project.
-rw-r--r--AUTHORS8
-rw-r--r--ChangeLog36
-rw-r--r--Makefile.am2
-rw-r--r--Makefile.in6
-rw-r--r--README34
-rw-r--r--THANKS8
-rw-r--r--TODO26
-rwxr-xr-xconformance/dotest5
-rwxr-xr-xconformance/findbad5
-rwxr-xr-xconformance/findbads11
-rw-r--r--conformance/reference/bidi.h71
-rw-r--r--conformance/reference/bidiref.cpp1557
-rwxr-xr-xconformance/test40
-rw-r--r--conformance/test_gen.c72
-rw-r--r--conformance/test_gen_rand.c32
-rw-r--r--fribidi.c1262
-rw-r--r--fribidi.h2
-rw-r--r--fribidi_get_type.c21
-rw-r--r--fribidi_main.c149
-rw-r--r--fribidi_tables.i116
-rw-r--r--fribidi_types.h23
-rwxr-xr-xmkinstalldirs2
-rw-r--r--test_fribidi.c9
-rw-r--r--unidata/BidiMirroring.txt238
-rwxr-xr-xunidata/CreateGetType.pl (renamed from CreateGetType.pl)82
-rw-r--r--unidata/PropList.txt3592
26 files changed, 6860 insertions, 549 deletions
diff --git a/AUTHORS b/AUTHORS
index 78e5711..5dc6bf2 100644
--- a/AUTHORS
+++ b/AUTHORS
@@ -1,7 +1,7 @@
-Dov Grobgeld <dov@imagic.weizmann.ac.il>:
+Dov Grobgeld <dov@imagic.weizmann.ac.il>
* Initial author and current maintainer.
-Owen Tayler <otaylor@redhat.com>:
+Owen Tayler <otaylor@redhat.com>
* Contributed very important memory leak and speed patches.
Raphael Finkel <raphael@cs.uky.edu>
@@ -19,3 +19,7 @@ Omer Zak <omerz@actcom.co.il>
Behdad Esfahbod <behdad@bamdad.org>
* Added explicit bidi support.
+ * Fixed many bugs in Arabic bidi types.
+
+Roozbeh Pournader <roozbeh@sharif.edu>
+ * Changed the Perl script used for automatic character data extraction.
diff --git a/ChangeLog b/ChangeLog
index 7e635be..3c64d11 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,17 +1,43 @@
-2000-01-13 Behdad Esfahbod <behdad@bamdad.org>
+2001-02-19 Behdad Esfahbod <behdad@bamdad.org>
+
+ * fribidi.c: Added support for all explicit marks (embeddings and
+ overrides), it means rules X1 .. X10 of specification.
+
+ * fribidi.c: Added support for L1 rule of specification, it's
+ resetting the char type of some characters, just before reordering.
* fribidi.c: Added 'AL' to types that change the paragraph embedding
level, it previously only checked for 'R'.
- * fribidi.c: Added some more debug information.
+ * fribidi.c: Define symbol NO_STDIO changed to DEBUG, to turn off
+ all debug codes compile time, also a function fribidi_set_debug,
+ turns on and off the debug output generations in run time.
+
+ * fribidi.c: Added some more debug information, all debug informations
+ revised and go to stderr now.
+
+ * fribidi.c: Fixed a bug in rule W5, that is when some european
+ terminators are between some arabic letters and some european
+ numbers.
- * fribidi_main.c: Added support for inputting explicit marks.
+ * fribidi_main.c: Added support for inputting explicit marks, in
+ CapRTL charset mode.
* fribidi_main.c: Added command line option for debug mode.
* fribidi_main.c: Fixed a small bug that would annoy when the size
of original string may be different to the derived one.
+ * fribidi_main.c: Fixed a small bug in padding when the size of the
+ out string was more than the page width.
+
+ * fribidi_types.h: Added some types needed for debug info.
+
+2001-02-19 Roozbeh Pournader <roozbeh@sharif.edu>
+
+ * CreateGetType.pl: changed to use BidiMirroring.txt instead
+ of UnicodeData.txt.
+
2000-11-12 Dov Grobgeld <dov@imagic.weizmann.ac.il>
* fribidi.c: [OZ] Applied Owen Taylor's patch from 2000-07-19 to
@@ -43,13 +69,13 @@
USE_SIMPLE_MALLOC that may be used to effect what code is being
generated.
- * fribidi_char_sets.h, fribidi_char_sets.c : Added Omer Zaks more
+ * fribidi_char_sets.h, fribidi_char_sets.c: Added Omer Zaks more
general ut8 to unicode and unicode to ut8 functions.
* fribidi.c: [OZ] Replaced int to gint to better support embedded
libraries.
- * Applied lots of code contributed by Omer Zak <omerz@actcom.co.il> ,
+ * Applied lots of code contributed by Omer Zak <omerz@actcom.co.il>:
detailed above.
diff --git a/Makefile.am b/Makefile.am
index c22a3bf..e829fa4 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -22,7 +22,7 @@ libfribidiinc_HEADERS = \
fribidi_types.h
EXTRA_DIST = \
- CreateGetType.pl \
+ unidata/CreateGetType.pl \
tests/test-hebrew \
tests/test-capital-rtl \
run.tests \
diff --git a/Makefile.in b/Makefile.in
index f294984..bff0b52 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -89,7 +89,7 @@ libfribidiincdir = $(includedir)/fribidi
libfribidiinc_HEADERS = fribidi.h fribidi_char_sets.h fribidi_types.h
-EXTRA_DIST = CreateGetType.pl tests/test-hebrew tests/test-capital-rtl run.tests test.reference ANNOUNCE acinclude.m4
+EXTRA_DIST = unidata/CreateGetType.pl tests/test-hebrew tests/test-capital-rtl run.tests test.reference ANNOUNCE acinclude.m4
test_fribidi_SOURCES = test_fribidi.c
@@ -143,7 +143,7 @@ HEADERS = $(libfribidiinc_HEADERS)
DIST_COMMON = README ./stamp-h.in AUTHORS COPYING ChangeLog INSTALL \
Makefile.am Makefile.in NEWS THANKS TODO acconfig.h acinclude.m4 \
aclocal.m4 config.guess config.h.in config.sub configure configure.in \
-fribidi-config.in install-sh ltconfig ltmain.sh missing mkinstalldirs
+fribidi-config.in install-sh ltconfig ltmain.sh mkinstalldirs
DISTFILES = $(DIST_COMMON) $(SOURCES) $(HEADERS) $(TEXINFOS) $(EXTRA_DIST)
@@ -414,7 +414,7 @@ distdir: $(DISTFILES)
-rm -rf $(distdir)
mkdir $(distdir)
-chmod 777 $(distdir)
- $(mkinstalldirs) $(distdir)/tests
+ $(mkinstalldirs) $(distdir)/tests $(distdir)/unidata
@for file in $(DISTFILES); do \
d=$(srcdir); \
if test -d $$d/$$file; then \
diff --git a/README b/README
index b20a381..e015315 100644
--- a/README
+++ b/README
@@ -3,22 +3,22 @@ This is is FriBidi, a Free Implementation of the Unicode BiDi algorithm.
Background
==========
One of the missing links stopping the penetration of free software in
-Israel is the lack of support for Hebrew. In order to have proper
-Hebrew support, the BiDi algorithm must be implemented. It is my hope
-that this library will stimulate more Hebrew free software.
-
-Of course the BiDi algorithm is not limited to Hebrew, so I expect
-that our Arab neighbors will also find this software useful.
-
+Middle East is the lack of support for Hebrew and Arabic. In order to have
+proper Arabic and Hebrew support, the BiDi algorithm must be implemented. It
+is our hope that this library will stimulate more free software in Middle
+Eastern countries.
Audience
========
-It is my hope that this library will stimulate the implementation of
+It is our hope that this library will stimulate the implementation of
Hebrew and Arabic in lots of free software. Here is a small list of
projects that would benifit from the use of the FriBidi library, but
-of course there are many more: Wine, Mozilla, Gtk, Gnome, Qt, KDE,
-AbiWord, lynx, StarOffice.
+of course there are many more: Wine, Mozilla, Gnome, Qt, KDE, AbiWord,
+lynx, OpenOffice.
+
+It may be interesting for you to know Fribidi is already being used in
+projects like GTK+ and Xterm.
Downloading
===========
@@ -33,18 +33,20 @@ See INSTALL for a description of how to build and install this library.
Implementation
==============
The library implements most of the algorithm as described in the
-"Unicode Bidirectional Algorithm, Working Draft Unicode Technical Report
-#9, http://www.unicode.org/unicode/reports/tr9/". The major feature
-that is currently missing in fribidi is the support for explicit overrides.
+"Unicode Standard Annex #9, The Bidirectional Algorithm,
+http://www.unicode.org/unicode/reports/tr9/". There are still some bugs in
+resolving Arabic weak types, but Fribidi is being tested against Unicode
+Reference Code, and due to our belief, is one of the most-conforming
+implementation of the algorithm.
-In the API I was was inspired by the document "Bi-Di languages support
+In the API we were inspired by the document "Bi-Di languages support
- BiDi API propasal", http://www.langbox.com/AraMosaic/mozilla/BiDi_API.html,
by Franck Portaneri <franck@langbox.com> which he wrote as a proposal
for adding BiDi support to Mozilla.
Internally the library uses Unicode entirely. The character property
function was automatically created from the Unicode property list
-document PropList.txt available from the Unicode ftp site. This
+document PropList.txt available from the Unicode FTP site. This
means that every Unicode character should be treated in strict
accordance with the Unicode specification. The same is true for the
mirroring of characters, which also works for all the characters
@@ -132,7 +134,7 @@ Bugs and comments
Report FriBidi bugs at:
- http://sourceforge.net/bugs/?group_id=2722
+ http://fribidi.sourceforge.net/bugs.php
And send your comments to:
diff --git a/THANKS b/THANKS
index da3f3a5..2d86b47 100644
--- a/THANKS
+++ b/THANKS
@@ -1,5 +1,5 @@
-To Franck Portaneri for the Mozilla BiDi languges support document
+We would like to thank:
-To Mark Davis at Unicode for writing the Unicode Technical report #9.
-
-To the linux-il list for valuable feedback.
+* Franck Portaneri for the Mozilla BiDi languges support document;
+* Mark Davis at Unicode for writing the Unicode Technical report #9;
+* The linux-il list for valuable feedback.
diff --git a/TODO b/TODO
index b4b7b5a..9331b46 100644
--- a/TODO
+++ b/TODO
@@ -1,10 +1,16 @@
+BiDi algorithms issues:
+
+* Handle the L3 rule of the BiDi algorithm
+
General issues:
* Clean up code
+* Add some more tests
+
* Better documentation
-* Write some utility programs.
+* Write some utility programs
* Write notes and a reference implementation for the following interactions
with fribidi:
@@ -33,12 +39,12 @@ General issues:
adding a single character.
* Add handling of precomposed and presentation unicode chars when converting
- to 8 bit encodings (breaking into several chars, or better match)
-
-* Persian 8bit encoding ISIRI-3342 has the notion of strong typed chars like
- spaces etc (all the range 0x00-0x3f,0x7f is strong typed left-to-right,
- and duplicated in 0x80-0xbf,0xff as a strong typed right-to-left version.
- several chars in the range 0xa0-0xbf have unicode counterparts; but
- others (and all in the range 0x80-0x9f,0xff) have not; so how should that
- be handled when converting to unicode to preserve at best the direction
- information?
+ to 8-bit encodings (breaking into several chars, or better match)
+
+* Persian 8-bit encoding ISIRI-3342 has the notion of strong typed chars like
+ spaces etc (all the characters in the range 0x00-0x7f are strongly typed
+ left-to-right, and sometimes have duplicates in 0x80-0xff as a strongly
+ typed right-to-left version. several chars in the range 0xa0-0xbf have
+ exact unicode counterparts; but others (and all in the range 0x80-0x9f,
+ and 0xff) have not; so how should that be handled when converting to
+ unicode to preserve at best the direction information?
diff --git a/conformance/dotest b/conformance/dotest
new file mode 100755
index 0000000..8599244
--- /dev/null
+++ b/conformance/dotest
@@ -0,0 +1,5 @@
+#!/bin/sh
+# Generate and test a single test data
+
+./test_gen_rand $1 > test.in
+cat test.in | ./test -clean $2 $3
diff --git a/conformance/findbad b/conformance/findbad
new file mode 100755
index 0000000..fda5af0
--- /dev/null
+++ b/conformance/findbad
@@ -0,0 +1,5 @@
+#!/bin/sh
+# Find a single nonconforming test
+
+while ! test "`./dotest $1`"; do echo -n .; done
+cat test.in | ./test -clean
diff --git a/conformance/findbads b/conformance/findbads
new file mode 100755
index 0000000..5cfe8a4
--- /dev/null
+++ b/conformance/findbads
@@ -0,0 +1,11 @@
+#!/bin/sh
+# Generate and test single tests, and archive the nonconforming ones.
+
+while true; do
+ ./findbad $@
+ echo -e "bad test found!!!\7"
+ if ! test -d badtests; then
+ mkdir badtests
+ fi
+ cp test.in badtests/$RANDOM.in
+done
diff --git a/conformance/reference/bidi.h b/conformance/reference/bidi.h
new file mode 100644
index 0000000..2a22884
--- /dev/null
+++ b/conformance/reference/bidi.h
@@ -0,0 +1,71 @@
+// File: Bidi.h
+//
+// For use with Bidi Reference Implementation
+// For rights, disclaimers and description see associated BIDI.CPP file
+//
+// Copyright (C) 1999, ASMUS, Inc. All Rights Reserved
+
+#if WINDOWS_UI
+ // disable some warnings generated by windows.h
+ #pragma warning (disable : 4514 4201)
+#endif
+
+#if JAVA_INTERFACE
+ #include "biditest.h" // not part of the reference implementation
+#else
+ #if WINDOWS_UI > 1
+ #include "main.h" // private header, includes Windows,h
+ #include "window.h" // center
+ #if DEBUG_ENABLED
+ #define DEBUGGING 1 // conditionally enable debug support
+ #endif // for private build
+ #elif WINDOWS_UI
+ #define ASSERT(x) // suppress ASSERTs for standalone
+ #include "windows.h" // standard include file for windows
+ #include "windowsx.h" // extended include file for windows
+ #else
+ // commandline version, define printf based ASSERT
+ #include <stdio.h>
+ #define ASSERT(x) if (!(x)) fprintf(stdout, "assert failed: %s\n", #x); else ;
+ #endif
+#endif
+
+// provides constants for UI elements declared in BIDI.RC
+#ifndef IDC_INPUT
+#define IDC_INPUT 2189
+#define IDC_LEVEL 2190
+#define IDC_EX_LEVEL 2190
+#define IDC_DISPL 2191
+#define IDC_TYPES 2192
+#define IDC_N_TYPES 2193
+#define IDC_IM_LEVEL 2194
+#define IDC_W_TYPES 2195
+#define IDC_RLE 2196
+#define IDC_LRE 2197
+#define IDC_RLO 2198
+#define IDC_LRO 2199
+#define IDC_MIRROR 2200
+#define IDC_RLM 2201
+#define IDC_NBSP 2202
+#define IDC_X_TYPES 2202
+#define IDC_LRM 2204
+#define IDC_CLEAN 2205
+#define IDC_LS 2206
+
+#define IDC_LEGEND 2216
+#define IDC_BASELEVEL 2217
+#endif
+
+// duplicate some windows.h defines for commandline
+#ifndef TEXT
+#define TCHAR char
+#define LPTSTR char *
+#endif
+
+#if WINDOWS_UI
+BOOL CALLBACK BidiDlgProc(HWND hwndDlg, UINT message, WPARAM wParam, LPARAM lParam);
+#endif
+
+#ifdef _MAIN_H_
+BOOL CALLBACK BidiDlgProc(HWND hwndDlg, UINT message, WPARAM wParam, LPARAM lParam);
+#endif
diff --git a/conformance/reference/bidiref.cpp b/conformance/reference/bidiref.cpp
new file mode 100644
index 0000000..834a785
--- /dev/null
+++ b/conformance/reference/bidiref.cpp
@@ -0,0 +1,1557 @@
+#define DEBUGGING
+// Bidi.cpp - version 24
+
+// Reference implementation for Unicode Bidirectional Algorithm
+
+// Bidi include file
+#include "bidi.h"
+
+
+/*------------------------------------------------------------------------
+ File: Bidi.Cpp
+
+ Description
+ -----------
+
+ Sample Implementation of the Unicode Bidirectional Algorithm as it
+ was revised by Revision 5 of the Uniode Technical Report # 9
+ (1999-8-17)
+
+ This implementation is organized into several passes, each implemen-
+ ting one or more of the rules of the Unicode Bidi Algorithm. The
+ resolution of Weak Types and of Neutrals each use a state table
+ approach.
+
+ Both a printf based interface and a Windows DlgProc are provided for
+ interactive testing.
+
+ The file biditest.cpp contains hooks to link to a stress harness
+ comparing this implementation to a Java based implementation. This
+ harness was used to verify that the two implementations produce
+ identical results.
+
+ Build Notes
+ -----------
+
+ To compile the sample implementation please set the #define
+ directives above so the correct headers get included. Not all the
+ files are needed for all purposes; some, like biditest.cpp are
+ needed solely to recreate the environment used for verification
+ so that any bug reports can be verified quickly.
+
+ The Win32 version is provided as a dialog procedure. To create
+ a full executable using VC++:
+ - add the file to the project for a suitable Win32 program,
+ e.g. the GENERIC sample program from the SDK
+ - add the instructions in bidi.rc to the reseource file
+ - add a command to launch the bidi dialog from your program
+
+ This code uses an extension to C++ that gives variables declared in
+ a for() statement function the same scope as the for() statement.
+ If your compiler does not support this extension, you may need to
+ move the declaration, e.g. int ich = 0; in front of the for statement.
+
+ Implementation Note
+ -------------------
+
+ NOTE: The Unicode Birdirectional Algorithm removes all explicit
+ formatting codes in rule X9, but states that this can be
+ simulated by conformant implementations. This implementation
+ attempts to demonstrate such a simulation
+
+ To demonstrate this, the current implementation does the
+ following:
+
+ in resolveExplicit()
+ - change LRE, LRO, RLE, RLO, PDF to BN
+ - assign nested levels to BN
+
+ in resolveWeak and resolveNeutrals
+ - assign L and R to BN's where they exist in place of
+ sor and eor by changing the last BN in front of a
+ level change to a strong type
+ - skip over BN's for the purpose of determining actions
+ - include BN in the count of deferred runs
+ which will resolve some of them to EN, AN and N
+
+ in resolveWhiteSpace
+ - set the level of any surviving BN to the base level,
+ or the level of the preceding character
+ - include LRE,LRO, RLE, RLO, PDF and BN in the count
+ whitespace to be reset
+
+ This will result in the same order for non-BN characters as
+ if the BN characters had been removed.
+
+ The clean() function can be used to remove boundary marks for
+ verification purposes.
+
+ Notation
+ --------
+ Pointer variables generally start with the letter p
+ Counter variables generally start with the letter c
+ Index variables generally start with the letter i
+ Boolean variables generally start with the letter f
+
+ The enumerated bidirectional types have the same name as in the
+ description for the Unicode Bidirectional Algorithm
+
+ Update History:
+ --------------
+ - clean version for publication
+
+ - Last Revised 11-4-99
+
+ Disclaimer and legal rights
+ ---------------------------
+
+ This file contains bugs. All representations to the contrary are
+ void.
+
+ Source code in this file and the accompanying headers and included files
+ may be distributed free of charge by anyone, as long as full credit is
+ given and any and all liabilities are assumed by the recipient.
+
+ Written by: Asmus Freytag
+ Command line interface by: Rick McGowan
+ Verification and Test Harness: Doug Felt
+
+ Copyright (C) 1999, ASMUS, Inc. All Rights Reserved
+------------------------------------------------------------------------*/
+
+
+// === HELPER FUNCTIONS AND DECLARATIONS =================================
+
+#define odd(x) ((x) & 1)
+
+/*------------------------------------------------------------------------
+ Bidirectional Character Types
+
+ as defined by the Unicode Bidirectional Algorithm Table 3-7.
+
+ Note:
+
+ The list of bidirectional character types here is not grouped the
+ same way as the table 3-7, since the numberic values for the types
+ are chosen to keep the state and action tables compact.
+------------------------------------------------------------------------*/
+enum
+{
+ // input types
+ // ON MUST be zero, code relies on ON = N = 0
+ ON = 0, // Other Neutral
+ L, // Left Letter
+ R, // Right Letter
+ AN, // Arabic Number
+ EN, // European Number
+ AL, // Arabic Letter (Right-to-left)
+ NSM, // Non-spacing Mark
+ CS, // Common Separator
+ ES, // European Separator
+ ET, // European Terminator (post/prefix e.g. $ and %)
+
+ // resolved types
+ BN, // Boundary neutral (type of RLE etc after explicit levels)
+
+ // input types,
+ S, // Segment Separator (TAB) // used only in L1
+ WS, // White space // used only in L1
+ B, // Paragraph Separator (aka as PS)
+
+ // types for explicit controls
+ RLO, // these are used only in X1-X9
+ RLE,
+ LRO,
+ LRE,
+ PDF,
+
+ // resolved types, also resolved directions
+ N = ON, // alias, where ON, WS and S are treated the same
+};
+
+/*----------------------------------------------------------------------
+ The following array maps character codes to types for the purpose of
+ this sample implementation. The legend string gives a human readable
+ explanation of the pseudo alphabet.
+
+ For simplicity, characters entered by buttons are given a 1:1 mapping
+ between their type and pseudo character value. Pseudo characters that
+ can be typed from the keyboard are explained in the legend string.
+
+ Use the Unicode Character Database for the real values in real use.
+ ---------------------------------------------------------------------*/
+
+const int LRM = 4;
+const int RLM = 5;
+
+const int LS = 0x13;
+
+int TypesFromChar[] =
+{
+//0 1 2 3 4 5 6 7 8 9 a b c d e f
+ ON, ON, ON, ON, L, R, ON, ON, ON, ON, ON, ON, ON, B, RLO,RLE, /*00-0f*/
+ LRO,LRE,PDF,WS, ON, ON, ON, ON, ON, ON, ON, ON, ON, ON, ON, ON, /*10-1f*/
+
+ WS, ON, ON, ON, ET, ON, ON, ON, ON, ON, ON, ET, CS, ON, ES, ES, /*20-2f*/
+ EN, EN, EN, EN, EN, EN, AN, AN, AN, AN, CS, ON, ON, ON, ON, ON, /*30-3f*/
+ R, AL, AL, AL, AL, AL, AL, R, R, R, R, R, R, R, R, R, /*40-4f*/
+ R, R, R, R, R, R, R, R, R, R, R, ON, B, ON, ON, ON, /*50-5f*/
+ NSM, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, /*60-6f*/
+ L, L, L, L, L, L, L, L, L, L, L, ON, S, ON, WS, ON, /*70-7f*/
+};
+
+// WS, LS and S are not explicitly needed except for L1. Therefore this
+// Table conflates ON, S, WS, and LS to N, all others unchanged
+int NTypes[] = {
+ N, // ON,
+ L, // L,
+ R, // R,
+ AN, // AN,
+ EN, // EN,
+ AL, // AL
+ NSM, // NSM
+ CS, // CS
+ ES, // ES
+ ET, // ET
+ BN, // BN
+ N, // S
+ N, // WS
+ B, // B
+ RLO, // RLO
+ RLE, // RLE
+ LRO, // LRO
+ LRE, // LRE
+ PDF, // PDF
+ ON, // LS
+};
+
+int ClassFromChN(TCHAR ch)
+{
+ ASSERT(ch < 0x7f && ch >= 0);
+ return NTypes[TypesFromChar[ch]];
+}
+
+int ClassFromChWS(TCHAR ch)
+{
+ ASSERT(ch < 0x7f && ch >= 0);
+ return TypesFromChar[ch];
+}
+
+// === DISPLAY SUPPORT =================================================
+
+ enum // Display character codes
+ {
+ RIGHT = '<', // rtl arrow
+ LEFT = '>', // ltr arrow
+ PUSH = '+', // dn arrow
+ POP = '-', // up arrow
+ LSEP = '=', // double dagger
+ NEUTRAL = ' ', // rtl/ltr dbl headed arrow
+ ALPHA = 'a',
+ };
+
+// display support:
+TCHAR CharFromTypes[] =
+{
+ NEUTRAL, // ON,
+ LEFT, // L,
+ RIGHT, // R,
+ '9', // AN,
+ '1', // EN,
+ ALPHA, // AL
+ '@', // NSM
+ '.', // CS
+ ',', // ES
+ '$', // ET
+ ':', // BN
+ 'X', // S
+ '_', // WS
+ 'B', // B
+ PUSH, // RLO
+ PUSH, // RLE
+ PUSH, // LRO
+ PUSH, // LRE
+ POP, // PDF
+ LSEP, // LS
+
+};
+
+
+// This works only for testing
+// a full implementation would need 61 levels....
+TCHAR CharFromLevel[] =
+{
+ '0', '1', '2', '3', '4',
+ '5', '6', '7', '8', '9',
+ 'A', 'B', 'C', 'D', 'E',
+ 'F', 'X', 'Y', 'Z', 'o',
+ 'o', 'o', 'o', 'o', 'o' // overhang levels
+};
+
+// === HELPER FUNCTIONS ================================================
+
+// reverse cch characters
+void reverse(LPTSTR psz, int cch)
+{
+ TCHAR chTemp;
+
+ int ich; for (ich = 0; ich < --cch; ich++)
+ {
+ chTemp = psz[ich];
+ psz[ich] = psz[cch];
+ psz[cch] = chTemp;
+ }
+}
+
+// Set a run of cval values at locations all prior to, but not including
+// iStart, to the new value nval.
+void SetDeferredRun(int *pval, int cval, int iStart, int nval)
+{
+ int i; for (i = iStart - 1; i >= iStart - cval; i--)
+ {
+ pval[i] = nval;
+ }
+}
+
+// === ASSIGNING BIDI CLASSES ============================================
+
+/*------------------------------------------------------------------------
+ Function: classify
+
+ Determines the character classes for all following
+ passes of the algorithm
+
+ Input: Text string
+ Character count
+ Whether to report types as WS, ON, S or as N (false)
+
+ Output: Array of directional classes
+------------------------------------------------------------------------*/
+int classify(const LPTSTR pszText, int * pcls, int cch, bool fWS = false)
+{
+ if (fWS)
+ {
+ int ich; for (ich = 0; ich < cch; ich++)
+ {
+ pcls[ich] = ClassFromChWS(pszText[ich]);
+ }
+ return ich;
+ }
+ else
+ {
+ int ich; for (ich = 0; ich < cch; ich++)
+ {
+ pcls[ich] = ClassFromChN(pszText[ich]);
+ }
+ return ich;
+ }
+}
+
+// === THE PARAGRAPH LEVEL ===============================================
+
+/*------------------------------------------------------------------------
+ Function: resolveParagraphs
+
+ Resolves the input strings into blocks over which the algorithm
+ is then applied.
+
+ Implements Rule P1 of the Unicode Bidi Algorithm
+
+ Input: Text string
+ Character count
+
+ Output: revised character count
+
+ Note: This is a very simplistic function. In effect it restricts
+ the action of the algorithm to the first paragraph in the input
+ where a paragraph ends at the end of the first block separator
+ or at the end of the input text.
+
+------------------------------------------------------------------------*/
+
+int resolveParagraphs(int * types, int cch)
+{
+ // skip characters not of type B
+ int ich;
+for(ich = 0; ich < cch && types[ich] != B; ich++)
+ ;
+ // stop after first B, make it a BN for use in the next steps
+ if (ich < cch && types[ich] == B)
+ types[ich++] = BN;
+ return ich;
+}
+
+/*------------------------------------------------------------------------
+ Function: baseLevel
+
+ Determines the base level
+
+ Implements rule P2 of the Unicode Bidi Algorithm.
+
+ Input: Array of directional classes
+ Character count
+
+ Note: Ignores explicit embeddings
+------------------------------------------------------------------------*/
+int baseLevel(const int * pcls, int cch)
+{
+ int ich; for (ich = 0; ich < cch; ich++)
+ {
+ switch (pcls[ich])
+ {
+ // strong left
+ case L:
+ return 0;
+ break;
+
+ // strong right
+ case R:
+ case AL:
+ return 1;
+ break;
+ }
+ }
+ return 0;
+}
+
+//====== RESOLVE EXPLICIT ================================================
+
+int GreaterEven(int i)
+{
+ return odd(i) ? i + 1 : i + 2;
+}
+
+int GreaterOdd(int i)
+{
+ return odd(i) ? i + 2 : i + 1;
+}
+
+int EmbeddingDirection(int level)
+{
+ return odd(level) ? R : L;
+}
+
+
+/*------------------------------------------------------------------------
+ Function: resolveExplicit
+
+ Recursively resolves explicit embedding levels and overrides.
+ Implements rules X1-X9, of the Unicode Bidirectional Algorithm.
+
+ Input: Base embedding level and direction
+ Character count
+
+ Output: Array of embedding levels
+
+ In/Out: Array of direction classes
+
+
+ Note: The function uses two simple counters to keep track of
+ matching explicit codes and PDF. Use the default argument for
+ the outermost call. The nesting counter counts the recursion
+ depth and not the embedding level.
+------------------------------------------------------------------------*/
+#ifdef DEBUGGING
+const int MAX_LEVEL = 15; // leave at the smaller level so we can test hitting
+ // the limit more easily in interactive mode
+#else
+const int MAX_LEVEL = 61; // the real value
+#endif
+
+int resolveExplicit(int level, int dir, int * pcls, int * plevel, int cch,
+ int nNest = 0)
+{
+ // always called with a valid nesting level
+ // nesting levels are != embedding levels
+ int nLastValid = nNest;
+
+ // check input values
+ ASSERT(nNest >= 0 && level >= 0 && level <= MAX_LEVEL);
+
+ // process the text
+ int ich; for (ich = 0; ich < cch; ich++)
+ {
+ int cls = pcls[ich];
+ switch (cls)
+ {
+ case LRO:
+ case LRE:
+ nNest++;
+ if (GreaterEven(level) <= MAX_LEVEL)
+ {
+ plevel[ich] = GreaterEven(level);
+ pcls[ich] = BN;
+ ich += resolveExplicit(plevel[ich], (cls == LRE ? N : L),
+ &pcls[ich+1], &plevel[ich+1],
+ cch - (ich+1), nNest);
+ nNest--;
+ continue;
+ }
+ cls = pcls[ich] = BN;
+ break;
+
+ case RLO:
+ case RLE:
+ nNest++;
+ if (GreaterOdd(level) <= MAX_LEVEL)
+ {
+ plevel[ich] = GreaterOdd(level);
+ pcls[ich] = BN;
+ ich += resolveExplicit(plevel[ich], (cls == RLE ? N : R),
+ &pcls[ich+1], &plevel[ich+1],
+ cch - (ich+1), nNest);
+ nNest--;
+ continue;
+ }
+ cls = pcls[ich] = BN;
+ break;
+
+ case PDF:
+ cls = pcls[ich] = BN;
+ if (nNest)
+ {
+ if (nLastValid < nNest)
+ {
+ nNest--;
+ }
+ else
+ {
+ cch = ich; // break the loop, but complete body
+ }
+ }
+ }
+
+ // Apply the override
+ if (dir != N)
+ {
+ cls = dir;
+ }
+ plevel[ich] = level;
+ if (pcls[ich] != BN)
+ pcls[ich] = cls;
+ }
+
+ return ich;
+}
+
+// === RESOLVE WEAK TYPES ================================================
+
+enum // possible states
+{
+ xa, // arabic letter
+ xr, // right leter
+ xl, // left letter
+
+ ao, // arabic lett. foll by ON
+ ro, // right lett. foll by ON
+ lo, // left lett. foll by ON
+
+ rt, // ET following R
+ lt, // ET following L
+
+ cn, // EN, AN following AL
+ ra, // arabic number foll R
+ re, // european number foll R
+ la, // arabic number foll L
+ le, // european number foll L
+
+ ac, // CS following cn
+ rc, // CS following ra
+ rs, // CS,ES following re
+ lc, // CS following la
+ ls, // CS,ES following le
+
+ ret, // ET following re
+ let, // ET following le
+} ;
+
+int stateWeak[][10] =
+{
+ // N, L, R AN, EN, AL,NSM, CS, ES, ET,
+/*xa*/ ao, xl, xr, cn, cn, xa, xa, ao, ao, ao, /* arabic letter */
+/*xr*/ ro, xl, xr, ra, re, xa, xr, ro, ro, rt, /* right leter */
+/*xl*/ lo, xl, xr, la, le, xa, xl, lo, lo, lt, /* left letter */
+
+/*ao*/ ao, xl, xr, cn, cn, xa, ao, ao, ao, ao, /* arabic lett. foll by ON*/
+/*ro*/ ro, xl, xr, ra, re, xa, ro, ro, ro, rt, /* right lett. foll by ON */
+/*lo*/ lo, xl, xr, la, le, xa, lo, lo, lo, lt, /* left lett. foll by ON */
+
+/*rt*/ ro, xl, xr, ra, re, xa, rt, ro, ro, rt, /* ET following R */
+/*lt*/ lo, xl, xr, la, le, xa, lt, lo, lo, lt, /* ET following L */
+
+/*cn*/ ao, xl, xr, cn, cn, xa, cn, ac, ao, ao, /* EN, AN following AL */
+/*ra*/ ro, xl, xr, ra, re, xa, ra, rc, ro, rt, /* arabic number foll R */
+/*re*/ ro, xl, xr, ra, re, xa, re, rs, rs,ret, /* european number foll R */
+/*la*/ lo, xl, xr, la, le, xa, la, lc, lo, lt, /* arabic number foll L */
+/*le*/ lo, xl, xr, la, le, xa, le, ls, ls,let, /* european number foll L */
+
+/*ac*/ ao, xl, xr, cn, cn, xa, ao, ao, ao, ao, /* CS following cn */
+/*rc*/ ro, xl, xr, ra, re, xa, ro, ro, ro, rt, /* CS following ra */
+/*rs*/ ro, xl, xr, ra, re, xa, ro, ro, ro, rt, /* CS,ES following re */
+/*lc*/ lo, xl, xr, la, le, xa, lo, lo, lo, lt, /* CS following la */
+/*ls*/ lo, xl, xr, la, le, xa, lo, lo, lo, lt, /* CS,ES following le */
+
+/*ret*/ ro, xl, xr, ra, re, xa,ret, ro, ro,ret, /* ET following re */
+/*let*/ lo, xl, xr, la, le, xa,let, lo, lo,let, /* ET following le */
+
+
+};
+
+enum // possible actions
+{
+ // primitives
+ IX = 0x100, // increment
+ XX = 0xF, // no-op
+
+ // actions
+ xxx = (XX << 4) + XX, // no-op
+ xIx = IX + xxx, // increment run
+ xxN = (XX << 4) + ON, // set current to N
+ xxE = (XX << 4) + EN, // set current to EN
+ xxA = (XX << 4) + AN, // set current to AN
+ xxR = (XX << 4) + R, // set current to R
+ xxL = (XX << 4) + L, // set current to L
+ Nxx = (ON << 4) + 0xF, // set run to neutral
+ Axx = (AN << 4) + 0xF, // set run to AN
+ ExE = (EN << 4) + EN, // set run to EN, set current to EN
+ NIx = (ON << 4) + 0xF + IX, // set run to N, increment
+ NxN = (ON << 4) + ON, // set run to N, set current to N
+ NxR = (ON << 4) + R, // set run to N, set current to R
+ NxE = (ON << 4) + EN, // set run to N, set current to EN
+
+ AxA = (AN << 4) + AN, // set run to AN, set current to AN
+ NxL = (ON << 4) + L, // set run to N, set current to L
+ LxL = (L << 4) + L, // set run to L, set current to L
+} ;
+
+
+int actionWeak[][10] =
+{
+ // N, L, R AN, EN, AL, NSM, CS, ES, ET,
+/*xa*/ xxx, xxx, xxx, xxx, xxA, xxR, xxR, xxN, xxN, xxN, /* arabic letter */
+/*xr*/ xxx, xxx, xxx, xxx, xxE, xxR, xxR, xxN, xxN, xIx, /* right leter */
+/*xl*/ xxx, xxx, xxx, xxx, xxL, xxR, xxL, xxN, xxN, xIx, /* left letter */
+
+/*ao*/ xxx, xxx, xxx, xxx, xxA, xxR, xxN, xxN, xxN, xxN, /* arabic lett. foll by ON */
+/*ro*/ xxx, xxx, xxx, xxx, xxE, xxR, xxN, xxN, xxN, xIx, /* right lett. foll by ON */
+/*lo*/ xxx, xxx, xxx, xxx, xxL, xxR, xxN, xxN, xxN, xIx, /* left lett. foll by ON */
+
+/*rt*/ Nxx, Nxx, Nxx, Nxx, ExE, NxR, xIx, NxN, NxN, xIx, /* ET following R */
+/*lt*/ Nxx, Nxx, Nxx, Nxx, LxL, NxR, xIx, NxN, NxN, xIx, /* ET following L */
+
+/*cn*/ xxx, xxx, xxx, xxx, xxA, xxR, xxA, xIx, xxN, xxN, /* EN, AN following AL */
+/*ra*/ xxx, xxx, xxx, xxx, xxE, xxR, xxA, xIx, xxN, xIx, /* arabic number foll R */
+/*re*/ xxx, xxx, xxx, xxx, xxE, xxR, xxE, xIx, xIx, xxE, /* european number foll R */
+/*la*/ xxx, xxx, xxx, xxx, xxL, xxR, xxA, xIx, xxN, xIx, /* arabic number foll L */
+/*le*/ xxx, xxx, xxx, xxx, xxL, xxR, xxL, xIx, xIx, xxL, /* european number foll L */
+
+/*ac*/ Nxx, Nxx, Nxx, Axx, AxA, NxR, NxN, NxN, NxN, NxN, /* CS following cn */
+/*rc*/ Nxx, Nxx, Nxx, Axx, NxE, NxR, NxN, NxN, NxN, NIx, /* CS following ra */
+/*rs*/ Nxx, Nxx, Nxx, Nxx, ExE, NxR, NxN, NxN, NxN, NIx, /* CS,ES following re */
+/*lc*/ Nxx, Nxx, Nxx, Axx, NxL, NxR, NxN, NxN, NxN, NIx, /* CS following la */
+/*ls*/ Nxx, Nxx, Nxx, Nxx, LxL, NxR, NxN, NxN, NxN, NIx, /* CS,ES following le */
+
+/*ret*/xxx, xxx, xxx, xxx, xxE, xxR, xxE, xxN, xxN, xxE, /* ET following re */
+/*let*/xxx, xxx, xxx, xxx, xxL, xxR, xxL, xxN, xxN, xxL, /* ET following le */
+};
+
+int GetDeferredType(int action)
+{
+ return (action >> 4) & 0xF;
+}
+
+int GetResolvedType(int action)
+{
+ return action & 0xF;
+}
+
+/* Note on action table:
+
+ States can be of two kinds:
+ - Immediate Resolution State, where each input token
+ is resolved as soon as it is seen. These states havve
+ only single action codes (xxN) or the no-op (xxx)
+ for static input tokens.
+ - Deferred Resolution State, where input tokens either
+ either extend the run (xIx) or resolve its Type (e.g. Nxx).
+
+ Input classes are of three kinds
+ - Static Input Token, where the class of the token remains
+ unchanged on output (AN, L, N, R)
+ - Replaced Input Token, where the class of the token is
+ always replaced on output (AL, BN, NSM, CS, ES, ET)
+ - Conditional Input Token, where the class of the token is
+ changed on output in some, but not all, cases (EN)
+
+ Where tokens are subject to change, a double action
+ (e.g. NxA, or NxN) is _required_ after deferred states,
+ resolving both the deferred state and changing the current token.
+
+ These properties of the table are verified by assertions below.
+ This code is needed only during debugging and maintenance
+*/
+#ifdef DEBUGGING
+
+bool IsDeferredState(int state)
+{
+ switch(state)
+ {
+ case rt: // this needs to be a deferred
+ case lt:
+ case ac:
+ case rc:
+ case rs:
+ case lc:
+ case ls:
+ return true;
+ }
+ return false;
+}
+
+bool IsModifiedClass(int cls)
+{
+ switch(cls)
+ {
+ case AL:
+ case NSM:
+ case ES:
+ case CS:
+ case ET:
+ case EN: // sometimes 'modified' to EN
+ return true;
+ }
+ return false;
+}
+
+const int state_first = xa;
+const int state_last = let;
+
+const int cls_first = N;
+const int cls_last = ET;
+
+
+// Verify these properties of the tables
+int VerifyTables()
+{
+ int done = 1;
+
+ int cls; for (cls = cls_first; cls <= cls_last; cls++)
+ {
+ int state; for (state = state_first; state <= state_last; state++)
+ {
+ int action= actionWeak[state][cls];
+ int nextstate = stateWeak[state][cls];
+
+ if (IX & action)
+ {
+ // make sure when we defer we get to a
+ // deferred state
+ ASSERT(IsDeferredState(nextstate));
+
+ // Make sure permanent classes are not deferred
+ ASSERT(IsModifiedClass(cls));
+ }
+ else
+ {
+ // make sure we are not deferring without
+ // incrementing a run
+ ASSERT(!IsDeferredState(nextstate));
+
+ // make sure modified classes are modified
+ if (IsModifiedClass(cls))
+ {
+ ASSERT(GetResolvedType(action) != XX);
+ }
+ else
+ {
+ ASSERT(GetResolvedType(action) == XX);
+ }
+ }
+
+ // if we are deferring, make sure things are resolved
+ if (IsDeferredState(state))
+ {
+ // Deferred states must increment or have deferred type
+ ASSERT(action == xIx || GetDeferredType(action) != XX);
+ }
+ else
+ {
+ ASSERT(GetDeferredType(action) == XX);
+ }
+ }
+ };
+ return done;
+}
+#endif
+
+/*------------------------------------------------------------------------
+ Function: resolveWeak
+
+ Resolves the directionality of numeric and other weak character types
+
+ Implements rules W1-W7 of the Unicode Bidirectional Algorithm.
+
+ Input: Array of embedding levels
+ Character count
+
+ In/Out: Array of directional classes
+
+ Note: On input only these directional classes are expected
+ AL, HL, R, L, ON, BN, NSM, AN, EN, ES, ET, CS,
+------------------------------------------------------------------------*/
+void resolveWeak(int baselevel, int *pcls, int *plevel, int cch)
+{
+ int state = odd(baselevel) ? xr : xl;
+ int cls;
+
+ int level = baselevel;
+
+ int cchRun = 0;
+
+ int ich; for (ich = 0; ich < cch; ich++)
+ {
+#ifdef DEBUGGING
+ if (pcls[ich] > BN) {
+ fprintf(stdout, "error: pcls[%d] > BN (%d)\n", ich, pcls[ich]);
+ }
+#endif
+
+ // ignore boundary neutrals
+ if (pcls[ich] == BN)
+ {
+ // must flatten levels unless at a level change;
+ plevel[ich] = level;
+
+ // lookahead for level changes
+ if (ich + 1 == cch && level != baselevel)
+ {
+ // have to fixup last BN before end of the loop, since
+ // its fix-upped value will be needed below the assert
+ pcls[ich] = EmbeddingDirection(level);
+ }
+ else if (ich + 1 < cch && level != plevel[ich+1] && pcls[ich+1] != BN)
+ {
+ // fixup LAST BN in front / after a level run to make
+ // it act like the SOR/EOR in rule X10
+ int newlevel = plevel[ich+1];
+ if (level > newlevel) {
+ newlevel = level;
+ }
+ plevel[ich] = newlevel;
+
+ // must match assigned level
+ pcls[ich] = EmbeddingDirection(newlevel);
+ level = plevel[ich+1];
+ }
+ else
+ {
+ // don't interrupt runs
+ if (cchRun)
+ {
+ cchRun++;
+ }
+ continue;
+ }
+ }
+
+ ASSERT(pcls[ich] <= BN);
+ cls = pcls[ich];
+
+ int action = actionWeak[state][cls];
+
+ // resolve the directionality for deferred runs
+ int clsRun = GetDeferredType(action);
+ if (clsRun != XX)
+ {
+ SetDeferredRun(pcls, cchRun, ich, clsRun);
+ cchRun = 0;
+ }
+
+ // resolve the directionality class at the current location
+ int clsNew = GetResolvedType(action);
+ if (clsNew != XX)
+ pcls[ich] = clsNew;
+
+ // increment a deferred run
+ if (IX & action)
+ cchRun++;
+
+ state = stateWeak[state][cls];
+ }
+
+ // resolve any deferred runs
+ // use the direction of the current level to emulate PDF
+ cls = EmbeddingDirection(level);
+
+ // resolve the directionality for deferred runs
+ int clsRun = GetDeferredType(actionWeak[state][cls]);
+ if (clsRun != XX)
+ SetDeferredRun(pcls, cchRun, ich, clsRun);
+}
+
+// === RESOLVE NEUTAL TYPES ==============================================
+
+// action values
+enum
+{
+ // action to resolve previous input
+ nL = L, // resolve EN to L
+ En = 3 << 4, // resolve neutrals run to embedding level direction
+ Rn = R << 4, // resolve neutrals run to strong right
+ Ln = L << 4, // resolved neutrals run to strong left
+ In = (1<<8), // increment count of deferred neutrals
+ LnL = (1<<4)+L, // set run and EN to L
+};
+
+int GetDeferredNeutrals(int action, int level)
+{
+ action = (action >> 4) & 0xF;
+ if (action == (En >> 4))
+ return EmbeddingDirection(level);
+ else
+ return action;
+}
+
+int GetResolvedNeutrals(int action)
+{
+ action = action & 0xF;
+ if (action == In)
+ return 0;
+ else
+ return action;
+}
+
+// state values
+enum
+{
+ // new temporary class
+ r, // R and characters resolved to R
+ l, // L and characters resolved to L
+ rn, // N preceded by right
+ ln, // N preceded by left
+ a, // AN preceded by left (the abbrev 'la' is used up above)
+ na, // N preceeded by a
+} ;
+
+
+/*------------------------------------------------------------------------
+ Notes:
+
+ By rule W7, whenever a EN is 'dominated' by an L (including start of
+ run with embedding direction = L) it is resolved to, and further treated
+ as L.
+
+ This leads to the need for 'a' and 'na' states.
+------------------------------------------------------------------------*/
+
+int actionNeutrals[][5] =
+{
+// N, L, R, AN, EN, = cls
+ // state =
+ In, 0, 0, 0, 0, // r right
+ In, 0, 0, 0, L, // l left
+
+ In, En, Rn, Rn, Rn, // rn N preceded by right
+ In, Ln, En, En, LnL,// ln N preceded by left
+
+ In, 0, 0, 0, L, // a AN preceded by left
+ In, En, Rn, Rn, En, // na N preceded by a
+} ;
+
+int stateNeutrals[][5] =
+{
+// N, L, R, AN, EN = cls
+ // state =
+ rn, l, r, r, r, // r right
+ ln, l, r, a, l, // l left
+
+ rn, l, r, r, r, // rn N preceded by right
+ ln, l, r, a, l, // ln N preceded by left
+
+ na, l, r, a, l, // a AN preceded by left
+ na, l, r, a, l, // na N preceded by la
+} ;
+
+/*------------------------------------------------------------------------
+ Function: resolveNeutrals
+
+ Resolves the directionality of neutral character types.
+
+ Implements rules W7, N1 and N2 of the Unicode Bidi Algorithm.
+
+ Input: Array of embedding levels
+ Character count
+ Baselevel
+
+ In/Out: Array of directional classes
+
+ Note: On input only these directional classes are expected
+ R, L, N, AN, EN and BN
+
+ W8 resolves a number of ENs to L
+------------------------------------------------------------------------*/
+void resolveNeutrals(int baselevel, int *pcls, const int *plevel, int cch)
+{
+ // the state at the start of text depends on the base level
+ int state = odd(baselevel) ? r : l;
+ int cls;
+
+ int cchRun = 0;
+ int level = baselevel;
+
+ int ich; for (ich = 0; ich < cch; ich++)
+ {
+ // ignore boundary neutrals
+ if (pcls[ich] == BN)
+ {
+ // include in the count for a deferred run
+ if (cchRun)
+ cchRun++;
+
+ // skip any further processing
+ continue;
+ }
+
+ ASSERT(pcls[ich] < 5); // "Only N, L, R, AN, EN are allowed"
+ cls = pcls[ich];
+
+ int action = actionNeutrals[state][cls];
+
+ // resolve the directionality for deferred runs
+ int clsRun = GetDeferredNeutrals(action, level);
+ if (clsRun != N)
+ {
+ SetDeferredRun(pcls, cchRun, ich, clsRun);
+ cchRun = 0;
+ }
+
+ // resolve the directionality class at the current location
+ int clsNew = GetResolvedNeutrals(action);
+ if (clsNew != N)
+ pcls[ich] = clsNew;
+
+ if (In & action)
+ cchRun++;
+
+ state = stateNeutrals[state][cls];
+ level = plevel[ich];
+ }
+
+ // resolve any deferred runs
+ cls = EmbeddingDirection(level); // eor has type of current level
+
+ // resolve the directionality for deferred runs
+ int clsRun = GetDeferredNeutrals(actionNeutrals[state][cls], level);
+ if (clsRun != N)
+ SetDeferredRun(pcls, cchRun, ich, clsRun);
+}
+
+// === RESOLVE IMPLLICIT =================================================
+
+/*------------------------------------------------------------------------
+ Function: resolveImplicit
+
+ Recursively resolves implicit embedding levels.
+ Implements rules I1 and I2 of the Unicode Bidirectional Algorithm.
+
+ Input: Array of direction classes
+ Character count
+ Base level
+
+ In/Out: Array of embedding levels
+
+ Note: levels may exceed 15 on output.
+ Accepted subset of direction classes
+ R, L, AN, EN
+------------------------------------------------------------------------*/
+int addLevel[][4] =
+{
+ // L, R, AN, EN = cls
+ // level =
+/* even */ 0, 1, 2, 2, // EVEN
+/* odd */ 1, 0, 1, 1, // ODD
+
+};
+
+void resolveImplicit(const int * pcls, int * plevel, int cch)
+{
+ int ich; for (ich = 0; ich < cch; ich++)
+ {
+
+ // cannot resolve bn here, since some bn were resolved to strong
+ // types in resolveWeak. To remove these we need the original
+ // types, which are available again in resolveWhiteSpace
+ if (pcls[ich] == BN)
+ {
+ continue;
+ }
+ ASSERT(pcls[ich] > 0); // "No Neutrals allowed to survive here."
+ ASSERT(pcls[ich] < 5); // "Out of range."
+ plevel[ich] += addLevel[odd(plevel[ich])][pcls[ich] - 1];
+ }
+}
+
+// === REORDER ===========================================================
+/*------------------------------------------------------------------------
+ Function: resolveLines
+
+ Breaks a paragraph into lines
+
+ Input: Character count
+ In/Out: Array of characters
+ Array of line break flags
+
+ Returns the count of characters on the first line
+
+ Note: This function only breaks lines at hard line breaks. Other
+ line breaks can be passed in. If pbrk[n] is true, then a break
+ occurs after the character in pszInput[n]. Breaks before the first
+ character are not allowed.
+------------------------------------------------------------------------*/
+int resolveLines(TCHAR * pszInput, bool * pbrk, int cch)
+{
+ // skip characters not of type LS
+ int ich;
+for(ich = 0; ich < cch; ich++)
+ {
+ if (pszInput[ich] == LS || (pbrk && pbrk[ich]))
+ {
+ ich++;
+ break;
+ }
+ }
+
+ return ich;
+}
+
+/*------------------------------------------------------------------------
+ Function: resolveWhiteSpace
+
+ Resolves levels for WS and S
+ Implements rule L1 of the Unicode bidi Algorithm.
+
+ Input: Base embedding level
+ Character count
+ Array of direction classes (for one line of text)
+
+ In/Out: Array of embedding levels (for one line of text)
+
+ Note: this should be applied a line at a time. The default driver
+ code supplied in this file assumes a single line of text; for
+ a real implementation, cch and the initial pointer values
+ would have to be adjusted.
+------------------------------------------------------------------------*/
+void resolveWhitespace(int baselevel, const int *pcls, int *plevel,
+ int cch)
+{
+ int clevel = 0;
+ int oldlevel = baselevel;
+
+ int ich; for (ich = 0; ich < cch; ich++)
+ {
+ switch(pcls[ich])
+ {
+ default:
+ clevel = 0; // any other character breaks the run
+ break;
+ case WS:
+ clevel++;
+ break;
+
+ case RLE:
+ case LRE:
+ case LRO:
+ case RLO:
+ case PDF:
+ case BN:
+ plevel[ich] = oldlevel;
+ clevel++;
+ break;
+
+ case S:
+ case B:
+ // reset levels for WS before eot
+ SetDeferredRun(plevel, clevel, ich, baselevel);
+ clevel = 0;
+ plevel[ich] = baselevel;
+ break;
+ }
+ oldlevel = plevel[ich];
+ }
+ // reset level before eot
+ SetDeferredRun(plevel, clevel, ich, baselevel);
+}
+
+
+/*------------------------------------------------------------------------
+ Functions: reorder/reorderLevel
+
+ Recursively reorders the display string
+ "From the highest level down, reverse all characters at that level and
+ higher, down to the lowest odd level"
+
+ Implements rule L2 of the Unicode bidi Algorithm.
+
+ Input: Array of embedding levels
+ Character count
+ Flag enabling reversal (set to false by initial caller)
+
+ In/Out: Text to reorder
+
+ Note: levels may exceed 15 resp. 61 on input.
+
+ Rule L3 - reorder combining marks is not implemented here
+ Rule L4 - glyph mirroring is implemented as a display option below
+
+ Note: this should be applied a line at a time
+-------------------------------------------------------------------------*/
+int reorderLevel(int level, LPTSTR pszText, const int * plevel, int cch,
+ bool fReverse = false)
+{
+ // true as soon as first odd level encountered
+ fReverse = fReverse || odd(level);
+
+ int ich; for (ich = 0; ich < cch; ich++)
+ {
+ if (plevel[ich] < level)
+ {
+ break;
+ }
+ else if (plevel[ich] > level)
+ {
+ ich += reorderLevel(level + 1, pszText + ich, plevel + ich,
+ cch - ich, fReverse) - 1;
+ }
+ }
+ if (fReverse)
+ {
+ reverse(pszText, ich);
+ }
+ return ich;
+}
+
+int reorder(int baselevel, LPTSTR pszText, const int * plevel, int cch)
+{
+ int ich = 0;
+
+ while (ich < cch)
+ {
+ ich += reorderLevel(baselevel, pszText + ich, plevel + ich,
+ cch - ich);
+ }
+ return ich;
+}
+
+// === DISPLAY OPTIONS ================================================
+/*-----------------------------------------------------------------------
+ Function: mirror
+
+ Crudely implements rule L4 of the Unicode Bidirectional Algorithm
+ Demonstrate mirrored brackets, braces and parens
+
+
+ Input: Array of levels
+ Count of characters
+
+ In/Out: Array of characters (should be array of glyph ids)
+
+ Note;
+ A full implementation would need to substitute mirrored glyphs even
+ for characters that are not paired (e.g. integral sign).
+-----------------------------------------------------------------------*/
+void mirror(LPTSTR pszInput, const int * plevel, int cch)
+{
+ int ich; for (ich = 0; ich < cch; ich ++)
+ {
+ if (!odd(plevel[ich]))
+ continue;
+
+ if (pszInput[ich] == '[')
+ {
+ pszInput[ich] = ']';
+ }
+ else if (pszInput[ich] == ']')
+ {
+ pszInput[ich] = '[';
+ }
+ else if (pszInput[ich] == '{')
+ {
+ pszInput[ich] = '}';
+ }
+ else if (pszInput[ich] == '}')
+ {
+ pszInput[ich] = '{';
+ }
+ else if (pszInput[ich] == ')')
+ {
+ pszInput[ich] = '(';
+ }
+ else if (pszInput[ich] == '(')
+ {
+ pszInput[ich] = ')';
+ }
+ }
+}
+
+/*-----------------------------------------------------------------------
+ Function: clean
+
+ remove formatting codes
+
+ In/Out: Array of characters
+ Count of characters
+
+ Note;
+
+ This function can be used to remove formatting codes so the
+ ordering of the string can be compared to implementations that
+ remove formatting codes. This implementation is limited to the
+ pseudo alphabet used for the demo version.
+
+-----------------------------------------------------------------------*/
+int clean(LPTSTR pszInput, int cch)
+{
+ int cchMove = 0;
+ int ich; for (ich = 0; ich < cch; ich ++)
+ {
+ if (pszInput[ich] < 0x20)
+ {
+ cchMove++;
+ }
+ else
+ {
+ pszInput[ich - cchMove] = pszInput[ich];
+ }
+ }
+ pszInput[ich - cchMove] = 0;
+
+ return ich - cchMove;
+}
+
+/*------------------------------------------------------------------------
+ Function: BidiLines
+
+ Implements the Line-by-Line phases of the Unicode Bidi Algorithm
+
+ Input: Count of characters
+ flag whether to mirror
+
+ Inp/Out: Input text
+ Array of character directions
+ Array of levels
+
+------------------------------------------------------------------------*/
+void BidiLines(int baselevel, TCHAR * pszLine, int * pclsLine,
+ int * plevelLine, int cchPara, int fMirror, bool * pbrk = 0)
+{
+ int cchLine = 0;
+
+ do
+ {
+ // break lines at LS
+ cchLine = resolveLines(pszLine, pbrk, cchPara);
+
+ // resolve whitespace
+ resolveWhitespace(baselevel, pclsLine, plevelLine, cchLine);
+
+
+ if (fMirror)
+ {
+ mirror(pszLine, plevelLine, cchLine);
+ }
+
+ // reorder each line in place
+ reorder(baselevel, pszLine, plevelLine, cchLine);
+
+ pszLine += cchLine;
+ plevelLine += cchLine;
+ pbrk += pbrk ? cchLine : 0;
+ pclsLine += cchLine;
+ cchPara -= cchLine;
+
+ } while (cchPara);
+}
+
+// ===== FUNCTIONS FOR COMMAND LINE VERSION ==============================
+
+#include <stdlib.h>
+#include <string.h>
+
+// An alternate CharFromTypes array may be needed to use the command
+// line version,
+
+#define MAX_CCH 65520
+void ShowInputTypes(FILE* f, LPTSTR pszInput, int cch)
+{
+ TCHAR pszTypes[MAX_CCH+1];
+ int ich; for (ich = 0; ich < cch; ich++)
+ {
+ pszTypes[ich] = CharFromTypes[ClassFromChWS(pszInput[ich])];
+ }
+ pszTypes[ich] = 0;
+
+ fprintf(f, pszTypes);
+}
+
+void ShowTypes(FILE* f, int * types, int cch)
+{
+ TCHAR pszTypes[MAX_CCH+1];
+ int ich; for (ich = 0; ich < cch; ich++)
+ {
+ pszTypes[ich] = CharFromTypes[types[ich]];
+ }
+ pszTypes[ich] = 0;
+
+ fprintf(f, pszTypes);
+}
+
+void ShowLevels(FILE* f, int * levels, int cch)
+{
+
+ #ifdef DEBUGGING
+ // do nothing if levels are not limited to 15 as for debugging
+ // since we can't show 0-61 in a single character
+
+ TCHAR pszLevel[MAX_CCH+1];
+ int ich; for (ich = 0; ich < cch; ich++)
+ {
+ pszLevel[ich] = CharFromLevel[levels[ich]];
+ }
+ pszLevel[ich] = 0;
+
+ fprintf(f, pszLevel);
+
+ #else
+ // squelch compiler warnings
+ f; levels; cch;
+ #endif
+}
+
+void usage(char *s)
+{
+ printf("Usage: %s [-verbose] [-nomirror] [-clean] strings...\n", s);
+ printf("\t-verbose = verbose debugging output.\n");
+ printf("\t-nomirror = refrain from glyph mirroring.\n");
+ printf("\t-clean = clean up the result.\n");
+ printf("\tOptions affect all subsequent arguments.\n");
+ printf("\tAll other arguments are interpreted as strings to process.\n");
+}
+
+int main(int argc, char** argv)
+{
+ int realArg = 0;
+ int doMirror = 1;
+ int doClean = 0;
+ int beVerbose = 0;
+
+ FILE* f = stderr;
+
+ int i; for (i = 1; i < argc; ++i)
+ {
+ if (strcmp(argv[i], "-verbose") == 0)
+ {
+ beVerbose = 1;
+ continue;
+ }
+ else if (strcmp(argv[i], "-nomirror") == 0)
+ {
+ doMirror = 0;
+ continue;
+ }
+ else if (strcmp(argv[i], "-clean") == 0)
+ {
+ doClean = 1;
+ continue;
+ }
+ else if (strcmp(argv[i], "-help") == 0)
+ {
+ usage(argv[0]);
+ exit(0);
+ }
+ }
+ TCHAR pszInput[MAX_CCH+1];
+ while (fgets(pszInput, MAX_CCH, stdin))
+ {
+ ++realArg;
+
+ pszInput[MAX_CCH] = 0;
+ int cch = strlen(pszInput);
+ if (pszInput[cch-1] == '\n')
+ cch--;
+ pszInput[cch] = 0;
+
+ int types[MAX_CCH];
+ int levels[MAX_CCH];
+
+ // assign directional types
+ classify(pszInput, types, cch);
+
+ if (beVerbose)
+ {
+ fprintf(f, "Input Types: ");
+ ShowInputTypes(f, pszInput, cch); fprintf(f, "\n");
+ }
+
+ // limit text to first block
+ cch = resolveParagraphs(types, cch);
+
+ // set base level and compute character types
+ int baselevel = baseLevel(types, cch);
+ if (beVerbose)
+ {
+ fprintf(f, "Base Level : %d\n", baselevel);
+ }
+
+ // resolve explicit
+ resolveExplicit(baselevel, N, types, levels, cch);
+
+ if (beVerbose)
+ {
+ fprintf(f, "Levels (A) : ");
+ ShowLevels(f, levels, cch); fprintf(f, "\n");
+ }
+
+ // resolve weak
+ resolveWeak(baselevel, types, levels, cch);
+
+ if (beVerbose)
+ {
+ fprintf(f, "Types (A) : ");
+ ShowTypes(f, types, cch); fprintf(f, "\n");
+ }
+
+ // resolve neutrals
+ resolveNeutrals(baselevel,types, levels, cch);
+
+ if (beVerbose)
+ {
+ fprintf(f, "Types (B) : ");
+ ShowTypes(f, types, cch); fprintf(f, "\n");
+ }
+
+ // resolveImplicit
+ resolveImplicit(types, levels, cch);
+
+ if (beVerbose)
+ {
+ fprintf(f, "Levels (B) : ");
+ ShowLevels(f, levels, cch); fprintf(f, "\n");
+ }
+
+ // assign directional types again, but for WS, S this time
+ classify(pszInput, types, cch, true);
+
+ BidiLines(baselevel, pszInput, types, levels, cch, doMirror);
+
+ if (doClean)
+ {
+ cch = clean(pszInput, cch);
+ }
+
+ printf("%s\n", pszInput);
+ }
+
+ return 0;
+}
+
+//[EOF]
diff --git a/conformance/test b/conformance/test
new file mode 100755
index 0000000..55e3dc6
--- /dev/null
+++ b/conformance/test
@@ -0,0 +1,40 @@
+#!/bin/sh
+# Test ../fribidi with reference/bidiref, using stdin and diff the outputs
+
+convertsub () {
+ refline="`echo \"$refline\" | sed s/$1/$2/g`"
+}
+
+while [ -n "$1" ]; do
+ if test "$1" = "-debug"; then
+ p1=$p1" -debug"
+ p2=$p2" -verbose"
+ elif test "$1" = "-clean"; then
+ p1=$p1" -clean"
+ p2=$p2" -clean"
+ fi
+ shift
+done
+
+echo -n > test.out
+echo -n > test.ref
+
+while read line; do
+ echo "$line" | ../fribidi $p1 -nopad >> test.out
+# echo "$line" | reference/bidiref $p2 >> test.ref
+ refline="$line"
+ convertsub "_R" "`echo -e \"\\16\"`"
+ convertsub "_r" "`echo -e \"\\17\"`"
+ convertsub "_L" "`echo -e \"\\20\"`"
+ convertsub "_l" "`echo -e \"\\21\"`"
+ convertsub "_o" "`echo -e \"\\22\"`"
+ refline=`echo "$refline" | reference/bidiref $p2`
+ convertsub "`echo -e \"\\16\"`" "_R"
+ convertsub "`echo -e \"\\17\"`" "_r"
+ convertsub "`echo -e \"\\20\"`" "_L"
+ convertsub "`echo -e \"\\21\"`" "_l"
+ convertsub "`echo -e \"\\22\"`" "_o"
+ echo "$refline" >> test.ref
+done
+
+diff test.out test.ref
diff --git a/conformance/test_gen.c b/conformance/test_gen.c
new file mode 100644
index 0000000..3f3f397
--- /dev/null
+++ b/conformance/test_gen.c
@@ -0,0 +1,72 @@
+/* Generate fully random tests. */
+#include <stdlib.h>
+#include <stdio.h>
+#include <time.h>
+
+int lev = 0, len = 0, minlen;
+
+void letter () {
+
+ char rl = (rand() % 2 ? 'a' : 'A');
+ int i, j = rand() % 5;
+
+ len += j;
+
+ for (i = 0; i < j; i++)
+ putchar(rl + rand() % 26);
+
+}
+
+void number () {
+
+ int i, j = rand() % 5;
+
+ len += j;
+
+ switch (rand() % 16) {
+ case 10: putchar('['); break;
+ case 11: putchar(']'); break;
+ case 12: putchar('{'); break;
+ case 13: putchar('}'); break;
+ case 14: putchar('('); break;
+ case 15: putchar(')'); break;
+
+ default:
+ for (i = 0; i < j; i++)
+ putchar('0' + rand() % 6);
+ break;
+ }
+
+}
+
+void gen () {
+
+ int f = 1;
+
+ while (f)
+ switch (rand() % 12) {
+ case 0: len += 2; printf("_R"); gen(); printf("_o"); break;
+ case 1: len += 2; printf("_r"); gen(); printf("_o"); break;
+ case 2: len += 2; printf("_L"); gen(); printf("_o"); break;
+ case 3: len += 2; printf("_l"); gen(); printf("_o"); break;
+
+ case 4: len++; letter(); letter(); printf(" "); break;
+ case 5: number(); break;
+ case 6: len++; printf(" "); break;
+
+ default: letter(); if (len << lev > minlen) f = 0; break;
+ }
+
+}
+
+int main (int argc, char **argv) {
+
+ if (argv[1])
+ minlen = atoi(argv[1]);
+ else
+ minlen = 1;
+ srand((int)(time)(NULL));
+ gen();
+ printf("\n");
+
+}
diff --git a/conformance/test_gen_rand.c b/conformance/test_gen_rand.c
new file mode 100644
index 0000000..d3f3e19
--- /dev/null
+++ b/conformance/test_gen_rand.c
@@ -0,0 +1,32 @@
+/* Generate random test data for explicit bidi tests. */
+#include <stdlib.h>
+#include <stdio.h>
+#include <time.h>
+
+int main (int argc, char **argv) {
+
+ int i, j, len;
+ char c;
+
+ if (argv[1])
+ len = atoi(argv[1]);
+ else
+ len = 10;
+ srand((int)(time)(NULL));
+
+ for (i = 0; i < len; i++) {
+ c = rand() % (0x80 - 0x0E) + 0x0E;
+ switch (c) {
+ case 0x60: // rule p3 is not implemented in fribidi yet
+ case 0x3C: case 0x3E: // not mirrored in bidiref implementation
+ case 0x5F: // used as a escape char
+ case 0x13: // bidiref doesn't work proper with this
+ case 0x14: case 0x15: case 0x16: case 0x17: case 0x18: case 0x19:
+ case 0x1A: case 0x1B: case 0x1C: case 0x1D: case 0x1E: case 0x1F:
+ case 0x00: i--; break;
+ default: putchar(c);
+ }
+ }
+
+ printf("\n");
+}
diff --git a/fribidi.c b/fribidi.c
index e283c40..e02e305 100644
--- a/fribidi.c
+++ b/fribidi.c
@@ -1,5 +1,6 @@
/* FriBidi - Library of BiDi algorithm
* Copyright (C) 1999 Dov Grobgeld
+ * Copyright (C) 2001 Behdad Esfahbod
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Library General Public
@@ -8,7 +9,7 @@
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Library General Public License for more details.
*
* You should have received a copy of the GNU Library General Public
@@ -16,18 +17,27 @@
* Free Software Foundation, Inc., 59 Temple Place - Suite 330,
* Boston, MA 02111-1307, USA.
*/
+#define DEBUG
+
#include <glib.h>
#include "fribidi.h"
-#ifndef NO_STDIO
+#ifdef DEBUG
#include <stdio.h>
#endif
-#ifndef NO_STDIO
+#ifdef DEBUG
#define DBG(s) if (fribidi_debug) { fprintf(stderr, s); }
#else
#define DBG(s)
#endif
+#ifdef DEBUG
+// for easier test with the reference code only.
+#define MAX_LEVEL 15
+#else
+// default value.
+#define MAX_LEVEL 61
+#endif
/*======================================================================
// Typedef for the run-length list.
//----------------------------------------------------------------------*/
@@ -47,12 +57,22 @@ typedef struct {
FriBidiChar value;
} key_value_t;
-/* Global variables */
-gboolean fribidi_debug = FALSE;
+typedef struct {
+ FriBidiCharType override; /* only L, R and N are valid */
+ gint level;
+} level_info;
+
+#ifdef DEBUG
+static gboolean fribidi_debug = FALSE;
+#endif
-void fribidi_set_debug(gboolean debug)
+int fribidi_set_debug(gboolean debug)
{
- fribidi_debug = debug;
+#ifdef DEBUG
+ return fribidi_debug = debug;
+#else
+ return 0;
+#endif
}
static gint bidi_string_strlen(FriBidiChar *str)
@@ -147,6 +167,7 @@ static TypeLink *run_length_encode_types(gint *char_type, gint type_len)
/* Add the starting link */
list = new_type_link();
list->type = FRIBIDI_TYPE_SOT;
+ list->level = -1;
list->len = 0;
list->pos = 0;
last = list;
@@ -158,29 +179,30 @@ static TypeLink *run_length_encode_types(gint *char_type, gint type_len)
for (i=0; i<=type_len; i++)
{
if (i==type_len || char_type[i] != type)
- {
- if (pos>=0)
- {
- link = new_type_link();
- link->type = type;
- link->pos = pos;
- link->len = len;
- last->next = link;
- link->prev = last;
- last = last->next;
- }
- if (i==type_len)
- break;
- len = 0;
- pos = i;
- }
- type =char_type[i];
+ {
+ if (pos>=0)
+ {
+ link = new_type_link();
+ link->type = type;
+ link->pos = pos;
+ link->len = len;
+ last->next = link;
+ link->prev = last;
+ last = last->next;
+ }
+ if (i==type_len)
+ break;
+ len = 0;
+ pos = i;
+ }
+ type = char_type[i];
len++;
}
/* Add the ending link */
link = new_type_link();
link->type = FRIBIDI_TYPE_EOT;
+ link->level = -1;
link->len = 0;
link->pos = type_len;
last->next = link;
@@ -189,6 +211,153 @@ static TypeLink *run_length_encode_types(gint *char_type, gint type_len)
return list;
}
+/* explicits_list is a list like type_rl_list, that holds the explicit
+ codes that are removed from rl_list, to reinsert them later by calling
+ the override_list.
+*/
+static void *init_list(TypeLink **start, TypeLink **end)
+{
+ TypeLink *list = NULL;
+ TypeLink *link;
+
+ /* Add the starting link */
+ list = new_type_link();
+ list->type = FRIBIDI_TYPE_SOT;
+ list->level = -1;
+ list->len = 0;
+ list->pos = 0;
+
+ /* Add the ending link */
+ link = new_type_link();
+ link->type = FRIBIDI_TYPE_EOT;
+ link->level = -1;
+ link->len = 0;
+ link->pos = 0;
+ list->next = link;
+ link->prev = list;
+
+ *start = list;
+ *end = link;
+}
+
+/* move an element before another element in a list, the list must have a
+ previous element, used to update explicits_list.
+ assuming that p have both prev and next or none of them.
+*/
+void move_element_before (TypeLink *p, TypeLink *list)
+{
+ if (p->prev) {
+ p->prev->next = p->next;
+ p->next->prev = p->prev;
+ }
+ p->prev = list->prev;
+ list->prev->next = p;
+ p->next = list;
+ list->prev = p;
+}
+
+/* override the rl_list base with the elements in over list, used to reinsert
+ the removed explicit codes from explicits_list into the type_rl_list and
+ also to reset the embedding level of some chars in L1.
+ i assume that the pos of the first element in base list is not more than
+ the pos of the first element of the over list, and the pos of the last
+ element of the base list is not less than the pos of the last element of
+ the over list, they are always satisfied for the two purposes already said.
+*/
+void override_list (TypeLink *base, TypeLink *over)
+{
+ TypeLink *p = base, *q, *r, *s, *t;
+ gint pos = 0, pos2;
+ if (!base) {
+ base = over;
+ return;
+ }
+ if (!over)
+ return;
+ for (q = over; q; /* move forward q on list at the end of the loop. */) {
+ if (!q->len || q->pos < pos) {
+ t = q;
+ q = q->next;
+ free_type_link(t);
+ continue;
+ }
+ pos = q->pos;
+ while (p->next && p->next->pos <= pos)
+ p = p->next;
+ /* now p is the element that q must be inserted 'in'. */
+ pos2 = pos + q->len;
+ r = p;
+ while (r->next && r->next->pos < pos2)
+ r = r->next;
+ /* now r is the last element that q affects. */
+ if (p == r) {
+ /* split p into at most 3 interval, and insert q in the place of
+ the second interval, set r to be the third part. */
+ /* third part needed? */
+ if (p->next && p->next->pos == pos2) {
+ if (r->next)
+ r = r->next;
+ else {
+ r = new_type_link();
+ *r = *p;
+ r->len = 0;
+ r->pos = pos2;
+ }
+ } else {
+ r = new_type_link();
+ *r = *p;
+ if (r->next)
+ r->next->prev = r;
+ r->len -= pos2 - r->pos;
+ r->pos = pos2;
+ }
+ /* first part needed? */
+ if (p->pos == pos) {
+ if (p->prev) {
+ t = p;
+ p = p->prev;
+ free_type_link(t);
+ } else
+ p->len = 0;
+ } else {
+ p->len = pos - p->pos;
+ }
+ } else {
+ /* cut the end of p. */
+ p->len = pos - p->pos;
+ /* if all of p is cut, remove it. */
+ if (!p->len && p->prev)
+ p = p->prev;
+ /* remove the elements between p and r. */
+ if (p != r)
+ for (s = p->next; s != r;) {
+ t = s;
+ s = s->next;
+ free_type_link(t);
+ }
+ /* cut the begining of r. */
+ r->pos = pos2;
+ if (r->next)
+ r->len = r->next->pos - pos2;
+ /* if all of r is cut, remove it. */
+ if (!r->len && r->next) {
+ t = r;
+ r = r->next;
+ free_type_link(t);
+ }
+ }
+ /* before updating the next and prev links to point to the inserted q,
+ we must remember the next element of q in the over list.
+ */
+ t = q;
+ q = q->next;
+ p->next = t;
+ t->prev = p;
+ t->next = r;
+ r->prev = t;
+ }
+}
+
/* Some convenience macros */
#define RL_TYPE(list) (list)->type
#define RL_LEN(list) (list)->len
@@ -199,18 +368,20 @@ static void compact_list(TypeLink *list)
{
while(list)
{
- if (list->prev
- && RL_TYPE(list->prev) == RL_TYPE(list))
- {
+ if (list->prev &&
+ RL_TYPE(list->prev) == RL_TYPE(list) &&
+ RL_LEVEL(list->prev) == RL_LEVEL(list) &&
+ RL_POS(list->prev) + RL_LEN(list->prev) == RL_POS(list))
+ {
TypeLink *next = list->next;
- list->prev->next = list->next;
- list->next->prev = list->prev;
- RL_LEN(list->prev) = RL_LEN(list->prev) + RL_LEN(list);
+ list->prev->next = list->next;
+ list->next->prev = list->prev;
+ RL_LEN(list->prev) = RL_LEN(list->prev) + RL_LEN(list);
free_type_link(list);
- list = next;
+ list = next;
}
else
- list = list->next;
+ list = list->next;
}
}
@@ -218,109 +389,201 @@ static void compact_list(TypeLink *list)
/* Rules for overriding current type */
#define TYPE_RULE1(old_this, \
- new_this) \
+ new_this) \
if (this_type == TYPE_ ## old_this) \
RL_TYPE(pp) = FRIBIDI_TYPE_ ## new_this; \
/* Rules for current and previous type */
#define TYPE_RULE2(old_prev, old_this, \
- new_prev, new_this) \
+ new_prev, new_this) \
if ( prev_type == FRIBIDI_TYPE_ ## old_prev \
- && this_type == FRIBIDI_TYPE_ ## old_this) \
+ && this_type == FRIBIDI_TYPE_ ## old_this) \
{ \
- RL_TYPE(pp->prev) = FRIBIDI_TYPE_ ## new_prev; \
- RL_TYPE(pp) = FRIBIDI_TYPE_ ## new_this; \
+ RL_TYPE(ppprev) = FRIBIDI_TYPE_ ## new_prev; \
+ RL_TYPE(pp) = FRIBIDI_TYPE_ ## new_this; \
continue; \
}
/* A full rule that assigns all three types */
#define TYPE_RULE(old_prev, old_this, old_next, \
- new_prev, new_this, new_next) \
+ new_prev, new_this, new_next) \
if ( prev_type == FRIBIDI_TYPE_ ## old_prev \
- && this_type == FRIBIDI_TYPE_ ## old_this \
- && next_type == FRIBIDI_TYPE_ ## old_next) \
+ && this_type == FRIBIDI_TYPE_ ## old_this \
+ && next_type == FRIBIDI_TYPE_ ## old_next) \
{ \
- RL_TYPE(pp->prev) = FRIBIDI_TYPE_ ## new_prev; \
- RL_TYPE(pp) = FRIBIDI_TYPE_ ## new_this; \
- RL_TYPE(pp->next) = FRIBIDI_TYPE_ ## new_next; \
+ RL_TYPE(ppprev) = FRIBIDI_TYPE_ ## new_prev; \
+ RL_TYPE(pp) = FRIBIDI_TYPE_ ## new_this; \
+ RL_TYPE(ppnext) = FRIBIDI_TYPE_ ## new_next; \
continue; \
}
/* For optimization the following macro only assigns the center type */
#define TYPE_RULE_C(old_prev, old_this, old_next, \
- new_this) \
+ new_this) \
if ( prev_type == FRIBIDI_TYPE_ ## old_prev \
- && this_type == FRIBIDI_TYPE_ ## old_this \
- && next_type == FRIBIDI_TYPE_ ## old_next) \
+ && this_type == FRIBIDI_TYPE_ ## old_this \
+ && next_type == FRIBIDI_TYPE_ ## old_next) \
{ \
- RL_TYPE(pp) = FRIBIDI_TYPE_ ## new_this; \
+ RL_TYPE(pp) = FRIBIDI_TYPE_ ## new_this; \
continue; \
}
+/*=======================================================
+// define macros for push and pop the status in the stack
+//-------------------------------------------------------*/
+
+/* There's some little points in pushing and poping into the status stack:
+ 1. when the embedding level is not valid (more than MAX_LEVEL=61),
+ you must reject it, and not to push into the stack, but when you see a
+ PDF, you must find the matching code, and if it was pushed in the stack,
+ pop it, it means you must pop if and only if you have pushed the
+ matching code, the over_pushed var counts the number of rejected codes yet.
+ 2. there's a more confusing point too, when the embedding level is exactly
+ MAX_LEVEL-1=60, a LRO or LRE must be rejected because the new level would
+ be MAX_LEVEL+1=62, that is invalid, but a RLO or RLE must be accepted
+ because the new level is MAX_LEVEL=61, that is valid, so the rejected
+ codes may be not continuous in the logical order, in fact there is at
+ most two continuous intervals of codes, with a RLO or RLE between them.
+ to support the case, the first_interval var counts the number of rejected
+ codes in the first interval, when it is 0, means that there is only one
+ interval yet.
+
+/* a. If this new level would be valid, then this embedding code is valid.
+ Remember (push) the current embedding level and override status.
+ Reset current level to this new level, and reset the override status to
+ new_override.
+ b. If the new level would not be valid, then this code is invalid. Don't
+ change the current level or override status.
+*/
+#define PUSH_STATUS(new_override) \
+ { \
+ if (new_level <= MAX_LEVEL) \
+ { \
+ if (level == MAX_LEVEL - 1) \
+ first_interval = over_pushed; \
+ status_stack->level = level; \
+ status_stack->override = override; \
+ status_stack++; \
+ stack_size++; \
+ level = new_level; \
+ override = new_override; \
+ } else \
+ over_pushed++; \
+ }
+
+/* If there was a valid matching code, restore (pop) the last remembered
+ (pushed) embedding level and directional override.
+*/
+#define POP_STATUS \
+ { \
+ if (over_pushed + stack_size) \
+ { \
+ if (over_pushed > first_interval) \
+ over_pushed--; \
+ else \
+ { \
+ if (over_pushed == first_interval) \
+ first_interval = 0; \
+ status_stack--; \
+ stack_size--; \
+ level = status_stack->level; \
+ override = status_stack->override; \
+ } \
+ } \
+ }
+
+/*==========================================================================
+// There was no support for sor and eor in the absence of Explicit Embedding
+// Levels, so define macros, to support them, with as less change as needed.
+//--------------------------------------------------------------------------*/
+
+/* Return the direction of the level number, ie. even is FRIBIDI_TYPE_L and
+ odd is FRIBIDI_TYPE_R.
+*/
+#define LEVEL_DIR(lev) \
+ (lev & 1 ? FRIBIDI_TYPE_R : FRIBIDI_TYPE_L)
+
+/* Return the type of previous char or the sor, if already at the start of
+ a run level.
+*/
+#define PREV_TYPE_OR_SOR \
+ (RL_LEVEL(ppprev)==RL_LEVEL(pp) ? RL_TYPE(ppprev) : LEVEL_DIR( \
+ (RL_LEVEL(ppprev)>RL_LEVEL(pp) ? RL_LEVEL(ppprev) : RL_LEVEL(pp)) \
+ ))
+
+/* Return the type of next char or the eor, if already at the end of
+ a run level.
+*/
+#define NEXT_TYPE_OR_EOR \
+ (!ppnext ? LEVEL_DIR(RL_LEVEL(pp)) : \
+ (RL_LEVEL(ppnext)==RL_LEVEL(pp) ? RL_TYPE(ppnext) : LEVEL_DIR( \
+ (RL_LEVEL(ppnext)>RL_LEVEL(pp) ? RL_LEVEL(ppnext) : RL_LEVEL(pp)) \
+ )))
+
+#ifdef DEBUG
/*======================================================================
// For debugging, define some macros for printing the types and the
// levels.
//----------------------------------------------------------------------*/
-#ifndef NO_STDIO
+
+static char char_from_level[] = {
+ 'e', /* -2, internal error, this level shouldn't be viewed. */
+ '_', /* -1, indicating start of string and end of string. */
+ /* 0-9,A-F are the only valid levels in debug mode and before resolving
+ implicits. after that the levels X, Y, Z may be appear too. */
+ '0', '1', '2', '3', '4', '5', '6', '7', '8', '9',
+ 'A', 'B', 'C', 'D', 'E', 'F',
+ 'X', 'Y', 'Z', /* only must appear after resolving implicits. */
+ 'o', 'o', 'o' /* overflows, this levels and higher levels show a bug!. */
+};
+
static void print_types_re(TypeLink *pp)
{
+ fprintf(stderr, "Run types : ");
while(pp)
{
- printf("%d:%c(%d)[%d] ", RL_POS(pp), RL_TYPE(pp), RL_LEN(pp), RL_LEVEL(pp));
+ fprintf(stderr, "%d:%d(%d)[%c] ",
+ RL_POS(pp),
+ RL_TYPE(pp),
+ RL_LEN(pp),
+ char_from_level[RL_LEVEL(pp) + 2]);
pp = pp->next;
}
- printf("\n");
+ fprintf(stderr, "\n");
}
static void print_resolved_levels(TypeLink *pp)
{
+ fprintf(stderr, "Res. levels: ");
while(pp)
{
gint i;
for (i=0; i<RL_LEN(pp); i++)
- printf("%d", RL_LEVEL(pp));
+ fprintf(stderr, "%c", char_from_level[RL_LEVEL(pp) + 2]);
pp = pp->next;
}
- printf("\n");
+ fprintf(stderr, "\n");
}
static void print_resolved_types(TypeLink *pp)
{
- while(pp)
- {
- gint i;
- for (i=0; i<RL_LEN(pp); i++)
- {
- gchar ch;
- FriBidiCharType type = RL_TYPE(pp);
-
- /* Convert the type to something readable */
- if (type == FRIBIDI_TYPE_R)
- ch = 'R';
- else if (type == FRIBIDI_TYPE_L)
- ch = 'L';
- else if (type == FRIBIDI_TYPE_E)
- ch = 'E';
- else if (type == FRIBIDI_TYPE_EN)
- ch = 'n';
- else if (type == FRIBIDI_TYPE_N)
- ch = 'N';
- else
- ch = '?';
-
- printf("%c", ch);
- }
- pp = pp->next;
- }
- printf("\n");
+ fprintf(stderr, "Res. types : ");
+ while(pp) {
+ gint i;
+ for (i=0; i<RL_LEN(pp); i++)
+ fprintf(stderr, "%c", char_from_type[pp->type]);
+ pp = pp->next;
+ }
+ fprintf(stderr, "\n");
}
static void print_bidi_string(FriBidiChar *str)
{
gint i;
+ fprintf(stderr, "Org. types : ");
for (i=0; i<bidi_string_strlen(str); i++)
- printf("%c", str[i]);
- printf("\n");
+ fprintf(stderr, "%c", char_from_type[fribidi_get_type(str[i])]);
+ fprintf(stderr, "\n");
}
#endif
@@ -331,51 +594,52 @@ static void print_bidi_string(FriBidiChar *str)
//----------------------------------------------------------------------*/
static TypeLink *
search_rl_for_strong(TypeLink *pos,
- gint dir)
+ gint dir)
{
TypeLink *pp = pos;
if (dir == -1)
{
for (pp = pos; pp; pp=pp->prev)
- {
- FriBidiCharType char_type = RL_TYPE(pp);
- if (char_type == FRIBIDI_TYPE_R || char_type == FRIBIDI_TYPE_L)
- return pp;
- }
+ {
+ FriBidiCharType char_type = RL_TYPE(pp);
+ if (char_type == FRIBIDI_TYPE_R || char_type == FRIBIDI_TYPE_L ||
+ char_type == FRIBIDI_TYPE_RLO || char_type == FRIBIDI_TYPE_LRO)
+ return pp;
+ }
}
else
{
for (pp = pos; pp; pp=pp->next)
- {
- FriBidiCharType char_type = RL_TYPE(pp);
- if (char_type == FRIBIDI_TYPE_R || char_type == FRIBIDI_TYPE_L)
- return pp;
- }
+ {
+ FriBidiCharType char_type = RL_TYPE(pp);
+ if (char_type == FRIBIDI_TYPE_R || char_type == FRIBIDI_TYPE_L ||
+ char_type == FRIBIDI_TYPE_RLO || char_type == FRIBIDI_TYPE_LRO)
+ return pp;
+ }
}
return NULL;
}
/*======================================================================
// This function should follow the Unicode specification closely!
-//
-// It is still lacking the support for <RLO> and <LRO>.
//----------------------------------------------------------------------*/
static void
fribidi_analyse_string(/* input */
- FriBidiChar *str,
- gint len,
- FriBidiCharType *pbase_dir,
- /* output */
- TypeLink **ptype_rl_list,
- gint *pmax_level)
+ FriBidiChar *str,
+ gint len,
+ FriBidiCharType *pbase_dir,
+ /* output */
+ TypeLink **ptype_rl_list,
+ gint *pmax_level)
{
gint base_level, base_dir;
gint max_level;
gint i;
gint *char_type;
gint prev_last_strong, last_strong;
- TypeLink *type_rl_list, *pp;
+ TypeLink *type_rl_list, *explicits_list, *explicits_list_end,
+ *pp, *ppprev, *ppnext;
/* Determinate character types */
char_type = g_new(gint, len);
@@ -385,21 +649,16 @@ fribidi_analyse_string(/* input */
/* Run length encode the character types */
type_rl_list = run_length_encode_types(char_type, len);
g_free(char_type);
+ init_list(&explicits_list, &explicits_list_end);
/* Find the base level */
if (*pbase_dir == FRIBIDI_TYPE_L)
- {
- base_dir = FRIBIDI_TYPE_L;
base_level = 0;
- }
else if (*pbase_dir == FRIBIDI_TYPE_R)
- {
- base_dir = FRIBIDI_TYPE_R;
base_level = 1;
- }
- /* Search for first strong character and use its direction as base
- direciton */
+ /* P2. Search for first strong character and use its direction as base
+ direction */
else
{
base_level = 0; /* Default */
@@ -407,7 +666,7 @@ fribidi_analyse_string(/* input */
for (pp = type_rl_list; pp; pp = pp->next)
{
if (RL_TYPE(pp) == FRIBIDI_TYPE_R ||
- RL_TYPE(pp) == FRIBIDI_TYPE_AL) /* had been dropped */
+ RL_TYPE(pp) == FRIBIDI_TYPE_AL)
{
base_level = 1;
base_dir = FRIBIDI_TYPE_R;
@@ -425,241 +684,443 @@ fribidi_analyse_string(/* input */
* that was passed on input.
*/
if (base_dir == FRIBIDI_TYPE_N)
- {
- if (*pbase_dir == FRIBIDI_TYPE_WR)
- {
- base_dir = FRIBIDI_TYPE_RTL;
- base_level = 1;
- }
- else if (*pbase_dir == FRIBIDI_TYPE_WL)
- {
- base_dir = FRIBIDI_TYPE_LTR;
- base_level = 0;
- }
- }
+ if (*pbase_dir == FRIBIDI_TYPE_WR)
+ base_level = 1;
+ else
+ base_level = 0;
}
-
- /* 1. Explicit Levels and Directions. TBD! */
- compact_list(type_rl_list);
-
- /* 2. Explicit Overrides. TBD! */
- compact_list(type_rl_list);
-
- /* 3. Terminating Embeddings and overrides. TBD! */
+ base_dir = LEVEL_DIR(base_level);
+
+ /* Explicit Levels and Directions */
+ DBG("Explicit Levels and Directions.\n");
+ {
+ /* X1. Begin by setting the current embedding level to the paragraph
+ embedding level. Set the directional override status to neutral.
+ Process each character iteratively, applying rules X2 through X9.
+ Only embedding levels from 0 to 61 are valid in this phase.
+ */
+ gint level = base_level;
+ gint override = FRIBIDI_TYPE_N;
+ gint new_level;
+ /* stack */
+ gint stack_size = 0;
+ gint over_pushed = 0;
+ gint first_interval = 0;
+ level_info *status_stack = g_new(level_info, MAX_LEVEL + 2);
+ TypeLink *dummy = new_type_link();
+
+ gint i;
+
+ for (pp = type_rl_list->next; pp->next; pp = pp->next)
+ {
+ gint this_type = RL_TYPE(pp);
+ /* 1. Explicit Embeddings */
+ /* X2. With each RLE, compute the least greater odd embedding level. */
+ if (this_type == FRIBIDI_TYPE_RLE)
+ for (i = 0; i < RL_LEN(pp); i++)
+ {
+ new_level = (level + 1) | 1;
+ PUSH_STATUS(FRIBIDI_TYPE_N);
+ }
+ /* X3. With each LRE, compute the least greater even embedding level. */
+ else if (this_type == FRIBIDI_TYPE_LRE)
+ for (i = 0; i < RL_LEN(pp); i++)
+ {
+ new_level = (level + 2) & ~1;
+ PUSH_STATUS(FRIBIDI_TYPE_N);
+ }
+ /* 2. Explicit Overrides */
+ /* X4. With each RLO, compute the least greater odd embedding level. */
+ if (this_type == FRIBIDI_TYPE_RLO)
+ for (i = 0; i < RL_LEN(pp); i++)
+ {
+ new_level = (level + 1) | 1;
+ PUSH_STATUS(FRIBIDI_TYPE_R);
+ }
+ /* X5. With each LRO, compute the least greater even embedding level. */
+ else if (this_type == FRIBIDI_TYPE_LRO)
+ for (i = 0; i < RL_LEN(pp); i++)
+ {
+ new_level = (level + 2) & ~1;
+ PUSH_STATUS(FRIBIDI_TYPE_L);
+ }
+
+ /* X6. For all typed besides RLE, LRE, RLO, LRO, and PDF:
+ a. Set the level of the current character to the current
+ embedding level.
+ b. Whenever the directional override status is not neutral,
+ reset the current character type to the directional override
+ status.
+ */
+ else if (this_type != FRIBIDI_TYPE_PDF)
+ {
+ RL_LEVEL(pp) = level;
+ if (override != FRIBIDI_TYPE_N &&
+ RL_TYPE(pp) != FRIBIDI_TYPE_BN)
+ RL_TYPE(pp) = override;
+ }
+ /* 3. Terminating Embeddings and overrides */
+ /* X7. With each PDF, determine the matching embedding or
+ ovveride code.
+ */
+ else /* now: this_type == FRIBIDI_TYPE_PDF */
+ for (i = 0; i < RL_LEN(pp); i++)
+ POP_STATUS;
+ /* X8. All explicit directional embeddings and overrides are
+ completely terminated at the end of each paragraph. Paragraph
+ separators are not included in the embedding.
+ */
+ /*
+ This function is running on a single paragraph, so we can do
+ X8 after all the input is processed.
+ */
+ /* X9. Remove all RLE, LRE, RLO, LRO, PDF, and BN codes.
+ */
+ if (this_type == FRIBIDI_TYPE_RLE ||
+ this_type == FRIBIDI_TYPE_LRE ||
+ this_type == FRIBIDI_TYPE_RLO ||
+ this_type == FRIBIDI_TYPE_LRO ||
+ this_type == FRIBIDI_TYPE_PDF ||
+ this_type == FRIBIDI_TYPE_BN)
+ {
+ /* Remove element and add it to explicits_list */
+ dummy->next = pp->next;
+ pp->level = -2;
+ move_element_before(pp, explicits_list_end);
+ pp = dummy;
+ }
+ }
+
+ /* Implementing X8. It has no effect on a single paragraph! */
+ level = base_level;
+ override = FRIBIDI_TYPE_N;
+ status_stack -= stack_size;
+ stack_size = 0;
+
+ free_type_link(dummy);
+ g_free(status_stack);
+ }
+ /* X10. The remaining rules are applied to each run of characters at the
+ same level. For each run, determine the start-of-level-run (sor) and
+ end-of-level-run (eor) type, either L or R. This depends on the
+ higher of the two levels on either side of the boundary (at the start
+ or end of the paragraph, the level of the 'other' run is the base
+ embedding level). If the higher level is odd, the type is R, otherwise
+ it is L.
+ */
+ /* Resolving Implicit Levels can be done out of X10 loop, so only change
+ of Resolving Weak Types and Resolving Neutral Types is needed.
+ */
+
compact_list(type_rl_list);
- if (fribidi_debug)
- {
- print_bidi_string(str);
- print_resolved_levels(type_rl_list);
- print_resolved_types(type_rl_list);
- }
-
+#ifdef DEBUG
+ if (fribidi_debug) {
+ print_bidi_string(str);
+ print_resolved_levels(type_rl_list);
+ print_resolved_types(type_rl_list);
+ }
+#endif
+
/* 4. Resolving weak types */
DBG("Resolving weak types.\n");
- last_strong = base_dir;
- for (pp = type_rl_list->next; pp->next; pp = pp->next)
- {
- gint prev_type = RL_TYPE(pp->prev);
+ {
+ gint last_strong = base_dir;
+ gint prev_type_new;
+ gint prev_type_old = base_dir;
+
+ for (ppnext= (pp=(ppprev=type_rl_list)->next)->next;ppnext;
+ ppprev=pp, pp=ppnext, ppnext=ppnext->next) {
+ /* When we are at the start of a level run, prev_type is sor. */
+ gint prev_type;
gint this_type = RL_TYPE(pp);
- gint next_type = RL_TYPE(pp->next);
-
- /* Remember the last strong character */
- if (prev_type == FRIBIDI_TYPE_AL
- || prev_type == FRIBIDI_TYPE_R
- || prev_type == FRIBIDI_TYPE_L)
- last_strong = prev_type;
+ /* When we are at the end of a level run, prev_type is eor. */
+ gint next_type = NEXT_TYPE_OR_EOR;
- /* W1. NSM */
+ prev_type_new = RL_TYPE(ppprev);
+ RL_TYPE(ppprev) = prev_type_old;
+ prev_type_old = this_type;
+ prev_type = PREV_TYPE_OR_SOR;
+
+ /* Remember the last strong character
+ It's very important to do it here, not at the end of the loop
+ because the types may change, it affects rule 3.
+ */
+ if (RL_LEVEL(ppprev) != RL_LEVEL(pp))
+ last_strong = prev_type/*sor*/;
+ else
+ if (prev_type == FRIBIDI_TYPE_AL ||
+ prev_type == FRIBIDI_TYPE_R ||
+ prev_type == FRIBIDI_TYPE_L)
+ last_strong = prev_type;
+
+ /* W1. NSM
+ Examine each non-spacing mark (NSM) in the level run, and change
+ the type of the NSM to the type of the previous character. If the
+ NSM is at the start of the level run, it will get the type of sor.
+ */
if (this_type == FRIBIDI_TYPE_NSM)
- {
- if (prev_type == FRIBIDI_TYPE_SOT)
- RL_TYPE(pp) = FRIBIDI_TYPE_N; /* Will be resolved to base dir */
- else
- RL_TYPE(pp) = prev_type;
- }
+ RL_TYPE(pp) = prev_type;
- /* W2: European numbers */
- if (this_type == FRIBIDI_TYPE_EN /* another typo */
- && last_strong == FRIBIDI_TYPE_AL)
- RL_TYPE(pp) = FRIBIDI_TYPE_AN;
-
- /* W3: Change ALs to R
- We have to do this for prev character as we would otherwise
- interfer with the next last_strong which is FRIBIDI_TYPE_AL.
+ /* W2: European numbers.
*/
- if (prev_type == FRIBIDI_TYPE_AL)
- RL_TYPE(pp->prev) = FRIBIDI_TYPE_R;
+ if (this_type == FRIBIDI_TYPE_EN && last_strong == FRIBIDI_TYPE_AL)
+ RL_TYPE(pp) = FRIBIDI_TYPE_AN;
+
+ /* W3: Change ALs to R.
+ */
+ if (this_type == FRIBIDI_TYPE_AL)
+ RL_TYPE(pp) = FRIBIDI_TYPE_R;
+
+ RL_TYPE(ppprev) = prev_type_new;
+
+ }
+
+ for (ppnext= (pp=(ppprev=type_rl_list)->next)->next;ppnext;
+ ppprev=pp, pp=ppnext, ppnext=ppnext->next) {
+ /* When we are at the start of a level run, prev_type is sor. */
+ gint prev_type = PREV_TYPE_OR_SOR;
+ gint this_type = RL_TYPE(pp);
+ /* When we are at the end of a level run, prev_type is eor. */
+ gint next_type = NEXT_TYPE_OR_EOR;
/* W4. A single european separator changes to a european number.
- A single common separator between two numbers of the same type
- changes to that type.
- */
+ A single common separator between two numbers of the same type
+ changes to that type.
+ */
if (RL_LEN(pp) == 1)
- {
- TYPE_RULE_C(EN,ES,EN, EN);
- TYPE_RULE_C(EN,CS,EN, EN);
- TYPE_RULE_C(AN,CS,AN, AN);
- }
+ {
+ TYPE_RULE_C(EN,ES,EN, EN);
+ TYPE_RULE_C(EN,CS,EN, EN);
+ TYPE_RULE_C(AN,CS,AN, AN);
+ }
+ }
+ for (ppnext= (pp=(ppprev=type_rl_list)->next)->next;ppnext;
+ ppprev=pp, pp=ppnext, ppnext=ppnext->next) {
+ /* When we are at the start of a level run, prev_type is sor. */
+ gint prev_type = PREV_TYPE_OR_SOR;
+ gint this_type = RL_TYPE(pp);
+ /* When we are at the end of a level run, prev_type is eor. */
+ gint next_type = NEXT_TYPE_OR_EOR;
+
+ /* Remember the last strong character
+ It's very important to do it here, not at the end of the loop
+ because the types may change, it affects rule 3.
+ */
+ if (RL_LEVEL(ppprev) != RL_LEVEL(pp))
+ last_strong = prev_type/*sor*/;
+ else
+ if (prev_type == FRIBIDI_TYPE_AL ||
+ prev_type == FRIBIDI_TYPE_R ||
+ prev_type == FRIBIDI_TYPE_L)
+ last_strong = prev_type;
/* W5. A sequence of European terminators adjacent to European
- numbers changes to All European numbers.
- */
- if (this_type == FRIBIDI_TYPE_ET)
- {
- if (next_type == FRIBIDI_TYPE_EN
- || prev_type == FRIBIDI_TYPE_EN) {
- RL_TYPE(pp) = FRIBIDI_TYPE_EN;
- }
- }
+ numbers changes to All European numbers.
+ */
+ if (this_type == FRIBIDI_TYPE_ET &&
+ (prev_type == FRIBIDI_TYPE_EN || next_type == FRIBIDI_TYPE_EN))
+ RL_TYPE(pp) = FRIBIDI_TYPE_EN;
- /* This type may have been overriden */
+ /* This type may have been overriden. */
this_type = RL_TYPE(pp);
- /* W6. Otherwise change separators and terminators to other neutral */
- if (this_type == FRIBIDI_TYPE_ET
- || this_type == FRIBIDI_TYPE_CS
- || this_type == FRIBIDI_TYPE_ES)
- RL_TYPE(pp) = FRIBIDI_TYPE_ON;
-
- /* W7. Change european numbers to L. */
- if (prev_type == FRIBIDI_TYPE_EN
- && last_strong == FRIBIDI_TYPE_L)
- RL_TYPE(pp->prev) = FRIBIDI_TYPE_L;
+ /* W6. Otherwise change separators and terminators to other neutral.
+ */
+ if (this_type == FRIBIDI_TYPE_ET ||
+ this_type == FRIBIDI_TYPE_CS ||
+ this_type == FRIBIDI_TYPE_ES)
+ RL_TYPE(pp) = FRIBIDI_TYPE_ON;
+
+ /* W7. Change european numbers to L.
+ */
+ if (this_type == FRIBIDI_TYPE_EN && last_strong == FRIBIDI_TYPE_L)
+ RL_TYPE(pp) = FRIBIDI_TYPE_L;
}
- /* Handle the two rules that effect pp->prev for the last element */
- if (RL_TYPE (pp->prev) == FRIBIDI_TYPE_AL) /* W3 */
- RL_TYPE(pp->prev) = FRIBIDI_TYPE_R;
- if (RL_TYPE (pp->prev) == FRIBIDI_TYPE_EN /* W7 */
- && last_strong == FRIBIDI_TYPE_L)
- RL_TYPE(pp->prev) = FRIBIDI_TYPE_L;
+ }
compact_list(type_rl_list);
-
- if (fribidi_debug)
- {
- print_bidi_string(str);
- print_resolved_levels(type_rl_list);
- print_resolved_types(type_rl_list);
- }
+#ifdef DEBUG
+ if (fribidi_debug) {
+ print_types_re(type_rl_list);
+ print_resolved_levels(type_rl_list);
+ print_resolved_types(type_rl_list);
+ }
+#endif
+
/* 5. Resolving Neutral Types */
DBG("Resolving neutral types.\n");
-
+ {
+ TypeLink *ppprev, *ppnext/* prev and next non neutral */;
+ gint prev_type, next_type;
+
/* We can now collapse all separators and other neutral types to
- plain neutrals */
+ plain neutrals. */
for (pp = type_rl_list->next; pp->next; pp = pp->next)
{
gint this_type = RL_TYPE(pp);
- if ( this_type == FRIBIDI_TYPE_WS
- || this_type == FRIBIDI_TYPE_ON
- || this_type == FRIBIDI_TYPE_ES
- || this_type == FRIBIDI_TYPE_ET
- || this_type == FRIBIDI_TYPE_CS
- || this_type == FRIBIDI_TYPE_BN)
- RL_TYPE(pp) = FRIBIDI_TYPE_N;
+ if (this_type == FRIBIDI_TYPE_WS ||
+ this_type == FRIBIDI_TYPE_ON ||
+ this_type == FRIBIDI_TYPE_ES ||
+ this_type == FRIBIDI_TYPE_ET ||
+ this_type == FRIBIDI_TYPE_BN ||
+ this_type == FRIBIDI_TYPE_BS ||
+ this_type == FRIBIDI_TYPE_SS ||
+ this_type == FRIBIDI_TYPE_CS)
+ RL_TYPE(pp) = FRIBIDI_TYPE_N;
}
compact_list(type_rl_list);
- for (pp = type_rl_list->next; pp->next; pp = pp->next)
+ /* N1. and N2.
+ For each neutral, resolve it.
+ */
+ for (ppnext=pp=(ppprev=type_rl_list)->next; pp->next; pp=pp->next)
{
- gint prev_type = RL_TYPE(pp->prev);
gint this_type = RL_TYPE(pp);
- gint next_type = RL_TYPE(pp->next);
-
- if (this_type == FRIBIDI_TYPE_N) /* optimization! */
- {
- /* "European and arabic numbers are treated
- as though they were R" */
-
- if (prev_type == FRIBIDI_TYPE_EN || prev_type == FRIBIDI_TYPE_AN)
- prev_type = FRIBIDI_TYPE_R;
-
- if (next_type == FRIBIDI_TYPE_EN || next_type == FRIBIDI_TYPE_AN)
- next_type = FRIBIDI_TYPE_R;
+ /* "European and arabic numbers are treated as though they were R" */
+ if (this_type == FRIBIDI_TYPE_EN || this_type == FRIBIDI_TYPE_AN)
+ this_type = FRIBIDI_TYPE_R;
+
+ /* Find prev_type from ppprev. */
+ prev_type = PREV_TYPE_OR_SOR;
+ /* "European and arabic numbers are treated as though they were R" */
+ if (prev_type == FRIBIDI_TYPE_EN || prev_type == FRIBIDI_TYPE_AN)
+ prev_type = FRIBIDI_TYPE_R;
+
+ /* Update ppnext if needed. */
+ if (RL_LEVEL(pp) == RL_LEVEL(ppnext)) {
+ /* Find next non-neutral. */
+ for (ppnext = pp->next;
+ RL_TYPE(ppnext) == FRIBIDI_TYPE_N &&
+ RL_LEVEL(ppnext) == RL_LEVEL(ppnext->prev); ppnext = ppnext->next)
+ /* Nothing! */;
+ next_type = NEXT_TYPE_OR_EOR;
+
+ /* "European and arabic numbers are treated as though they were R" */
+ if (next_type == FRIBIDI_TYPE_EN || next_type == FRIBIDI_TYPE_AN)
+ next_type = FRIBIDI_TYPE_R;
+ }
- /* N1. */
- TYPE_RULE_C(R,N,R, R);
- TYPE_RULE_C(L,N,L, L);
+ if (this_type == FRIBIDI_TYPE_N)
+ RL_TYPE(pp) = (prev_type == next_type) ?
+ /* N1. */ prev_type :
+ /* N2. */ /*FRIBIDI_TYPE_E*/LEVEL_DIR(RL_LEVEL(pp));
- /* N2. Any remaining neutrals takes the embedding direction */
- if (RL_TYPE(pp) == FRIBIDI_TYPE_N)
- RL_TYPE(pp) = FRIBIDI_TYPE_E;
- }
+ /* Update ppprev if needed. */
+ if (this_type != FRIBIDI_TYPE_N ||
+ RL_LEVEL(pp) != RL_LEVEL(pp->next))
+ ppprev = pp;
}
-
+ }
+
compact_list(type_rl_list);
-#ifndef NO_STDIO
- if (fribidi_debug)
- print_types_re(type_rl_list);
- if (fribidi_debug)
- {
- print_bidi_string(str);
- print_resolved_levels(type_rl_list);
- print_resolved_types(type_rl_list);
- }
+#ifdef DEBUG
+ if (fribidi_debug) {
+ print_types_re(type_rl_list);
+ print_resolved_levels(type_rl_list);
+ print_resolved_types(type_rl_list);
+ }
#endif
-
- /* 6. Resolving Implicit levels */
+
+ /* 6. Resolving implicit levels */
DBG("Resolving implicit levels.\n");
{
- gint level = base_level;
max_level = base_level;
for (pp = type_rl_list->next; pp->next; pp = pp->next)
{
- gint this_type = RL_TYPE(pp);
-
- /* This code should be expanded to handle explicit directions! */
-
- /* Even */
- if (level % 2 == 0)
- {
- if (this_type == FRIBIDI_TYPE_R)
- RL_LEVEL(pp) = level + 1;
- else if (this_type == FRIBIDI_TYPE_AN)
- RL_LEVEL(pp) = level + 2;
- else if (RL_TYPE(pp->prev) != FRIBIDI_TYPE_L && this_type == FRIBIDI_TYPE_EN)
- RL_LEVEL(pp) = level + 2;
- else
- RL_LEVEL(pp) = level;
- }
- /* Odd */
- else
- {
- if ( this_type == FRIBIDI_TYPE_L
- || this_type == FRIBIDI_TYPE_AN
- || this_type == FRIBIDI_TYPE_EN)
- RL_LEVEL(pp) = level+1;
- else
- RL_LEVEL(pp) = level;
- }
-
- if (RL_LEVEL(pp) > max_level)
- max_level = RL_LEVEL(pp);
+ gint this_type = RL_TYPE(pp);
+ gint level = RL_LEVEL(pp);
+
+ /* This code should be expanded to handle explicit directions! */
+
+ /* I1. Even */
+ if (level % 2 == 0)
+ {
+ if (this_type == FRIBIDI_TYPE_R)
+ RL_LEVEL(pp)++;
+ else if (this_type == FRIBIDI_TYPE_AN ||
+ this_type == FRIBIDI_TYPE_EN)
+ RL_LEVEL(pp) += 2;
+ }
+ /* I2. Odd */
+ else
+ if (this_type == FRIBIDI_TYPE_L ||
+ this_type == FRIBIDI_TYPE_AN ||
+ this_type == FRIBIDI_TYPE_EN)
+ RL_LEVEL(pp)++;
+
+ if (RL_LEVEL(pp) > max_level)
+ max_level = RL_LEVEL(pp);
}
}
compact_list(type_rl_list);
+
+#ifdef DEBUG
+ if (fribidi_debug) {
+ print_bidi_string(str);
+ print_resolved_levels(type_rl_list);
+ print_resolved_types(type_rl_list);
+ }
+#endif
-#ifndef NO_STDIO
- if (fribidi_debug)
- {
- print_bidi_string(str);
- print_resolved_levels(type_rl_list);
- print_resolved_types(type_rl_list);
- }
+/* Reinsert the explicit codes & bn's that already removed, from the
+ explicits_list to type_rl_list. */
+ DBG("Reinserting explicit codes.\n");
+ {
+ TypeLink *p;
+
+ override_list(type_rl_list, explicits_list);
+ p = type_rl_list->next;
+ if (p->level < 0)
+ p->level = base_level;
+ for (; p->next; p = p->next)
+ if (p->level < 0)
+ p->level = p->prev->level;
+ }
+
+#ifdef DEBUG
+ if (fribidi_debug) {
+ print_types_re(type_rl_list);
+ print_resolved_levels(type_rl_list);
+ print_resolved_types(type_rl_list);
+ }
#endif
-
+
*ptype_rl_list = type_rl_list;
*pmax_level = max_level;
*pbase_dir = base_dir;
}
/*======================================================================
+// Frees up the rl_list, must be called after each call to
+ fribidi_analyse_string(), after the list is not needed anymore.
+//----------------------------------------------------------------------*/
+void free_rl_list(TypeLink *type_rl_list) {
+
+ TypeLink *p, *pp;
+ if (!pp)
+ return;
+#ifdef USE_SIMPLE_MALLOC
+ for (pp = type_rl_list; pp; pp = pp->next) {
+ p = pp->next;
+ g_free(pp);
+ };
+#else
+ for (pp = type_rl_list->next; pp->next; pp=pp->next)
+ /*Nothing*/;
+ pp->next = free_type_links;
+ free_type_links = type_rl_list;
+ type_rl_list = NULL;
+#endif
+
+}
+/*======================================================================
// Here starts the exposed front end functions.
//----------------------------------------------------------------------*/
@@ -668,15 +1129,15 @@ fribidi_analyse_string(/* input */
// does reordering and fills in the output strings.
//----------------------------------------------------------------------*/
void fribidi_log2vis(/* input */
- FriBidiChar *str,
- gint len,
- FriBidiCharType *pbase_dir,
- /* output */
- FriBidiChar *visual_str,
- guint16 *position_L_to_V_list,
- guint16 *position_V_to_L_list,
- guint8 *embedding_level_list
- )
+ FriBidiChar *str,
+ gint len,
+ FriBidiCharType *pbase_dir,
+ /* output */
+ FriBidiChar *visual_str,
+ guint16 *position_L_to_V_list,
+ guint16 *position_V_to_L_list,
+ guint8 *embedding_level_list
+ )
{
TypeLink *type_rl_list, *pp = NULL;
gint max_level;
@@ -695,113 +1156,153 @@ void fribidi_log2vis(/* input */
if (len > FRIBIDI_MAX_STRING_LENGTH)
{
-#ifndef NO_STDIO
- fprintf(stderr, "Fribidi can't handle strings > 65000 chars!\n");
+#ifdef DEBUG
+ fprintf(stderr, "Fribidi is set to not handle strings > %d chars!\n",
+ FRIBIDI_MAX_STRING_LENGTH);
#endif
return;
}
fribidi_analyse_string(str, len, pbase_dir,
- /* output */
- &type_rl_list,
- &max_level);
+ /* output */
+ &type_rl_list,
+ &max_level);
/* 7. Reordering resolved levels */
- DBG("Reordering.\n");
+ DBG("Reordering resolved levels.\n");
{
gint level_idx;
- gint i;
+ gint i, j, k, state, pos;
+ TypeLink *p, *q, *list, *list_end;
+
+ /* L1. Reset the embedding levels of some chars.
+ */
+ init_list(&list, &list_end);
+ q = list_end;
+ state = 1;
+ pos = len - 1;
+ for (j=len-1; j>=0; j--) {
+ k = fribidi_get_type(str[j]);
+ if (!state && (k == FRIBIDI_TYPE_BS || k == FRIBIDI_TYPE_SS)) {
+ state = 1;
+ pos = j;
+ } else
+ if (state && (!j || !(
+ k == FRIBIDI_TYPE_WS ||
+ k == FRIBIDI_TYPE_BS ||
+ k == FRIBIDI_TYPE_SS ||
+ k == FRIBIDI_TYPE_BN ||
+ k == FRIBIDI_TYPE_LRO ||
+ k == FRIBIDI_TYPE_LRE ||
+ k == FRIBIDI_TYPE_RLO ||
+ k == FRIBIDI_TYPE_RLE ||
+ k == FRIBIDI_TYPE_PDF))) {
+ /* if state is on at the very first of string, do this too. */
+ if (!j)
+ j--;
+ state = 0;
+ p = new_type_link();
+ p->prev = p->next = 0;
+ p->pos = j+1;
+ p->len = pos - j;
+ p->type = *pbase_dir;
+ p->level = p->type == FRIBIDI_TYPE_R ? 1 : 0;
+ move_element_before(p, q);
+ q = p;
+ }
+ }
+ override_list(type_rl_list, list);
+
+#ifdef DEBUG
+ if (fribidi_debug) {
+ DBG("Reset the embedding levels.\n");
+ print_types_re(type_rl_list);
+ print_resolved_levels(type_rl_list);
+ print_resolved_types(type_rl_list);
+ }
+#endif
+
+ /* TBD: L3 */
/* Set up the ordering array to sorted order and copy the logical
string to the visual */
if (position_L_to_V_list)
for (i=0; i<len+1; i++)
- position_L_to_V_list[i]=i;
+ position_L_to_V_list[i]=i;
if (visual_str)
for (i=0; i<len+1; i++)
- visual_str[i] = str[i];
+ visual_str[i] = str[i];
/* Assign the embedding level array */
if (embedding_level_list)
for (pp = type_rl_list->next; pp->next; pp = pp->next)
- {
- gint i;
- gint pos = RL_POS(pp);
- gint len = RL_LEN(pp);
- gint level = RL_LEVEL(pp);
- for (i=0; i<len; i++)
- embedding_level_list[pos + i] = level;
+ {
+ gint i;
+ gint pos = RL_POS(pp);
+ gint len = RL_LEN(pp);
+ gint level = RL_LEVEL(pp);
+ for (i=0; i<len; i++)
+ embedding_level_list[pos + i] = level;
}
-
+
/* Reorder both the outstring and the order array*/
if (visual_str || position_V_to_L_list)
{
- if (visual_str)
- /* Mirror all characters that are in odd levels and have mirrors */
- for (pp = type_rl_list->next; pp->next; pp = pp->next)
- {
- if (RL_LEVEL(pp) % 2 == 1)
- {
- gint i;
- for (i=RL_POS(pp); i<RL_POS(pp)+RL_LEN(pp); i++)
- {
- FriBidiChar mirrored_ch;
- if (fribidi_get_mirror_char(visual_str[i], &mirrored_ch))
- visual_str[i] = mirrored_ch;
- }
- }
- }
-
- /* Reorder */
- for (level_idx = max_level; level_idx>0; level_idx--)
- {
- for (pp = type_rl_list->next; pp->next; pp = pp->next)
- {
- if (RL_LEVEL(pp) >= level_idx)
- {
- /* Find all stretches that are >= level_idx */
- gint len = RL_LEN(pp);
- gint pos = RL_POS(pp);
+ if (visual_str)
+ /* L4. Mirror all characters that are in odd levels and have mirrors.
+ */
+ for (pp = type_rl_list->next; pp->next; pp = pp->next)
+ {
+ if (RL_LEVEL(pp) & 1)
+ {
+ gint i;
+ for (i=RL_POS(pp); i<RL_POS(pp)+RL_LEN(pp); i++)
+ {
+ FriBidiChar mirrored_ch;
+ if (fribidi_get_mirror_char(visual_str[i], &mirrored_ch))
+ visual_str[i] = mirrored_ch;
+ }
+ }
+ }
+
+ /* L2. Reorder.
+ */
+ for (level_idx = max_level; level_idx>0; level_idx--)
+ {
+ for (pp = type_rl_list->next; pp->next; pp = pp->next)
+ {
+ if (RL_LEVEL(pp) >= level_idx)
+ {
+ /* Find all stretches that are >= level_idx */
+ gint len = RL_LEN(pp);
+ gint pos = RL_POS(pp);
TypeLink *pp1 = pp->next;
- while(pp1->next && RL_LEVEL(pp1) >= level_idx)
- {
- len+= RL_LEN(pp1);
- pp1 = pp1->next;
- }
-
- pp = pp1->prev;
- if (visual_str)
- bidi_string_reverse(visual_str+pos, len);
- if (position_V_to_L_list)
- int16_array_reverse(position_V_to_L_list+pos, len);
-
- }
- }
- }
+ while(pp1->next && RL_LEVEL(pp1) >= level_idx)
+ {
+ len+= RL_LEN(pp1);
+ pp1 = pp1->next;
+ }
+
+ pp = pp1->prev;
+ if (visual_str)
+ bidi_string_reverse(visual_str+pos, len);
+ if (position_V_to_L_list)
+ int16_array_reverse(position_V_to_L_list+pos, len);
+
+ }
+ }
+ }
}
/* Convert the l2v list to v2l */
if (position_V_to_L_list && position_L_to_V_list)
for (i=0; i<len; i++)
- position_V_to_L_list[position_L_to_V_list[i]] = i;
+ position_V_to_L_list[position_L_to_V_list[i]] = i;
}
- /* Free up the rl_list */
-
- /* At this point, pp points to the last link or (rarely) might be NULL
- */
- if (!pp)
- for (pp = type_rl_list->next; pp->next; pp = pp->next)
- /* Nothing */;
-
- pp->next = free_type_links;
- free_type_links = type_rl_list;
-
- /* Free up V_to_L if we allocated it */
- if (private_V_to_L)
- g_free(position_V_to_L_list);
+ free_rl_list(type_rl_list);
}
@@ -811,12 +1312,12 @@ void fribidi_log2vis(/* input */
//----------------------------------------------------------------------*/
void fribidi_log2vis_get_embedding_levels(
/* input */
- FriBidiChar *str,
- gint len,
- FriBidiCharType *pbase_dir,
- /* output */
- guint8 *embedding_level_list
- )
+ FriBidiChar *str,
+ gint len,
+ FriBidiCharType *pbase_dir,
+ /* output */
+ guint8 *embedding_level_list
+ )
{
TypeLink *type_rl_list, *pp;
gint max_level;
@@ -825,9 +1326,9 @@ void fribidi_log2vis_get_embedding_levels(
return;
fribidi_analyse_string(str, len, pbase_dir,
- /* output */
- &type_rl_list,
- &max_level);
+ /* output */
+ &type_rl_list,
+ &max_level);
for (pp = type_rl_list->next; pp->next; pp = pp->next)
{
@@ -836,11 +1337,8 @@ void fribidi_log2vis_get_embedding_levels(
gint len = RL_LEN(pp);
gint level = RL_LEVEL(pp);
for (i=0; i<len; i++)
- embedding_level_list[pos + i] = level;
+ embedding_level_list[pos + i] = level;
}
- /* Free up the rl_list */
- pp->next = free_type_links;
- free_type_links = type_rl_list;
+ free_rl_list(type_rl_list);
}
-
diff --git a/fribidi.h b/fribidi.h
index 91f9f70..5f5e984 100644
--- a/fribidi.h
+++ b/fribidi.h
@@ -29,7 +29,7 @@ fribidi_get_mirror_char(/* Input */
FriBidiChar ch,
/* Output */
FriBidiChar *mirrored_ch);
-void
+int
fribidi_set_debug(gboolean debug);
void fribidi_log2vis(/* input */
diff --git a/fribidi_get_type.c b/fribidi_get_type.c
index edac1ef..d00353f 100644
--- a/fribidi_get_type.c
+++ b/fribidi_get_type.c
@@ -26,11 +26,24 @@
//----------------------------------------------------------------------*/
FriBidiCharType fribidi_get_type(FriBidiChar uch)
{
- guchar *block = FriBidiPropertyBlocks[uch / 256];
+ int i = uch % 256, j = uch / 256;
+ guchar *block = FriBidiPropertyBlocks[j];
if (block)
- return block[uch % 256];
- else
- return 0;
+ return block[i];
+ else {
+ switch (j) {
+ case 0x05: if (i >= 0x90) return FRIBIDI_TYPE_R; else break;
+
+ case 0xFB: if (i >= 0x50) return FRIBIDI_TYPE_AL; else
+ if (i >= 0x1D) return FRIBIDI_TYPE_R; else break;
+ case 0x06:
+ case 0xFC:
+ case 0xFD: return FRIBIDI_TYPE_AL;
+ case 0x07: if (i <= 0xBF) return FRIBIDI_TYPE_AL; else break;
+ case 0xFE: if (i >= 0x70) return FRIBIDI_TYPE_AL; else break;
+ }
+ return FRIBIDI_TYPE_L;
+ }
}
gboolean
diff --git a/fribidi_main.c b/fribidi_main.c
index f269df5..bf611bf 100644
--- a/fribidi_main.c
+++ b/fribidi_main.c
@@ -1,5 +1,6 @@
/* FriBidi - Library of BiDi algorithm
* Copyright (C) 1999 Dov Grobgeld
+ * Copyright (C) 2001 Behdad Esfahbod
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Library General Public
@@ -28,6 +29,7 @@
#include <stdarg.h>
#include <glib.h>
#include "fribidi.h"
+#include "fribidi_tables.i"
#define CASE(s) if(!strcmp(S_, s))
@@ -40,10 +42,6 @@ void die(gchar *fmt, ...)
exit(-1);
}
-#define UNI_ALEF 0x05D0
-#define UNI_TAV 0x05EA
-
-/* defining explicit bidi marks */
#define UNI_LRM 0x200E
#define UNI_RLM 0x200F
#define UNI_LRE 0x202a
@@ -52,6 +50,62 @@ void die(gchar *fmt, ...)
#define UNI_LRO 0x202d
#define UNI_RLO 0x202e
+#define WS FRIBIDI_TYPE_WS
+#define BS FRIBIDI_TYPE_BS
+#define EO FRIBIDI_TYPE_EO
+#define CTL FRIBIDI_TYPE_CTL
+#define LRE FRIBIDI_TYPE_LRE
+#define RLE FRIBIDI_TYPE_RLE
+#define ES FRIBIDI_TYPE_ES
+#define LRO FRIBIDI_TYPE_LRO
+#define RLO FRIBIDI_TYPE_RLO
+#define AL FRIBIDI_TYPE_AL
+#define SS FRIBIDI_TYPE_SS
+#define ET FRIBIDI_TYPE_ET
+#define NSM FRIBIDI_TYPE_NSM
+#define LTR FRIBIDI_TYPE_LTR
+#define ON FRIBIDI_TYPE_ON
+#define AN FRIBIDI_TYPE_AN
+#define BN FRIBIDI_TYPE_BN
+#define RTL FRIBIDI_TYPE_RTL
+#define CS FRIBIDI_TYPE_CS
+#define PDF FRIBIDI_TYPE_PDF
+#define EN FRIBIDI_TYPE_EN
+
+guchar FriBidiPropertyBlockSmallCaps[] = {
+//0 1 2 3 4 5 6 7 8 9 a b c d e f
+ ON ,ON ,ON ,ON ,LTR,RTL,ON ,ON ,ON ,ON ,ON ,ON ,ON ,BS ,RLO,RLE,/*00-0f*/
+ LRO,LRE,PDF,WS ,ON ,ON ,ON ,ON ,ON ,ON ,ON ,ON ,ON ,ON ,ON ,ON ,/*10-1f*/
+ WS ,ON ,ON ,ON ,ET ,ON ,ON ,ON ,ON ,ON ,ON ,ET ,CS ,ON ,ES ,ES ,/*20-2f*/
+ EN ,EN ,EN ,EN ,EN ,EN ,AN ,AN ,AN ,AN ,CS ,ON ,ON ,ON ,ON ,ON ,/*30-3f*/
+ RTL,AL ,AL ,AL ,AL ,AL ,AL ,RTL,RTL,RTL,RTL,RTL,RTL,RTL,RTL,RTL,/*40-4f*/
+ RTL,RTL,RTL,RTL,RTL,RTL,RTL,RTL,RTL,RTL,RTL,ON ,BS ,ON ,ON ,ON ,/*50-5f*/
+ NSM,LTR,LTR,LTR,LTR,LTR,LTR,LTR,LTR,LTR,LTR,LTR,LTR,LTR,LTR,LTR,/*60-6f*/
+ LTR,LTR,LTR,LTR,LTR,LTR,LTR,LTR,LTR,LTR,LTR,ON ,SS ,ON ,WS ,ON ,/*70-7f*/
+};
+
+#undef WS
+#undef BS
+#undef EO
+#undef CTL
+#undef LRE
+#undef RLE
+#undef ES
+#undef LRO
+#undef RLO
+#undef AL
+#undef SS
+#undef ET
+#undef NSM
+#undef LTR
+#undef ON
+#undef AN
+#undef BN
+#undef RTL
+#undef CS
+#undef PDF
+#undef EN
+
void charset_to_unicode(gint char_set,
guchar *s,
/* output */
@@ -64,7 +118,9 @@ void charset_to_unicode(gint char_set,
if (char_set == 0)
{
- /* Convert A-Z into hebrew characters */
+ for (i=0; i< (sizeof(FriBidiPropertyBlockSmallCaps) /
+ sizeof(FriBidiPropertyBlockSmallCaps[0])); i++)
+ FriBidiPropertyBlocks[0][i] = FriBidiPropertyBlockSmallCaps[i];
j = 0;
for (i=0; i<len+1; i++)
{
@@ -76,13 +132,14 @@ void charset_to_unicode(gint char_set,
* _< RLM
* _l LRE
* _r RLE
- * _p PDF
+ * _o PDF
* _L LRO
* _R RLO
* __ underscore itself
*
*/
+
if (ch == '_') {
(*length)--;
switch (ch = s[++i]) {
@@ -90,15 +147,13 @@ void charset_to_unicode(gint char_set,
case '<': us[j++] = UNI_RLM; break;
case 'l': us[j++] = UNI_LRE; break;
case 'r': us[j++] = UNI_RLE; break;
- case 'p': us[j++] = UNI_PDF; break;
+ case 'o': us[j++] = UNI_PDF; break;
case 'L': us[j++] = UNI_LRO; break;
case 'R': us[j++] = UNI_RLO; break;
case '_': us[j++] = '_'; break;
- default: i--; break;
+ default: us[j++] = '_'; i--; break;
}
- } else if (ch >= 'A' && ch <= 'Z')
- us[j++] = ch - 'A' + UNI_ALEF;
- else
+ } else
us[j++] = s[i];
}
}
@@ -120,6 +175,8 @@ void charset_to_unicode(gint char_set,
die("Sorry! Not implemented!\n");
}
+gboolean do_clean = FALSE;
+
void unicode_to_charset(gint char_set,
FriBidiChar *us,
int length,
@@ -128,12 +185,10 @@ void unicode_to_charset(gint char_set,
{
if (char_set == 0)
{
- /* Convert hebrew characters into A-Z */
int i, j = 0;
for (i=0, j=0; i<length; i++)
{
FriBidiChar ch = us[i];
- /*{-BE start*/
switch (ch)
{
case UNI_LRM: s[j++] = '_'; s[j++] = '>'; break;
@@ -145,13 +200,16 @@ void unicode_to_charset(gint char_set,
case UNI_RLO: s[j++] = '_'; s[j++] = 'R'; break;
case '_' : s[j++] = '_'; s[j++] = '_'; break;
default:
- if (ch >= UNI_ALEF && ch <= UNI_TAV)
- s[j++] = ch-UNI_ALEF+'A';
- else if (ch < 256)
- s[j++] = ch;
- else
+ if (ch < 256) {
+ if (ch >= 32 || !do_clean)
+ s[j++] = ch;
+ } else
s[j++] = '¿';
+ if (do_clean)
+ j += 2;
}
+ if (do_clean)
+ j -= 2;
}
s[j] = 0;
}
@@ -189,7 +247,7 @@ int main(int argc, char *argv[])
gchar *S_ = argv[argp++];
CASE("-help")
{
- printf("fribidi - A command line interface to the fribidi library\n"
+ printf("fribidi - A command line interface to the fribidi library.\n"
"\n"
"Syntax:\n"
" fribidi {options} [filename]\n"
@@ -202,7 +260,8 @@ int main(int argc, char *argv[])
" -bol bol Start lines with the string given by bol.\n"
" -rtl Force base direction to RTL.\n"
" -ltr Force base direction to LTR.\n"
- " -debug Print debug information.\n"
+ " -debug Output debug info about the progress of the algorithm.\n"
+ " -clean Remove explicit format codes in output. (CapRTL mode only)\n"
" -charset cs Specify charset. Default is CapRTL. Available options are:\n"
" * 8859-8 (Hebrew)\n"
" * 8859-6 (Arabic)\n"
@@ -222,7 +281,15 @@ int main(int argc, char *argv[])
CASE("-rtl") { input_base_direction = FRIBIDI_TYPE_R; continue; }
CASE("-ltr") { input_base_direction = FRIBIDI_TYPE_L; continue; }
CASE("-fill") { do_fill = TRUE; continue; }
- CASE("-debug") { fribidi_set_debug(1); continue; }
+ CASE("-debug")
+ {
+ if (fribidi_set_debug(TRUE))
+ continue;
+ else {
+ die("Fribidi must be compiled with DEBUG option to enable debugging.\n");
+ }
+ }
+ CASE("-clean") { do_clean = TRUE; continue; }
CASE("-charset")
{
gchar *S_ = argv[argp++];
@@ -237,9 +304,6 @@ int main(int argc, char *argv[])
}
continue;
}
-
-
-
die("Unknown option %s!\n", S_);
}
@@ -256,21 +320,23 @@ int main(int argc, char *argv[])
/* Read and process input one line at a time */
{
- guchar S_[2048];
+ guchar S_[65520];
- while(fgets(S_, sizeof(S_), IN))
+ while(fgets(S_, sizeof(S_) - 1, IN))
{
- int len = strlen(S_);
- FriBidiChar us[2048], out_us[2048];
- guchar outstring[2048];
+ int len;
+ FriBidiChar us[65520], out_us[65520];
+ guchar outstring[65520];
FriBidiCharType base;
- int i;
-
+ int i, j, k;
+
+ S_[sizeof(S_) - 1] = 0;
+ len = strlen(S_);
/* chop */
- if (S_[len-1] == '\n')
- S_[len-1] = '\0';
-
- len--;
+ if (S_[len-1] == '\n') {
+ len--;
+ S_[len] = '\0';
+ }
charset_to_unicode(char_set, S_, us, &len);
@@ -292,11 +358,14 @@ int main(int argc, char *argv[])
if (bol_text)
printf("%s", bol_text);
- if (base == FRIBIDI_TYPE_R && do_pad)
- for (i=0; i<text_width-strlen(outstring); i++) /* debugged ;) */
- printf(" ");
-
- printf("%s", outstring);
+ if (base == FRIBIDI_TYPE_R && do_pad && *outstring) {
+ j = strlen(outstring);
+ k = (j-1) % text_width + 1;
+ for (i=(j-1)/text_width-1; i>=0; i--)
+ printf("%.*s", text_width, outstring + (i * text_width + k));
+ printf("%*.*s", text_width, k, outstring);
+ } else
+ printf("%s", outstring);
if (eol_text)
printf("%s", eol_text);
diff --git a/fribidi_tables.i b/fribidi_tables.i
index 24d4d10..238caf5 100644
--- a/fribidi_tables.i
+++ b/fribidi_tables.i
@@ -1,31 +1,31 @@
/*======================================================================
-// This file was automatically created from PropList-3.0.1.txt
+// This file was automatically created from PropList.txt
// by the perl script CreateGetType.pl.
//----------------------------------------------------------------------*/
#include "fribidi.h"
-#define LTR FRIBIDI_TYPE_LTR
-#define RTL FRIBIDI_TYPE_RTL
-#define EN FRIBIDI_TYPE_EN
-#define ES FRIBIDI_TYPE_ES
-#define ET FRIBIDI_TYPE_ET
-#define AN FRIBIDI_TYPE_AN
-#define CS FRIBIDI_TYPE_CS
+#define WS FRIBIDI_TYPE_WS
#define BS FRIBIDI_TYPE_BS
-#define SS FRIBIDI_TYPE_SS
+#define EO FRIBIDI_TYPE_EO
#define CTL FRIBIDI_TYPE_CTL
#define LRE FRIBIDI_TYPE_LRE
#define RLE FRIBIDI_TYPE_RLE
+#define ES FRIBIDI_TYPE_ES
#define LRO FRIBIDI_TYPE_LRO
#define RLO FRIBIDI_TYPE_RLO
-#define WS FRIBIDI_TYPE_WS
-#define ON FRIBIDI_TYPE_ON
#define AL FRIBIDI_TYPE_AL
+#define SS FRIBIDI_TYPE_SS
+#define ET FRIBIDI_TYPE_ET
#define NSM FRIBIDI_TYPE_NSM
+#define LTR FRIBIDI_TYPE_LTR
+#define ON FRIBIDI_TYPE_ON
+#define AN FRIBIDI_TYPE_AN
#define BN FRIBIDI_TYPE_BN
+#define RTL FRIBIDI_TYPE_RTL
+#define CS FRIBIDI_TYPE_CS
#define PDF FRIBIDI_TYPE_PDF
-#define EO FRIBIDI_TYPE_EO
+#define EN FRIBIDI_TYPE_EN
guchar FriBidiPropertyBlock0000[256] = {
@@ -916,27 +916,27 @@ guchar *FriBidiPropertyBlocks[256] = {
FriBidiPropertyBlockff00,
};
-#undef LTR
-#undef RTL
-#undef EN
-#undef ES
-#undef ET
-#undef AN
-#undef CS
+#undef WS
#undef BS
-#undef SS
+#undef EO
#undef CTL
#undef LRE
#undef RLE
+#undef ES
#undef LRO
#undef RLO
-#undef WS
-#undef ON
#undef AL
+#undef SS
+#undef ET
#undef NSM
+#undef LTR
+#undef ON
+#undef AN
#undef BN
+#undef RTL
+#undef CS
#undef PDF
-#undef EO
+#undef EN
/*======================================================================
@@ -948,7 +948,7 @@ guchar *FriBidiPropertyBlocks[256] = {
// but do not have any mirrored glyph, e.g. the sign for there exist.
// Are these used in Arabic? That is are all the mathematical signs
// that are assigned to be mirrorable actually mirrored in Arabic?
-// If that is the case, I'll change the below code to include also
+// If that is the case, we'll change the below code to include also
// characters that mirror to themself. It will then be the responsibility
// of the display engine to actually mirror these.
//----------------------------------------------------------------------*/
@@ -974,6 +974,19 @@ struct {
{0x207E, 0x207D},
{0x208D, 0x208E},
{0x208E, 0x208D},
+ {0x2208, 0x220B},
+ {0x2209, 0x220C},
+ {0x220A, 0x220D},
+ {0x220B, 0x2208},
+ {0x220C, 0x2209},
+ {0x220D, 0x220A},
+ {0x223C, 0x223D},
+ {0x223D, 0x223C},
+ {0x2243, 0x22CD},
+ {0x2252, 0x2253},
+ {0x2253, 0x2252},
+ {0x2254, 0x2255},
+ {0x2255, 0x2254},
{0x2264, 0x2265},
{0x2265, 0x2264},
{0x2266, 0x2267},
@@ -990,20 +1003,73 @@ struct {
{0x2273, 0x2272},
{0x2274, 0x2275},
{0x2275, 0x2274},
+ {0x2276, 0x2277},
+ {0x2277, 0x2276},
+ {0x2278, 0x2279},
+ {0x2279, 0x2278},
+ {0x227A, 0x227B},
+ {0x227B, 0x227A},
+ {0x227C, 0x227D},
+ {0x227D, 0x227C},
+ {0x227E, 0x227F},
+ {0x227F, 0x227E},
+ {0x2280, 0x2281},
+ {0x2281, 0x2280},
+ {0x2282, 0x2283},
+ {0x2283, 0x2282},
+ {0x2284, 0x2285},
+ {0x2285, 0x2284},
+ {0x2286, 0x2287},
+ {0x2287, 0x2286},
+ {0x2288, 0x2289},
+ {0x2289, 0x2288},
+ {0x228A, 0x228B},
+ {0x228B, 0x228A},
+ {0x228F, 0x2290},
+ {0x2290, 0x228F},
+ {0x2291, 0x2292},
+ {0x2292, 0x2291},
{0x22A2, 0x22A3},
{0x22A3, 0x22A2},
+ {0x22B0, 0x22B1},
+ {0x22B1, 0x22B0},
+ {0x22B2, 0x22B3},
+ {0x22B3, 0x22B2},
+ {0x22B4, 0x22B5},
+ {0x22B5, 0x22B4},
+ {0x22B6, 0x22B7},
+ {0x22B7, 0x22B6},
{0x22C9, 0x22CA},
{0x22CA, 0x22C9},
{0x22CB, 0x22CC},
{0x22CC, 0x22CB},
+ {0x22CD, 0x2243},
+ {0x22D0, 0x22D1},
+ {0x22D1, 0x22D0},
{0x22D6, 0x22D7},
{0x22D7, 0x22D6},
{0x22D8, 0x22D9},
{0x22D9, 0x22D8},
+ {0x22DA, 0x22DB},
+ {0x22DB, 0x22DA},
{0x22DC, 0x22DD},
{0x22DD, 0x22DC},
+ {0x22DE, 0x22DF},
+ {0x22DF, 0x22DE},
+ {0x22E0, 0x22E1},
+ {0x22E1, 0x22E0},
+ {0x22E2, 0x22E3},
+ {0x22E3, 0x22E2},
+ {0x22E4, 0x22E5},
+ {0x22E5, 0x22E4},
{0x22E6, 0x22E7},
{0x22E7, 0x22E6},
+ {0x22E8, 0x22E9},
+ {0x22E9, 0x22E8},
+ {0x22EA, 0x22EB},
+ {0x22EB, 0x22EA},
+ {0x22EC, 0x22ED},
+ {0x22ED, 0x22EC},
{0x22F0, 0x22F1},
{0x22F1, 0x22F0},
{0x2308, 0x2309},
@@ -1032,4 +1098,4 @@ struct {
{0x301B, 0x301A}
};
-gint nFriBidiMirroredChars = 74;
+gint nFriBidiMirroredChars = 140;
diff --git a/fribidi_types.h b/fribidi_types.h
index dfec677..9878e92 100644
--- a/fribidi_types.h
+++ b/fribidi_types.h
@@ -1,5 +1,6 @@
/* FriBidi - Library of BiDi algorithm
* Copyright (C) 1999 Dov Grobgeld
+ * Copyright (C) 2001 Behdad Esfahbod
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Library General Public
@@ -49,16 +50,24 @@ typedef enum
/* The following are only used internally */
FRIBIDI_TYPE_L = FRIBIDI_TYPE_LTR,
FRIBIDI_TYPE_R = FRIBIDI_TYPE_RTL,
- FRIBIDI_TYPE_BN = FRIBIDI_TYPE_ON + 2,
- FRIBIDI_TYPE_CM ,
- FRIBIDI_TYPE_SOT ,
- FRIBIDI_TYPE_EOT ,
- FRIBIDI_TYPE_N ,
- FRIBIDI_TYPE_E ,
+ FRIBIDI_TYPE_BN = FRIBIDI_TYPE_ON + 2,
+ FRIBIDI_TYPE_CM ,
+ FRIBIDI_TYPE_SOT ,
+ FRIBIDI_TYPE_EOT ,
+ FRIBIDI_TYPE_N ,
+ FRIBIDI_TYPE_E ,
FRIBIDI_TYPE_CTL , /* Control units */
- FRIBIDI_TYPE_EO , /* Control units */
+ FRIBIDI_TYPE_EO , /* Control units */
} FriBidiCharType;
+#ifdef DEBUG
+char char_from_type[] = {
+ 'L', 'R', 'l', 'r', '1', 'w', 'w', '9', 'w', 'P', 'S', '_', 'A',
+ '.', '+', '+', '+', '+', '-', 'n', '?',
+ 'b', '?', '>', '<', 'n', 'e', '?', '?'
+};
+#endif
+
/* The following type is used by fribidi_utils */
typedef struct {
int length;
diff --git a/mkinstalldirs b/mkinstalldirs
index 7d0c31d..bfb3799 100755
--- a/mkinstalldirs
+++ b/mkinstalldirs
@@ -4,7 +4,7 @@
# Created: 1993-05-16
# Public domain
-# $Id: mkinstalldirs,v 1.1 2000-12-23 20:54:16 dov-g Exp $
+# $Id: mkinstalldirs,v 1.2 2001-02-19 17:22:11 behdad Exp $
errstatus=0
diff --git a/test_fribidi.c b/test_fribidi.c
index 7fc6e0f..570df24 100644
--- a/test_fribidi.c
+++ b/test_fribidi.c
@@ -68,7 +68,14 @@ int main(int argc, char *argv[])
CASE("-outputonly") { do_output_only++; pad_width = 80; continue; };
CASE("-test_vtol") { do_test_vtol++; continue; };
CASE("-print_embedding") { do_print_embedding++; continue; };
- CASE("-debug") { fribidi_set_debug(TRUE); continue; };
+ CASE("-debug") {
+ if (fribidi_set_debug(TRUE))
+ continue;
+ else {
+ fprintf(stderr, "Fribidi must be compiled with DEBUG option to enable debugging.\n");
+ exit(-1);
+ }
+ }
CASE("-order") { do_use_order++; continue; };
CASE("-capital_rtl") { do_cap_as_rtl++; continue; };
CASE("-nopad") { do_no_pad++; continue; };
diff --git a/unidata/BidiMirroring.txt b/unidata/BidiMirroring.txt
new file mode 100644
index 0000000..8ac5be9
--- /dev/null
+++ b/unidata/BidiMirroring.txt
@@ -0,0 +1,238 @@
+# BidiMirroring-1.txt
+#
+# This file is an informative supplement to the UnicodeData file. It
+# lists characters that have the mirrored property
+# where there is another Unicode character that typically has a glyph
+# that is the mirror image of the original character's glyph.
+# The repertoire covered by the file is Unicode 3.0.1.
+#
+# The file contains a list of lines with mappings from one code point
+# to another one for character-based mirroring.
+# Note that for "real" mirroring, a rendering engine needs to select
+# appropriate alternative glyphs, and that many Unicode characters do not
+# have a mirror-image Unicode character.
+#
+# Each mapping line contains two fields, separated by a semicolon (';').
+# Each of the two fields contains a code point represented as a
+# variable-length hexadecimal value with 4 to 6 digits.
+# A comment indicates where the characters are "BEST FIT" mirroring.
+#
+# Code points with the "mirrored" property but no appropriate mirrors are
+# listed as comments at the end of the file.
+#
+# For information on bidi mirroring, see UTR #21: Bidirectional Algorithm,
+# at http://www.unicode.org/unicode/reports/tr9/
+#
+# Please address any comments to <errata@unicode.org>.
+# Note that this is an archival address: messages will be checked,
+# but do not expect an immediate response.
+#
+# This file was originally created by Markus Scherer
+#
+# ############################################################
+
+0028; 0029 # LEFT PARENTHESIS
+0029; 0028 # RIGHT PARENTHESIS
+003C; 003E # LESS-THAN SIGN
+003E; 003C # GREATER-THAN SIGN
+005B; 005D # LEFT SQUARE BRACKET
+005D; 005B # RIGHT SQUARE BRACKET
+007B; 007D # LEFT CURLY BRACKET
+007D; 007B # RIGHT CURLY BRACKET
+00AB; 00BB # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
+00BB; 00AB # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
+2039; 203A # SINGLE LEFT-POINTING ANGLE QUOTATION MARK
+203A; 2039 # SINGLE RIGHT-POINTING ANGLE QUOTATION MARK
+2045; 2046 # LEFT SQUARE BRACKET WITH QUILL
+2046; 2045 # RIGHT SQUARE BRACKET WITH QUILL
+207D; 207E # SUPERSCRIPT LEFT PARENTHESIS
+207E; 207D # SUPERSCRIPT RIGHT PARENTHESIS
+208D; 208E # SUBSCRIPT LEFT PARENTHESIS
+208E; 208D # SUBSCRIPT RIGHT PARENTHESIS
+2208; 220B # ELEMENT OF
+2209; 220C # NOT AN ELEMENT OF
+220A; 220D # SMALL ELEMENT OF
+220B; 2208 # CONTAINS AS MEMBER
+220C; 2209 # DOES NOT CONTAIN AS MEMBER
+220D; 220A # SMALL CONTAINS AS MEMBER
+223C; 223D # TILDE OPERATOR
+223D; 223C # REVERSED TILDE
+2243; 22CD # ASYMPTOTICALLY EQUAL TO
+2252; 2253 # APPROXIMATELY EQUAL TO OR THE IMAGE OF
+2253; 2252 # IMAGE OF OR APPROXIMATELY EQUAL TO
+2254; 2255 # COLON EQUALS
+2255; 2254 # EQUALS COLON
+2264; 2265 # LESS-THAN OR EQUAL TO
+2265; 2264 # GREATER-THAN OR EQUAL TO
+2266; 2267 # LESS-THAN OVER EQUAL TO
+2267; 2266 # GREATER-THAN OVER EQUAL TO
+2268; 2269 # [BEST FIT] LESS-THAN BUT NOT EQUAL TO
+2269; 2268 # [BEST FIT] GREATER-THAN BUT NOT EQUAL TO
+226A; 226B # MUCH LESS-THAN
+226B; 226A # MUCH GREATER-THAN
+226E; 226F # [BEST FIT] NOT LESS-THAN
+226F; 226E # [BEST FIT] NOT GREATER-THAN
+2270; 2271 # [BEST FIT] NEITHER LESS-THAN NOR EQUAL TO
+2271; 2270 # [BEST FIT] NEITHER GREATER-THAN NOR EQUAL TO
+2272; 2273 # [BEST FIT] LESS-THAN OR EQUIVALENT TO
+2273; 2272 # [BEST FIT] GREATER-THAN OR EQUIVALENT TO
+2274; 2275 # [BEST FIT] NEITHER LESS-THAN NOR EQUIVALENT TO
+2275; 2274 # [BEST FIT] NEITHER GREATER-THAN NOR EQUIVALENT TO
+2276; 2277 # LESS-THAN OR GREATER-THAN
+2277; 2276 # GREATER-THAN OR LESS-THAN
+2278; 2279 # NEITHER LESS-THAN NOR GREATER-THAN
+2279; 2278 # NEITHER GREATER-THAN NOR LESS-THAN
+227A; 227B # PRECEDES
+227B; 227A # SUCCEEDS
+227C; 227D # PRECEDES OR EQUAL TO
+227D; 227C # SUCCEEDS OR EQUAL TO
+227E; 227F # [BEST FIT] PRECEDES OR EQUIVALENT TO
+227F; 227E # [BEST FIT] SUCCEEDS OR EQUIVALENT TO
+2280; 2281 # [BEST FIT] DOES NOT PRECEDE
+2281; 2280 # [BEST FIT] DOES NOT SUCCEED
+2282; 2283 # SUBSET OF
+2283; 2282 # SUPERSET OF
+2284; 2285 # [BEST FIT] NOT A SUBSET OF
+2285; 2284 # [BEST FIT] NOT A SUPERSET OF
+2286; 2287 # SUBSET OF OR EQUAL TO
+2287; 2286 # SUPERSET OF OR EQUAL TO
+2288; 2289 # [BEST FIT] NEITHER A SUBSET OF NOR EQUAL TO
+2289; 2288 # [BEST FIT] NEITHER A SUPERSET OF NOR EQUAL TO
+228A; 228B # [BEST FIT] SUBSET OF WITH NOT EQUAL TO
+228B; 228A # [BEST FIT] SUPERSET OF WITH NOT EQUAL TO
+228F; 2290 # SQUARE IMAGE OF
+2290; 228F # SQUARE ORIGINAL OF
+2291; 2292 # SQUARE IMAGE OF OR EQUAL TO
+2292; 2291 # SQUARE ORIGINAL OF OR EQUAL TO
+22A2; 22A3 # RIGHT TACK
+22A3; 22A2 # LEFT TACK
+22B0; 22B1 # PRECEDES UNDER RELATION
+22B1; 22B0 # SUCCEEDS UNDER RELATION
+22B2; 22B3 # NORMAL SUBGROUP OF
+22B3; 22B2 # CONTAINS AS NORMAL SUBGROUP
+22B4; 22B5 # NORMAL SUBGROUP OF OR EQUAL TO
+22B5; 22B4 # CONTAINS AS NORMAL SUBGROUP OR EQUAL TO
+22B6; 22B7 # ORIGINAL OF
+22B7; 22B6 # IMAGE OF
+22C9; 22CA # LEFT NORMAL FACTOR SEMIDIRECT PRODUCT
+22CA; 22C9 # RIGHT NORMAL FACTOR SEMIDIRECT PRODUCT
+22CB; 22CC # LEFT SEMIDIRECT PRODUCT
+22CC; 22CB # RIGHT SEMIDIRECT PRODUCT
+22CD; 2243 # REVERSED TILDE EQUALS
+22D0; 22D1 # DOUBLE SUBSET
+22D1; 22D0 # DOUBLE SUPERSET
+22D6; 22D7 # LESS-THAN WITH DOT
+22D7; 22D6 # GREATER-THAN WITH DOT
+22D8; 22D9 # VERY MUCH LESS-THAN
+22D9; 22D8 # VERY MUCH GREATER-THAN
+22DA; 22DB # LESS-THAN EQUAL TO OR GREATER-THAN
+22DB; 22DA # GREATER-THAN EQUAL TO OR LESS-THAN
+22DC; 22DD # EQUAL TO OR LESS-THAN
+22DD; 22DC # EQUAL TO OR GREATER-THAN
+22DE; 22DF # EQUAL TO OR PRECEDES
+22DF; 22DE # EQUAL TO OR SUCCEEDS
+22E0; 22E1 # [BEST FIT] DOES NOT PRECEDE OR EQUAL
+22E1; 22E0 # [BEST FIT] DOES NOT SUCCEED OR EQUAL
+22E2; 22E3 # [BEST FIT] NOT SQUARE IMAGE OF OR EQUAL TO
+22E3; 22E2 # [BEST FIT] NOT SQUARE ORIGINAL OF OR EQUAL TO
+22E4; 22E5 # [BEST FIT] SQUARE IMAGE OF OR NOT EQUAL TO
+22E5; 22E4 # [BEST FIT] SQUARE ORIGINAL OF OR NOT EQUAL TO
+22E6; 22E7 # [BEST FIT] LESS-THAN BUT NOT EQUIVALENT TO
+22E7; 22E6 # [BEST FIT] GREATER-THAN BUT NOT EQUIVALENT TO
+22E8; 22E9 # [BEST FIT] PRECEDES BUT NOT EQUIVALENT TO
+22E9; 22E8 # [BEST FIT] SUCCEEDS BUT NOT EQUIVALENT TO
+22EA; 22EB # [BEST FIT] NOT NORMAL SUBGROUP OF
+22EB; 22EA # [BEST FIT] DOES NOT CONTAIN AS NORMAL SUBGROUP
+22EC; 22ED # [BEST FIT] NOT NORMAL SUBGROUP OF OR EQUAL TO
+22ED; 22EC # [BEST FIT] DOES NOT CONTAIN AS NORMAL SUBGROUP OR EQUAL
+22F0; 22F1 # UP RIGHT DIAGONAL ELLIPSIS
+22F1; 22F0 # DOWN RIGHT DIAGONAL ELLIPSIS
+2308; 2309 # LEFT CEILING
+2309; 2308 # RIGHT CEILING
+230A; 230B # LEFT FLOOR
+230B; 230A # RIGHT FLOOR
+2329; 232A # LEFT-POINTING ANGLE BRACKET
+232A; 2329 # RIGHT-POINTING ANGLE BRACKET
+3008; 3009 # LEFT ANGLE BRACKET
+3009; 3008 # RIGHT ANGLE BRACKET
+300A; 300B # LEFT DOUBLE ANGLE BRACKET
+300B; 300A # RIGHT DOUBLE ANGLE BRACKET
+300C; 300D # [BEST FIT] LEFT CORNER BRACKET
+300D; 300C # [BEST FIT] RIGHT CORNER BRACKET
+300E; 300F # [BEST FIT] LEFT WHITE CORNER BRACKET
+300F; 300E # [BEST FIT] RIGHT WHITE CORNER BRACKET
+3010; 3011 # LEFT BLACK LENTICULAR BRACKET
+3011; 3010 # RIGHT BLACK LENTICULAR BRACKET
+3014; 3015 # [BEST FIT] LEFT TORTOISE SHELL BRACKET
+3015; 3014 # [BEST FIT] RIGHT TORTOISE SHELL BRACKET
+3016; 3017 # LEFT WHITE LENTICULAR BRACKET
+3017; 3016 # RIGHT WHITE LENTICULAR BRACKET
+3018; 3019 # LEFT WHITE TORTOISE SHELL BRACKET
+3019; 3018 # RIGHT WHITE TORTOISE SHELL BRACKET
+301A; 301B # LEFT WHITE SQUARE BRACKET
+301B; 301A # RIGHT WHITE SQUARE BRACKET
+
+# The following characters have no appropriate mirroring character
+
+# 2201; COMPLEMENT
+# 2202; PARTIAL DIFFERENTIAL
+# 2203; THERE EXISTS
+# 2204; THERE DOES NOT EXIST
+# 2211; N-ARY SUMMATION
+# 2215; DIVISION SLASH
+# 2216; SET MINUS
+# 221A; SQUARE ROOT
+# 221B; CUBE ROOT
+# 221C; FOURTH ROOT
+# 221D; PROPORTIONAL TO
+# 221F; RIGHT ANGLE
+# 2220; ANGLE
+# 2221; MEASURED ANGLE
+# 2222; SPHERICAL ANGLE
+# 2224; DOES NOT DIVIDE
+# 2226; NOT PARALLEL TO
+# 222B; INTEGRAL
+# 222C; DOUBLE INTEGRAL
+# 222D; TRIPLE INTEGRAL
+# 222E; CONTOUR INTEGRAL
+# 222F; SURFACE INTEGRAL
+# 2230; VOLUME INTEGRAL
+# 2231; CLOCKWISE INTEGRAL
+# 2232; CLOCKWISE CONTOUR INTEGRAL
+# 2233; ANTICLOCKWISE CONTOUR INTEGRAL
+# 2239; EXCESS
+# 223B; HOMOTHETIC
+# 223E; INVERTED LAZY S
+# 223F; SINE WAVE
+# 2240; WREATH PRODUCT
+# 2241; NOT TILDE
+# 2242; MINUS TILDE
+# 2244; NOT ASYMPTOTICALLY EQUAL TO
+# 2245; APPROXIMATELY EQUAL TO
+# 2246; APPROXIMATELY BUT NOT ACTUALLY EQUAL TO
+# 2247; NEITHER APPROXIMATELY NOR ACTUALLY EQUAL TO
+# 2248; ALMOST EQUAL TO
+# 2249; NOT ALMOST EQUAL TO
+# 224A; ALMOST EQUAL OR EQUAL TO
+# 224B; TRIPLE TILDE
+# 224C; ALL EQUAL TO
+# 225F; QUESTIONED EQUAL TO
+# 2260; NOT EQUAL TO
+# 2262; NOT IDENTICAL TO
+# 228C; MULTISET
+# 2298; CIRCLED DIVISION SLASH
+# 22A6; ASSERTION
+# 22A7; MODELS
+# 22A8; TRUE
+# 22A9; FORCES
+# 22AA; TRIPLE VERTICAL BAR RIGHT TURNSTILE
+# 22AB; DOUBLE VERTICAL BAR DOUBLE RIGHT TURNSTILE
+# 22AC; DOES NOT PROVE
+# 22AD; NOT TRUE
+# 22AE; DOES NOT FORCE
+# 22AF; NEGATED DOUBLE VERTICAL BAR DOUBLE RIGHT TURNSTILE
+# 22B8; MULTIMAP
+# 22BE; RIGHT ANGLE WITH ARC
+# 22BF; RIGHT TRIANGLE
+# 2320; TOP HALF INTEGRAL
+# 2321; BOTTOM HALF INTEGRAL
diff --git a/CreateGetType.pl b/unidata/CreateGetType.pl
index bb2337a..dd40c34 100755
--- a/CreateGetType.pl
+++ b/unidata/CreateGetType.pl
@@ -2,6 +2,7 @@
# FriBidi - Library of BiDi algorithm
# Copyright (C) 1999 Dov Grobgeld
+# Copyright (C) 2001 Roozbeh Pournader
#
# This library is free software; you can redistribute it and/or
# modify it under the terms of the GNU Library General Public
@@ -23,14 +24,19 @@
# This is a Perl program for automatically building the cfunction
# fribidi_get_type() which returns the Bidi type of a unicode
# character. To build this function the script parses the
-# PropList-*.txt and the UnicodeData-*.txt files.
+# PropList.txt, and BidiMirroring.txt files.
+#
+# The latest version of these files are always available at:
+# http://www.unicode.org/Public/UNIDATA/
######################################################################
use strict;
-my $unicode_version = "3.0.1";
-my $unicode_data_file = "UnicodeData-$unicode_version.txt";
-my $unicode_proplist_file = "PropList-$unicode_version.txt";
+#my $unicode_data_file = "UnicodeData.txt";
+my $unicode_proplist_file = "PropList.txt";
+my $unicode_mirroring_file = "BidiMirroring.txt";
+my $proplist_version;
+my $mirroring_version;
my @bidi_entities;
my @mirrors;
@@ -50,8 +56,8 @@ my %type_names = ("0x10000090" => ["CTL", "Control units"],
"0x0B000000" => ["AL", "Arabic right to left letter"],
"0x0C000000" => ["NSM", "Non-spacing mark"],
"0x0D000000" => ["BN", "Boundary Neutral"],
- "0x0E000000" => ["PDF", "Pop directional formatting"],
- "0x0F000000" => ["EO", "Embedding or override"],
+ "0x0E000000" => ["PDF", "Pop directional formatting"],
+ "0x0F000000" => ["EO", "Embedding or override"],
"0x80000009" => ["ON", "Other Neutral"],
"0x10000091" => ["LRE", "RLE"],
"0x10000092" => ["RLE", "RLE"],
@@ -62,8 +68,21 @@ my %type_names = ("0x10000090" => ["CTL", "Control units"],
open(PROP, $unicode_proplist_file)
or die "Failed opening $unicode_proplist_file!\n";
-open(DATA, $unicode_data_file)
- or die "Failed opening $unicode_data_file!\n";
+#open(DATA, $unicode_data_file)
+# or die "Failed opening $unicode_data_file!\n";
+
+open(MIRR, $unicode_mirroring_file)
+ or die "Failed opening $unicode_mirroring_file!\n";
+
+$_ = <PROP>;
+if (/^Property dump: UnicodeData-(.*).txt/) {
+ $proplist_version = $1;
+}
+
+$_ = <MIRR>;
+if (/^# BidiMirroring-(.*).txt/) {
+ $mirroring_version = $1;
+}
#parse_unicode_data_for_bidi_entries();
parse_prop_for_bidi_entities();
@@ -153,42 +172,11 @@ sub find_bidi_controls {
}
sub parse_for_mirror_chars {
- my @mirrored_chars;
- my %mirror_name_to_num;
- seek(DATA,0,0);
- while(<DATA>) {
- my @props = split(";");
- next unless $props[9] eq "Y";
-
- # Just save them for now
- push(@mirrored_chars, [@props]);
- $mirror_name_to_num{$props[1]} = $props[0];
- }
-
- # Now find mirrored characters for a small subset of all those
- # characters defined as mirrorable.
- foreach my $m (@mirrored_chars) {
- my (@props) = @$m;
- my $num = $props[0];
- my $name = $props[1];
- my ($mirror_name, $mirror_num);
- my %opposite = ("UP RIGHT"=>"DOWN RIGHT",
- "DOWN RIGHT"=>"UP RIGHT",
- "LEFT"=>"RIGHT",
- "RIGHT"=>"LEFT",
- "LESS-THAN"=>"GREATER-THAN",
- "GREATER-THAN"=>"LESS-THAN");
- my $pattern = join("|", keys %opposite);
- ($mirror_name = $name)
- =~ s/\b($pattern)\b/$opposite{$&}/e;
- if ($mirror_name_to_num{$mirror_name}) {
- $mirror_num = $mirror_name_to_num{$mirror_name};
- } else {
- $mirror_num = $num;
+ seek(MIRR,0,0);
+ while(<MIRR>) {
+ if (/^([0-9A-F]*); ([0-9A-F]*)/) {
+ push(@mirrors, [$1, $2]);
}
-
- # Should I include characters that map to themself???
- push(@mirrors, [$num, $mirror_num]) if $num ne $mirror_num;
}
}
@@ -260,9 +248,9 @@ sub create_c_file {
my $num_used_blocks = 0;
my $c_file =<<__;
-/*======================================================================
-// This file was automatically created from $unicode_proplist_file
-// by the perl script CreateGetType.pl.
+/*========================================================================
+// This file was automatically created from $unicode_proplist_file, version $proplist_version,
+// and $unicode_mirroring_file, version $mirroring_version, by the perl script CreateGetType.pl.
//----------------------------------------------------------------------*/
#include "fribidi.h"
@@ -361,7 +349,7 @@ __
// but do not have any mirrored glyph, e.g. the sign for there exist.
// Are these used in Arabic? That is are all the mathematical signs
// that are assigned to be mirrorable actually mirrored in Arabic?
-// If that is the case, I'll change the below code to include also
+// If that is the case, we'll change the below code to include also
// characters that mirror to themself. It will then be the responsibility
// of the display engine to actually mirror these.
//----------------------------------------------------------------------*/
diff --git a/unidata/PropList.txt b/unidata/PropList.txt
new file mode 100644
index 0000000..e47f127
--- /dev/null
+++ b/unidata/PropList.txt
@@ -0,0 +1,3592 @@
+Property dump: UnicodeData-3.0.1.txt
+
+*******************************************
+
+Property dump for: 0x10000001 (Zero-width)
+
+070F
+180B..180E (4 chars)
+200B..200F (5 chars)
+202A..202E (5 chars)
+206A..206F (6 chars)
+FEFF
+FFF9..FFFB (3 chars)
+
+*******************************************
+
+Property dump for: 0x10000002 (Space)
+
+0020
+00A0
+1680
+2000..200B (12 chars)
+202F
+3000
+
+*******************************************
+
+Property dump for: 0x10000004 (White space)
+
+0009..000D (5 chars)
+0020
+0085
+00A0
+1680
+2000..200B (12 chars)
+2028..2029 (2 chars)
+202F
+3000
+
+*******************************************
+
+Property dump for: 0x00400000 (Non-break)
+
+00A0
+0F0C
+2007
+2011
+202F
+FEFF
+
+*******************************************
+
+Property dump for: 0x10000008 (ISO Control)
+
+0000..001F (32 chars)
+007F..009F (33 chars)
+
+*******************************************
+
+Property dump for: 0x10000090 (Bidi Control)
+
+200E..200F (2 chars)
+202A..202E (5 chars)
+
+*******************************************
+
+Property dump for: 0x100000A0 (Join Control)
+
+200C..200D (2 chars)
+
+*******************************************
+
+Property dump for: 0x100000C0 (Format Control)
+
+070F
+180B..180E (4 chars)
+206A..206F (6 chars)
+FFF9..FFFB (3 chars)
+
+*******************************************
+
+Property dump for: 0x20000400 (Dash)
+
+002D
+00AD
+058A
+1806
+2010..2015 (6 chars)
+207B
+208B
+2212
+301C
+3030
+FE31..FE32 (2 chars)
+FE58
+FE63
+FF0D
+
+*******************************************
+
+Property dump for: 0x20000800 (Hyphen)
+
+002D
+00AD
+058A
+1806
+2010..2011 (2 chars)
+30FB
+FE63
+FF0D
+FF65
+
+*******************************************
+
+Property dump for: 0x20001000 (Punctuation)
+
+0021..0023 (3 chars)
+0025..002A (6 chars)
+002C..002F (4 chars)
+003A..003B (2 chars)
+003F..0040 (2 chars)
+005B..005D (3 chars)
+005F
+007B
+007D
+00A1
+00AB
+00AD
+00B7
+00BB
+00BF
+037E
+0387
+055A..055F (6 chars)
+0589..058A (2 chars)
+05BE
+05C0
+05C3
+05F3..05F4 (2 chars)
+060C
+061B
+061F
+066A..066D (4 chars)
+06D4
+0700..070D (14 chars)
+0964..0965 (2 chars)
+0970
+0DF4
+0E4F
+0E5A..0E5B (2 chars)
+0F04..0F12 (15 chars)
+0F3A..0F3D (4 chars)
+0F85
+104A..104F (6 chars)
+10FB
+1361..1368 (8 chars)
+166D..166E (2 chars)
+169B..169C (2 chars)
+16EB..16ED (3 chars)
+17D4..17DA (7 chars)
+17DC
+1800..180A (11 chars)
+2010..2027 (24 chars)
+2030..2043 (20 chars)
+2045..2046 (2 chars)
+2048..204D (6 chars)
+207D..207E (2 chars)
+208D..208E (2 chars)
+2329..232A (2 chars)
+3001..3003 (3 chars)
+3008..3011 (10 chars)
+3014..301F (12 chars)
+3030
+30FB
+FD3E..FD3F (2 chars)
+FE30..FE44 (21 chars)
+FE49..FE52 (10 chars)
+FE54..FE61 (14 chars)
+FE63
+FE68
+FE6A..FE6B (2 chars)
+FF01..FF03 (3 chars)
+FF05..FF0A (6 chars)
+FF0C..FF0F (4 chars)
+FF1A..FF1B (2 chars)
+FF1F..FF20 (2 chars)
+FF3B..FF3D (3 chars)
+FF3F
+FF5B
+FF5D
+FF61..FF65 (5 chars)
+
+*******************************************
+
+Property dump for: 0x80000003 (Line Separator)
+
+2028
+
+*******************************************
+
+Property dump for: 0x80000004 (Paragraph Separator)
+
+2029
+
+*******************************************
+
+Property dump for: 0x20002000 (Quotation Mark)
+
+0022
+0027
+00AB
+00BB
+2018..201F (8 chars)
+2039..203A (2 chars)
+300C..300F (4 chars)
+301D..301F (3 chars)
+FE41..FE44 (4 chars)
+FF02
+FF07
+FF62..FF63 (2 chars)
+
+*******************************************
+
+Property dump for: 0x20004000 (Terminal Punctuation)
+
+0021
+002C
+002E
+003A..003B (2 chars)
+003F
+037E
+0387
+0589
+060C
+061B
+061F
+06D4
+0700..070A (11 chars)
+070C
+0964..0965 (2 chars)
+0E5A..0E5B (2 chars)
+104A..104B (2 chars)
+1361..1368 (8 chars)
+166D..166E (2 chars)
+16EB..16ED (3 chars)
+17D4..17D6 (3 chars)
+17DA
+1802..1805 (4 chars)
+1808..1809 (2 chars)
+203C..203D (2 chars)
+2048..2049 (2 chars)
+3001..3002 (2 chars)
+FE50..FE52 (3 chars)
+FE54..FE57 (4 chars)
+FF01
+FF0C
+FF0E
+FF1A..FF1B (2 chars)
+FF1F
+FF61
+FF64
+
+*******************************************
+
+Property dump for: 0x20008000 (Currency Symbol)
+
+0024
+00A2..00A5 (4 chars)
+09F2..09F3 (2 chars)
+0E3F
+17DB
+20A0..20AF (16 chars)
+FE69
+FF04
+FFE0..FFE1 (2 chars)
+FFE5..FFE6 (2 chars)
+
+*******************************************
+
+Property dump for: 0x20080000 (Math)
+
+0028..002B (4 chars)
+002D
+002F
+003C..003E (3 chars)
+005B..005E (4 chars)
+007B..007E (4 chars)
+00AC
+00B1
+00D7
+00F7
+2016
+2032..2034 (3 chars)
+2044
+207A..207E (5 chars)
+208A..208E (5 chars)
+20D0..20DC (13 chars)
+20E1
+2190..2194 (5 chars)
+219A..219B (2 chars)
+21A0
+21A3
+21A6
+21AE
+21CE..21CF (2 chars)
+21D2
+21D4
+2200..22F1 (242 chars)
+2308..230B (4 chars)
+2320..2321 (2 chars)
+2329..232A (2 chars)
+25B7
+25C1
+266F
+300A..300B (2 chars)
+301A..301B (2 chars)
+FB29
+FE35..FE38 (4 chars)
+FE59..FE5C (4 chars)
+FE61..FE66 (6 chars)
+FE68
+FF08..FF0B (4 chars)
+FF0D
+FF0F
+FF1C..FF1E (3 chars)
+FF3B..FF3E (4 chars)
+FF5B..FF5E (4 chars)
+FFE2
+FFE9..FFEC (4 chars)
+
+*******************************************
+
+Property dump for: 0x20000100 (Paired Punctuation)
+
+0028..0029 (2 chars)
+005B
+005D
+007B
+007D
+00AB
+00BB
+0F3A..0F3D (4 chars)
+169B..169C (2 chars)
+2018..201F (8 chars)
+2039..203A (2 chars)
+2045..2046 (2 chars)
+207D..207E (2 chars)
+208D..208E (2 chars)
+2329..232A (2 chars)
+3008..3011 (10 chars)
+3014..301B (8 chars)
+301D..301F (3 chars)
+FD3E..FD3F (2 chars)
+FE35..FE44 (16 chars)
+FE59..FE5E (6 chars)
+FF08..FF09 (2 chars)
+FF3B
+FF3D
+FF5B
+FF5D
+FF62..FF63 (2 chars)
+
+*******************************************
+
+Property dump for: 0x20000300 (Left of Pair)
+
+0028
+005B
+007B
+00AB
+0F3A
+0F3C
+169B
+2018
+201A..201C (3 chars)
+201E..201F (2 chars)
+2039
+2045
+207D
+208D
+2329
+3008
+300A
+300C
+300E
+3010
+3014
+3016
+3018
+301A
+301D
+FD3E
+FE35
+FE37
+FE39
+FE3B
+FE3D
+FE3F
+FE41
+FE43
+FE59
+FE5B
+FE5D
+FF08
+FF3B
+FF5B
+FF62
+
+*******************************************
+
+Property dump for: 0x20000004 (Combining)
+
+0300..034E (79 chars)
+0360..0362 (3 chars)
+0483..0486 (4 chars)
+0488..0489 (2 chars)
+0591..05A1 (17 chars)
+05A3..05B9 (23 chars)
+05BB..05BD (3 chars)
+05BF
+05C1..05C2 (2 chars)
+05C4
+064B..0655 (11 chars)
+0670
+06D6..06E4 (15 chars)
+06E7..06E8 (2 chars)
+06EA..06ED (4 chars)
+0711
+0730..074A (27 chars)
+07A6..07B0 (11 chars)
+0901..0903 (3 chars)
+093C
+093E..094D (16 chars)
+0951..0954 (4 chars)
+0962..0963 (2 chars)
+0981..0983 (3 chars)
+09BC
+09BE..09C4 (7 chars)
+09C7..09C8 (2 chars)
+09CB..09CD (3 chars)
+09D7
+09E2..09E3 (2 chars)
+0A02
+0A3C
+0A3E..0A42 (5 chars)
+0A47..0A48 (2 chars)
+0A4B..0A4D (3 chars)
+0A70..0A71 (2 chars)
+0A81..0A83 (3 chars)
+0ABC
+0ABE..0AC5 (8 chars)
+0AC7..0AC9 (3 chars)
+0ACB..0ACD (3 chars)
+0B01..0B03 (3 chars)
+0B3C
+0B3E..0B43 (6 chars)
+0B47..0B48 (2 chars)
+0B4B..0B4D (3 chars)
+0B56..0B57 (2 chars)
+0B82..0B83 (2 chars)
+0BBE..0BC2 (5 chars)
+0BC6..0BC8 (3 chars)
+0BCA..0BCD (4 chars)
+0BD7
+0C01..0C03 (3 chars)
+0C3E..0C44 (7 chars)
+0C46..0C48 (3 chars)
+0C4A..0C4D (4 chars)
+0C55..0C56 (2 chars)
+0C82..0C83 (2 chars)
+0CBE..0CC4 (7 chars)
+0CC6..0CC8 (3 chars)
+0CCA..0CCD (4 chars)
+0CD5..0CD6 (2 chars)
+0D02..0D03 (2 chars)
+0D3E..0D43 (6 chars)
+0D46..0D48 (3 chars)
+0D4A..0D4D (4 chars)
+0D57
+0D82..0D83 (2 chars)
+0DCA
+0DCF..0DD4 (6 chars)
+0DD6
+0DD8..0DDF (8 chars)
+0DF2..0DF3 (2 chars)
+0E31
+0E34..0E3A (7 chars)
+0E47..0E4E (8 chars)
+0EB1
+0EB4..0EB9 (6 chars)
+0EBB..0EBC (2 chars)
+0EC8..0ECD (6 chars)
+0F18..0F19 (2 chars)
+0F35
+0F37
+0F39
+0F3E..0F3F (2 chars)
+0F71..0F84 (20 chars)
+0F86..0F87 (2 chars)
+0F90..0F97 (8 chars)
+0F99..0FBC (36 chars)
+0FC6
+102C..1032 (7 chars)
+1036..1039 (4 chars)
+1056..1059 (4 chars)
+17B4..17D3 (32 chars)
+18A9
+20D0..20E3 (20 chars)
+302A..302F (6 chars)
+3099..309A (2 chars)
+FB1E
+FE20..FE23 (4 chars)
+
+*******************************************
+
+Property dump for: 0x20040000 (Non-spacing)
+
+0300..034E (79 chars)
+0360..0362 (3 chars)
+0483..0486 (4 chars)
+0488..0489 (2 chars)
+0591..05A1 (17 chars)
+05A3..05B9 (23 chars)
+05BB..05BD (3 chars)
+05BF
+05C1..05C2 (2 chars)
+05C4
+064B..0655 (11 chars)
+0670
+06D6..06E4 (15 chars)
+06E7..06E8 (2 chars)
+06EA..06ED (4 chars)
+0711
+0730..074A (27 chars)
+07A6..07B0 (11 chars)
+0901..0902 (2 chars)
+093C
+0941..0948 (8 chars)
+094D
+0951..0954 (4 chars)
+0962..0963 (2 chars)
+0981
+09BC
+09C1..09C4 (4 chars)
+09CD
+09E2..09E3 (2 chars)
+0A02
+0A3C
+0A41..0A42 (2 chars)
+0A47..0A48 (2 chars)
+0A4B..0A4D (3 chars)
+0A70..0A71 (2 chars)
+0A81..0A82 (2 chars)
+0ABC
+0AC1..0AC5 (5 chars)
+0AC7..0AC8 (2 chars)
+0ACD
+0B01
+0B3C
+0B3F
+0B41..0B43 (3 chars)
+0B4D
+0B56
+0B82
+0BC0
+0BCD
+0C3E..0C40 (3 chars)
+0C46..0C48 (3 chars)
+0C4A..0C4D (4 chars)
+0C55..0C56 (2 chars)
+0CBF
+0CC6
+0CCC..0CCD (2 chars)
+0D41..0D43 (3 chars)
+0D4D
+0DCA
+0DD2..0DD4 (3 chars)
+0DD6
+0E31
+0E34..0E3A (7 chars)
+0E47..0E4E (8 chars)
+0EB1
+0EB4..0EB9 (6 chars)
+0EBB..0EBC (2 chars)
+0EC8..0ECD (6 chars)
+0F18..0F19 (2 chars)
+0F35
+0F37
+0F39
+0F71..0F7E (14 chars)
+0F80..0F84 (5 chars)
+0F86..0F87 (2 chars)
+0F90..0F97 (8 chars)
+0F99..0FBC (36 chars)
+0FC6
+102D..1030 (4 chars)
+1032
+1036..1037 (2 chars)
+1039
+1058..1059 (2 chars)
+17B7..17BD (7 chars)
+17C6
+17C9..17D3 (11 chars)
+18A9
+20D0..20E3 (20 chars)
+302A..302F (6 chars)
+3099..309A (2 chars)
+FB1E
+FE20..FE23 (4 chars)
+
+*******************************************
+
+Property dump for: 0x20000008 (Composite)
+
+00C0..00C5 (6 chars)
+00C7..00CF (9 chars)
+00D1..00D6 (6 chars)
+00D9..00DD (5 chars)
+00E0..00E5 (6 chars)
+00E7..00EF (9 chars)
+00F1..00F6 (6 chars)
+00F9..00FD (5 chars)
+00FF..0130 (50 chars)
+0132..0137 (6 chars)
+0139..0149 (17 chars)
+014C..0151 (6 chars)
+0154..017E (43 chars)
+01A0..01A1 (2 chars)
+01AF..01B0 (2 chars)
+01C4..01DC (25 chars)
+01DE..01E3 (6 chars)
+01E6..01F5 (16 chars)
+01F8..0209 (18 chars)
+020C..021B (16 chars)
+021E..021F (2 chars)
+0226..0233 (14 chars)
+0344
+0385..0386 (2 chars)
+0388..038A (3 chars)
+038C
+038E..0390 (3 chars)
+03AA..03B0 (7 chars)
+03CA..03CE (5 chars)
+03D3..03D5 (3 chars)
+0400..0401 (2 chars)
+0403
+0407
+040C..040E (3 chars)
+0419
+0439
+0450..0451 (2 chars)
+0453
+0457
+045C..045E (3 chars)
+0476..0477 (2 chars)
+04BE..04BF (2 chars)
+04C1..04C2 (2 chars)
+04D0..04D3 (4 chars)
+04D6..04D7 (2 chars)
+04DA..04E7 (14 chars)
+04EA..04F5 (12 chars)
+04F8..04F9 (2 chars)
+0587
+0622..0626 (5 chars)
+06C0
+06C2
+06D6..06D7 (2 chars)
+0929
+0931
+0934
+0950
+0958..095F (8 chars)
+09CB..09CC (2 chars)
+09DC..09DD (2 chars)
+09DF
+0A33
+0A36
+0A59..0A5B (3 chars)
+0A5E
+0A74
+0AD0
+0B01
+0B48
+0B4B..0B4C (2 chars)
+0B5C..0B5D (2 chars)
+0B94
+0BCA..0BCC (3 chars)
+0C48
+0CC0
+0CC7..0CC8 (2 chars)
+0CCA..0CCB (2 chars)
+0D4A..0D4C (3 chars)
+0DDA
+0DDC..0DDE (3 chars)
+0E33
+0EB3
+0EDC..0EDD (2 chars)
+0F00
+0F43
+0F4D
+0F52
+0F57
+0F5C
+0F69
+0F73
+0F75..0F79 (5 chars)
+0F81
+0F93
+0F9D
+0FA2
+0FA7
+0FAC
+0FB9
+1026
+1E00..1E9B (156 chars)
+1EA0..1EF9 (90 chars)
+1F00..1F15 (22 chars)
+1F18..1F1D (6 chars)
+1F20..1F45 (38 chars)
+1F48..1F4D (6 chars)
+1F50..1F57 (8 chars)
+1F59
+1F5B
+1F5D
+1F5F..1F7D (31 chars)
+1F80..1FB4 (53 chars)
+1FB6..1FBC (7 chars)
+1FC1..1FC4 (4 chars)
+1FC6..1FD3 (14 chars)
+1FD6..1FDB (6 chars)
+1FDD..1FEE (18 chars)
+1FF2..1FF4 (3 chars)
+1FF6..1FFC (7 chars)
+203C
+2048..2049 (2 chars)
+2161..2163 (3 chars)
+2165..2168 (4 chars)
+216A..216B (2 chars)
+2171..2173 (3 chars)
+2175..2178 (4 chars)
+217A..217B (2 chars)
+2204
+2209
+220C
+2226
+222C..222D (2 chars)
+222F..2230 (2 chars)
+2247
+2249
+2260
+2262
+226D..2271 (5 chars)
+2274..2275 (2 chars)
+2278..2279 (2 chars)
+2280..2281 (2 chars)
+2284..2285 (2 chars)
+2288..2289 (2 chars)
+22AC..22AF (4 chars)
+22E0..22E3 (4 chars)
+22EA..22ED (4 chars)
+2460..24B5 (86 chars)
+24EA
+304C
+304E
+3050
+3052
+3054
+3056
+3058
+305A
+305C
+305E
+3060
+3062
+3065
+3067
+3069
+3070..3071 (2 chars)
+3073..3074 (2 chars)
+3076..3077 (2 chars)
+3079..307A (2 chars)
+307C..307D (2 chars)
+3094
+309E
+30AC
+30AE
+30B0
+30B2
+30B4
+30B6
+30B8
+30BA
+30BC
+30BE
+30C0
+30C2
+30C5
+30C7
+30C9
+30D0..30D1 (2 chars)
+30D3..30D4 (2 chars)
+30D6..30D7 (2 chars)
+30D9..30DA (2 chars)
+30DC..30DD (2 chars)
+30F4
+30FE
+3200..321C (29 chars)
+3220..3243 (36 chars)
+32C0..32CB (12 chars)
+3300..3376 (119 chars)
+337B..33DD (99 chars)
+33E0..33FE (31 chars)
+AC00..D7A3 (11172 chars)
+FB00..FB06 (7 chars)
+FB13..FB17 (5 chars)
+FB1D
+FB1F
+FB2A..FB36 (13 chars)
+FB38..FB3C (5 chars)
+FB3E
+FB40..FB41 (2 chars)
+FB43..FB44 (2 chars)
+FB46..FB4F (10 chars)
+FBEA..FBFB (18 chars)
+FC00..FD3D (318 chars)
+FD50..FD8F (64 chars)
+FD92..FDC7 (54 chars)
+FDF0..FDFB (12 chars)
+FE70..FE72 (3 chars)
+FE74
+FE76..FE7F (10 chars)
+
+*******************************************
+
+Property dump for: 0x20000080 (Hex Digit)
+
+0030..0039 (10 chars)
+0041..0046 (6 chars)
+0061..0066 (6 chars)
+FF10..FF19 (10 chars)
+FF21..FF26 (6 chars)
+FF41..FF46 (6 chars)
+
+*******************************************
+
+Property dump for: 0x20000030 (Decimal Digit)
+
+0030..0039 (10 chars)
+0660..0669 (10 chars)
+06F0..06F9 (10 chars)
+0966..096F (10 chars)
+09E6..09EF (10 chars)
+0A66..0A6F (10 chars)
+0AE6..0AEF (10 chars)
+0B66..0B6F (10 chars)
+0BE7..0BEF (9 chars)
+0C66..0C6F (10 chars)
+0CE6..0CEF (10 chars)
+0D66..0D6F (10 chars)
+0E50..0E59 (10 chars)
+0ED0..0ED9 (10 chars)
+0F20..0F29 (10 chars)
+1040..1049 (10 chars)
+1369..1371 (9 chars)
+17E0..17E9 (10 chars)
+1810..1819 (10 chars)
+FF10..FF19 (10 chars)
+
+*******************************************
+
+Property dump for: 0x20000010 (Numeric)
+
+0030..0039 (10 chars)
+00B2..00B3 (2 chars)
+00B9
+00BC..00BE (3 chars)
+0660..0669 (10 chars)
+06F0..06F9 (10 chars)
+0966..096F (10 chars)
+09E6..09EF (10 chars)
+09F4..09F9 (6 chars)
+0A66..0A6F (10 chars)
+0AE6..0AEF (10 chars)
+0B66..0B6F (10 chars)
+0BE7..0BF2 (12 chars)
+0C66..0C6F (10 chars)
+0CE6..0CEF (10 chars)
+0D66..0D6F (10 chars)
+0E50..0E59 (10 chars)
+0ED0..0ED9 (10 chars)
+0F20..0F33 (20 chars)
+1040..1049 (10 chars)
+1369..137C (20 chars)
+16EE..16F0 (3 chars)
+17E0..17E9 (10 chars)
+1810..1819 (10 chars)
+2070
+2074..2079 (6 chars)
+2080..2089 (10 chars)
+2153..2183 (49 chars)
+2460..249B (60 chars)
+24EA
+2776..2793 (30 chars)
+3007
+3021..3029 (9 chars)
+3038..303A (3 chars)
+3192..3195 (4 chars)
+3220..3229 (10 chars)
+3280..3289 (10 chars)
+FF10..FF19 (10 chars)
+
+*******************************************
+
+Property dump for: 0x20000001 (Alphabetic)
+
+0041..005A (26 chars)
+0061..007A (26 chars)
+00AA
+00B5
+00BA
+00C0..00D6 (23 chars)
+00D8..00F6 (31 chars)
+00F8..021F (296 chars)
+0222..0233 (18 chars)
+0250..02AD (94 chars)
+02B0..02B8 (9 chars)
+02BB..02C1 (7 chars)
+02E0..02E4 (5 chars)
+02EE
+0345
+037A
+0386
+0388..038A (3 chars)
+038C
+038E..03A1 (20 chars)
+03A3..03CE (44 chars)
+03D0..03D7 (8 chars)
+03DA..03F3 (26 chars)
+0400..0481 (130 chars)
+048C..04C4 (57 chars)
+04C7..04C8 (2 chars)
+04CB..04CC (2 chars)
+04D0..04F5 (38 chars)
+04F8..04F9 (2 chars)
+0531..0556 (38 chars)
+0559
+0561..0587 (39 chars)
+05D0..05EA (27 chars)
+05F0..05F2 (3 chars)
+0621..063A (26 chars)
+0641..0655 (21 chars)
+0670..06D3 (100 chars)
+06D5..06DC (8 chars)
+06E1..06E8 (8 chars)
+06ED
+06FA..06FC (3 chars)
+0710..072C (29 chars)
+0730..073F (16 chars)
+0780..07B0 (49 chars)
+0901..0903 (3 chars)
+0905..0939 (53 chars)
+093D..094C (16 chars)
+0950
+0958..0963 (12 chars)
+0981..0983 (3 chars)
+0985..098C (8 chars)
+098F..0990 (2 chars)
+0993..09A8 (22 chars)
+09AA..09B0 (7 chars)
+09B2
+09B6..09B9 (4 chars)
+09BE..09C4 (7 chars)
+09C7..09C8 (2 chars)
+09CB..09CC (2 chars)
+09D7
+09DC..09DD (2 chars)
+09DF..09E3 (5 chars)
+09F0..09F1 (2 chars)
+0A02
+0A05..0A0A (6 chars)
+0A0F..0A10 (2 chars)
+0A13..0A28 (22 chars)
+0A2A..0A30 (7 chars)
+0A32..0A33 (2 chars)
+0A35..0A36 (2 chars)
+0A38..0A39 (2 chars)
+0A3E..0A42 (5 chars)
+0A47..0A48 (2 chars)
+0A4B..0A4C (2 chars)
+0A59..0A5C (4 chars)
+0A5E
+0A70..0A74 (5 chars)
+0A81..0A83 (3 chars)
+0A85..0A8B (7 chars)
+0A8D
+0A8F..0A91 (3 chars)
+0A93..0AA8 (22 chars)
+0AAA..0AB0 (7 chars)
+0AB2..0AB3 (2 chars)
+0AB5..0AB9 (5 chars)
+0ABD..0AC5 (9 chars)
+0AC7..0AC9 (3 chars)
+0ACB..0ACC (2 chars)
+0AD0
+0AE0
+0B01..0B03 (3 chars)
+0B05..0B0C (8 chars)
+0B0F..0B10 (2 chars)
+0B13..0B28 (22 chars)
+0B2A..0B30 (7 chars)
+0B32..0B33 (2 chars)
+0B36..0B39 (4 chars)
+0B3D..0B43 (7 chars)
+0B47..0B48 (2 chars)
+0B4B..0B4C (2 chars)
+0B56..0B57 (2 chars)
+0B5C..0B5D (2 chars)
+0B5F..0B61 (3 chars)
+0B82..0B83 (2 chars)
+0B85..0B8A (6 chars)
+0B8E..0B90 (3 chars)
+0B92..0B95 (4 chars)
+0B99..0B9A (2 chars)
+0B9C
+0B9E..0B9F (2 chars)
+0BA3..0BA4 (2 chars)
+0BA8..0BAA (3 chars)
+0BAE..0BB5 (8 chars)
+0BB7..0BB9 (3 chars)
+0BBE..0BC2 (5 chars)
+0BC6..0BC8 (3 chars)
+0BCA..0BCC (3 chars)
+0BD7
+0C01..0C03 (3 chars)
+0C05..0C0C (8 chars)
+0C0E..0C10 (3 chars)
+0C12..0C28 (23 chars)
+0C2A..0C33 (10 chars)
+0C35..0C39 (5 chars)
+0C3E..0C44 (7 chars)
+0C46..0C48 (3 chars)
+0C4A..0C4C (3 chars)
+0C55..0C56 (2 chars)
+0C60..0C61 (2 chars)
+0C82..0C83 (2 chars)
+0C85..0C8C (8 chars)
+0C8E..0C90 (3 chars)
+0C92..0CA8 (23 chars)
+0CAA..0CB3 (10 chars)
+0CB5..0CB9 (5 chars)
+0CBE..0CC4 (7 chars)
+0CC6..0CC8 (3 chars)
+0CCA..0CCC (3 chars)
+0CD5..0CD6 (2 chars)
+0CDE
+0CE0..0CE1 (2 chars)
+0D02..0D03 (2 chars)
+0D05..0D0C (8 chars)
+0D0E..0D10 (3 chars)
+0D12..0D28 (23 chars)
+0D2A..0D39 (16 chars)
+0D3E..0D43 (6 chars)
+0D46..0D48 (3 chars)
+0D4A..0D4C (3 chars)
+0D57
+0D60..0D61 (2 chars)
+0D82..0D83 (2 chars)
+0D85..0D96 (18 chars)
+0D9A..0DB1 (24 chars)
+0DB3..0DBB (9 chars)
+0DBD
+0DC0..0DC6 (7 chars)
+0DCF..0DD4 (6 chars)
+0DD6
+0DD8..0DDF (8 chars)
+0DF2..0DF3 (2 chars)
+0E01..0E3A (58 chars)
+0E40..0E45 (6 chars)
+0E4D
+0E81..0E82 (2 chars)
+0E84
+0E87..0E88 (2 chars)
+0E8A
+0E8D
+0E94..0E97 (4 chars)
+0E99..0E9F (7 chars)
+0EA1..0EA3 (3 chars)
+0EA5
+0EA7
+0EAA..0EAB (2 chars)
+0EAD..0EB9 (13 chars)
+0EBB..0EBD (3 chars)
+0EC0..0EC4 (5 chars)
+0ECD
+0EDC..0EDD (2 chars)
+0F00
+0F40..0F47 (8 chars)
+0F49..0F6A (34 chars)
+0F71..0F81 (17 chars)
+0F88..0F8B (4 chars)
+0F90..0F97 (8 chars)
+0F99..0FBC (36 chars)
+1000..1021 (34 chars)
+1023..1027 (5 chars)
+1029..102A (2 chars)
+102C..1032 (7 chars)
+1036
+1038
+1050..1059 (10 chars)
+10A0..10C5 (38 chars)
+10D0..10F6 (39 chars)
+1100..1159 (90 chars)
+115F..11A2 (68 chars)
+11A8..11F9 (82 chars)
+1200..1206 (7 chars)
+1208..1246 (63 chars)
+1248
+124A..124D (4 chars)
+1250..1256 (7 chars)
+1258
+125A..125D (4 chars)
+1260..1286 (39 chars)
+1288
+128A..128D (4 chars)
+1290..12AE (31 chars)
+12B0
+12B2..12B5 (4 chars)
+12B8..12BE (7 chars)
+12C0
+12C2..12C5 (4 chars)
+12C8..12CE (7 chars)
+12D0..12D6 (7 chars)
+12D8..12EE (23 chars)
+12F0..130E (31 chars)
+1310
+1312..1315 (4 chars)
+1318..131E (7 chars)
+1320..1346 (39 chars)
+1348..135A (19 chars)
+13A0..13F4 (85 chars)
+1401..166C (620 chars)
+166F..1676 (8 chars)
+1681..169A (26 chars)
+16A0..16EA (75 chars)
+1780..17C8 (73 chars)
+1820..1842 (35 chars)
+1844..1877 (52 chars)
+1880..18A9 (42 chars)
+1E00..1E9B (156 chars)
+1EA0..1EF9 (90 chars)
+1F00..1F15 (22 chars)
+1F18..1F1D (6 chars)
+1F20..1F45 (38 chars)
+1F48..1F4D (6 chars)
+1F50..1F57 (8 chars)
+1F59
+1F5B
+1F5D
+1F5F..1F7D (31 chars)
+1F80..1FB4 (53 chars)
+1FB6..1FBC (7 chars)
+1FBE
+1FC2..1FC4 (3 chars)
+1FC6..1FCC (7 chars)
+1FD0..1FD3 (4 chars)
+1FD6..1FDB (6 chars)
+1FE0..1FEC (13 chars)
+1FF2..1FF4 (3 chars)
+1FF6..1FFC (7 chars)
+207F
+2102
+2107
+210A..2113 (10 chars)
+2115
+2119..211D (5 chars)
+2124
+2126
+2128
+212A..212D (4 chars)
+212F..2131 (3 chars)
+2133..2139 (7 chars)
+2160..2183 (36 chars)
+3041..3094 (84 chars)
+30A1..30FA (90 chars)
+3105..312C (40 chars)
+3131..318E (94 chars)
+31A0..31B7 (24 chars)
+A000..A48C (1165 chars)
+AC00..D7A3 (11172 chars)
+FB00..FB06 (7 chars)
+FB13..FB17 (5 chars)
+FB1D
+FB1F..FB28 (10 chars)
+FB2A..FB36 (13 chars)
+FB38..FB3C (5 chars)
+FB3E
+FB40..FB41 (2 chars)
+FB43..FB44 (2 chars)
+FB46..FBB1 (108 chars)
+FBD3..FD3D (363 chars)
+FD50..FD8F (64 chars)
+FD92..FDC7 (54 chars)
+FDF0..FDFB (12 chars)
+FE70..FE72 (3 chars)
+FE74
+FE76..FEFC (135 chars)
+FF21..FF3A (26 chars)
+FF41..FF5A (26 chars)
+FF66..FF6F (10 chars)
+FF71..FF9D (45 chars)
+FFA0..FFBE (31 chars)
+FFC2..FFC7 (6 chars)
+FFCA..FFCF (6 chars)
+FFD2..FFD7 (6 chars)
+FFDA..FFDC (3 chars)
+
+*******************************************
+
+Property dump for: 0x20000002 (Ideographic)
+
+3006..3007 (2 chars)
+3021..3029 (9 chars)
+3038..303A (3 chars)
+3400..4DB5 (6582 chars)
+4E00..9FA5 (20902 chars)
+F900..FA2D (302 chars)
+
+*******************************************
+
+Property dump for: 0x20010000 (Diacritic)
+
+005E
+0060
+00A8
+00AF
+00B4
+00B7..00B8 (2 chars)
+02B0..02EE (63 chars)
+0300..034E (79 chars)
+0360..0362 (3 chars)
+0374..0375 (2 chars)
+037A
+0384..0385 (2 chars)
+0483..0486 (4 chars)
+0559
+0591..05A1 (17 chars)
+05A3..05B9 (23 chars)
+05BB..05BD (3 chars)
+05BF
+05C1..05C2 (2 chars)
+05C4
+06DF..06E0 (2 chars)
+06E5..06E6 (2 chars)
+06EA..06EC (3 chars)
+0740..074A (11 chars)
+093C
+094D
+0951..0954 (4 chars)
+09BC
+09CD
+0A3C
+0A4D
+0ABC
+0ACD
+0B3C
+0B4D
+0BCD
+0C4D
+0CCD
+0D4D
+0DCA
+0E47..0E4C (6 chars)
+0E4E
+0EC8..0ECC (5 chars)
+0F18..0F19 (2 chars)
+0F35
+0F37
+0F39
+0F3E..0F3F (2 chars)
+0F82..0F84 (3 chars)
+0F86..0F87 (2 chars)
+0FC6
+1037
+1039
+17C9..17D3 (11 chars)
+1FBD
+1FBF..1FC1 (3 chars)
+1FCD..1FCF (3 chars)
+1FDD..1FDF (3 chars)
+1FED..1FEF (3 chars)
+1FFD..1FFE (2 chars)
+302A..302F (6 chars)
+3099..309C (4 chars)
+30FC
+FB1E
+FE20..FE23 (4 chars)
+FF3E
+FF40
+FF70
+FF9E..FF9F (2 chars)
+FFE3
+
+*******************************************
+
+Property dump for: 0x20000040 (Extender)
+
+00B7
+02D0..02D1 (2 chars)
+0640
+0E46
+0EC6
+1843
+3005
+3031..3035 (5 chars)
+309D..309E (2 chars)
+30FC..30FE (3 chars)
+FF70
+
+*******************************************
+
+Property dump for: 0x20020000 (Identifier Part)
+
+0030..0039 (10 chars)
+0041..005A (26 chars)
+005F
+0061..007A (26 chars)
+00AA
+00B5
+00BA
+00C0..00D6 (23 chars)
+00D8..00F6 (31 chars)
+00F8..021F (296 chars)
+0222..0233 (18 chars)
+0250..02AD (94 chars)
+02B0..02B8 (9 chars)
+02BB..02C1 (7 chars)
+02D0..02D1 (2 chars)
+02E0..02E4 (5 chars)
+02EE
+0300..034E (79 chars)
+0360..0362 (3 chars)
+037A
+0386
+0388..038A (3 chars)
+038C
+038E..03A1 (20 chars)
+03A3..03CE (44 chars)
+03D0..03D7 (8 chars)
+03DA..03F3 (26 chars)
+0400..0481 (130 chars)
+0483..0486 (4 chars)
+048C..04C4 (57 chars)
+04C7..04C8 (2 chars)
+04CB..04CC (2 chars)
+04D0..04F5 (38 chars)
+04F8..04F9 (2 chars)
+0531..0556 (38 chars)
+0559
+0561..0587 (39 chars)
+0591..05A1 (17 chars)
+05A3..05B9 (23 chars)
+05BB..05BD (3 chars)
+05BF
+05C1..05C2 (2 chars)
+05C4
+05D0..05EA (27 chars)
+05F0..05F2 (3 chars)
+0621..063A (26 chars)
+0640..0655 (22 chars)
+0660..0669 (10 chars)
+0670..06D3 (100 chars)
+06D5..06DC (8 chars)
+06DF..06E8 (10 chars)
+06EA..06ED (4 chars)
+06F0..06FC (13 chars)
+0710..072C (29 chars)
+0730..074A (27 chars)
+0780..07B0 (49 chars)
+0901..0903 (3 chars)
+0905..0939 (53 chars)
+093C..094D (18 chars)
+0950..0954 (5 chars)
+0958..0963 (12 chars)
+0966..096F (10 chars)
+0981..0983 (3 chars)
+0985..098C (8 chars)
+098F..0990 (2 chars)
+0993..09A8 (22 chars)
+09AA..09B0 (7 chars)
+09B2
+09B6..09B9 (4 chars)
+09BC
+09BE..09C4 (7 chars)
+09C7..09C8 (2 chars)
+09CB..09CD (3 chars)
+09D7
+09DC..09DD (2 chars)
+09DF..09E3 (5 chars)
+09E6..09F1 (12 chars)
+0A02
+0A05..0A0A (6 chars)
+0A0F..0A10 (2 chars)
+0A13..0A28 (22 chars)
+0A2A..0A30 (7 chars)
+0A32..0A33 (2 chars)
+0A35..0A36 (2 chars)
+0A38..0A39 (2 chars)
+0A3C
+0A3E..0A42 (5 chars)
+0A47..0A48 (2 chars)
+0A4B..0A4D (3 chars)
+0A59..0A5C (4 chars)
+0A5E
+0A66..0A74 (15 chars)
+0A81..0A83 (3 chars)
+0A85..0A8B (7 chars)
+0A8D
+0A8F..0A91 (3 chars)
+0A93..0AA8 (22 chars)
+0AAA..0AB0 (7 chars)
+0AB2..0AB3 (2 chars)
+0AB5..0AB9 (5 chars)
+0ABC..0AC5 (10 chars)
+0AC7..0AC9 (3 chars)
+0ACB..0ACD (3 chars)
+0AD0
+0AE0
+0AE6..0AEF (10 chars)
+0B01..0B03 (3 chars)
+0B05..0B0C (8 chars)
+0B0F..0B10 (2 chars)
+0B13..0B28 (22 chars)
+0B2A..0B30 (7 chars)
+0B32..0B33 (2 chars)
+0B36..0B39 (4 chars)
+0B3C..0B43 (8 chars)
+0B47..0B48 (2 chars)
+0B4B..0B4D (3 chars)
+0B56..0B57 (2 chars)
+0B5C..0B5D (2 chars)
+0B5F..0B61 (3 chars)
+0B66..0B6F (10 chars)
+0B82..0B83 (2 chars)
+0B85..0B8A (6 chars)
+0B8E..0B90 (3 chars)
+0B92..0B95 (4 chars)
+0B99..0B9A (2 chars)
+0B9C
+0B9E..0B9F (2 chars)
+0BA3..0BA4 (2 chars)
+0BA8..0BAA (3 chars)
+0BAE..0BB5 (8 chars)
+0BB7..0BB9 (3 chars)
+0BBE..0BC2 (5 chars)
+0BC6..0BC8 (3 chars)
+0BCA..0BCD (4 chars)
+0BD7
+0BE7..0BEF (9 chars)
+0C01..0C03 (3 chars)
+0C05..0C0C (8 chars)
+0C0E..0C10 (3 chars)
+0C12..0C28 (23 chars)
+0C2A..0C33 (10 chars)
+0C35..0C39 (5 chars)
+0C3E..0C44 (7 chars)
+0C46..0C48 (3 chars)
+0C4A..0C4D (4 chars)
+0C55..0C56 (2 chars)
+0C60..0C61 (2 chars)
+0C66..0C6F (10 chars)
+0C82..0C83 (2 chars)
+0C85..0C8C (8 chars)
+0C8E..0C90 (3 chars)
+0C92..0CA8 (23 chars)
+0CAA..0CB3 (10 chars)
+0CB5..0CB9 (5 chars)
+0CBE..0CC4 (7 chars)
+0CC6..0CC8 (3 chars)
+0CCA..0CCD (4 chars)
+0CD5..0CD6 (2 chars)
+0CDE
+0CE0..0CE1 (2 chars)
+0CE6..0CEF (10 chars)
+0D02..0D03 (2 chars)
+0D05..0D0C (8 chars)
+0D0E..0D10 (3 chars)
+0D12..0D28 (23 chars)
+0D2A..0D39 (16 chars)
+0D3E..0D43 (6 chars)
+0D46..0D48 (3 chars)
+0D4A..0D4D (4 chars)
+0D57
+0D60..0D61 (2 chars)
+0D66..0D6F (10 chars)
+0D82..0D83 (2 chars)
+0D85..0D96 (18 chars)
+0D9A..0DB1 (24 chars)
+0DB3..0DBB (9 chars)
+0DBD
+0DC0..0DC6 (7 chars)
+0DCA
+0DCF..0DD4 (6 chars)
+0DD6
+0DD8..0DDF (8 chars)
+0DF2..0DF3 (2 chars)
+0E01..0E3A (58 chars)
+0E40..0E4E (15 chars)
+0E50..0E59 (10 chars)
+0E81..0E82 (2 chars)
+0E84
+0E87..0E88 (2 chars)
+0E8A
+0E8D
+0E94..0E97 (4 chars)
+0E99..0E9F (7 chars)
+0EA1..0EA3 (3 chars)
+0EA5
+0EA7
+0EAA..0EAB (2 chars)
+0EAD..0EB9 (13 chars)
+0EBB..0EBD (3 chars)
+0EC0..0EC4 (5 chars)
+0EC6
+0EC8..0ECD (6 chars)
+0ED0..0ED9 (10 chars)
+0EDC..0EDD (2 chars)
+0F00
+0F18..0F19 (2 chars)
+0F20..0F29 (10 chars)
+0F35
+0F37
+0F39
+0F3E..0F47 (10 chars)
+0F49..0F6A (34 chars)
+0F71..0F84 (20 chars)
+0F86..0F8B (6 chars)
+0F90..0F97 (8 chars)
+0F99..0FBC (36 chars)
+0FC6
+1000..1021 (34 chars)
+1023..1027 (5 chars)
+1029..102A (2 chars)
+102C..1032 (7 chars)
+1036..1039 (4 chars)
+1040..1049 (10 chars)
+1050..1059 (10 chars)
+10A0..10C5 (38 chars)
+10D0..10F6 (39 chars)
+1100..1159 (90 chars)
+115F..11A2 (68 chars)
+11A8..11F9 (82 chars)
+1200..1206 (7 chars)
+1208..1246 (63 chars)
+1248
+124A..124D (4 chars)
+1250..1256 (7 chars)
+1258
+125A..125D (4 chars)
+1260..1286 (39 chars)
+1288
+128A..128D (4 chars)
+1290..12AE (31 chars)
+12B0
+12B2..12B5 (4 chars)
+12B8..12BE (7 chars)
+12C0
+12C2..12C5 (4 chars)
+12C8..12CE (7 chars)
+12D0..12D6 (7 chars)
+12D8..12EE (23 chars)
+12F0..130E (31 chars)
+1310
+1312..1315 (4 chars)
+1318..131E (7 chars)
+1320..1346 (39 chars)
+1348..135A (19 chars)
+1369..1371 (9 chars)
+13A0..13F4 (85 chars)
+1401..166C (620 chars)
+166F..1676 (8 chars)
+1681..169A (26 chars)
+16A0..16EA (75 chars)
+1780..17D3 (84 chars)
+17E0..17E9 (10 chars)
+1810..1819 (10 chars)
+1820..1877 (88 chars)
+1880..18A9 (42 chars)
+1E00..1E9B (156 chars)
+1EA0..1EF9 (90 chars)
+1F00..1F15 (22 chars)
+1F18..1F1D (6 chars)
+1F20..1F45 (38 chars)
+1F48..1F4D (6 chars)
+1F50..1F57 (8 chars)
+1F59
+1F5B
+1F5D
+1F5F..1F7D (31 chars)
+1F80..1FB4 (53 chars)
+1FB6..1FBC (7 chars)
+1FBE
+1FC2..1FC4 (3 chars)
+1FC6..1FCC (7 chars)
+1FD0..1FD3 (4 chars)
+1FD6..1FDB (6 chars)
+1FE0..1FEC (13 chars)
+1FF2..1FF4 (3 chars)
+1FF6..1FFC (7 chars)
+203F..2040 (2 chars)
+207F
+20D0..20DC (13 chars)
+20E1
+2102
+2107
+210A..2113 (10 chars)
+2115
+2119..211D (5 chars)
+2124
+2126
+2128
+212A..212D (4 chars)
+212F..2131 (3 chars)
+2133..2139 (7 chars)
+2160..2183 (36 chars)
+3005..3007 (3 chars)
+3021..302F (15 chars)
+3031..3035 (5 chars)
+3038..303A (3 chars)
+3041..3094 (84 chars)
+3099..309A (2 chars)
+309D..309E (2 chars)
+30A1..30FE (94 chars)
+3105..312C (40 chars)
+3131..318E (94 chars)
+31A0..31B7 (24 chars)
+3400..4DB5 (6582 chars)
+4E00..9FA5 (20902 chars)
+A000..A48C (1165 chars)
+AC00..D7A3 (11172 chars)
+F900..FA2D (302 chars)
+FB00..FB06 (7 chars)
+FB13..FB17 (5 chars)
+FB1D..FB28 (12 chars)
+FB2A..FB36 (13 chars)
+FB38..FB3C (5 chars)
+FB3E
+FB40..FB41 (2 chars)
+FB43..FB44 (2 chars)
+FB46..FBB1 (108 chars)
+FBD3..FD3D (363 chars)
+FD50..FD8F (64 chars)
+FD92..FDC7 (54 chars)
+FDF0..FDFB (12 chars)
+FE20..FE23 (4 chars)
+FE33..FE34 (2 chars)
+FE4D..FE4F (3 chars)
+FE70..FE72 (3 chars)
+FE74
+FE76..FEFC (135 chars)
+FF10..FF19 (10 chars)
+FF21..FF3A (26 chars)
+FF3F
+FF41..FF5A (26 chars)
+FF65..FFBE (90 chars)
+FFC2..FFC7 (6 chars)
+FFCA..FFCF (6 chars)
+FFD2..FFD7 (6 chars)
+FFDA..FFDC (3 chars)
+
+*******************************************
+
+Property dump for: 0x10000080 (Ignorable Control)
+
+0001..0008 (8 chars)
+000E..001B (14 chars)
+007F..0084 (6 chars)
+0086..009F (26 chars)
+070F
+180B..180E (4 chars)
+200C..200F (4 chars)
+202A..202E (5 chars)
+206A..206F (6 chars)
+FEFF
+FFF9..FFFB (3 chars)
+
+*******************************************
+
+Property dump for: 0x01000000 (Bidi: Left-to-Right)
+
+0041..005A (26 chars)
+0061..007A (26 chars)
+00AA
+00B5
+00BA
+00C0..00D6 (23 chars)
+00D8..00F6 (31 chars)
+00F8..021F (296 chars)
+0222..0233 (18 chars)
+0250..02AD (94 chars)
+02B0..02B8 (9 chars)
+02BB..02C1 (7 chars)
+02D0..02D1 (2 chars)
+02E0..02E4 (5 chars)
+02EE
+037A
+0386
+0388..038A (3 chars)
+038C
+038E..03A1 (20 chars)
+03A3..03CE (44 chars)
+03D0..03D7 (8 chars)
+03DA..03F3 (26 chars)
+0400..0482 (131 chars)
+048C..04C4 (57 chars)
+04C7..04C8 (2 chars)
+04CB..04CC (2 chars)
+04D0..04F5 (38 chars)
+04F8..04F9 (2 chars)
+0531..0556 (38 chars)
+0559..055F (7 chars)
+0561..0587 (39 chars)
+0589
+0903
+0905..0939 (53 chars)
+093D..0940 (4 chars)
+0949..094C (4 chars)
+0950
+0958..0961 (10 chars)
+0964..0970 (13 chars)
+0982..0983 (2 chars)
+0985..098C (8 chars)
+098F..0990 (2 chars)
+0993..09A8 (22 chars)
+09AA..09B0 (7 chars)
+09B2
+09B6..09B9 (4 chars)
+09BE..09C0 (3 chars)
+09C7..09C8 (2 chars)
+09CB..09CC (2 chars)
+09D7
+09DC..09DD (2 chars)
+09DF..09E1 (3 chars)
+09E6..09F1 (12 chars)
+09F4..09FA (7 chars)
+0A05..0A0A (6 chars)
+0A0F..0A10 (2 chars)
+0A13..0A28 (22 chars)
+0A2A..0A30 (7 chars)
+0A32..0A33 (2 chars)
+0A35..0A36 (2 chars)
+0A38..0A39 (2 chars)
+0A3E..0A40 (3 chars)
+0A59..0A5C (4 chars)
+0A5E
+0A66..0A6F (10 chars)
+0A72..0A74 (3 chars)
+0A83
+0A85..0A8B (7 chars)
+0A8D
+0A8F..0A91 (3 chars)
+0A93..0AA8 (22 chars)
+0AAA..0AB0 (7 chars)
+0AB2..0AB3 (2 chars)
+0AB5..0AB9 (5 chars)
+0ABD..0AC0 (4 chars)
+0AC9
+0ACB..0ACC (2 chars)
+0AD0
+0AE0
+0AE6..0AEF (10 chars)
+0B02..0B03 (2 chars)
+0B05..0B0C (8 chars)
+0B0F..0B10 (2 chars)
+0B13..0B28 (22 chars)
+0B2A..0B30 (7 chars)
+0B32..0B33 (2 chars)
+0B36..0B39 (4 chars)
+0B3D..0B3E (2 chars)
+0B40
+0B47..0B48 (2 chars)
+0B4B..0B4C (2 chars)
+0B57
+0B5C..0B5D (2 chars)
+0B5F..0B61 (3 chars)
+0B66..0B70 (11 chars)
+0B83
+0B85..0B8A (6 chars)
+0B8E..0B90 (3 chars)
+0B92..0B95 (4 chars)
+0B99..0B9A (2 chars)
+0B9C
+0B9E..0B9F (2 chars)
+0BA3..0BA4 (2 chars)
+0BA8..0BAA (3 chars)
+0BAE..0BB5 (8 chars)
+0BB7..0BB9 (3 chars)
+0BBE..0BBF (2 chars)
+0BC1..0BC2 (2 chars)
+0BC6..0BC8 (3 chars)
+0BCA..0BCC (3 chars)
+0BD7
+0BE7..0BF2 (12 chars)
+0C01..0C03 (3 chars)
+0C05..0C0C (8 chars)
+0C0E..0C10 (3 chars)
+0C12..0C28 (23 chars)
+0C2A..0C33 (10 chars)
+0C35..0C39 (5 chars)
+0C41..0C44 (4 chars)
+0C60..0C61 (2 chars)
+0C66..0C6F (10 chars)
+0C82..0C83 (2 chars)
+0C85..0C8C (8 chars)
+0C8E..0C90 (3 chars)
+0C92..0CA8 (23 chars)
+0CAA..0CB3 (10 chars)
+0CB5..0CB9 (5 chars)
+0CBE
+0CC0..0CC4 (5 chars)
+0CC7..0CC8 (2 chars)
+0CCA..0CCB (2 chars)
+0CD5..0CD6 (2 chars)
+0CDE
+0CE0..0CE1 (2 chars)
+0CE6..0CEF (10 chars)
+0D02..0D03 (2 chars)
+0D05..0D0C (8 chars)
+0D0E..0D10 (3 chars)
+0D12..0D28 (23 chars)
+0D2A..0D39 (16 chars)
+0D3E..0D40 (3 chars)
+0D46..0D48 (3 chars)
+0D4A..0D4C (3 chars)
+0D57
+0D60..0D61 (2 chars)
+0D66..0D6F (10 chars)
+0D82..0D83 (2 chars)
+0D85..0D96 (18 chars)
+0D9A..0DB1 (24 chars)
+0DB3..0DBB (9 chars)
+0DBD
+0DC0..0DC6 (7 chars)
+0DCF..0DD1 (3 chars)
+0DD8..0DDF (8 chars)
+0DF2..0DF4 (3 chars)
+0E01..0E30 (48 chars)
+0E32..0E33 (2 chars)
+0E40..0E46 (7 chars)
+0E4F..0E5B (13 chars)
+0E81..0E82 (2 chars)
+0E84
+0E87..0E88 (2 chars)
+0E8A
+0E8D
+0E94..0E97 (4 chars)
+0E99..0E9F (7 chars)
+0EA1..0EA3 (3 chars)
+0EA5
+0EA7
+0EAA..0EAB (2 chars)
+0EAD..0EB0 (4 chars)
+0EB2..0EB3 (2 chars)
+0EBD
+0EC0..0EC4 (5 chars)
+0EC6
+0ED0..0ED9 (10 chars)
+0EDC..0EDD (2 chars)
+0F00..0F17 (24 chars)
+0F1A..0F34 (27 chars)
+0F36
+0F38
+0F3E..0F47 (10 chars)
+0F49..0F6A (34 chars)
+0F7F
+0F85
+0F88..0F8B (4 chars)
+0FBE..0FC5 (8 chars)
+0FC7..0FCC (6 chars)
+0FCF
+1000..1021 (34 chars)
+1023..1027 (5 chars)
+1029..102A (2 chars)
+102C
+1031
+1038
+1040..1057 (24 chars)
+10A0..10C5 (38 chars)
+10D0..10F6 (39 chars)
+10FB
+1100..1159 (90 chars)
+115F..11A2 (68 chars)
+11A8..11F9 (82 chars)
+1200..1206 (7 chars)
+1208..1246 (63 chars)
+1248
+124A..124D (4 chars)
+1250..1256 (7 chars)
+1258
+125A..125D (4 chars)
+1260..1286 (39 chars)
+1288
+128A..128D (4 chars)
+1290..12AE (31 chars)
+12B0
+12B2..12B5 (4 chars)
+12B8..12BE (7 chars)
+12C0
+12C2..12C5 (4 chars)
+12C8..12CE (7 chars)
+12D0..12D6 (7 chars)
+12D8..12EE (23 chars)
+12F0..130E (31 chars)
+1310
+1312..1315 (4 chars)
+1318..131E (7 chars)
+1320..1346 (39 chars)
+1348..135A (19 chars)
+1361..137C (28 chars)
+13A0..13F4 (85 chars)
+1401..1676 (630 chars)
+1681..169A (26 chars)
+16A0..16F0 (81 chars)
+1780..17B6 (55 chars)
+17BE..17C5 (8 chars)
+17C7..17C8 (2 chars)
+17D4..17DA (7 chars)
+17DC
+17E0..17E9 (10 chars)
+1810..1819 (10 chars)
+1820..1877 (88 chars)
+1880..18A8 (41 chars)
+1E00..1E9B (156 chars)
+1EA0..1EF9 (90 chars)
+1F00..1F15 (22 chars)
+1F18..1F1D (6 chars)
+1F20..1F45 (38 chars)
+1F48..1F4D (6 chars)
+1F50..1F57 (8 chars)
+1F59
+1F5B
+1F5D
+1F5F..1F7D (31 chars)
+1F80..1FB4 (53 chars)
+1FB6..1FBC (7 chars)
+1FBE
+1FC2..1FC4 (3 chars)
+1FC6..1FCC (7 chars)
+1FD0..1FD3 (4 chars)
+1FD6..1FDB (6 chars)
+1FE0..1FEC (13 chars)
+1FF2..1FF4 (3 chars)
+1FF6..1FFC (7 chars)
+200E
+207F
+2102
+2107
+210A..2113 (10 chars)
+2115
+2119..211D (5 chars)
+2124
+2126
+2128
+212A..212D (4 chars)
+212F..2131 (3 chars)
+2133..2139 (7 chars)
+2160..2183 (36 chars)
+2336..237A (69 chars)
+2395
+249C..24E9 (78 chars)
+3005..3007 (3 chars)
+3021..3029 (9 chars)
+3031..3035 (5 chars)
+3038..303A (3 chars)
+3041..3094 (84 chars)
+309D..309E (2 chars)
+30A1..30FA (90 chars)
+30FC..30FE (3 chars)
+3105..312C (40 chars)
+3131..318E (94 chars)
+3190..31B7 (40 chars)
+3200..321C (29 chars)
+3220..3243 (36 chars)
+3260..327B (28 chars)
+327F..32B0 (50 chars)
+32C0..32CB (12 chars)
+32D0..32FE (47 chars)
+3300..3376 (119 chars)
+337B..33DD (99 chars)
+33E0..33FE (31 chars)
+3400..4DB5 (6582 chars)
+4E00..9FA5 (20902 chars)
+A000..A48C (1165 chars)
+AC00..D7A3 (11172 chars)
+D800..FA2D (8750 chars)
+FB00..FB06 (7 chars)
+FB13..FB17 (5 chars)
+FF21..FF3A (26 chars)
+FF41..FF5A (26 chars)
+FF66..FFBE (89 chars)
+FFC2..FFC7 (6 chars)
+FFCA..FFCF (6 chars)
+FFD2..FFD7 (6 chars)
+FFDA..FFDC (3 chars)
+F0000..FFFFD (65534 chars)
+100000..10FFFD (65534 chars)
+
+*******************************************
+
+Property dump for: 0x02000000 (Bidi: (Hebrew) Right-to-Left)
+
+05BE
+05C0
+05C3
+05D0..05EA (27 chars)
+05F0..05F4 (5 chars)
+200F
+FB1D
+FB1F..FB28 (10 chars)
+FB2A..FB36 (13 chars)
+FB38..FB3C (5 chars)
+FB3E
+FB40..FB41 (2 chars)
+FB43..FB44 (2 chars)
+FB46..FB4F (10 chars)
+
+*******************************************
+
+Property dump for: 0x0B000000 (Bidi: Arabic Right-to-Left)
+
+061B
+061F
+0621..063A (26 chars)
+0640..064A (11 chars)
+066D
+0671..06D5 (101 chars)
+06E5..06E6 (2 chars)
+06FA..06FE (5 chars)
+0700..070D (14 chars)
+0710
+0712..072C (27 chars)
+0780..07A5 (38 chars)
+FB50..FBB1 (98 chars)
+FBD3..FD3D (363 chars)
+FD50..FD8F (64 chars)
+FD92..FDC7 (54 chars)
+FDF0..FDFB (12 chars)
+FE70..FE72 (3 chars)
+FE74
+FE76..FEFC (135 chars)
+
+*******************************************
+
+Property dump for: 0x03000000 (Bidi: European Digit)
+
+0030..0039 (10 chars)
+00B2..00B3 (2 chars)
+00B9
+06F0..06F9 (10 chars)
+2070
+2074..2079 (6 chars)
+2080..2089 (10 chars)
+2460..249B (60 chars)
+24EA
+FF10..FF19 (10 chars)
+
+*******************************************
+
+Property dump for: 0x04000000 (Bidi: Eur Num Separator)
+
+002F
+FF0F
+
+*******************************************
+
+Property dump for: 0x05000000 (Bidi: Eur Num Terminator)
+
+0023..0025 (3 chars)
+002B
+002D
+00A2..00A5 (4 chars)
+00B0..00B1 (2 chars)
+066A
+09F2..09F3 (2 chars)
+0E3F
+17DB
+2030..2034 (5 chars)
+207A..207B (2 chars)
+208A..208B (2 chars)
+20A0..20AF (16 chars)
+212E
+2212..2213 (2 chars)
+FB29
+FE5F
+FE62..FE63 (2 chars)
+FE69..FE6A (2 chars)
+FF03..FF05 (3 chars)
+FF0B
+FF0D
+FFE0..FFE1 (2 chars)
+FFE5..FFE6 (2 chars)
+
+*******************************************
+
+Property dump for: 0x06000000 (Bidi: Arabic Digit)
+
+0660..0669 (10 chars)
+066B..066C (2 chars)
+
+*******************************************
+
+Property dump for: 0x07000000 (Bidi: Common Separator)
+
+002C
+002E
+003A
+00A0
+060C
+FE50
+FE52
+FE55
+FF0C
+FF0E
+FF1A
+
+*******************************************
+
+Property dump for: 0x08000000 (Bidi: Block Separator)
+
+000A
+000D
+001C..001E (3 chars)
+0085
+2029
+
+*******************************************
+
+Property dump for: 0x09000000 (Bidi: Segment Separator)
+
+0009
+000B
+001F
+
+*******************************************
+
+Property dump for: 0x0A000000 (Bidi: Whitespace)
+
+000C
+0020
+1680
+2000..200A (11 chars)
+2028
+202F
+3000
+
+*******************************************
+
+Property dump for: 0x0C000000 (Bidi: Non-spacing Mark)
+
+0300..034E (79 chars)
+0360..0362 (3 chars)
+0483..0486 (4 chars)
+0488..0489 (2 chars)
+0591..05A1 (17 chars)
+05A3..05B9 (23 chars)
+05BB..05BD (3 chars)
+05BF
+05C1..05C2 (2 chars)
+05C4
+064B..0655 (11 chars)
+0670
+06D6..06E4 (15 chars)
+06E7..06E8 (2 chars)
+06EA..06ED (4 chars)
+0711
+0730..074A (27 chars)
+07A6..07B0 (11 chars)
+0901..0902 (2 chars)
+093C
+0941..0948 (8 chars)
+094D
+0951..0954 (4 chars)
+0962..0963 (2 chars)
+0981
+09BC
+09C1..09C4 (4 chars)
+09CD
+09E2..09E3 (2 chars)
+0A02
+0A3C
+0A41..0A42 (2 chars)
+0A47..0A48 (2 chars)
+0A4B..0A4D (3 chars)
+0A70..0A71 (2 chars)
+0A81..0A82 (2 chars)
+0ABC
+0AC1..0AC5 (5 chars)
+0AC7..0AC8 (2 chars)
+0ACD
+0B01
+0B3C
+0B3F
+0B41..0B43 (3 chars)
+0B4D
+0B56
+0B82
+0BC0
+0BCD
+0C3E..0C40 (3 chars)
+0C46..0C48 (3 chars)
+0C4A..0C4D (4 chars)
+0C55..0C56 (2 chars)
+0CBF
+0CC6
+0CCC..0CCD (2 chars)
+0D41..0D43 (3 chars)
+0D4D
+0DCA
+0DD2..0DD4 (3 chars)
+0DD6
+0E31
+0E34..0E3A (7 chars)
+0E47..0E4E (8 chars)
+0EB1
+0EB4..0EB9 (6 chars)
+0EBB..0EBC (2 chars)
+0EC8..0ECD (6 chars)
+0F18..0F19 (2 chars)
+0F35
+0F37
+0F39
+0F71..0F7E (14 chars)
+0F80..0F84 (5 chars)
+0F86..0F87 (2 chars)
+0F90..0F97 (8 chars)
+0F99..0FBC (36 chars)
+0FC6
+102D..1030 (4 chars)
+1032
+1036..1037 (2 chars)
+1039
+1058..1059 (2 chars)
+17B7..17BD (7 chars)
+17C6
+17C9..17D3 (11 chars)
+18A9
+20D0..20E3 (20 chars)
+302A..302F (6 chars)
+3099..309A (2 chars)
+FB1E
+FE20..FE23 (4 chars)
+
+*******************************************
+
+Property dump for: 0x0D000000 (Bidi: Boundary Neutral)
+
+0000..0008 (9 chars)
+000E..001B (14 chars)
+007F..0084 (6 chars)
+0086..009F (26 chars)
+070F
+180B..180E (4 chars)
+200B..200D (3 chars)
+206A..206F (6 chars)
+FEFF
+FFF9..FFFB (3 chars)
+
+*******************************************
+
+Property dump for: 0x0E000000 (Bidi: PDF)
+
+202C
+
+*******************************************
+
+Property dump for: 0x0F000000 (Bidi: Embedding or Override)
+
+202A..202B (2 chars)
+202D..202E (2 chars)
+
+*******************************************
+
+Property dump for: 0x80000009 (Bidi: Other Neutral)
+
+0021..0022 (2 chars)
+0026..002A (5 chars)
+003B..0040 (6 chars)
+005B..0060 (6 chars)
+007B..007E (4 chars)
+00A1
+00A6..00A9 (4 chars)
+00AB..00AF (5 chars)
+00B4
+00B6..00B8 (3 chars)
+00BB..00BF (5 chars)
+00D7
+00F7
+02B9..02BA (2 chars)
+02C2..02CF (14 chars)
+02D2..02DF (14 chars)
+02E5..02ED (9 chars)
+0374..0375 (2 chars)
+037E
+0384..0385 (2 chars)
+0387
+058A
+06E9
+0F3A..0F3D (4 chars)
+169B..169C (2 chars)
+1800..180A (11 chars)
+1FBD
+1FBF..1FC1 (3 chars)
+1FCD..1FCF (3 chars)
+1FDD..1FDF (3 chars)
+1FED..1FEF (3 chars)
+1FFD..1FFE (2 chars)
+2010..2027 (24 chars)
+2035..2046 (18 chars)
+2048..204D (6 chars)
+207C..207E (3 chars)
+208C..208E (3 chars)
+2100..2101 (2 chars)
+2103..2106 (4 chars)
+2108..2109 (2 chars)
+2114
+2116..2118 (3 chars)
+211E..2123 (6 chars)
+2125
+2127
+2129
+2132
+213A
+2153..215F (13 chars)
+2190..21F3 (100 chars)
+2200..2211 (18 chars)
+2214..22F1 (222 chars)
+2300..2335 (54 chars)
+237B
+237D..2394 (24 chars)
+2396..239A (5 chars)
+2400..2426 (39 chars)
+2440..244A (11 chars)
+2500..2595 (150 chars)
+25A0..25F7 (88 chars)
+2600..2613 (20 chars)
+2619..2671 (89 chars)
+2701..2704 (4 chars)
+2706..2709 (4 chars)
+270C..2727 (28 chars)
+2729..274B (35 chars)
+274D
+274F..2752 (4 chars)
+2756
+2758..275E (7 chars)
+2761..2767 (7 chars)
+2776..2794 (31 chars)
+2798..27AF (24 chars)
+27B1..27BE (14 chars)
+2800..28FF (256 chars)
+2E80..2E99 (26 chars)
+2E9B..2EF3 (89 chars)
+2F00..2FD5 (214 chars)
+2FF0..2FFB (12 chars)
+3001..3004 (4 chars)
+3008..3020 (25 chars)
+3030
+3036..3037 (2 chars)
+303E..303F (2 chars)
+309B..309C (2 chars)
+30FB
+A490..A4A1 (18 chars)
+A4A4..A4B3 (16 chars)
+A4B5..A4C0 (12 chars)
+A4C2..A4C4 (3 chars)
+A4C6
+FD3E..FD3F (2 chars)
+FE30..FE44 (21 chars)
+FE49..FE4F (7 chars)
+FE51
+FE54
+FE56..FE5E (9 chars)
+FE60..FE61 (2 chars)
+FE64..FE66 (3 chars)
+FE68
+FE6B
+FF01..FF02 (2 chars)
+FF06..FF0A (5 chars)
+FF1B..FF20 (6 chars)
+FF3B..FF40 (6 chars)
+FF5B..FF5E (4 chars)
+FF61..FF65 (5 chars)
+FFE2..FFE4 (3 chars)
+FFE8..FFEE (7 chars)
+FFFC..FFFD (2 chars)
+
+*******************************************
+
+Property dump for: 0x80000005 (Private Use)
+
+E000..F8FF (6400 chars)
+F0000..FFFFD (65534 chars)
+100000..10FFFD (65534 chars)
+
+*******************************************
+
+Property dump for: 0x80000000 (Uppercase)
+
+0041..005A (26 chars)
+00C0..00D6 (23 chars)
+00D8..00DE (7 chars)
+0100
+0102
+0104
+0106
+0108
+010A
+010C
+010E
+0110
+0112
+0114
+0116
+0118
+011A
+011C
+011E
+0120
+0122
+0124
+0126
+0128
+012A
+012C
+012E
+0130
+0132
+0134
+0136
+0139
+013B
+013D
+013F
+0141
+0143
+0145
+0147
+014A
+014C
+014E
+0150
+0152
+0154
+0156
+0158
+015A
+015C
+015E
+0160
+0162
+0164
+0166
+0168
+016A
+016C
+016E
+0170
+0172
+0174
+0176
+0178..0179 (2 chars)
+017B
+017D
+0181..0182 (2 chars)
+0184
+0186..0187 (2 chars)
+0189..018B (3 chars)
+018E..0191 (4 chars)
+0193..0194 (2 chars)
+0196..0198 (3 chars)
+019C..019D (2 chars)
+019F..01A0 (2 chars)
+01A2
+01A4
+01A6..01A7 (2 chars)
+01A9
+01AC
+01AE..01AF (2 chars)
+01B1..01B3 (3 chars)
+01B5
+01B7..01B8 (2 chars)
+01BC
+01C4
+01C7
+01CA
+01CD
+01CF
+01D1
+01D3
+01D5
+01D7
+01D9
+01DB
+01DE
+01E0
+01E2
+01E4
+01E6
+01E8
+01EA
+01EC
+01EE
+01F1
+01F4
+01F6..01F8 (3 chars)
+01FA
+01FC
+01FE
+0200
+0202
+0204
+0206
+0208
+020A
+020C
+020E
+0210
+0212
+0214
+0216
+0218
+021A
+021C
+021E
+0222
+0224
+0226
+0228
+022A
+022C
+022E
+0230
+0232
+0386
+0388..038A (3 chars)
+038C
+038E..038F (2 chars)
+0391..03A1 (17 chars)
+03A3..03AB (9 chars)
+03D2..03D4 (3 chars)
+03DA
+03DC
+03DE
+03E0
+03E2
+03E4
+03E6
+03E8
+03EA
+03EC
+03EE
+0400..042F (48 chars)
+0460
+0462
+0464
+0466
+0468
+046A
+046C
+046E
+0470
+0472
+0474
+0476
+0478
+047A
+047C
+047E
+0480
+048C
+048E
+0490
+0492
+0494
+0496
+0498
+049A
+049C
+049E
+04A0
+04A2
+04A4
+04A6
+04A8
+04AA
+04AC
+04AE
+04B0
+04B2
+04B4
+04B6
+04B8
+04BA
+04BC
+04BE
+04C0..04C1 (2 chars)
+04C3
+04C7
+04CB
+04D0
+04D2
+04D4
+04D6
+04D8
+04DA
+04DC
+04DE
+04E0
+04E2
+04E4
+04E6
+04E8
+04EA
+04EC
+04EE
+04F0
+04F2
+04F4
+04F8
+0531..0556 (38 chars)
+10A0..10C5 (38 chars)
+1E00
+1E02
+1E04
+1E06
+1E08
+1E0A
+1E0C
+1E0E
+1E10
+1E12
+1E14
+1E16
+1E18
+1E1A
+1E1C
+1E1E
+1E20
+1E22
+1E24
+1E26
+1E28
+1E2A
+1E2C
+1E2E
+1E30
+1E32
+1E34
+1E36
+1E38
+1E3A
+1E3C
+1E3E
+1E40
+1E42
+1E44
+1E46
+1E48
+1E4A
+1E4C
+1E4E
+1E50
+1E52
+1E54
+1E56
+1E58
+1E5A
+1E5C
+1E5E
+1E60
+1E62
+1E64
+1E66
+1E68
+1E6A
+1E6C
+1E6E
+1E70
+1E72
+1E74
+1E76
+1E78
+1E7A
+1E7C
+1E7E
+1E80
+1E82
+1E84
+1E86
+1E88
+1E8A
+1E8C
+1E8E
+1E90
+1E92
+1E94
+1EA0
+1EA2
+1EA4
+1EA6
+1EA8
+1EAA
+1EAC
+1EAE
+1EB0
+1EB2
+1EB4
+1EB6
+1EB8
+1EBA
+1EBC
+1EBE
+1EC0
+1EC2
+1EC4
+1EC6
+1EC8
+1ECA
+1ECC
+1ECE
+1ED0
+1ED2
+1ED4
+1ED6
+1ED8
+1EDA
+1EDC
+1EDE
+1EE0
+1EE2
+1EE4
+1EE6
+1EE8
+1EEA
+1EEC
+1EEE
+1EF0
+1EF2
+1EF4
+1EF6
+1EF8
+1F08..1F0F (8 chars)
+1F18..1F1D (6 chars)
+1F28..1F2F (8 chars)
+1F38..1F3F (8 chars)
+1F48..1F4D (6 chars)
+1F59
+1F5B
+1F5D
+1F5F
+1F68..1F6F (8 chars)
+1FB8..1FBB (4 chars)
+1FC8..1FCB (4 chars)
+1FD8..1FDB (4 chars)
+1FE8..1FEC (5 chars)
+1FF8..1FFB (4 chars)
+2102
+2107
+210B..210D (3 chars)
+2110..2112 (3 chars)
+2115
+2119..211D (5 chars)
+2124
+2126
+2128
+212A..212D (4 chars)
+2130..2131 (2 chars)
+2133
+2160..216F (16 chars)
+24B6..24CF (26 chars)
+FF21..FF3A (26 chars)
+
+*******************************************
+
+Property dump for: 0x80000001 (Lowercase)
+
+0061..007A (26 chars)
+00AA
+00B5
+00BA
+00DF..00F6 (24 chars)
+00F8..00FF (8 chars)
+0101
+0103
+0105
+0107
+0109
+010B
+010D
+010F
+0111
+0113
+0115
+0117
+0119
+011B
+011D
+011F
+0121
+0123
+0125
+0127
+0129
+012B
+012D
+012F
+0131
+0133
+0135
+0137..0138 (2 chars)
+013A
+013C
+013E
+0140
+0142
+0144
+0146
+0148..0149 (2 chars)
+014B
+014D
+014F
+0151
+0153
+0155
+0157
+0159
+015B
+015D
+015F
+0161
+0163
+0165
+0167
+0169
+016B
+016D
+016F
+0171
+0173
+0175
+0177
+017A
+017C
+017E..0180 (3 chars)
+0183
+0185
+0188
+018C..018D (2 chars)
+0192
+0195
+0199..019B (3 chars)
+019E
+01A1
+01A3
+01A5
+01A8
+01AA..01AB (2 chars)
+01AD
+01B0
+01B4
+01B6
+01B9..01BA (2 chars)
+01BD..01BF (3 chars)
+01C6
+01C9
+01CC
+01CE
+01D0
+01D2
+01D4
+01D6
+01D8
+01DA
+01DC..01DD (2 chars)
+01DF
+01E1
+01E3
+01E5
+01E7
+01E9
+01EB
+01ED
+01EF..01F0 (2 chars)
+01F3
+01F5
+01F9
+01FB
+01FD
+01FF
+0201
+0203
+0205
+0207
+0209
+020B
+020D
+020F
+0211
+0213
+0215
+0217
+0219
+021B
+021D
+021F
+0223
+0225
+0227
+0229
+022B
+022D
+022F
+0231
+0233
+0250..02AD (94 chars)
+02B0..02B8 (9 chars)
+02C0..02C1 (2 chars)
+02E0..02E4 (5 chars)
+0390
+03AC..03CE (35 chars)
+03D0..03D1 (2 chars)
+03D5..03D7 (3 chars)
+03DB
+03DD
+03DF
+03E1
+03E3
+03E5
+03E7
+03E9
+03EB
+03ED
+03EF..03F3 (5 chars)
+0430..045F (48 chars)
+0461
+0463
+0465
+0467
+0469
+046B
+046D
+046F
+0471
+0473
+0475
+0477
+0479
+047B
+047D
+047F
+0481
+048D
+048F
+0491
+0493
+0495
+0497
+0499
+049B
+049D
+049F
+04A1
+04A3
+04A5
+04A7
+04A9
+04AB
+04AD
+04AF
+04B1
+04B3
+04B5
+04B7
+04B9
+04BB
+04BD
+04BF
+04C2
+04C4
+04C8
+04CC
+04D1
+04D3
+04D5
+04D7
+04D9
+04DB
+04DD
+04DF
+04E1
+04E3
+04E5
+04E7
+04E9
+04EB
+04ED
+04EF
+04F1
+04F3
+04F5
+04F9
+0561..0587 (39 chars)
+1E01
+1E03
+1E05
+1E07
+1E09
+1E0B
+1E0D
+1E0F
+1E11
+1E13
+1E15
+1E17
+1E19
+1E1B
+1E1D
+1E1F
+1E21
+1E23
+1E25
+1E27
+1E29
+1E2B
+1E2D
+1E2F
+1E31
+1E33
+1E35
+1E37
+1E39
+1E3B
+1E3D
+1E3F
+1E41
+1E43
+1E45
+1E47
+1E49
+1E4B
+1E4D
+1E4F
+1E51
+1E53
+1E55
+1E57
+1E59
+1E5B
+1E5D
+1E5F
+1E61
+1E63
+1E65
+1E67
+1E69
+1E6B
+1E6D
+1E6F
+1E71
+1E73
+1E75
+1E77
+1E79
+1E7B
+1E7D
+1E7F
+1E81
+1E83
+1E85
+1E87
+1E89
+1E8B
+1E8D
+1E8F
+1E91
+1E93
+1E95..1E9B (7 chars)
+1EA1
+1EA3
+1EA5
+1EA7
+1EA9
+1EAB
+1EAD
+1EAF
+1EB1
+1EB3
+1EB5
+1EB7
+1EB9
+1EBB
+1EBD
+1EBF
+1EC1
+1EC3
+1EC5
+1EC7
+1EC9
+1ECB
+1ECD
+1ECF
+1ED1
+1ED3
+1ED5
+1ED7
+1ED9
+1EDB
+1EDD
+1EDF
+1EE1
+1EE3
+1EE5
+1EE7
+1EE9
+1EEB
+1EED
+1EEF
+1EF1
+1EF3
+1EF5
+1EF7
+1EF9
+1F00..1F07 (8 chars)
+1F10..1F15 (6 chars)
+1F20..1F27 (8 chars)
+1F30..1F37 (8 chars)
+1F40..1F45 (6 chars)
+1F50..1F57 (8 chars)
+1F60..1F67 (8 chars)
+1F70..1F7D (14 chars)
+1F80..1F87 (8 chars)
+1F90..1F97 (8 chars)
+1FA0..1FA7 (8 chars)
+1FB0..1FB4 (5 chars)
+1FB6..1FB7 (2 chars)
+1FBE
+1FC2..1FC4 (3 chars)
+1FC6..1FC7 (2 chars)
+1FD0..1FD3 (4 chars)
+1FD6..1FD7 (2 chars)
+1FE0..1FE7 (8 chars)
+1FF2..1FF4 (3 chars)
+1FF6..1FF7 (2 chars)
+207F
+210A
+210E..210F (2 chars)
+2113
+212F
+2134
+2139
+2170..217F (16 chars)
+24D0..24E9 (26 chars)
+FB00..FB06 (7 chars)
+FB13..FB17 (5 chars)
+FF41..FF5A (26 chars)
+
+*******************************************
+
+Property dump for: 0x80000002 (Titlecase)
+
+01C5
+01C8
+01CB
+01F2
+1F88..1F8F (8 chars)
+1F98..1F9F (8 chars)
+1FA8..1FAF (8 chars)
+1FBC
+1FCC
+1FFC
+
+*******************************************
+
+Property dump for: 0x80000006 (Low Surrogate)
+
+DC00..DFFF (1024 chars)
+
+*******************************************
+
+Property dump for: 0x80000007 (High Surrogate)
+
+D800..DB7F (896 chars)
+
+*******************************************
+
+Property dump for: 0x80000008 (Private Use High Surrogate)
+
+DB80..DBFF (128 chars)
+
+*******************************************
+
+Property dump for: 0x8000000A (Not a Character)
+
+FFFE..FFFF (2 chars)
+1FFFE..1FFFF (2 chars)
+2FFFE..2FFFF (2 chars)
+3FFFE..3FFFF (2 chars)
+4FFFE..4FFFF (2 chars)
+5FFFE..5FFFF (2 chars)
+6FFFE..6FFFF (2 chars)
+7FFFE..7FFFF (2 chars)
+8FFFE..8FFFF (2 chars)
+9FFFE..9FFFF (2 chars)
+AFFFE..AFFFF (2 chars)
+BFFFE..BFFFF (2 chars)
+CFFFE..CFFFF (2 chars)
+DFFFE..DFFFF (2 chars)
+EFFFE..EFFFF (2 chars)
+FFFFE..FFFFF (2 chars)
+10FFFE..10FFFF (2 chars)
+
+*******************************************
+
+Property dump for: 0x00000000 (Unassigned Code Value)
+
+0220..0221 (2 chars)
+0234..024F (28 chars)
+02AE..02AF (2 chars)
+02EF..02FF (17 chars)
+034F..035F (17 chars)
+0363..0373 (17 chars)
+0376..0379 (4 chars)
+037B..037D (3 chars)
+037F..0383 (5 chars)
+038B
+038D
+03A2
+03CF
+03D8..03D9 (2 chars)
+03F4..03FF (12 chars)
+0487
+048A..048B (2 chars)
+04C5..04C6 (2 chars)
+04C9..04CA (2 chars)
+04CD..04CF (3 chars)
+04F6..04F7 (2 chars)
+04FA..0530 (55 chars)
+0557..0558 (2 chars)
+0560
+0588
+058B..0590 (6 chars)
+05A2
+05BA
+05C5..05CF (11 chars)
+05EB..05EF (5 chars)
+05F5..060B (23 chars)
+060D..061A (14 chars)
+061C..061E (3 chars)
+0620
+063B..063F (5 chars)
+0656..065F (10 chars)
+066E..066F (2 chars)
+06EE..06EF (2 chars)
+06FF
+070E
+072D..072F (3 chars)
+074B..077F (53 chars)
+07B1..0900 (336 chars)
+0904
+093A..093B (2 chars)
+094E..094F (2 chars)
+0955..0957 (3 chars)
+0971..0980 (16 chars)
+0984
+098D..098E (2 chars)
+0991..0992 (2 chars)
+09A9
+09B1
+09B3..09B5 (3 chars)
+09BA..09BB (2 chars)
+09BD
+09C5..09C6 (2 chars)
+09C9..09CA (2 chars)
+09CE..09D6 (9 chars)
+09D8..09DB (4 chars)
+09DE
+09E4..09E5 (2 chars)
+09FB..0A01 (7 chars)
+0A03..0A04 (2 chars)
+0A0B..0A0E (4 chars)
+0A11..0A12 (2 chars)
+0A29
+0A31
+0A34
+0A37
+0A3A..0A3B (2 chars)
+0A3D
+0A43..0A46 (4 chars)
+0A49..0A4A (2 chars)
+0A4E..0A58 (11 chars)
+0A5D
+0A5F..0A65 (7 chars)
+0A75..0A80 (12 chars)
+0A84
+0A8C
+0A8E
+0A92
+0AA9
+0AB1
+0AB4
+0ABA..0ABB (2 chars)
+0AC6
+0ACA
+0ACE..0ACF (2 chars)
+0AD1..0ADF (15 chars)
+0AE1..0AE5 (5 chars)
+0AF0..0B00 (17 chars)
+0B04
+0B0D..0B0E (2 chars)
+0B11..0B12 (2 chars)
+0B29
+0B31
+0B34..0B35 (2 chars)
+0B3A..0B3B (2 chars)
+0B44..0B46 (3 chars)
+0B49..0B4A (2 chars)
+0B4E..0B55 (8 chars)
+0B58..0B5B (4 chars)
+0B5E
+0B62..0B65 (4 chars)
+0B71..0B81 (17 chars)
+0B84
+0B8B..0B8D (3 chars)
+0B91
+0B96..0B98 (3 chars)
+0B9B
+0B9D
+0BA0..0BA2 (3 chars)
+0BA5..0BA7 (3 chars)
+0BAB..0BAD (3 chars)
+0BB6
+0BBA..0BBD (4 chars)
+0BC3..0BC5 (3 chars)
+0BC9
+0BCE..0BD6 (9 chars)
+0BD8..0BE6 (15 chars)
+0BF3..0C00 (14 chars)
+0C04
+0C0D
+0C11
+0C29
+0C34
+0C3A..0C3D (4 chars)
+0C45
+0C49
+0C4E..0C54 (7 chars)
+0C57..0C5F (9 chars)
+0C62..0C65 (4 chars)
+0C70..0C81 (18 chars)
+0C84
+0C8D
+0C91
+0CA9
+0CB4
+0CBA..0CBD (4 chars)
+0CC5
+0CC9
+0CCE..0CD4 (7 chars)
+0CD7..0CDD (7 chars)
+0CDF
+0CE2..0CE5 (4 chars)
+0CF0..0D01 (18 chars)
+0D04
+0D0D
+0D11
+0D29
+0D3A..0D3D (4 chars)
+0D44..0D45 (2 chars)
+0D49
+0D4E..0D56 (9 chars)
+0D58..0D5F (8 chars)
+0D62..0D65 (4 chars)
+0D70..0D81 (18 chars)
+0D84
+0D97..0D99 (3 chars)
+0DB2
+0DBC
+0DBE..0DBF (2 chars)
+0DC7..0DC9 (3 chars)
+0DCB..0DCE (4 chars)
+0DD5
+0DD7
+0DE0..0DF1 (18 chars)
+0DF5..0E00 (12 chars)
+0E3B..0E3E (4 chars)
+0E5C..0E80 (37 chars)
+0E83
+0E85..0E86 (2 chars)
+0E89
+0E8B..0E8C (2 chars)
+0E8E..0E93 (6 chars)
+0E98
+0EA0
+0EA4
+0EA6
+0EA8..0EA9 (2 chars)
+0EAC
+0EBA
+0EBE..0EBF (2 chars)
+0EC5
+0EC7
+0ECE..0ECF (2 chars)
+0EDA..0EDB (2 chars)
+0EDE..0EFF (34 chars)
+0F48
+0F6B..0F70 (6 chars)
+0F8C..0F8F (4 chars)
+0F98
+0FBD
+0FCD..0FCE (2 chars)
+0FD0..0FFF (48 chars)
+1022
+1028
+102B
+1033..1035 (3 chars)
+103A..103F (6 chars)
+105A..109F (70 chars)
+10C6..10CF (10 chars)
+10F7..10FA (4 chars)
+10FC..10FF (4 chars)
+115A..115E (5 chars)
+11A3..11A7 (5 chars)
+11FA..11FF (6 chars)
+1207
+1247
+1249
+124E..124F (2 chars)
+1257
+1259
+125E..125F (2 chars)
+1287
+1289
+128E..128F (2 chars)
+12AF
+12B1
+12B6..12B7 (2 chars)
+12BF
+12C1
+12C6..12C7 (2 chars)
+12CF
+12D7
+12EF
+130F
+1311
+1316..1317 (2 chars)
+131F
+1347
+135B..1360 (6 chars)
+137D..139F (35 chars)
+13F5..1400 (12 chars)
+1677..167F (9 chars)
+169D..169F (3 chars)
+16F1..177F (143 chars)
+17DD..17DF (3 chars)
+17EA..17FF (22 chars)
+180F
+181A..181F (6 chars)
+1878..187F (8 chars)
+18AA..1DFF (1366 chars)
+1E9C..1E9F (4 chars)
+1EFA..1EFF (6 chars)
+1F16..1F17 (2 chars)
+1F1E..1F1F (2 chars)
+1F46..1F47 (2 chars)
+1F4E..1F4F (2 chars)
+1F58
+1F5A
+1F5C
+1F5E
+1F7E..1F7F (2 chars)
+1FB5
+1FC5
+1FD4..1FD5 (2 chars)
+1FDC
+1FF0..1FF1 (2 chars)
+1FF5
+1FFF
+2047
+204E..2069 (28 chars)
+2071..2073 (3 chars)
+208F..209F (17 chars)
+20B0..20CF (32 chars)
+20E4..20FF (28 chars)
+213B..2152 (24 chars)
+2184..218F (12 chars)
+21F4..21FF (12 chars)
+22F2..22FF (14 chars)
+237C
+239B..23FF (101 chars)
+2427..243F (25 chars)
+244B..245F (21 chars)
+24EB..24FF (21 chars)
+2596..259F (10 chars)
+25F8..25FF (8 chars)
+2614..2618 (5 chars)
+2672..2700 (143 chars)
+2705
+270A..270B (2 chars)
+2728
+274C
+274E
+2753..2755 (3 chars)
+2757
+275F..2760 (2 chars)
+2768..2775 (14 chars)
+2795..2797 (3 chars)
+27B0
+27BF..27FF (65 chars)
+2900..2E7F (1408 chars)
+2E9A
+2EF4..2EFF (12 chars)
+2FD6..2FEF (26 chars)
+2FFC..2FFF (4 chars)
+303B..303D (3 chars)
+3040
+3095..3098 (4 chars)
+309F..30A0 (2 chars)
+30FF..3104 (6 chars)
+312D..3130 (4 chars)
+318F
+31B8..31FF (72 chars)
+321D..321F (3 chars)
+3244..325F (28 chars)
+327C..327E (3 chars)
+32B1..32BF (15 chars)
+32CC..32CF (4 chars)
+32FF
+3377..337A (4 chars)
+33DE..33DF (2 chars)
+33FF
+4DB6..4DFF (74 chars)
+9FA6..9FFF (90 chars)
+A48D..A48F (3 chars)
+A4A2..A4A3 (2 chars)
+A4B4
+A4C1
+A4C5
+A4C7..ABFF (1849 chars)
+D7A4..D7FF (92 chars)
+FA2E..FAFF (210 chars)
+FB07..FB12 (12 chars)
+FB18..FB1C (5 chars)
+FB37
+FB3D
+FB3F
+FB42
+FB45
+FBB2..FBD2 (33 chars)
+FD40..FD4F (16 chars)
+FD90..FD91 (2 chars)
+FDC8..FDEF (40 chars)
+FDFC..FE1F (36 chars)
+FE24..FE2F (12 chars)
+FE45..FE48 (4 chars)
+FE53
+FE67
+FE6C..FE6F (4 chars)
+FE73
+FE75
+FEFD..FEFE (2 chars)
+FF00
+FF5F..FF60 (2 chars)
+FFBF..FFC1 (3 chars)
+FFC8..FFC9 (2 chars)
+FFD0..FFD1 (2 chars)
+FFD8..FFD9 (2 chars)
+FFDD..FFDF (3 chars)
+FFE7
+FFEF..FFF8 (10 chars)
+10000..1FFFD (65534 chars)
+20000..2FFFD (65534 chars)
+30000..3FFFD (65534 chars)
+40000..4FFFD (65534 chars)
+50000..5FFFD (65534 chars)
+60000..6FFFD (65534 chars)
+70000..7FFFD (65534 chars)
+80000..8FFFD (65534 chars)
+90000..9FFFD (65534 chars)
+A0000..AFFFD (65534 chars)
+B0000..BFFFD (65534 chars)
+C0000..CFFFD (65534 chars)
+D0000..DFFFD (65534 chars)
+E0000..EFFFD (65534 chars)
+