summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLars Knoll <lars.knoll@nokia.com>2010-06-14 14:14:59 +0200
committerLars Knoll <lars.knoll@nokia.com>2010-06-14 14:22:31 +0200
commitcce760d41f115fecd5b9b6b20b62883b10a9c204 (patch)
treef7fc041c8a1c8c73744060fc845a3c6af76a8793
parent85ad0ddd092522b4cff251f324128662f100991f (diff)
Fixes for thai linebreaking
* Load libthai.so.0 since libthai.so is not there on all systems * Remove dependency on codecs. Unicode->TIS620 is so simple we can simply hardcode it in harbuzz-thai.c * Speed up detection of word boundaries * Falback when libthai is not found is now to not break instead of breaking after every character (in line with recommendations from unicode.org linebreaking algorithm) Reviewed-by: Simon Hausmann
-rw-r--r--contrib/harfbuzz-unicode.c19
-rw-r--r--src/harfbuzz-external.h6
-rw-r--r--src/harfbuzz-shaper.cpp13
-rw-r--r--src/harfbuzz-thai.c64
-rw-r--r--tests/linebreaking/harfbuzz-qt.cpp25
5 files changed, 52 insertions, 75 deletions
diff --git a/contrib/harfbuzz-unicode.c b/contrib/harfbuzz-unicode.c
index 51dd4ea..049e0ca 100644
--- a/contrib/harfbuzz-unicode.c
+++ b/contrib/harfbuzz-unicode.c
@@ -262,24 +262,7 @@ HB_GetMirroredChar(HB_UChar16 ch) {
}
void *
-HB_Library_Resolve(const char *library, const char *symbol) {
+HB_Library_Resolve(const char *library, int version, const char *symbol) {
abort();
return NULL;
}
-
-void *
-HB_TextCodecForMib(int mib) {
- abort();
- return NULL;
-}
-
-char *
-HB_TextCodec_ConvertFromUnicode(void *codec, const HB_UChar16 *unicode, hb_uint32 length, hb_uint32 *outputLength) {
- abort();
- return NULL;
-}
-
-void
-HB_TextCodec_FreeResult(char *v) {
- abort();
-}
diff --git a/src/harfbuzz-external.h b/src/harfbuzz-external.h
index 760749b..7644f0d 100644
--- a/src/harfbuzz-external.h
+++ b/src/harfbuzz-external.h
@@ -146,11 +146,7 @@ HB_CharCategory HB_GetUnicodeCharCategory(HB_UChar32 ch);
int HB_GetUnicodeCharCombiningClass(HB_UChar32 ch);
HB_UChar16 HB_GetMirroredChar(HB_UChar16 ch);
-void *HB_Library_Resolve(const char *library, const char *symbol);
-
-void *HB_TextCodecForMib(int mib);
-char *HB_TextCodec_ConvertFromUnicode(void *codec, const HB_UChar16 *unicode, hb_uint32 length, hb_uint32 *outputLength);
-void HB_TextCodec_FreeResult(char *);
+void *HB_Library_Resolve(const char *library, int version, const char *symbol);
HB_END_HEADER
diff --git a/src/harfbuzz-shaper.cpp b/src/harfbuzz-shaper.cpp
index 4bc53c8..ce4d4ac 100644
--- a/src/harfbuzz-shaper.cpp
+++ b/src/harfbuzz-shaper.cpp
@@ -183,18 +183,15 @@ static void calcLineBreaks(const HB_UChar16 *uc, hb_uint32 len, HB_CharAttribute
if (ncls >= HB_LineBreak_CR)
goto next;
- // two complex chars (thai or lao), thai_attributes might override, but here we do a best guess
- if (cls == HB_LineBreak_SA && ncls == HB_LineBreak_SA) {
- lineBreakType = HB_Break;
- goto next;
- }
-
{
int tcls = ncls;
+ // for south east asian chars that require a complex (dictionary analysis), the unicode
+ // standard recommends to treat them as AL. thai_attributes and other attribute methods that
+ // do dictionary analysis can override
if (tcls >= HB_LineBreak_SA)
- tcls = HB_LineBreak_ID;
+ tcls = HB_LineBreak_AL;
if (cls >= HB_LineBreak_SA)
- cls = HB_LineBreak_ID;
+ cls = HB_LineBreak_AL;
int brk = breakTable[cls][tcls];
switch (brk) {
diff --git a/src/harfbuzz-thai.c b/src/harfbuzz-thai.c
index 1d1aa2f..fc2bdbf 100644
--- a/src/harfbuzz-thai.c
+++ b/src/harfbuzz-thai.c
@@ -27,57 +27,79 @@
#include "harfbuzz-external.h"
#include <assert.h>
+#include <stdio.h>
+
+typedef int (*th_brk_def)(const char*, int[], int);
+static th_brk_def th_brk = 0;
+static int libthai_resolved = 0;
+
+static void resolve_libthai()
+{
+ if (!th_brk)
+ th_brk = (th_brk_def)HB_Library_Resolve("thai", 0, "th_brk");
+ libthai_resolved = 1;
+}
+
+static void to_tis620(const HB_UChar16 *string, hb_uint32 len, const char *cstr)
+{
+ hb_uint32 i;
+ unsigned char *result = (unsigned char *)cstr;
+
+ for (i = 0; i < len; ++i) {
+ if (string[i] <= 0xa0)
+ result[i] = (unsigned char)string[i];
+ if (string[i] >= 0xe01 && string[i] <= 0xe5b)
+ result[i] = (unsigned char)(string[i] - 0xe00 + 0xa0);
+ else
+ result[i] = '?';
+ }
+}
static void thaiWordBreaks(const HB_UChar16 *string, hb_uint32 len, HB_CharAttributes *attributes)
{
- typedef int (*th_brk_def)(const char*, int[], int);
- static void *thaiCodec = 0;
- static th_brk_def th_brk = 0;
- char *cstr = 0;
+ char s[128];
+ char *cstr = s;
int brp[128];
int *break_positions = brp;
hb_uint32 numbreaks;
hb_uint32 i;
- if (!thaiCodec)
- thaiCodec = HB_TextCodecForMib(2259);
-
- /* load libthai dynamically */
- if (!th_brk && thaiCodec) {
- th_brk = (th_brk_def)HB_Library_Resolve("thai", "th_brk");
- if (!th_brk)
- thaiCodec = 0;
- }
+ if (!libthai_resolved)
+ resolve_libthai();
if (!th_brk)
return;
- cstr = HB_TextCodec_ConvertFromUnicode(thaiCodec, string, len, 0);
- if (!cstr)
- return;
+ if (len > 128)
+ cstr = (char *)malloc(len*sizeof(char));
+
+ to_tis620(string, len, cstr);
- break_positions = brp;
numbreaks = th_brk(cstr, break_positions, 128);
if (numbreaks > 128) {
break_positions = (int *)malloc(numbreaks * sizeof(int));
numbreaks = th_brk(cstr, break_positions, numbreaks);
}
- for (i = 0; i < len; ++i)
+ for (i = 0; i < len; ++i) {
attributes[i].lineBreakType = HB_NoBreak;
+ attributes[i].wordBoundary = FALSE;
+ }
for (i = 0; i < numbreaks; ++i) {
- if (break_positions[i] > 0)
+ if (break_positions[i] > 0) {
attributes[break_positions[i]-1].lineBreakType = HB_Break;
+ attributes[i].wordBoundary = TRUE;
+ }
}
if (break_positions != brp)
free(break_positions);
- HB_TextCodec_FreeResult(cstr);
+ if (len > 128)
+ free(cstr);
}
-
void HB_ThaiAttributes(HB_Script script, const HB_UChar16 *text, hb_uint32 from, hb_uint32 len, HB_CharAttributes *attributes)
{
assert(script == HB_Script_Thai);
diff --git a/tests/linebreaking/harfbuzz-qt.cpp b/tests/linebreaking/harfbuzz-qt.cpp
index ea03052..f0048b7 100644
--- a/tests/linebreaking/harfbuzz-qt.cpp
+++ b/tests/linebreaking/harfbuzz-qt.cpp
@@ -79,30 +79,9 @@ void HB_GetGraphemeAndLineBreakClass(HB_UChar32 ch, HB_GraphemeClass *grapheme,
*lineBreak = (HB_LineBreakClass) prop->line_break_class;
}
-void *HB_Library_Resolve(const char *library, const char *symbol)
+void *HB_Library_Resolve(const char *library, int version, const char *symbol)
{
- return QLibrary::resolve(library, symbol);
-}
-
-void *HB_TextCodecForMib(int mib)
-{
- return QTextCodec::codecForMib(mib);
-}
-
-char *HB_TextCodec_ConvertFromUnicode(void *codec, const HB_UChar16 *unicode, hb_uint32 length, hb_uint32 *outputLength)
-{
- QByteArray data = reinterpret_cast<QTextCodec *>(codec)->fromUnicode((const QChar *)unicode, length);
- // ### suboptimal
- char *output = (char *)malloc(data.length() + 1);
- memcpy(output, data.constData(), data.length() + 1);
- if (outputLength)
- *outputLength = data.length();
- return output;
-}
-
-void HB_TextCodec_FreeResult(char *string)
-{
- free(string);
+ return QLibrary::resolve(library, version, symbol);
}
}