summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorKeith Packard <keithp@keithp.com>2004-12-29 09:15:17 +0000
committerKeith Packard <keithp@keithp.com>2004-12-29 09:15:17 +0000
commit192296d852011f4a2abb6e9fd1ee741fa7f81673 (patch)
tree0a6924e8b0b4a950d5eb78bbfc541be3367ad9ae /src
parent5cf8c5364f1b7a676f52b480fa55c571cadc6fda (diff)
Adopt some RedHat suggestions for standard font configuration.
Add new helper program 'fc-case' to construct case folding tables from standard Unicode CaseFolding.txt file Re-implement case insensitive functions with Unicode aware versions (including full case folding mappings)
Diffstat (limited to 'src')
-rw-r--r--src/fcint.h34
-rw-r--r--src/fclist.c16
-rw-r--r--src/fcstr.c263
3 files changed, 240 insertions, 73 deletions
diff --git a/src/fcint.h b/src/fcint.h
index 106a38b3..4dca3c58 100644
--- a/src/fcint.h
+++ b/src/fcint.h
@@ -231,6 +231,37 @@ typedef struct _FcGlyphName {
FcChar8 name[1]; /* name extends beyond struct */
} FcGlyphName;
+/*
+ * To perform case-insensitive string comparisons, a table
+ * is used which holds three different kinds of folding data.
+ *
+ * The first is a range of upper case values mapping to a range
+ * of their lower case equivalents. Within each range, the offset
+ * between upper and lower case is constant.
+ *
+ * The second is a range of upper case values which are interleaved
+ * with their lower case equivalents.
+ *
+ * The third is a set of raw unicode values mapping to a list
+ * of unicode values for comparison purposes. This allows conversion
+ * of ß to "ss" so that SS, ss and ß all match. A separate array
+ * holds the list of unicode values for each entry.
+ *
+ * These are packed into a single table. Using a binary search,
+ * the appropriate entry can be located.
+ */
+
+#define FC_CASE_FOLD_RANGE 0
+#define FC_CASE_FOLD_EVEN_ODD 1
+#define FC_CASE_FOLD_FULL 2
+
+typedef struct _FcCaseFold {
+ FcChar32 upper;
+ FcChar16 method : 2;
+ FcChar16 count : 14;
+ short offset; /* lower - upper for RANGE, table id for FULL */
+} FcCaseFold;
+
#define FC_MAX_FILE_LEN 4096
/*
@@ -746,4 +777,7 @@ FcStrUsesHome (const FcChar8 *s);
FcChar8 *
FcStrLastSlash (const FcChar8 *path);
+FcChar32
+FcStrHashIgnoreCase (const FcChar8 *s);
+
#endif /* _FC_INT_H_ */
diff --git a/src/fclist.c b/src/fclist.c
index 6730f203..aaa90617 100644
--- a/src/fclist.c
+++ b/src/fclist.c
@@ -220,20 +220,6 @@ FcListPatternMatchAny (const FcPattern *p,
}
static FcChar32
-FcListStringHash (const FcChar8 *s)
-{
- FcChar32 h = 0;
- FcChar8 c;
-
- while ((c = *s++))
- {
- c = FcToLower (c);
- h = ((h << 3) ^ (h >> 3)) ^ c;
- }
- return h;
-}
-
-static FcChar32
FcListMatrixHash (const FcMatrix *m)
{
int xx = (int) (m->xx * 100),
@@ -255,7 +241,7 @@ FcListValueHash (FcValue v)
case FcTypeDouble:
return (FcChar32) (int) v.u.d;
case FcTypeString:
- return FcListStringHash (v.u.s);
+ return FcStrHashIgnoreCase (v.u.s);
case FcTypeBool:
return (FcChar32) v.u.b;
case FcTypeMatrix:
diff --git a/src/fcstr.c b/src/fcstr.c
index 06315e6f..61f1897d 100644
--- a/src/fcstr.c
+++ b/src/fcstr.c
@@ -63,16 +63,147 @@ FcStrFree (FcChar8 *s)
free (s);
}
+
+#include "../fc-case/fccase.h"
+
+#define FcCaseFoldUpperCount(cf) \
+ ((cf)->method == FC_CASE_FOLD_FULL ? 1 : (cf)->count)
+
+#define FC_STR_CANON_BUF_LEN 1024
+
+typedef struct _FcCaseWalker {
+ const FcChar8 *read;
+ const FcChar8 *src;
+ int len;
+ FcChar8 utf8[FC_MAX_CASE_FOLD_CHARS + 1];
+} FcCaseWalker;
+
+static void
+FcStrCaseWalkerInit (const FcChar8 *src, FcCaseWalker *w)
+{
+ w->src = src;
+ w->read = 0;
+ w->len = strlen (src);
+}
+
+static FcChar8
+FcStrCaseWalkerLong (FcCaseWalker *w, FcChar8 r)
+{
+ FcChar32 ucs4;
+ int slen;
+
+ slen = FcUtf8ToUcs4 (w->src - 1, &ucs4, w->len);
+ if (slen <= 0)
+ return r;
+ if (FC_MIN_FOLD_CHAR <= ucs4 && ucs4 <= FC_MAX_FOLD_CHAR)
+ {
+ int min = 0;
+ int max = FC_NUM_CASE_FOLD;
+
+ while (min <= max)
+ {
+ int mid = (min + max) >> 1;
+ FcChar32 low = fcCaseFold[mid].upper;
+ FcChar32 high = low + FcCaseFoldUpperCount (&fcCaseFold[mid]);
+
+ if (high <= ucs4)
+ min = mid + 1;
+ else if (ucs4 < low)
+ max = mid - 1;
+ else
+ {
+ const FcCaseFold *fold = &fcCaseFold[mid];
+ int dlen;
+
+ switch (fold->method) {
+ case FC_CASE_FOLD_EVEN_ODD:
+ if ((ucs4 & 1) != (fold->upper & 1))
+ return r;
+ /* fall through ... */
+ default:
+ dlen = FcUcs4ToUtf8 (ucs4 + fold->offset, w->utf8);
+ break;
+ case FC_CASE_FOLD_FULL:
+ dlen = fold->count;
+ memcpy (w->utf8, fcCaseFoldChars + fold->offset, dlen);
+ break;
+ }
+
+ /* consume rest of src utf-8 bytes */
+ w->src += slen - 1;
+ w->len -= slen - 1;
+
+ /* read from temp buffer */
+ w->utf8[dlen] = '\0';
+ w->read = w->utf8;
+ return *w->read++;
+ }
+ }
+ }
+ return r;
+}
+
+static FcChar8
+FcStrCaseWalkerNext (FcCaseWalker *w)
+{
+ FcChar8 r;
+
+ if (w->read)
+ {
+ if ((r = *w->read++))
+ return r;
+ w->read = 0;
+ }
+ r = *w->src++;
+ --w->len;
+
+ if ((r & 0xc0) == 0xc0)
+ return FcStrCaseWalkerLong (w, r);
+ if ('A' <= r && r <= 'Z')
+ r = r - 'A' + 'a';
+ return r;
+}
+
+static FcChar8
+FcStrCaseWalkerNextIgnoreBlanks (FcCaseWalker *w)
+{
+ FcChar8 r;
+
+ if (w->read)
+ {
+ if ((r = *w->read++))
+ return r;
+ w->read = 0;
+ }
+ do
+ {
+ r = *w->src++;
+ --w->len;
+ } while (r == ' ');
+
+ if ((r & 0xc0) == 0xc0)
+ return FcStrCaseWalkerLong (w, r);
+ if ('A' <= r && r <= 'Z')
+ r = r - 'A' + 'a';
+ return r;
+}
+
int
FcStrCmpIgnoreCase (const FcChar8 *s1, const FcChar8 *s2)
{
- FcChar8 c1, c2;
+ FcCaseWalker w1, w2;
+ FcChar8 c1, c2;
+
+ if (s1 == s2) return 0;
+
+ FcStrCaseWalkerInit (s1, &w1);
+ FcStrCaseWalkerInit (s2, &w2);
for (;;)
{
- c1 = *s1++;
- c2 = *s2++;
- if (!c1 || (c1 != c2 && (c1 = FcToLower(c1)) != (c2 = FcToLower(c2))))
+ c1 = FcStrCaseWalkerNext (&w1);
+ c2 = FcStrCaseWalkerNext (&w2);
+ if (!c1 || (c1 != c2))
break;
}
return (int) c1 - (int) c2;
@@ -81,17 +212,19 @@ FcStrCmpIgnoreCase (const FcChar8 *s1, const FcChar8 *s2)
int
FcStrCmpIgnoreBlanksAndCase (const FcChar8 *s1, const FcChar8 *s2)
{
- FcChar8 c1, c2;
+ FcCaseWalker w1, w2;
+ FcChar8 c1, c2;
+
+ if (s1 == s2) return 0;
+
+ FcStrCaseWalkerInit (s1, &w1);
+ FcStrCaseWalkerInit (s2, &w2);
for (;;)
{
- do
- c1 = *s1++;
- while (c1 == ' ');
- do
- c2 = *s2++;
- while (c2 == ' ');
- if (!c1 || (c1 != c2 && (c1 = FcToLower(c1)) != (c2 = FcToLower(c2))))
+ c1 = FcStrCaseWalkerNextIgnoreBlanks (&w1);
+ c2 = FcStrCaseWalkerNextIgnoreBlanks (&w2);
+ if (!c1 || (c1 != c2))
break;
}
return (int) c1 - (int) c2;
@@ -115,23 +248,40 @@ FcStrCmp (const FcChar8 *s1, const FcChar8 *s2)
}
/*
+ * Return a hash value for a string
+ */
+
+FcChar32
+FcStrHashIgnoreCase (const FcChar8 *s)
+{
+ FcChar32 h = 0;
+ FcCaseWalker w;
+ FcChar8 c;
+
+ FcStrCaseWalkerInit (s, &w);
+ while ((c = FcStrCaseWalkerNext (&w)))
+ h = ((h << 3) ^ (h >> 3)) ^ c;
+ return h;
+}
+
+/*
* Is the head of s1 equal to s2?
*/
static FcBool
FcStrIsAtIgnoreBlanksAndCase (const FcChar8 *s1, const FcChar8 *s2)
{
- FcChar8 c1, c2;
+ FcCaseWalker w1, w2;
+ FcChar8 c1, c2;
+
+ FcStrCaseWalkerInit (s1, &w1);
+ FcStrCaseWalkerInit (s2, &w2);
for (;;)
{
- do
- c1 = *s1++;
- while (c1 == ' ');
- do
- c2 = *s2++;
- while (c2 == ' ');
- if (!c1 || (c1 != c2 && (c1 = FcToLower(c1)) != (c2 = FcToLower(c2))))
+ c1 = FcStrCaseWalkerNextIgnoreBlanks (&w1);
+ c2 = FcStrCaseWalkerNextIgnoreBlanks (&w2);
+ if (!c1 || (c1 != c2))
break;
}
return c1 == c2 || !c2;
@@ -160,13 +310,17 @@ FcStrContainsIgnoreBlanksAndCase (const FcChar8 *s1, const FcChar8 *s2)
static FcBool
FcStrIsAtIgnoreCase (const FcChar8 *s1, const FcChar8 *s2)
{
- FcChar8 c1, c2;
+ FcCaseWalker w1, w2;
+ FcChar8 c1, c2;
+
+ FcStrCaseWalkerInit (s1, &w1);
+ FcStrCaseWalkerInit (s2, &w2);
for (;;)
{
- c1 = *s1++;
- c2 = *s2++;
- if (!c1 || (c1 != c2 && (c1 = FcToLower(c1)) != (c2 = FcToLower(c2))))
+ c1 = FcStrCaseWalkerNext (&w1);
+ c2 = FcStrCaseWalkerNext (&w2);
+ if (!c1 || (c1 != c2))
break;
}
return c1 == c2 || !c2;
@@ -191,52 +345,45 @@ FcStrContainsIgnoreCase (const FcChar8 *s1, const FcChar8 *s2)
const FcChar8 *
FcStrStrIgnoreCase (const FcChar8 *s1, const FcChar8 *s2)
{
- FcChar8 c1, c2;
- const FcChar8 * p = s1;
- const FcChar8 * b = s2;
+ FcCaseWalker w1, w2;
+ FcChar8 c1, c2;
+ const FcChar8 *cur;
if (!s1 || !s2)
return 0;
if (s1 == s2)
return s1;
-
-again:
- c2 = *s2++;
- c2 = FcToLower (c2);
-
- if (!c2)
- return 0;
-
- for (;;)
+
+ FcStrCaseWalkerInit (s1, &w1);
+ FcStrCaseWalkerInit (s2, &w2);
+
+ c2 = FcStrCaseWalkerNext (&w2);
+
+ for (;;)
{
- p = s1;
- c1 = *s1++;
- if (!c1 || (c1 = FcToLower (c1)) == c2)
+ cur = w1.src;
+ c1 = FcStrCaseWalkerNext (&w1);
+ if (!c1)
break;
- }
+ if (c1 == c2)
+ {
+ FcCaseWalker w1t = w1;
+ FcCaseWalker w2t = w2;
+ FcChar8 c1t, c2t;
- if (c1 != c2)
- return 0;
+ for (;;)
+ {
+ c1t = FcStrCaseWalkerNext (&w1t);
+ c2t = FcStrCaseWalkerNext (&w2t);
- for (;;)
- {
- c1 = *s1;
- c2 = *s2;
- if (c1 && c2 && (c1 = FcToLower (c1)) != (c2 = FcToLower (c2)))
- {
- s1 = p + 1;
- s2 = b;
- goto again;
+ if (!c2t)
+ return cur;
+ if (c2t != c1t)
+ break;
+ }
}
- if (!c2)
- return p;
- if (!c1)
- return 0;
- ++ s1;
- ++ s2;
}
-
return 0;
}