Adopt some RedHat suggestions for standard font configuration.

Add new helper program 'fc-case' to construct case folding tables from standard Unicode CaseFolding.txt file Re-implement case insensitive functions with Unicode aware versions (including full case folding mappings)
author: Keith Packard <keithp@keithp.com> 2004-12-29 09:15:17 +0000
committer: Keith Packard <keithp@keithp.com> 2004-12-29 09:15:17 +0000
commit: 192296d852011f4a2abb6e9fd1ee741fa7f81673 (patch)
tree: 0a6924e8b0b4a950d5eb78bbfc541be3367ad9ae /src
parent: 5cf8c5364f1b7a676f52b480fa55c571cadc6fda (diff)
3 files changed, 240 insertions, 73 deletions
diff --git a/src/fcint.h b/src/fcint.h
index 106a38b3..4dca3c58 100644
--- a/src/fcint.h
+++ b/src/fcint.h
@@ -231,6 +231,37 @@ typedef struct _FcGlyphName {
     FcChar8	name[1];	/* name extends beyond struct */
 } FcGlyphName;
 
+/*
+ * To perform case-insensitive string comparisons, a table
+ * is used which holds three different kinds of folding data.
+ * 
+ * The first is a range of upper case values mapping to a range
+ * of their lower case equivalents.  Within each range, the offset
+ * between upper and lower case is constant.
+ *
+ * The second is a range of upper case values which are interleaved
+ * with their lower case equivalents.
+ * 
+ * The third is a set of raw unicode values mapping to a list
+ * of unicode values for comparison purposes.  This allows conversion
+ * of ß to "ss" so that SS, ss and ß all match.  A separate array
+ * holds the list of unicode values for each entry.
+ *
+ * These are packed into a single table.  Using a binary search,
+ * the appropriate entry can be located.
+ */
+
+#define FC_CASE_FOLD_RANGE	    0
+#define FC_CASE_FOLD_EVEN_ODD	    1
+#define FC_CASE_FOLD_FULL	    2
+
+typedef struct _FcCaseFold {
+    FcChar32	upper;
+    FcChar16	method : 2;
+    FcChar16	count : 14;
+    short    	offset;	    /* lower - upper for RANGE, table id for FULL */
+} FcCaseFold;
+
 #define FC_MAX_FILE_LEN	    4096
 
 /*
@@ -746,4 +777,7 @@ FcStrUsesHome (const FcChar8 *s);
 FcChar8 *
 FcStrLastSlash (const FcChar8  *path);
 
+FcChar32
+FcStrHashIgnoreCase (const FcChar8 *s);
+
 #endif /* _FC_INT_H_ */
diff --git a/src/fclist.c b/src/fclist.c
index 6730f203..aaa90617 100644
--- a/src/fclist.c
+++ b/src/fclist.c
@@ -220,20 +220,6 @@ FcListPatternMatchAny (const FcPattern *p,
 }
 
 static FcChar32
-FcListStringHash (const FcChar8	*s)
-{
-    FcChar32	h = 0;
-    FcChar8	c;
-
-    while ((c = *s++))
-    {
-	c = FcToLower (c);
-	h = ((h << 3) ^ (h >> 3)) ^ c;
-    }
-    return h;
-}
-
-static FcChar32
 FcListMatrixHash (const FcMatrix *m)
 {
     int	    xx = (int) (m->xx * 100), 
@@ -255,7 +241,7 @@ FcListValueHash (FcValue    v)
     case FcTypeDouble:
 	return (FcChar32) (int) v.u.d;
     case FcTypeString:
-	return FcListStringHash (v.u.s);
+	return FcStrHashIgnoreCase (v.u.s);
     case FcTypeBool:
 	return (FcChar32) v.u.b;
     case FcTypeMatrix:
diff --git a/src/fcstr.c b/src/fcstr.c
index 06315e6f..61f1897d 100644
--- a/src/fcstr.c
+++ b/src/fcstr.c
@@ -63,16 +63,147 @@ FcStrFree (FcChar8 *s)
     free (s);
 }
 
+
+#include "../fc-case/fccase.h"
+
+#define FcCaseFoldUpperCount(cf) \
+    ((cf)->method == FC_CASE_FOLD_FULL ? 1 : (cf)->count)
+
+#define FC_STR_CANON_BUF_LEN	1024
+
+typedef struct _FcCaseWalker {
+    const FcChar8   *read;
+    const FcChar8   *src;
+    int		    len;
+    FcChar8	    utf8[FC_MAX_CASE_FOLD_CHARS + 1];
+} FcCaseWalker;
+
+static void
+FcStrCaseWalkerInit (const FcChar8 *src, FcCaseWalker *w)
+{
+    w->src = src;
+    w->read = 0;
+    w->len = strlen (src);
+}
+
+static FcChar8
+FcStrCaseWalkerLong (FcCaseWalker *w, FcChar8 r)
+{
+    FcChar32	ucs4;
+    int		slen;
+
+    slen = FcUtf8ToUcs4 (w->src - 1, &ucs4, w->len);
+    if (slen <= 0)
+	return r;
+    if (FC_MIN_FOLD_CHAR <= ucs4 && ucs4 <= FC_MAX_FOLD_CHAR)
+    {
+	int min = 0;
+	int max = FC_NUM_CASE_FOLD;
+
+	while (min <= max)
+	{
+	    int		mid = (min + max) >> 1;
+	    FcChar32    low = fcCaseFold[mid].upper;
+	    FcChar32    high = low + FcCaseFoldUpperCount (&fcCaseFold[mid]);
+	    
+	    if (high <= ucs4)
+		min = mid + 1;
+	    else if (ucs4 < low)
+		max = mid - 1;
+	    else
+	    {
+		const FcCaseFold    *fold = &fcCaseFold[mid];
+		int		    dlen;
+		
+		switch (fold->method) {
+		case  FC_CASE_FOLD_EVEN_ODD:
+		    if ((ucs4 & 1) != (fold->upper & 1))
+			return r;
+		    /* fall through ... */
+		default:
+		    dlen = FcUcs4ToUtf8 (ucs4 + fold->offset, w->utf8);
+		    break;
+		case FC_CASE_FOLD_FULL:
+		    dlen = fold->count;
+		    memcpy (w->utf8, fcCaseFoldChars + fold->offset, dlen);
+		    break;
+		}
+		
+		/* consume rest of src utf-8 bytes */
+		w->src += slen - 1;
+		w->len -= slen - 1;
+		
+		/* read from temp buffer */
+		w->utf8[dlen] = '\0';
+		w->read = w->utf8;
+		return *w->read++;
+	    }
+	}
+    }
+    return r;
+}
+
+static FcChar8
+FcStrCaseWalkerNext (FcCaseWalker *w)
+{
+    FcChar8	r;
+
+    if (w->read)
+    {
+	if ((r = *w->read++))
+	    return r;
+	w->read = 0;
+    }
+    r = *w->src++;
+    --w->len;
+    
+    if ((r & 0xc0) == 0xc0)
+	return FcStrCaseWalkerLong (w, r);
+    if ('A' <= r && r <= 'Z')
+        r = r - 'A' + 'a';
+    return r;
+}
+
+static FcChar8
+FcStrCaseWalkerNextIgnoreBlanks (FcCaseWalker *w)
+{
+    FcChar8	r;
+
+    if (w->read)
+    {
+	if ((r = *w->read++))
+	    return r;
+	w->read = 0;
+    }
+    do
+    {
+	r = *w->src++;
+	--w->len;
+    } while (r == ' ');
+    
+    if ((r & 0xc0) == 0xc0)
+	return FcStrCaseWalkerLong (w, r);
+    if ('A' <= r && r <= 'Z')
+        r = r - 'A' + 'a';
+    return r;
+}
+
 int
 FcStrCmpIgnoreCase (const FcChar8 *s1, const FcChar8 *s2)
 {
-    FcChar8 c1, c2;
+    FcCaseWalker    w1, w2;
+    FcChar8	    c1, c2;
+
+    if (s1 == s2) return 0;
+    
+    FcStrCaseWalkerInit (s1, &w1);
+    FcStrCaseWalkerInit (s2, &w2);
     
     for (;;) 
     {
-	c1 = *s1++;
-	c2 = *s2++;
-	if (!c1 || (c1 != c2 && (c1 = FcToLower(c1)) != (c2 = FcToLower(c2))))
+	c1 = FcStrCaseWalkerNext (&w1);
+	c2 = FcStrCaseWalkerNext (&w2);
+	if (!c1 || (c1 != c2))
 	    break;
     }
     return (int) c1 - (int) c2;
@@ -81,17 +212,19 @@ FcStrCmpIgnoreCase (const FcChar8 *s1, const FcChar8 *s2)
 int
 FcStrCmpIgnoreBlanksAndCase (const FcChar8 *s1, const FcChar8 *s2)
 {
-    FcChar8 c1, c2;
+    FcCaseWalker    w1, w2;
+    FcChar8	    c1, c2;
+
+    if (s1 == s2) return 0;
+    
+    FcStrCaseWalkerInit (s1, &w1);
+    FcStrCaseWalkerInit (s2, &w2);
     
     for (;;) 
     {
-	do
-	    c1 = *s1++;
-	while (c1 == ' ');
-	do
-	    c2 = *s2++;
-	while (c2 == ' ');
-	if (!c1 || (c1 != c2 && (c1 = FcToLower(c1)) != (c2 = FcToLower(c2))))
+	c1 = FcStrCaseWalkerNextIgnoreBlanks (&w1);
+	c2 = FcStrCaseWalkerNextIgnoreBlanks (&w2);
+	if (!c1 || (c1 != c2))
 	    break;
     }
     return (int) c1 - (int) c2;
@@ -115,23 +248,40 @@ FcStrCmp (const FcChar8 *s1, const FcChar8 *s2)
 }
 
 /*
+ * Return a hash value for a string
+ */
+
+FcChar32
+FcStrHashIgnoreCase (const FcChar8 *s)
+{
+    FcChar32	    h = 0;
+    FcCaseWalker    w;
+    FcChar8	    c;
+
+    FcStrCaseWalkerInit (s, &w);
+    while ((c = FcStrCaseWalkerNext (&w)))
+	h = ((h << 3) ^ (h >> 3)) ^ c;
+    return h;
+}
+
+/*
  * Is the head of s1 equal to s2?
  */
 
 static FcBool
 FcStrIsAtIgnoreBlanksAndCase (const FcChar8 *s1, const FcChar8 *s2)
 {
-    FcChar8 c1, c2;
+    FcCaseWalker    w1, w2;
+    FcChar8	    c1, c2;
+
+    FcStrCaseWalkerInit (s1, &w1);
+    FcStrCaseWalkerInit (s2, &w2);
     
     for (;;) 
     {
-	do
-	    c1 = *s1++;
-	while (c1 == ' ');
-	do
-	    c2 = *s2++;
-	while (c2 == ' ');
-	if (!c1 || (c1 != c2 && (c1 = FcToLower(c1)) != (c2 = FcToLower(c2))))
+	c1 = FcStrCaseWalkerNextIgnoreBlanks (&w1);
+	c2 = FcStrCaseWalkerNextIgnoreBlanks (&w2);
+	if (!c1 || (c1 != c2))
 	    break;
     }
     return c1 == c2 || !c2;
@@ -160,13 +310,17 @@ FcStrContainsIgnoreBlanksAndCase (const FcChar8 *s1, const FcChar8 *s2)
 static FcBool
 FcStrIsAtIgnoreCase (const FcChar8 *s1, const FcChar8 *s2)
 {
-    FcChar8 c1, c2;
+    FcCaseWalker    w1, w2;
+    FcChar8	    c1, c2;
+
+    FcStrCaseWalkerInit (s1, &w1);
+    FcStrCaseWalkerInit (s2, &w2);
     
     for (;;) 
     {
-	c1 = *s1++;
-	c2 = *s2++;
-	if (!c1 || (c1 != c2 && (c1 = FcToLower(c1)) != (c2 = FcToLower(c2))))
+	c1 = FcStrCaseWalkerNext (&w1);
+	c2 = FcStrCaseWalkerNext (&w2);
+	if (!c1 || (c1 != c2))
 	    break;
     }
     return c1 == c2 || !c2;
@@ -191,52 +345,45 @@ FcStrContainsIgnoreCase (const FcChar8 *s1, const FcChar8 *s2)
 const FcChar8 *
 FcStrStrIgnoreCase (const FcChar8 *s1, const FcChar8 *s2)
 {
-    FcChar8 c1, c2;
-    const FcChar8 * p = s1;
-    const FcChar8 * b = s2;
+    FcCaseWalker    w1, w2;
+    FcChar8	    c1, c2;
+    const FcChar8   *cur;
 
     if (!s1 || !s2)
 	return 0;
 
     if (s1 == s2)
 	return s1;
-
-again:
-    c2 = *s2++;
-    c2 = FcToLower (c2);
-
-    if (!c2)
-	return 0;
-
-    for (;;) 
+    
+    FcStrCaseWalkerInit (s1, &w1);
+    FcStrCaseWalkerInit (s2, &w2);
+    
+    c2 = FcStrCaseWalkerNext (&w2);
+    
+    for (;;)
     {
-	p = s1;
-	c1 = *s1++;
-	if (!c1 || (c1 = FcToLower (c1)) == c2)
+	cur = w1.src;
+	c1 = FcStrCaseWalkerNext (&w1);
+	if (!c1)
 	    break;
-    }
+	if (c1 == c2)
+	{
+	    FcCaseWalker    w1t = w1;
+	    FcCaseWalker    w2t = w2;
+	    FcChar8	    c1t, c2t;
 
-    if (c1 != c2)
-	return 0;
+	    for (;;)
+	    {
+		c1t = FcStrCaseWalkerNext (&w1t);
+		c2t = FcStrCaseWalkerNext (&w2t);
 
-    for (;;)
-    {
-	c1 = *s1;
-	c2 = *s2;
-	if (c1 && c2 && (c1 = FcToLower (c1)) != (c2 = FcToLower (c2)))
-	{
-	    s1 = p + 1;
-	    s2 = b;
-	    goto again;
+		if (!c2t)
+		    return cur;
+		if (c2t != c1t)
+		    break;
+	    }
 	}
-	if (!c2)
-	    return p;
-	if (!c1)
-	    return 0;
-	++ s1;
-	++ s2;
     }
-
     return 0;
 }
author	Keith Packard <keithp@keithp.com>	2004-12-29 09:15:17 +0000
committer	Keith Packard <keithp@keithp.com>	2004-12-29 09:15:17 +0000
commit	192296d852011f4a2abb6e9fd1ee741fa7f81673 (patch)
tree	0a6924e8b0b4a950d5eb78bbfc541be3367ad9ae /src
parent	5cf8c5364f1b7a676f52b480fa55c571cadc6fda (diff)