diff options
author | Gary Wong <gtw@gnu.org> | 2009-09-02 21:12:23 -0600 |
---|---|---|
committer | Gary Wong <gtw@gnu.org> | 2009-09-02 21:12:23 -0600 |
commit | 5508fa4a9733b8f6e382b3b98d8de0481ebceaef (patch) | |
tree | e1ad5bfc46ec993031651a8df18b984804053ed7 | |
parent | 472c13261930f1a1803abd2aa24a137a66fc0a42 (diff) |
Clean up UTF-8 processing.
-rw-r--r-- | ChangeLog | 13 | ||||
-rw-r--r-- | configure.ac | 7 | ||||
-rw-r--r-- | decorate-render.c | 94 | ||||
-rw-r--r-- | frame.c | 6 | ||||
-rw-r--r-- | gwm.c | 20 | ||||
-rw-r--r-- | gwm.h | 2 | ||||
-rw-r--r-- | managed.c | 6 | ||||
-rw-r--r-- | utf8.c | 266 | ||||
-rw-r--r-- | utf8.h | 16 | ||||
-rw-r--r-- | window-table.c | 21 |
10 files changed, 355 insertions, 96 deletions
@@ -1,3 +1,16 @@ +2009-09-02 Gary Wong <gtw@gnu.org> + + * utf8.c (utf8_illegal, utf8_length, utf8_next) + (utf8_dup_valid, utf8_dup_valid_len, dup_valid_common): New functions. + (to_utf8) [DEBUG]: Add sanity checks. + * decorate-render.c (render_text): Use utf8_length() and utf8_next(). + (text_width): Use utf8_next(). + * managed.c (managed_property_change): Use utf8_dup_valid_len(). + + * frame.c (recalc_size): Avoid needless C99isms. + * gwm.c (place_window): Likewise. + * window-table.c (table_hash): Likewise. + 2009-09-01 Gary Wong <gtw@gnu.org> * actions.c (action_map_raise, action_window_list_menu) diff --git a/configure.ac b/configure.ac index b37d62d..535aac9 100644 --- a/configure.ac +++ b/configure.ac @@ -43,6 +43,10 @@ AC_DEFUN([GWM_OPTIONAL_PACKAGE],[ ]) AC_DEFINE(_GNU_SOURCE, 1, [Enable GNU extensions on systems that have them.]) +AH_BOTTOM([/* If we haven't been told that we're debugging, assume we aren't. */ +#ifndef DEBUG +#define NDEBUG 1 +#endif]) # Checks for programs: AC_PROG_CC @@ -77,6 +81,9 @@ AC_SEARCH_LIBS(iconv, iconv) # Checks for header files: AC_CHECK_HEADERS(iconv.h mcheck.h poll.h) +# Checks for types: +AC_TYPE_UINT64_T + # Checks for functions: AC_CHECK_FUNCS(iconv mtrace ppoll) diff --git a/decorate-render.c b/decorate-render.c index a04a3fc..01e535c 100644 --- a/decorate-render.c +++ b/decorate-render.c @@ -39,6 +39,7 @@ #include "button.h" #include "decorate-render.h" #include "frame.h" +#include "utf8.h" enum style_id { STYLE_TITLE, STYLE_MENU, NUM_STYLES @@ -319,47 +320,7 @@ static xcb_void_cookie_t render_text( xcb_drawable_t drawable, int screen, const unsigned char *p; uint32_t *buf; - if( !text || !*text ) { - xcb_void_cookie_t r = { 0 }; - - return r; - } - - len = 0; - p = (const unsigned char *) text; - for(;;) - if( !p[ 0 ] ) - /* End of string. */ - break; - else if( !( p[ 0 ] & 0x80 ) ) { - /* Legal single byte character. */ - len++; - p++; - } else if( *p >= 0xC2 && *p <= 0xDF && - p[ 1 ] >= 0x80 && p[ 1 ] <= 0xBF ) { - /* Legal two byte character. */ - len++; - p += 2; - } else if( *p >= 0xE0 && *p <= 0xEF && - p[ 1 ] >= 0x80 && p[ 1 ] <= 0xBF && - p[ 2 ] >= 0x80 && p[ 2 ] <= 0xBF && - ( *p > 0xE0 || p[ 1 ] > 0x80 ) ) { - /* Legal three byte character. */ - len++; - p += 3; - } else if( *p >= 0xF0 && *p <= 0xF4 && - p[ 1 ] >= 0x80 && p[ 1 ] <= 0xBF && - p[ 2 ] >= 0x80 && p[ 2 ] <= 0xBF && - p[ 3 ] >= 0x80 && p[ 3 ] <= 0xBF && - ( *p > 0xF0 || p[ 1 ] > 0x80 ) ) { - /* Legal four byte character. */ - len++; - p += 4; - } else - /* Illegal character: ignore this byte and continue. */ - p++; - - if( !len ) { + if( !text || !( len = utf8_length( (const unsigned char *) text ) ) ) { xcb_void_cookie_t r = { 0 }; return r; @@ -367,42 +328,9 @@ static xcb_void_cookie_t render_text( xcb_drawable_t drawable, int screen, buf = alloca( len * sizeof *buf ); - for( p = (const unsigned char *) text, i = 0; i < len; i++ ) { - retry: - assert( *p ); - - if( !( p[ 0 ] & 0x80 ) ) - /* One byte character. */ - buf[ i ] = *p++; - else if( *p >= 0xC2 && *p <= 0xDF && - p[ 1 ] >= 0x80 && p[ 1 ] <= 0xBF ) { - /* Two byte character. */ - buf[ i ] = ( ( p[ 0 ] & 0x1F ) << 6 ) | ( p[ 1 ] & 0x3F ); - p += 2; - } else if( *p >= 0xE0 && *p <= 0xEF && - p[ 1 ] >= 0x80 && p[ 1 ] <= 0xBF && - p[ 2 ] >= 0x80 && p[ 2 ] <= 0xBF && - ( *p > 0xE0 || p[ 1 ] > 0x80 ) ) { - /* Three byte character. */ - buf[ i ] = ( ( p[ 0 ] & 0x0F ) << 12 ) | - ( ( p[ 1 ] & 0x3F ) << 6 ) | ( p[ 2 ] & 0x3F ); - p += 3; - } else if( *p >= 0xF0 && *p <= 0xF4 && - p[ 1 ] >= 0x80 && p[ 1 ] <= 0xBF && - p[ 2 ] >= 0x80 && p[ 2 ] <= 0xBF && - p[ 3 ] >= 0x80 && p[ 3 ] <= 0xBF && - ( *p > 0xF0 || p[ 1 ] > 0x80 ) ) { - /* Four byte character. */ - buf[ i ] = ( ( p[ 0 ] & 0x07 ) << 18 ) | - ( ( p[ 1 ] & 0x3F ) << 12 ) | ( ( p[ 2 ] & 0x3F ) << 6 ) | - ( p[ 3 ] & 0x3F ); - p += 4; - } else { - p++; - goto retry; - } - } - + for( i = 0, p = (const unsigned char *) text; i < len; i++ ) + buf[ i ] = utf8_next( &p ); + src = pictures[ screen ].pic; dest = xcb_generate_id( c ); @@ -571,16 +499,16 @@ static xcb_void_cookie_t render_text( xcb_drawable_t drawable, int screen, static int text_width( enum style_id style, const char *text ) { - const char *p; - int width; - - for( width = 0, p = text; *p; p++ ) { + const unsigned char *p = (const unsigned char *) text; + int width = 0; + + while( *p ) { int dx, dy; - query_metrics( style, *p, &dx, &dy ); + query_metrics( style, utf8_next( &p ), &dx, &dy ); width += dx; } - + return width; } @@ -665,8 +665,12 @@ static void recalc_size( struct gwm_window *window, int x, int y, new_height != window->u.frame.height ) { int disp_width, disp_height; int new_fb_width, new_fb_height; - uint32_t values[ 4 ] = { new_l, new_t, new_width, new_height }; + uint32_t values[ 4 ]; + values[ 0 ] = new_l; + values[ 1 ] = new_t; + values[ 2 ] = new_width; + values[ 3 ] = new_height; xcb_configure_window( c, window->w, XCB_CONFIG_WINDOW_X | XCB_CONFIG_WINDOW_Y | XCB_CONFIG_WINDOW_WIDTH | XCB_CONFIG_WINDOW_HEIGHT, values ); @@ -34,6 +34,9 @@ #endif #include <signal.h> #include <stdarg.h> +#if HAVE_STDINT_H +#include <stdint.h> +#endif #include <stdio.h> #include <stdlib.h> #include <string.h> @@ -978,8 +981,7 @@ extern void install_window_colormap( int screen, struct gwm_window *window, screens[ screen ]->default_colormap, t ); } -/* Return TRUE iff an event is the only button pressed (and so would - initiate a passive grab). */ +/* Return TRUE iff an event is the only button pressed. */ extern CONST int initial_press( xcb_button_press_event_t *ev ) { return !( ev->state & 0x1F00 ); @@ -1017,21 +1019,29 @@ static void place_window( struct gwm_window *window, int *x, int *y, int sx = screens[ window->screen ]->width_in_pixels - width; int sy = screens[ window->screen ]->height_in_pixels - height; - unsigned long long hash; - +#if defined( UINT64_MAX ) || defined( uint64_t ) + uint64_t hash; +#else + unsigned long hash; +#endif + if( sx <= 0 ) *x = 0; else { hash = window->w * 0x9B4A36D1; +#if defined( UINT64_MAX ) || defined( uint64_t ) || ULONG_MAX > 0xFFFFFFFFUL hash ^= hash >> 32; +#endif *x = hash % sx; } if( sy <= 0 ) *y = 0; else { - hash = window->w * 0x9B4A36D1; + hash = window->w * 0xA6E34925; +#if defined( UINT64_MAX ) || defined( uint64_t ) || ULONG_MAX > 0xFFFFFFFFUL hash ^= hash >> 32; +#endif *y = hash % sy; } } @@ -236,7 +236,7 @@ struct gwm_window { /* from WM_HINTS: */ int hints; /* see HINT_* above */ /* from WM_NAME and _NET_WM_NAME: */ - char *name; /* must be free()d */ + char *name; /* legal UTF-8; must be free()d */ int net_wm_name; /* from WM_NORMAL_HINTS: */ int min_width, min_height, max_width, max_height, width_inc, @@ -337,10 +337,8 @@ extern void managed_property_change( struct gwm_window *window, int prop, free( window->u.managed.name ); if( p->value_len && p->format == 8 ) { - window->u.managed.name = xmalloc( p->value_len + 1 ); - memcpy( window->u.managed.name, xcb_get_property_value( p ), - p->value_len ); - window->u.managed.name[ p->value_len ] = 0; + window->u.managed.name = (char *) utf8_dup_valid_len( + xcb_get_property_value( p ), p->value_len ); window->u.managed.net_wm_name = 1; if( window->u.managed.state == STATE_NORMAL && @@ -23,6 +23,7 @@ #include <config.h> +#include <assert.h> #if HAVE_ICONV_H #include <iconv.h> #endif @@ -81,6 +82,8 @@ extern char *to_utf8( enum gwm_encoding encoding, const char *in, *outp++ = 0; + assert( !utf8_illegal( (unsigned char *) out ) ); + return xrealloc( out, outp - out ); } } @@ -95,9 +98,272 @@ extern char *to_utf8( enum gwm_encoding encoding, const char *in, *outp++ = 0; + assert( !utf8_illegal( (unsigned char *) out ) ); + return xrealloc( out, outp - out ); } +extern PURE unsigned char *utf8_illegal( const unsigned char *str ) { + + for(;;) + if( !*str ) + /* End of string. Everything was legal. */ + return NULL; + else if( !( str[ 0 ] & 0x80 ) ) + /* Legal single byte character. */ + str++; + else if( ( str[ 0 ] >= 0x80 && str[ 0 ] <= 0xC1 ) || + ( str[ 0 ] > 0xF4 ) ) + /* Illegal continuation byte, long representation of single + byte character, or overly long sequence. */ + return (unsigned char *) str; + else if( str[ 0 ] >= 0xC2 && str[ 0 ] <= 0xDF ) { + /* Two byte sequence... */ + if( str[ 1 ] < 0x80 || str[ 1 ] > 0xBF ) + /* ...where byte 2 is illegal. */ + return (unsigned char *) str + 1; + else + /* ...which is fully legal. */ + str += 2; + } else if( str[ 0 ] >= 0xE0 && str[ 0 ] <= 0xEF ) { + /* Three byte sequence... */ + if( str[ 1 ] < 0x80 || str[ 1 ] > 0xBF || + ( str[ 0 ] == 0xE0 && str[ 1 ] < 0xA0 ) || + ( str[ 0 ] == 0xED && str[ 1 ] > 0x9F ) ) + /* ...where byte 2 is illegal. */ + return (unsigned char *) str + 1; + else if( str[ 2 ] < 0x80 || str[ 2 ] > 0xBF ) + /* ...where byte 3 is illegal. */ + return (unsigned char *) str + 2; + else + /* ...which is fully legal. */ + str += 3; + } else { + assert( str[ 0 ] >= 0xF0 && str[ 0 ] <= 0xF4 ); + /* Four byte sequence... */ + if( str[ 1 ] < 0x80 || str[ 1 ] > 0xBF || + ( str[ 0 ] == 0xF0 && str[ 1 ] < 0x90 ) || + ( str[ 0 ] == 0xF4 && str[ 1 ] > 0x8F ) ) + /* ...where byte 2 is illegal. */ + return (unsigned char *) str + 1; + else if( str[ 2 ] < 0x80 || str[ 2 ] > 0xBF ) + /* ...where byte 3 is illegal. */ + return (unsigned char *) str + 2; + else if( str[ 3 ] < 0x80 || str[ 3 ] > 0xBF ) + /* ...where byte 4 is illegal. */ + return (unsigned char *) str + 3; + else + /* ...which is fully legal. */ + str += 4; + } +} + +extern PURE int utf8_length( const unsigned char *str ) { + + int len; + + assert( !utf8_illegal( str ) ); + + for( len = 0; *str; len++ ) + if( *str < 0x80 ) + str++; + else if( *str < 0xE0 ) + str += 2; + else if( *str < 0xF0 ) + str += 3; + else + str += 4; + + return len; +} + +static MALLOC unsigned char *dup_valid_common( const unsigned char *str, + int len ) { + + const unsigned char *p; + unsigned char *out, *outp; + + outp = out = xmalloc( len + 1 ); + p = str; + for(;;) + if( outp == out + len ) { + /* End of string. */ + *outp = 0; + assert( !utf8_illegal( out ) ); + return out; + } else if( !( p[ 0 ] & 0x80 ) ) + /* Legal single byte character. */ + *outp++ = *p++; + else if( *p >= 0xC2 && *p <= 0xDF && + p[ 1 ] >= 0x80 && p[ 1 ] <= 0xBF ) { + /* Legal two byte character. */ + *outp++ = *p++; + *outp++ = *p++; + } else if( *p >= 0xE0 && *p <= 0xEF && + p[ 1 ] >= 0x80 && p[ 1 ] <= 0xBF && + p[ 2 ] >= 0x80 && p[ 2 ] <= 0xBF && + ( *p > 0xE0 || p[ 1 ] > 0x9F ) && + ( *p != 0xED || p[ 1 ] < 0xA0 ) ) { + /* Legal three byte character. */ + *outp++ = *p++; + *outp++ = *p++; + *outp++ = *p++; + } else if( *p >= 0xF0 && *p <= 0xF4 && + p[ 1 ] >= 0x80 && p[ 1 ] <= 0xBF && + p[ 2 ] >= 0x80 && p[ 2 ] <= 0xBF && + p[ 3 ] >= 0x80 && p[ 3 ] <= 0xBF && + ( *p > 0xF0 || p[ 1 ] > 0x8F ) && + ( *p != 0xF4 || p[ 1 ] < 0x90 ) ) { + /* Legal four byte character. */ + *outp++ = *p++; + *outp++ = *p++; + *outp++ = *p++; + *outp++ = *p++; + } else + /* Illegal character: ignore this byte and continue. */ + p++; +} + +extern MALLOC unsigned char *utf8_dup_valid_len( const unsigned char *str, + int num_bytes ) { + + int len; + const unsigned char *p; + + len = 0; + p = str; + for(;;) + if( !num_bytes ) + /* End of string. */ + break; + else if( !( p[ 0 ] & 0x80 ) ) { + /* Legal single byte character. */ + len++; + p++; + num_bytes--; + } else if( num_bytes >= 2 && + *p >= 0xC2 && *p <= 0xDF && + p[ 1 ] >= 0x80 && p[ 1 ] <= 0xBF ) { + /* Legal two byte character. */ + len += 2; + p += 2; + num_bytes -= 2; + } else if( num_bytes >= 3 && + *p >= 0xE0 && *p <= 0xEF && + p[ 1 ] >= 0x80 && p[ 1 ] <= 0xBF && + p[ 2 ] >= 0x80 && p[ 2 ] <= 0xBF && + ( *p > 0xE0 || p[ 1 ] > 0x9F ) && + ( *p != 0xED || p[ 1 ] < 0xA0 ) ) { + /* Legal three byte character. */ + len += 3; + p += 3; + num_bytes -= 3; + } else if( num_bytes >= 4 && + *p >= 0xF0 && *p <= 0xF4 && + p[ 1 ] >= 0x80 && p[ 1 ] <= 0xBF && + p[ 2 ] >= 0x80 && p[ 2 ] <= 0xBF && + p[ 3 ] >= 0x80 && p[ 3 ] <= 0xBF && + ( *p > 0xF0 || p[ 1 ] > 0x8F ) && + ( *p != 0xF4 || p[ 1 ] < 0x90 ) ) { + /* Legal four byte character. */ + len += 4; + p += 4; + num_bytes -= 4; + } else { + /* Illegal character: ignore this byte and continue. */ + p++; + num_bytes--; + } + + return dup_valid_common( str, len ); +} + +extern MALLOC unsigned char *utf8_dup_valid( const unsigned char *str ) { + + int len; + const unsigned char *p; + + len = 0; + p = str; + for(;;) + if( !p[ 0 ] ) + /* End of string. */ + break; + else if( !( p[ 0 ] & 0x80 ) ) { + /* Legal single byte character. */ + len++; + p++; + } else if( *p >= 0xC2 && *p <= 0xDF && + p[ 1 ] >= 0x80 && p[ 1 ] <= 0xBF ) { + /* Legal two byte character. */ + len += 2; + p += 2; + } else if( *p >= 0xE0 && *p <= 0xEF && + p[ 1 ] >= 0x80 && p[ 1 ] <= 0xBF && + p[ 2 ] >= 0x80 && p[ 2 ] <= 0xBF && + ( *p > 0xE0 || p[ 1 ] > 0x9F ) && + ( *p != 0xED || p[ 1 ] < 0xA0 ) ) { + /* Legal three byte character. */ + len += 3; + p += 3; + } else if( *p >= 0xF0 && *p <= 0xF4 && + p[ 1 ] >= 0x80 && p[ 1 ] <= 0xBF && + p[ 2 ] >= 0x80 && p[ 2 ] <= 0xBF && + p[ 3 ] >= 0x80 && p[ 3 ] <= 0xBF && + ( *p > 0xF0 || p[ 1 ] > 0x8F ) && + ( *p != 0xF4 || p[ 1 ] < 0x90 ) ) { + /* Legal four byte character. */ + len += 4; + p += 4; + } else + /* Illegal character: ignore this byte and continue. */ + p++; + + return dup_valid_common( str, len ); +} + +extern uint32_t utf8_next( const unsigned char **p ) { + + uint32_t n; + const unsigned char *c = *p; + + assert( c[ 0 ] < 0x80 || + ( c[ 0 ] >= 0xC2 && c[ 0 ] <= 0xDF && + c[ 1 ] >= 0x80 && c[ 1 ] <= 0xBF ) || + ( c[ 0 ] >= 0xE0 && c[ 0 ] <= 0xEF && + c[ 1 ] >= 0x80 && c[ 1 ] <= 0xBF && + c[ 2 ] >= 0x80 && c[ 2 ] <= 0xBF && + ( c[ 0 ] > 0xE0 || c[ 1 ] > 0x9F ) && + ( c[ 0 ] != 0xED || c[ 1 ] < 0xA0 ) ) || + ( c[ 0 ] >= 0xF0 && c[ 0 ] <= 0xF4 && + c[ 1 ] >= 0x80 && c[ 1 ] <= 0xBF && + c[ 2 ] >= 0x80 && c[ 2 ] <= 0xBF && + c[ 3 ] >= 0x80 && c[ 3 ] <= 0xBF && + ( c[ 0 ] > 0xF0 || c[ 1 ] > 0x8F ) && + ( c[ 0 ] != 0xF4 || c[ 1 ] < 0x90 ) ) ); + + if( !*c ) + return 0; + + if( c[ 0 ] < 0x80 ) { + n = c[ 0 ]; + ( *p )++; + } else if( c[ 0 ] < 0xE0 ) { + n = ( ( c[ 0 ] & 0x1F ) << 6 ) | ( c[ 1 ] & 0x3F ); + *p += 2; + } else if( c[ 0 ] < 0xF0 ) { + n = ( ( c[ 0 ] & 0x0F ) << 12 ) | ( ( c[ 1 ] & 0x3F ) << 6 ) | + ( c[ 2 ] & 0x3F ); + *p += 3; + } else { + n = ( ( c[ 0 ] & 0x07 ) << 18 ) | ( ( c[ 1 ] & 0x3F ) << 12 ) | + ( ( c[ 2 ] & 0x3F ) << 6 ) | ( c[ 3 ] & 0x3F ); + *p += 4; + } + + return n; +} + extern void cleanup_utf8( void ) { #if HAVE_ICONV @@ -8,6 +8,22 @@ enum gwm_encoding { extern char *to_utf8( enum gwm_encoding encoding, const char *in, size_t len ); +/* Return a pointer to the first illegal byte in STR, or NULL if the + entire string is legal. */ +extern PURE unsigned char *utf8_illegal( const unsigned char *str ); + +/* Return the number of characters in STR. */ +extern PURE int utf8_length( const unsigned char *str ); + +/* Duplicate a UTF-8 string. The output is allocated with malloc() and + is guaranteed to be a legal UTF-8 sequence, even if the input was not. */ +extern MALLOC unsigned char *utf8_dup_valid_len( const unsigned char *str, + int num_bytes ); +extern MALLOC unsigned char *utf8_dup_valid( const unsigned char *str ); + +/* Return the character pointed to by P (which is advanced). */ +extern uint32_t utf8_next( const unsigned char **p ); + #if DEBUG extern void cleanup_utf8( void ); #endif diff --git a/window-table.c b/window-table.c index 6970171..36dfc1a 100644 --- a/window-table.c +++ b/window-table.c @@ -24,6 +24,10 @@ #include <config.h> #include <assert.h> +#include <limits.h> +#if HAVE_STDINT_H +#include <stdint.h> +#endif #include <stdlib.h> #include <xcb/xcb.h> @@ -60,7 +64,11 @@ static CONST int table_hash( struct window_table *table, int half, xcb_window_t key ) { unsigned long h0, h1; - unsigned long long l; +#if defined( UINT64_MAX ) || defined( uint64_t ) + uint64_t l; +#else + unsigned long l; +#endif if( half ) { h0 = table->hash ^ 0x79D9C6D4UL; @@ -70,11 +78,20 @@ static CONST int table_hash( struct window_table *table, int half, h1 = table->hash ^ 0x52C694B6UL; } +#if defined( UINT64_MAX ) || defined( uint64_t ) || ULONG_MAX > 0xFFFFFFFFUL l = key * h0; l ^= l >> 32; l *= h1; l ^= l >> 29; - +#else + l = key * h0; + l ^= l >> 16; + l *= h1; + l ^= l >> 13; + l *= h0; + l ^= l >> 15; +#endif + return l & ( table->n - 1 ); } |