#ifndef UTF8_H #define UTF8_H enum gwm_encoding { ENCODING_COMPOUND, /* X Consortium Compound Text (ISO 2022-like) */ ENCODING_LATIN_1 /* ISO 8859-1 */ }; extern char *to_utf8( enum gwm_encoding encoding, const char *in, size_t len ); /* Return a pointer to the first illegal byte in STR, or NULL if the entire string is legal. */ extern PURE unsigned char *utf8_illegal( const unsigned char *str ); /* Return the number of characters in STR. */ extern PURE int utf8_length( const unsigned char *str ); /* Duplicate a UTF-8 string. The output is allocated with malloc() and is guaranteed to be a legal UTF-8 sequence, even if the input was not. */ extern MALLOC unsigned char *utf8_dup_valid_len( const unsigned char *str, int num_bytes ); extern MALLOC unsigned char *utf8_dup_valid( const unsigned char *str ); /* Return the character pointed to by P (which is advanced). */ extern uint32_t utf8_next( const unsigned char **p ); #if DEBUG extern void cleanup_utf8( void ); #endif #endif