summaryrefslogtreecommitdiff
path: root/utf8.h
blob: 3bc778571980feb13c604b6da83fe400e7fdd3af (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
#ifndef UTF8_H
#define UTF8_H

enum gwm_encoding {
    ENCODING_COMPOUND, /* X Consortium Compound Text (ISO 2022-like) */
    ENCODING_LATIN_1 /* ISO 8859-1 */
};

extern char *to_utf8( enum gwm_encoding encoding, const char *in, size_t len );

/* Return a pointer to the first illegal byte in STR, or NULL if the
   entire string is legal. */
extern PURE unsigned char *utf8_illegal( const unsigned char *str );

/* Return the number of characters in STR. */
extern PURE int utf8_length( const unsigned char *str );

/* Duplicate a UTF-8 string.  The output is allocated with malloc() and
   is guaranteed to be a legal UTF-8 sequence, even if the input was not. */
extern MALLOC unsigned char *utf8_dup_valid_len( const unsigned char *str,
						 int num_bytes );
extern MALLOC unsigned char *utf8_dup_valid( const unsigned char *str );

/* Return the character pointed to by P (which is advanced). */
extern uint32_t utf8_next( const unsigned char **p );

#if DEBUG
extern void cleanup_utf8( void );
#endif

#endif