diff options
-rw-r--r-- | src/hb-buffer.cc | 10 | ||||
-rw-r--r-- | src/hb-utf-private.hh | 307 |
2 files changed, 166 insertions, 151 deletions
diff --git a/src/hb-buffer.cc b/src/hb-buffer.cc index 76bb10c0..d9205522 100644 --- a/src/hb-buffer.cc +++ b/src/hb-buffer.cc @@ -1298,6 +1298,8 @@ hb_buffer_add_utf (hb_buffer_t *buffer, unsigned int item_offset, int item_length) { + typedef hb_utf_t<T> utf_t; + assert (buffer->content_type == HB_BUFFER_CONTENT_TYPE_UNICODE || (!buffer->len && buffer->content_type == HB_BUFFER_CONTENT_TYPE_INVALID)); @@ -1305,7 +1307,7 @@ hb_buffer_add_utf (hb_buffer_t *buffer, return; if (text_length == -1) - text_length = hb_utf_strlen (text); + text_length = utf_t::strlen (text); if (item_length == -1) item_length = text_length - item_offset; @@ -1328,7 +1330,7 @@ hb_buffer_add_utf (hb_buffer_t *buffer, while (start < prev && buffer->context_len[0] < buffer->CONTEXT_LENGTH) { hb_codepoint_t u; - prev = hb_utf_prev (prev, start, &u); + prev = utf_t::prev (prev, start, &u); buffer->context[0][buffer->context_len[0]++] = u; } } @@ -1339,7 +1341,7 @@ hb_buffer_add_utf (hb_buffer_t *buffer, { hb_codepoint_t u; const T *old_next = next; - next = hb_utf_next (next, end, &u); + next = utf_t::next (next, end, &u); buffer->add (u, old_next - (const T *) text); } @@ -1349,7 +1351,7 @@ hb_buffer_add_utf (hb_buffer_t *buffer, while (next < end && buffer->context_len[1] < buffer->CONTEXT_LENGTH) { hb_codepoint_t u; - next = hb_utf_next (next, end, &u); + next = utf_t::next (next, end, &u); buffer->context[1][buffer->context_len[1]++] = u; } diff --git a/src/hb-utf-private.hh b/src/hb-utf-private.hh index 398f73c6..cbacd67c 100644 --- a/src/hb-utf-private.hh +++ b/src/hb-utf-private.hh @@ -29,202 +29,215 @@ #include "hb-private.hh" +template <typename T, bool validate=true> struct hb_utf_t; + /* UTF-8 */ -static inline const uint8_t * -hb_utf_next (const uint8_t *text, - const uint8_t *end, - hb_codepoint_t *unicode) +template <> +struct hb_utf_t<uint8_t, true> { - /* Written to only accept well-formed sequences. - * Based on ideas from ICU's U8_NEXT. - * Generates a -1 for each ill-formed byte. */ + static inline const uint8_t * + next (const uint8_t *text, + const uint8_t *end, + hb_codepoint_t *unicode) + { + /* Written to only accept well-formed sequences. + * Based on ideas from ICU's U8_NEXT. + * Generates a -1 for each ill-formed byte. */ - hb_codepoint_t c = *text++; + hb_codepoint_t c = *text++; - if (c > 0x7Fu) - { - if (hb_in_range (c, 0xC2u, 0xDFu)) /* Two-byte */ + if (c > 0x7Fu) { - unsigned int t1; - if (likely (text < end && - (t1 = text[0] - 0x80u) <= 0x3Fu)) + if (hb_in_range (c, 0xC2u, 0xDFu)) /* Two-byte */ { - c = ((c&0x1Fu)<<6) | t1; - text++; + unsigned int t1; + if (likely (text < end && + (t1 = text[0] - 0x80u) <= 0x3Fu)) + { + c = ((c&0x1Fu)<<6) | t1; + text++; + } + else + goto error; } - else - goto error; - } - else if (hb_in_range (c, 0xE0u, 0xEFu)) /* Three-byte */ - { - unsigned int t1, t2; - if (likely (1 < end - text && - (t1 = text[0] - 0x80u) <= 0x3Fu && - (t2 = text[1] - 0x80u) <= 0x3Fu)) + else if (hb_in_range (c, 0xE0u, 0xEFu)) /* Three-byte */ { - c = ((c&0xFu)<<12) | (t1<<6) | t2; - if (unlikely (c < 0x0800u || hb_in_range (c, 0xD800u, 0xDFFFu))) + unsigned int t1, t2; + if (likely (1 < end - text && + (t1 = text[0] - 0x80u) <= 0x3Fu && + (t2 = text[1] - 0x80u) <= 0x3Fu)) + { + c = ((c&0xFu)<<12) | (t1<<6) | t2; + if (unlikely (c < 0x0800u || hb_in_range (c, 0xD800u, 0xDFFFu))) + goto error; + text += 2; + } + else goto error; - text += 2; } - else - goto error; - } - else if (hb_in_range (c, 0xF0u, 0xF4u)) /* Four-byte */ - { - unsigned int t1, t2, t3; - if (likely (2 < end - text && - (t1 = text[0] - 0x80u) <= 0x3Fu && - (t2 = text[1] - 0x80u) <= 0x3Fu && - (t3 = text[2] - 0x80u) <= 0x3Fu)) + else if (hb_in_range (c, 0xF0u, 0xF4u)) /* Four-byte */ { - c = ((c&0x7u)<<18) | (t1<<12) | (t2<<6) | t3; - if (unlikely (!hb_in_range (c, 0x10000u, 0x10FFFFu))) + unsigned int t1, t2, t3; + if (likely (2 < end - text && + (t1 = text[0] - 0x80u) <= 0x3Fu && + (t2 = text[1] - 0x80u) <= 0x3Fu && + (t3 = text[2] - 0x80u) <= 0x3Fu)) + { + c = ((c&0x7u)<<18) | (t1<<12) | (t2<<6) | t3; + if (unlikely (!hb_in_range (c, 0x10000u, 0x10FFFFu))) + goto error; + text += 3; + } + else goto error; - text += 3; } else goto error; } - else - goto error; - } - - *unicode = c; - return text; - -error: - *unicode = -1; - return text; -} -static inline const uint8_t * -hb_utf_prev (const uint8_t *text, - const uint8_t *start, - hb_codepoint_t *unicode) -{ - const uint8_t *end = text--; - while (start < text && (*text & 0xc0) == 0x80 && end - text < 4) - text--; + *unicode = c; + return text; - if (likely (hb_utf_next (text, end, unicode) == end)) + error: + *unicode = -1; return text; + } - *unicode = -1; - return end - 1; -} + static inline const uint8_t * + prev (const uint8_t *text, + const uint8_t *start, + hb_codepoint_t *unicode) + { + const uint8_t *end = text--; + while (start < text && (*text & 0xc0) == 0x80 && end - text < 4) + text--; + if (likely (next (text, end, unicode) == end)) + return text; -static inline unsigned int -hb_utf_strlen (const uint8_t *text) -{ - return strlen ((const char *) text); -} + *unicode = -1; + return end - 1; + } + + static inline unsigned int + strlen (const uint8_t *text) + { + return ::strlen ((const char *) text); + } +}; /* UTF-16 */ -static inline const uint16_t * -hb_utf_next (const uint16_t *text, - const uint16_t *end, - hb_codepoint_t *unicode) +template <> +struct hb_utf_t<uint16_t, true> { - hb_codepoint_t c = *text++; - - if (likely (!hb_in_range (c, 0xD800u, 0xDFFFu))) + static inline const uint16_t * + next (const uint16_t *text, + const uint16_t *end, + hb_codepoint_t *unicode) { - *unicode = c; + hb_codepoint_t c = *text++; + + if (likely (!hb_in_range (c, 0xD800u, 0xDFFFu))) + { + *unicode = c; + return text; + } + + if (likely (hb_in_range (c, 0xD800u, 0xDBFFu))) + { + /* High-surrogate in c */ + hb_codepoint_t l; + if (text < end && ((l = *text), likely (hb_in_range (l, 0xDC00u, 0xDFFFu)))) + { + /* Low-surrogate in l */ + *unicode = (c << 10) + l - ((0xD800u << 10) - 0x10000u + 0xDC00u); + text++; + return text; + } + } + + /* Lonely / out-of-order surrogate. */ + *unicode = -1; return text; } - if (likely (hb_in_range (c, 0xD800u, 0xDBFFu))) + static inline const uint16_t * + prev (const uint16_t *text, + const uint16_t *start, + hb_codepoint_t *unicode) { - /* High-surrogate in c */ - hb_codepoint_t l; - if (text < end && ((l = *text), likely (hb_in_range (l, 0xDC00u, 0xDFFFu)))) + const uint16_t *end = text--; + hb_codepoint_t c = *text; + + if (likely (!hb_in_range (c, 0xD800u, 0xDFFFu))) { - /* Low-surrogate in l */ - *unicode = (c << 10) + l - ((0xD800u << 10) - 0x10000u + 0xDC00u); - text++; - return text; + *unicode = c; + return text; } - } - /* Lonely / out-of-order surrogate. */ - *unicode = -1; - return text; -} + if (likely (start < text && hb_in_range (c, 0xDC00u, 0xDFFFu))) + text--; -static inline const uint16_t * -hb_utf_prev (const uint16_t *text, - const uint16_t *start, - hb_codepoint_t *unicode) -{ - const uint16_t *end = text--; - hb_codepoint_t c = *text; + if (likely (next (text, end, unicode) == end)) + return text; - if (likely (!hb_in_range (c, 0xD800u, 0xDFFFu))) - { - *unicode = c; - return text; + *unicode = -1; + return end - 1; } - if (likely (start < text && hb_in_range (c, 0xDC00u, 0xDFFFu))) - text--; - if (likely (hb_utf_next (text, end, unicode) == end)) - return text; + static inline unsigned int + strlen (const uint16_t *text) + { + unsigned int l = 0; + while (*text++) l++; + return l; + } +}; - *unicode = -1; - return end - 1; -} +/* UTF-32 */ -static inline unsigned int -hb_utf_strlen (const uint16_t *text) +template <bool validate> +struct hb_utf_t<uint32_t, validate> { - unsigned int l = 0; - while (*text++) l++; - return l; -} - + static inline const uint32_t * + next (const uint32_t *text, + const uint32_t *end HB_UNUSED, + hb_codepoint_t *unicode) + { + hb_codepoint_t c = *text++; + if (validate && unlikely (c > 0x10FFFFu || hb_in_range (c, 0xD800u, 0xDFFFu))) + goto error; + *unicode = c; + return text; -/* UTF-32 */ + error: + *unicode = -1; + return text; + } -static inline const uint32_t * -hb_utf_next (const uint32_t *text, - const uint32_t *end HB_UNUSED, - hb_codepoint_t *unicode) -{ - hb_codepoint_t c = *text++; - if (unlikely (c > 0x10FFFFu || hb_in_range (c, 0xD800u, 0xDFFFu))) - goto error; - *unicode = c; - return text; - -error: - *unicode = -1; - return text; -} - -static inline const uint32_t * -hb_utf_prev (const uint32_t *text, - const uint32_t *start HB_UNUSED, - hb_codepoint_t *unicode) -{ - hb_utf_next (text - 1, text, unicode); - return text - 1; -} + static inline const uint32_t * + prev (const uint32_t *text, + const uint32_t *start HB_UNUSED, + hb_codepoint_t *unicode) + { + next (text - 1, text, unicode); + return text - 1; + } -static inline unsigned int -hb_utf_strlen (const uint32_t *text) -{ - unsigned int l = 0; - while (*text++) l++; - return l; -} + static inline unsigned int + strlen (const uint32_t *text) + { + unsigned int l = 0; + while (*text++) l++; + return l; + } +}; #endif /* HB_UTF_PRIVATE_HH */ |