diff options
author | Ray Strode <halfline@gmail.com> | 2023-12-11 03:14:31 +0000 |
---|---|---|
committer | Ray Strode <halfline@gmail.com> | 2023-12-11 03:14:31 +0000 |
commit | 3cff63128c7d04e259f7f62790fc1f42dc130322 (patch) | |
tree | a7afa375b65c96072523c92ddbcde98a1643c41b | |
parent | 73093a8411832159d380b889974016a0dfad04df (diff) | |
parent | 27fd8d115c3f9fea55940018e3cff8939ab5d22a (diff) |
Merge branch 'resilientunicode' into 'main'
ply-terminal-emulator: Handle incomplete Unicode characters with more resiliency
See merge request plymouth/plymouth!261
-rw-r--r-- | src/libply-splash-core/ply-keyboard.c | 33 | ||||
-rw-r--r-- | src/libply-splash-core/ply-rich-text.h | 2 | ||||
-rw-r--r-- | src/libply-splash-core/ply-terminal-emulator.c | 237 | ||||
-rw-r--r-- | src/libply/ply-buffer.c | 25 | ||||
-rw-r--r-- | src/libply/ply-buffer.h | 11 | ||||
-rw-r--r-- | src/libply/ply-utils.c | 119 | ||||
-rw-r--r-- | src/libply/ply-utils.h | 22 | ||||
-rw-r--r-- | src/main.c | 19 |
8 files changed, 329 insertions, 139 deletions
diff --git a/src/libply-splash-core/ply-keyboard.c b/src/libply-splash-core/ply-keyboard.c index 128d3ced..ab375f37 100644 --- a/src/libply-splash-core/ply-keyboard.c +++ b/src/libply-splash-core/ply-keyboard.c @@ -153,26 +153,15 @@ ply_keyboard_new_for_renderer (ply_renderer_t *renderer) static void process_backspace (ply_keyboard_t *keyboard) { - size_t bytes_to_remove; - ssize_t previous_character_size; - const char *bytes; + char *bytes; size_t size; + size_t capacity; ply_list_node_t *node; - bytes = ply_buffer_get_bytes (keyboard->line_buffer); - size = ply_buffer_get_size (keyboard->line_buffer); - - bytes_to_remove = MIN (size, PLY_UTF8_CHARACTER_SIZE_MAX); - while ((previous_character_size = ply_utf8_character_get_size (bytes + size - bytes_to_remove, bytes_to_remove)) < (ssize_t) bytes_to_remove) { - if (previous_character_size > 0) - bytes_to_remove -= previous_character_size; - else - bytes_to_remove--; + ply_buffer_borrow_bytes (keyboard->line_buffer, &bytes, &size, &capacity) { + ply_utf8_string_remove_last_character (&bytes, &size); } - if (bytes_to_remove <= size) - ply_buffer_remove_bytes_at_end (keyboard->line_buffer, bytes_to_remove); - for (node = ply_list_get_first_node (keyboard->backspace_handler_list); node; node = ply_list_get_next_node (keyboard->backspace_handler_list, node)) { ply_keyboard_closure_t *closure = ply_list_node_get_data (node); @@ -277,6 +266,7 @@ on_key_event (ply_keyboard_t *keyboard, i = 0; while (i < size) { + ply_utf8_character_byte_type_t character_byte_type; ssize_t character_size; char *keyboard_input; size_t bytes_left = size - i; @@ -318,18 +308,23 @@ on_key_event (ply_keyboard_t *keyboard, continue; } - character_size = (ssize_t) ply_utf8_character_get_size (bytes + i, bytes_left); + character_byte_type = ply_utf8_character_get_byte_type (bytes[i]); - if (character_size < 0) + if (PLY_UTF8_CHARACTER_BYTE_TYPE_IS_NOT_LEADING (character_byte_type)) break; /* If we're at a NUL character walk through it */ - if (character_size == 0) { + if (character_byte_type == PLY_UTF8_CHARACTER_BYTE_TYPE_END_OF_STRING) { i++; continue; } + character_size = ply_utf8_character_get_size_from_byte_type (character_byte_type); + + if (character_size > bytes_left) + break; + keyboard_input = strndup (bytes + i, character_size); process_keyboard_input (keyboard, keyboard_input, character_size); @@ -665,4 +660,4 @@ ply_keyboard_get_capslock_state (ply_keyboard_t *keyboard) } return NULL; -}
\ No newline at end of file +} diff --git a/src/libply-splash-core/ply-rich-text.h b/src/libply-splash-core/ply-rich-text.h index bce267cf..e0ab9eaf 100644 --- a/src/libply-splash-core/ply-rich-text.h +++ b/src/libply-splash-core/ply-rich-text.h @@ -23,8 +23,6 @@ #include "ply-terminal.h" #include <stddef.h> -#define PLY_UTF8_CHARACTER_MAX_SIZE 5 - typedef struct _ply_rich_text_t ply_rich_text_t; typedef struct diff --git a/src/libply-splash-core/ply-terminal-emulator.c b/src/libply-splash-core/ply-terminal-emulator.c index bc31d2d4..95058315 100644 --- a/src/libply-splash-core/ply-terminal-emulator.c +++ b/src/libply-splash-core/ply-terminal-emulator.c @@ -27,7 +27,7 @@ #include <stdio.h> -#define PLY_TERMINAL_SPACES_PER_TAB 8 +#define PLY_TERMINAL_EMULATOR_SPACES_PER_TAB 8 /* Characters between 64 to 157 end the escape sequence strings (in testing) * for i in $(seq 1 255) @@ -47,10 +47,16 @@ typedef enum { - PLY_TERMINAL_EMULATOR_PARSE_STATE_UNESCAPED, - PLY_TERMINAL_EMULATOR_PARSE_STATE_ESCAPED, - PLY_TERMINAL_EMULATOR_PARSE_STATE_CONTROL_SEQUENCE_PARAMETER -} ply_terminal_emulator_parse_state_t; + PLY_TERMINAL_EMULATOR_TERMINAL_STATE_UNESCAPED, + PLY_TERMINAL_EMULATOR_TERMINAL_STATE_ESCAPED, + PLY_TERMINAL_EMULATOR_TERMINAL_STATE_CONTROL_SEQUENCE_PARAMETER +} ply_terminal_emulator_terminal_state_t; + +typedef enum +{ + PLY_TERMINAL_EMULATOR_UTF8_CHARACTER_PARSE_STATE_SINGLE_BYTE, + PLY_TERMINAL_EMULATOR_UTF8_CHARACTER_PARSE_STATE_MULTI_BYTE, +} ply_terminal_emulator_utf8_character_parse_state_t; typedef enum { @@ -88,26 +94,31 @@ typedef struct struct _ply_terminal_emulator { - ply_terminal_emulator_parse_state_t state; + ply_terminal_emulator_terminal_state_t state; + + size_t number_of_rows; + size_t number_of_columns; - size_t number_of_rows; - size_t number_of_columns; + size_t line_count; + ply_array_t *lines; - size_t line_count; - ply_array_t *lines; + ply_trigger_t *output_trigger; - ply_trigger_t *output_trigger; + ssize_t cursor_row_offset; /* Relative to the bottom-most allocated line */ + size_t cursor_column; + ply_terminal_emulator_break_string_action_t break_action; - ssize_t cursor_row_offset; /* Relative to the bottom-most allocated line */ - size_t cursor_column; - ply_terminal_emulator_break_string_action_t break_action; + uint32_t last_parameter_was_integer : 1; + uint32_t pending_parameter_value; + ply_terminal_emulator_command_t *staged_command; + ply_list_t *pending_commands; - uint32_t last_parameter_was_integer : 1; - ply_terminal_emulator_command_t *staged_command; - ply_list_t *pending_commands; + ply_terminal_emulator_utf8_character_parse_state_t pending_character_state; + ply_buffer_t *pending_character; + int pending_character_size; - ply_rich_text_t *current_line; - ply_rich_text_character_style_t current_style; + ply_rich_text_t *current_line; + ply_rich_text_character_style_t current_style; }; typedef ply_terminal_emulator_break_string_t (*ply_terminal_emulator_dispatch_handler_t)(); @@ -139,6 +150,10 @@ ply_terminal_emulator_new (size_t number_of_rows, terminal_emulator->number_of_columns = number_of_columns; terminal_emulator->lines = ply_array_new (PLY_ARRAY_ELEMENT_TYPE_POINTER); + terminal_emulator->pending_character = ply_buffer_new (); + terminal_emulator->pending_character_state = PLY_TERMINAL_EMULATOR_UTF8_CHARACTER_PARSE_STATE_SINGLE_BYTE; + terminal_emulator->pending_character_size = 0; + span.offset = 0; span.range = terminal_emulator->number_of_columns; @@ -150,7 +165,10 @@ ply_terminal_emulator_new (size_t number_of_rows, terminal_emulator->cursor_row_offset = 0; - terminal_emulator->state = PLY_TERMINAL_EMULATOR_PARSE_STATE_UNESCAPED; + terminal_emulator->state = PLY_TERMINAL_EMULATOR_TERMINAL_STATE_UNESCAPED; + + terminal_emulator->last_parameter_was_integer = false; + terminal_emulator->pending_parameter_value = 0; terminal_emulator->break_action = PLY_TERMINAL_EMULATOR_BREAK_STRING_ACTION_PRESERVE_CURSOR_COLUMN; terminal_emulator->output_trigger = ply_trigger_new (NULL); @@ -907,9 +925,9 @@ on_escape_character_tab (ply_terminal_emulator_t *terminal_emulator, terminal_emulator->break_action = PLY_TERMINAL_EMULATOR_BREAK_STRING_ACTION_PRESERVE_CURSOR_COLUMN; if (terminal_emulator->cursor_column <= 0) { - pad_character_count = PLY_TERMINAL_SPACES_PER_TAB; + pad_character_count = PLY_TERMINAL_EMULATOR_SPACES_PER_TAB; } else { - pad_character_count = PLY_TERMINAL_SPACES_PER_TAB - (terminal_emulator->cursor_column % PLY_TERMINAL_SPACES_PER_TAB); + pad_character_count = PLY_TERMINAL_EMULATOR_SPACES_PER_TAB - (terminal_emulator->cursor_column % PLY_TERMINAL_EMULATOR_SPACES_PER_TAB); } ply_rich_text_get_mutable_span (terminal_emulator->current_line, &span); @@ -1052,6 +1070,40 @@ ply_terminal_emulator_get_line_count (ply_terminal_emulator_t *terminal_emulator return terminal_emulator->line_count; } +static ply_terminal_emulator_break_string_t +ply_terminal_emulator_flush_pending_character_to_line (ply_terminal_emulator_t *terminal_emulator) +{ + ply_terminal_emulator_break_string_t break_string = PLY_TERMINAL_EMULATOR_BREAK_STRING_NONE; + ply_rich_text_span_t span; + const char *character_bytes; + size_t character_size; + size_t maximum_characters; + + character_bytes = ply_buffer_get_bytes (terminal_emulator->pending_character); + character_size = ply_buffer_get_size (terminal_emulator->pending_character); + + ply_rich_text_set_character (terminal_emulator->current_line, + terminal_emulator->current_style, + terminal_emulator->cursor_column, + character_bytes, + character_size); + ply_buffer_clear (terminal_emulator->pending_character); + + terminal_emulator->cursor_column++; + + ply_rich_text_get_mutable_span (terminal_emulator->current_line, &span); + + maximum_characters = span.offset + span.range; + + if (terminal_emulator->cursor_column >= maximum_characters) { + terminal_emulator->cursor_row_offset++; + terminal_emulator->break_action = PLY_TERMINAL_EMULATOR_BREAK_STRING_ACTION_RESET_CURSOR_COLUMN; + break_string = PLY_TERMINAL_EMULATOR_BREAK_STRING; + } + + return break_string; +} + void ply_terminal_emulator_parse_substring (ply_terminal_emulator_t *terminal_emulator, ply_rich_text_t *terminal_emulator_line, @@ -1060,17 +1112,13 @@ ply_terminal_emulator_parse_substring (ply_terminal_emulator_t *terminal_emulato const char **unparsed_input, size_t *number_of_unparsed_bytes) { - char character_string[PLY_UTF8_CHARACTER_MAX_SIZE]; size_t input_length = number_of_bytes_to_parse; size_t new_length; size_t i = 0; ply_terminal_emulator_break_string_t break_string = PLY_TERMINAL_EMULATOR_BREAK_STRING_NONE; - int parameter_value; ply_terminal_emulator_command_t *command; ply_rich_text_span_t span; size_t maximum_characters; - - int character_length; ply_list_node_t *node; terminal_emulator->current_line = terminal_emulator_line; @@ -1094,89 +1142,127 @@ ply_terminal_emulator_parse_substring (ply_terminal_emulator_t *terminal_emulato fill_offsets_with_padding (terminal_emulator, new_length, terminal_emulator->cursor_column); while (i < input_length) { - if (break_string == PLY_TERMINAL_EMULATOR_BREAK_STRING && terminal_emulator->state == PLY_TERMINAL_EMULATOR_PARSE_STATE_UNESCAPED) { + ply_utf8_character_byte_type_t character_byte_type; + + if (break_string == PLY_TERMINAL_EMULATOR_BREAK_STRING && terminal_emulator->state == PLY_TERMINAL_EMULATOR_TERMINAL_STATE_UNESCAPED) { break_string = PLY_TERMINAL_EMULATOR_BREAK_STRING_NONE; break; } - parameter_value = 0; - terminal_emulator->break_action = PLY_TERMINAL_EMULATOR_BREAK_STRING_ACTION_PRESERVE_CURSOR_COLUMN; - /* Non-ASCII Unicode characters have no impact on escape code handling */ - character_length = ply_utf8_character_get_size (&input[i], 4); + character_byte_type = ply_utf8_character_get_byte_type (input[i]); + + if (character_byte_type != PLY_UTF8_CHARACTER_BYTE_TYPE_CONTINUATION) + ply_buffer_clear (terminal_emulator->pending_character); + + /* If the previous byte was also a UTF-8 leading byte, handle it as an invalid character */ + if (terminal_emulator->pending_character_state == PLY_TERMINAL_EMULATOR_UTF8_CHARACTER_PARSE_STATE_MULTI_BYTE && + character_byte_type != PLY_UTF8_CHARACTER_BYTE_TYPE_CONTINUATION && + terminal_emulator->state == PLY_TERMINAL_EMULATOR_TERMINAL_STATE_UNESCAPED) { + ply_buffer_append_bytes (terminal_emulator->pending_character, "?", 1); + break_string = ply_terminal_emulator_flush_pending_character_to_line (terminal_emulator); + } + + if (PLY_UTF8_CHARACTER_BYTE_TYPE_IS_MULTI_BYTE (character_byte_type)) { + /* Multi-byte Unicode characters */ + terminal_emulator->pending_character_state = PLY_TERMINAL_EMULATOR_UTF8_CHARACTER_PARSE_STATE_MULTI_BYTE; + terminal_emulator->pending_character_size = ply_utf8_character_get_size_from_byte_type (character_byte_type); + + ply_buffer_append_bytes (terminal_emulator->pending_character, &input[i], 1); - /* skip, if the character_length is -2, it's a auxiliary unicode byte */ - if (character_length < 0) { i++; continue; - } else if (character_length > 1) { - /* Last element is a nullchar */ - character_string[character_length] = '\0'; + } else if (character_byte_type == PLY_UTF8_CHARACTER_BYTE_TYPE_1_BYTE) { + /* Ascii characters could potentially be used in escape sequences */ + terminal_emulator->pending_character_state = PLY_TERMINAL_EMULATOR_UTF8_CHARACTER_PARSE_STATE_SINGLE_BYTE; + terminal_emulator->pending_character_size = ply_utf8_character_get_size_from_byte_type (character_byte_type); + } else if (character_byte_type == PLY_UTF8_CHARACTER_BYTE_TYPE_END_OF_STRING) { + i++; + continue; + } else if (character_byte_type == PLY_UTF8_CHARACTER_BYTE_TYPE_INVALID) { + i++; + continue; + } else if (character_byte_type == PLY_UTF8_CHARACTER_BYTE_TYPE_CONTINUATION) { + if (terminal_emulator->pending_character_state == PLY_TERMINAL_EMULATOR_UTF8_CHARACTER_PARSE_STATE_MULTI_BYTE) { + /* Handle the auxiliary unicode byte if handling a multi-byte character */ + if (terminal_emulator->pending_character_state == PLY_TERMINAL_EMULATOR_UTF8_CHARACTER_PARSE_STATE_MULTI_BYTE) + ply_buffer_append_bytes (terminal_emulator->pending_character, &input[i], 1); - for (int j = 0; j < character_length; j++) { - character_string[j] = input[i]; + i++; + + /* The multi-byte character is not finished yet, continue the loop */ + if (ply_buffer_get_size (terminal_emulator->pending_character) < terminal_emulator->pending_character_size) + continue; + } else { + /* If this is an auxiliary Unicode byte when not handling a multi-byte character, replace it with a placeholder */ + terminal_emulator->pending_character_size = 1; + ply_buffer_clear (terminal_emulator->pending_character); + ply_buffer_append_bytes (terminal_emulator->pending_character, "?", 1); + break_string = ply_terminal_emulator_flush_pending_character_to_line (terminal_emulator); i++; - if (i >= maximum_characters) - break; + continue; } - ply_rich_text_set_character (terminal_emulator->current_line, terminal_emulator->current_style, terminal_emulator->cursor_column, character_string, character_length); - terminal_emulator->cursor_column++; + } + + /* If the current character is a multi-byte character, and all the bytes are received */ + if (terminal_emulator->pending_character_state == PLY_TERMINAL_EMULATOR_UTF8_CHARACTER_PARSE_STATE_MULTI_BYTE) { + /* Drop and skip the multi-byte character if is still escaped */ + if (terminal_emulator->state != PLY_TERMINAL_EMULATOR_TERMINAL_STATE_UNESCAPED) { + ply_buffer_clear (terminal_emulator->pending_character); + continue; + } + + terminal_emulator->pending_character_state = PLY_TERMINAL_EMULATOR_UTF8_CHARACTER_PARSE_STATE_SINGLE_BYTE; + break_string = ply_terminal_emulator_flush_pending_character_to_line (terminal_emulator); continue; } switch (terminal_emulator->state) { - case PLY_TERMINAL_EMULATOR_PARSE_STATE_UNESCAPED: + case PLY_TERMINAL_EMULATOR_TERMINAL_STATE_UNESCAPED: if (input[i] == '\e') { terminal_emulator->staged_command = ply_terminal_emulator_command_new (); - terminal_emulator->state = PLY_TERMINAL_EMULATOR_PARSE_STATE_ESCAPED; + terminal_emulator->state = PLY_TERMINAL_EMULATOR_TERMINAL_STATE_ESCAPED; } else if (iscntrl (input[i]) && input[i] != '\e') { terminal_emulator->staged_command = ply_terminal_emulator_command_new (); terminal_emulator->staged_command->code = input[i]; terminal_emulator->staged_command->type = PLY_TERMINAL_EMULATOR_COMMAND_TYPE_CONTROL_CHARACTER; ply_list_append_data (terminal_emulator->pending_commands, terminal_emulator->staged_command); } else { - character_string[0] = input[i]; - character_string[1] = '\0'; - ply_rich_text_set_character (terminal_emulator->current_line, terminal_emulator->current_style, terminal_emulator->cursor_column, character_string, 1); - terminal_emulator->cursor_column++; - - if (terminal_emulator->cursor_column >= maximum_characters) { - terminal_emulator->cursor_row_offset++; - terminal_emulator->break_action = PLY_TERMINAL_EMULATOR_BREAK_STRING_ACTION_RESET_CURSOR_COLUMN; - break_string = PLY_TERMINAL_EMULATOR_BREAK_STRING; - } + ply_buffer_append_bytes (terminal_emulator->pending_character, &input[i], 1); + break_string = ply_terminal_emulator_flush_pending_character_to_line (terminal_emulator); } break; - case PLY_TERMINAL_EMULATOR_PARSE_STATE_ESCAPED: + case PLY_TERMINAL_EMULATOR_TERMINAL_STATE_ESCAPED: if (input[i] == '[') { + terminal_emulator->pending_parameter_value = 0; terminal_emulator->staged_command->parameters = ply_array_new (PLY_ARRAY_ELEMENT_TYPE_UINT32); terminal_emulator->staged_command->type = PLY_TERMINAL_EMULATOR_COMMAND_TYPE_CONTROL_SEQUENCE; terminal_emulator->staged_command->parameters_valid = true; terminal_emulator->last_parameter_was_integer = false; - terminal_emulator->state = PLY_TERMINAL_EMULATOR_PARSE_STATE_CONTROL_SEQUENCE_PARAMETER; + terminal_emulator->state = PLY_TERMINAL_EMULATOR_TERMINAL_STATE_CONTROL_SEQUENCE_PARAMETER; } else { terminal_emulator->staged_command->code = input[i]; terminal_emulator->staged_command->type = PLY_TERMINAL_EMULATOR_COMMAND_TYPE_ESCAPE; ply_list_append_data (terminal_emulator->pending_commands, terminal_emulator->staged_command); - terminal_emulator->state = PLY_TERMINAL_EMULATOR_PARSE_STATE_UNESCAPED; + terminal_emulator->state = PLY_TERMINAL_EMULATOR_TERMINAL_STATE_UNESCAPED; } break; - case PLY_TERMINAL_EMULATOR_PARSE_STATE_CONTROL_SEQUENCE_PARAMETER: + case PLY_TERMINAL_EMULATOR_TERMINAL_STATE_CONTROL_SEQUENCE_PARAMETER: /* Characters that end the control sequence, and define the command */ if ((unsigned char) input[i] >= PLY_TERMINAL_ESCAPE_CODE_COMMAND_MINIMUM && (unsigned char) input[i] <= PLY_TERMINAL_ESCAPE_CODE_COMMAND_MAXIMUM) { - terminal_emulator->state = PLY_TERMINAL_EMULATOR_PARSE_STATE_UNESCAPED; + terminal_emulator->state = PLY_TERMINAL_EMULATOR_TERMINAL_STATE_UNESCAPED; terminal_emulator->staged_command->code = input[i]; - if (terminal_emulator->last_parameter_was_integer == false) - ply_array_add_uint32_element (terminal_emulator->staged_command->parameters, parameter_value); + ply_array_add_uint32_element (terminal_emulator->staged_command->parameters, terminal_emulator->pending_parameter_value); + terminal_emulator->pending_parameter_value = 0; ply_list_append_data (terminal_emulator->pending_commands, terminal_emulator->staged_command); break; @@ -1187,28 +1273,22 @@ ply_terminal_emulator_parse_substring (ply_terminal_emulator_t *terminal_emulato ply_list_append_data (terminal_emulator->pending_commands, nested_command); } else if (input[i] == ';' || (isdigit (input[i]))) { if (isdigit (input[i])) { - /* If the previous character was an integer, and this one is an integer, it is probably the next digit*/ - if (terminal_emulator->last_parameter_was_integer == true) { - parameter_value = -1; - } else { - parameter_value = atoi (&input[i]); - } + /* If the previous character was an integer, and this one is an integer, it is probably the next digit */ + terminal_emulator->pending_parameter_value = terminal_emulator->pending_parameter_value * 10; + terminal_emulator->pending_parameter_value += input[i] - '0'; terminal_emulator->last_parameter_was_integer = true; } else if (input[i] == ';') { - /* Skip, and do not add the default value of 0 if the last character encountered was a valid parameter - * Double ;;'s imply a 0 - */ - if (terminal_emulator->last_parameter_was_integer == true) - parameter_value = -1; + /* Double ;;'s imply a 0 */ + if (terminal_emulator->last_parameter_was_integer == false) { + ply_array_add_uint32_element (terminal_emulator->staged_command->parameters, 0); + } else { + ply_array_add_uint32_element (terminal_emulator->staged_command->parameters, terminal_emulator->pending_parameter_value); + } + terminal_emulator->pending_parameter_value = 0; terminal_emulator->last_parameter_was_integer = false; } - - /* Skip parameter if less than 0 */ - if (parameter_value >= 0) - ply_array_add_uint32_element (terminal_emulator->staged_command->parameters, parameter_value); - break; } else { /* invalid characters in the middle of the escape sequence invalidate it */ @@ -1217,7 +1297,7 @@ ply_terminal_emulator_parse_substring (ply_terminal_emulator_t *terminal_emulato break; } - if (terminal_emulator->state == PLY_TERMINAL_EMULATOR_PARSE_STATE_UNESCAPED) { + if (terminal_emulator->state == PLY_TERMINAL_EMULATOR_TERMINAL_STATE_UNESCAPED) { ply_list_foreach (terminal_emulator->pending_commands, node) { ply_terminal_emulator_break_string_t break_string_value = PLY_TERMINAL_EMULATOR_BREAK_STRING_NONE; @@ -1273,7 +1353,6 @@ ply_terminal_emulator_parse_lines (ply_terminal_emulator_t *terminal_emulator, unparsed_text = text; unparsed_text_length = size; while (unparsed_text_length > 0) { - assert (terminal_emulator->line_count != 0); first_row = 0; diff --git a/src/libply/ply-buffer.c b/src/libply/ply-buffer.c index 034a7b1d..0fa68d8a 100644 --- a/src/libply/ply-buffer.c +++ b/src/libply/ply-buffer.c @@ -224,6 +224,20 @@ ply_buffer_append_from_fd (ply_buffer_t *buffer, ply_buffer_append_bytes (buffer, bytes, bytes_read); } +void +ply_buffer_set_bytes (ply_buffer_t *buffer, + void *bytes, + size_t number_of_bytes, + size_t capacity) +{ + if (buffer->data != bytes) + free (buffer->data); + + buffer->data = bytes; + buffer->size = number_of_bytes; + buffer->capacity = capacity; +} + const char * ply_buffer_get_bytes (ply_buffer_t *buffer) { @@ -247,6 +261,12 @@ ply_buffer_steal_bytes (ply_buffer_t *buffer) } size_t +ply_buffer_get_capacity (ply_buffer_t *buffer) +{ + return buffer->capacity; +} + +size_t ply_buffer_get_size (ply_buffer_t *buffer) { return buffer->size; @@ -255,6 +275,9 @@ ply_buffer_get_size (ply_buffer_t *buffer) void ply_buffer_clear (ply_buffer_t *buffer) { - memset (buffer->data, '\0', buffer->capacity); + if (buffer->size == 0) + return; + + memset (buffer->data, '\0', buffer->size); buffer->size = 0; } diff --git a/src/libply/ply-buffer.h b/src/libply/ply-buffer.h index e464007e..fc7f4a89 100644 --- a/src/libply/ply-buffer.h +++ b/src/libply/ply-buffer.h @@ -44,12 +44,23 @@ __attribute__((__format__ (__printf__, 2, 3))) void ply_buffer_append_with_non_literal_format_string (ply_buffer_t *buffer, const char *format, ...); +void ply_buffer_set_bytes (ply_buffer_t *buffer, + void *bytes, + size_t number_of_bytes, + size_t capacity); void ply_buffer_remove_bytes (ply_buffer_t *buffer, size_t number_of_bytes); void ply_buffer_remove_bytes_at_end (ply_buffer_t *buffer, size_t number_of_bytes); const char *ply_buffer_get_bytes (ply_buffer_t *buffer); +size_t ply_buffer_get_capacity (ply_buffer_t *buffer); char *ply_buffer_steal_bytes (ply_buffer_t *buffer); +#define ply_buffer_borrow_bytes(buffer, bytes, size, capacity) \ + for (bool _ran = false; *bytes = (char *) ply_buffer_get_bytes (buffer), \ + *size = ply_buffer_get_size (buffer), \ + *capacity = ply_buffer_get_capacity (buffer), \ + !_ran; \ + ply_buffer_set_bytes (buffer, *bytes, *size, *capacity), _ran = true) size_t ply_buffer_get_size (ply_buffer_t *buffer); void ply_buffer_clear (ply_buffer_t *buffer); #endif diff --git a/src/libply/ply-utils.c b/src/libply/ply-utils.c index c5b0847e..95b505b1 100644 --- a/src/libply/ply-utils.c +++ b/src/libply/ply-utils.c @@ -742,21 +742,93 @@ ply_detach_daemon (ply_daemon_handle_t *handle, * 11100000-11101111 E0-EF Start of 3-byte sequence * 11110000-11110100 F0-F4 Start of 4-byte sequence */ -int -ply_utf8_character_get_size (const char *string, - size_t n) +ply_utf8_character_byte_type_t +ply_utf8_character_get_byte_type (const char byte) +{ + ply_utf8_character_byte_type_t byte_type; + + if (byte == '\0') + byte_type = PLY_UTF8_CHARACTER_BYTE_TYPE_END_OF_STRING; + else if ((byte & 0x80) == 0x00) + byte_type = PLY_UTF8_CHARACTER_BYTE_TYPE_1_BYTE; + else if ((byte & 0xE0) == 0xC0) + byte_type = PLY_UTF8_CHARACTER_BYTE_TYPE_2_BYTES; + else if ((byte & 0xF0) == 0xE0) + byte_type = PLY_UTF8_CHARACTER_BYTE_TYPE_3_BYTES; + else if ((byte & 0xF8) == 0xF0) + byte_type = PLY_UTF8_CHARACTER_BYTE_TYPE_4_BYTES; + else + byte_type = PLY_UTF8_CHARACTER_BYTE_TYPE_CONTINUATION; + + return byte_type; +} + +ssize_t +ply_utf8_character_get_size_from_byte_type (ply_utf8_character_byte_type_t byte_type) +{ + size_t size; + + switch (byte_type) { + case PLY_UTF8_CHARACTER_BYTE_TYPE_1_BYTE: + size = 1; + break; + case PLY_UTF8_CHARACTER_BYTE_TYPE_2_BYTES: + size = 2; + break; + case PLY_UTF8_CHARACTER_BYTE_TYPE_3_BYTES: + size = 3; + break; + case PLY_UTF8_CHARACTER_BYTE_TYPE_4_BYTES: + size = 4; + break; + case PLY_UTF8_CHARACTER_BYTE_TYPE_CONTINUATION: + case PLY_UTF8_CHARACTER_BYTE_TYPE_INVALID: + case PLY_UTF8_CHARACTER_BYTE_TYPE_END_OF_STRING: + size = 0; + break; + } + return size; +} + +ssize_t +ply_utf8_character_get_size (const char *bytes) +{ + ply_utf8_character_byte_type_t byte_type; + ssize_t size; + + byte_type = ply_utf8_character_get_byte_type (bytes[0]); + size = ply_utf8_character_get_size_from_byte_type (byte_type); + + return size; +} + +void +ply_utf8_string_remove_last_character (char **string, + size_t *size) { - int length; - - if (n < 1) return -1; - if (string[0] == 0x00) length = 0; - else if ((string[0] & 0x80) == 0x00) length = 1; - else if ((string[0] & 0xE0) == 0xC0) length = 2; - else if ((string[0] & 0xF0) == 0xE0) length = 3; - else if ((string[0] & 0xF8) == 0xF0) length = 4; - else return -2; - if (length > (int) n) return -1; - return length; + char *bytes = *string; + size_t size_in = *size, end_offset; + + if (size_in == 0) + return; + + end_offset = size_in - 1; + do { + ply_utf8_character_byte_type_t byte_type; + + byte_type = ply_utf8_character_get_byte_type (bytes[end_offset]); + + if (byte_type != PLY_UTF8_CHARACTER_BYTE_TYPE_CONTINUATION) { + memset (bytes + end_offset, '\0', size_in - end_offset); + *size = end_offset; + break; + } + + if (end_offset == 0) + break; + + end_offset--; + } while (true); } int @@ -766,10 +838,16 @@ ply_utf8_string_get_length (const char *string, size_t count = 0; while (true) { - int charlen = ply_utf8_character_get_size (string, n); - if (charlen <= 0) break; - string += charlen; - n -= charlen; + size_t size = ply_utf8_character_get_size (string); + + if (size == 0) + break; + + if (size > n) + break; + + string += size; + n -= size; count++; } return count; @@ -783,7 +861,7 @@ ply_utf8_string_get_byte_offset_from_character_offset (const char *string, size_t i; for (i = 0; i < character_offset && string[byte_offset] != '\0'; i++) { - byte_offset += ply_utf8_character_get_size (string + byte_offset, PLY_UTF8_CHARACTER_SIZE_MAX); + byte_offset += ply_utf8_character_get_size (string + byte_offset); } return byte_offset; @@ -818,8 +896,7 @@ ply_utf8_string_iterator_next (ply_utf8_string_iterator_t *iterator, if (iterator->string[iterator->current_byte_offset] == '\0') return false; - size_of_current_character = ply_utf8_character_get_size (iterator->string + iterator->current_byte_offset, - PLY_UTF8_CHARACTER_SIZE_MAX); + size_of_current_character = ply_utf8_character_get_size (iterator->string + iterator->current_byte_offset); if (size_of_current_character == 0) return false; diff --git a/src/libply/ply-utils.h b/src/libply/ply-utils.h index b99d2b23..7cbbb2f4 100644 --- a/src/libply/ply-utils.h +++ b/src/libply/ply-utils.h @@ -55,6 +55,20 @@ typedef enum PLY_UNIX_SOCKET_TYPE_TRIMMED_ABSTRACT } ply_unix_socket_type_t; +typedef enum +{ + PLY_UTF8_CHARACTER_BYTE_TYPE_CONTINUATION = -2, + PLY_UTF8_CHARACTER_BYTE_TYPE_INVALID = -1, + PLY_UTF8_CHARACTER_BYTE_TYPE_END_OF_STRING = 0, + PLY_UTF8_CHARACTER_BYTE_TYPE_1_BYTE = 1, + PLY_UTF8_CHARACTER_BYTE_TYPE_2_BYTES = 2, + PLY_UTF8_CHARACTER_BYTE_TYPE_3_BYTES = 3, + PLY_UTF8_CHARACTER_BYTE_TYPE_4_BYTES = 4 +} ply_utf8_character_byte_type_t; + +#define PLY_UTF8_CHARACTER_BYTE_TYPE_IS_NOT_LEADING(t) ((t) == PLY_UTF8_CHARACTER_BYTE_TYPE_INVALID || (t) == PLY_UTF8_CHARACTER_BYTE_TYPE_CONTINUATION) +#define PLY_UTF8_CHARACTER_BYTE_TYPE_IS_MULTI_BYTE(t) (((t) == PLY_UTF8_CHARACTER_BYTE_TYPE_2_BYTES || (t) == PLY_UTF8_CHARACTER_BYTE_TYPE_3_BYTES || (t) == PLY_UTF8_CHARACTER_BYTE_TYPE_4_BYTES)) + typedef struct { const char *string; @@ -120,8 +134,12 @@ ply_daemon_handle_t *ply_create_daemon (void); bool ply_detach_daemon (ply_daemon_handle_t *handle, int exit_code); -int ply_utf8_character_get_size (const char *string, - size_t n); +ply_utf8_character_byte_type_t ply_utf8_character_get_byte_type (const char byte); +ssize_t ply_utf8_character_get_size_from_byte_type (ply_utf8_character_byte_type_t byte_type); +ssize_t ply_utf8_character_get_size (const char *bytes); + +void ply_utf8_string_remove_last_character (char **string, + size_t *n); int ply_utf8_string_get_length (const char *string, size_t n); @@ -1654,28 +1654,17 @@ on_keyboard_input (state_t *state, static void on_backspace (state_t *state) { - ssize_t bytes_to_remove; - ssize_t previous_character_size; - const char *bytes; + char *bytes; size_t size; + size_t capacity; ply_list_node_t *node = ply_list_get_first_node (state->entry_triggers); if (!node) return; - bytes = ply_buffer_get_bytes (state->entry_buffer); - size = ply_buffer_get_size (state->entry_buffer); - if (size == 0) - return; - - bytes_to_remove = MIN (size, PLY_UTF8_CHARACTER_SIZE_MAX); - while ((previous_character_size = ply_utf8_character_get_size (bytes + size - bytes_to_remove, bytes_to_remove)) < bytes_to_remove) { - if (previous_character_size > 0) - bytes_to_remove -= previous_character_size; - else - bytes_to_remove--; + ply_buffer_borrow_bytes (state->entry_buffer, &bytes, &size, &capacity) { + ply_utf8_string_remove_last_character (&bytes, &size); } - ply_buffer_remove_bytes_at_end (state->entry_buffer, bytes_to_remove); update_display (state); } |