diff options
-rw-r--r-- | ChangeLog | 6 | ||||
-rw-r--r-- | TODO | 81 | ||||
-rw-r--r-- | binparser.c | 546 | ||||
-rw-r--r-- | binparser.h | 124 | ||||
-rw-r--r-- | elfparser.c | 417 |
5 files changed, 638 insertions, 536 deletions
@@ -1,3 +1,9 @@ +2007-02-24 Soren Sandman <sandmann@daimi.au.dk> + + * binparser.[ch]: Switch to a simpler conceptual model. + * elfparser.c: Update to binparser API changes. + * TODO: updates + Fri Feb 9 16:53:29 2007 Søren Sandmann <sandmann@redhat.com> * Update copyright notices @@ -103,40 +103,21 @@ Before 1.2: * crc32 checking probably doesn't belong in elfparser.c -* Rethink binparser. Maybe the default mode should be: - - there is a current offset - - you can move the cursor - - _goto() - - _align() - - you can read structs with "begin_struct (format) / end_struct()" - Or maybe just "set_format()" that would accept NULL? - - when you are reading a struct, you can skip records with _index() - - you can read fields with get_string/get_uint by passing a name. - - you can read anonymous strings and uints by passing NULL for name - This is allowed even when not reading structs. Or maybe this - should be separate functions. Advantages: - - they can skip ahead, unlike fields accessors - - you can access specific types (8,16,32,64) - - there is no "name" field - Disadvantage: - - the field accesors would need renaming. - bin_parser_get_uint_field () - is not really that bad though. - Maybe begin_record() could return a structure you could - use to access that particular record? Would nicely solve - the problems with "goto" and "index". - bin_record_get_uint(); - What should begin/end be called? They will have different - objects passed. - bin_parser_get_record (parser) -> record - bin_record_free (record); - - Maybe support for indirect strings? Ie., get_string() in elfparser - - This will require endianness to be a per-parser property. Which is - probably just fine. Although d-bus actually has - per-message endianness. Maybe there could be a settable - "endianness" property. +* Missing things in binparser.[ch] + + - it's inconvenient that you have to pass in both a parser _and_ + a record. The record should just contain a pointer to the parser + + - the bin_parser_seek_record (..., 1); idiom is a little dubious + + - maybe convert BIN_UINT32 => { BIN_UINT, 4 } + we already have the width in the struct. - Also need to add error checking. + - Add error checking + Also need to add error checking. + + - "native endian" is probably not useful. Maybe go back to just + having big/little endian. * Rename stack_stash_foreach_by_address() to stack_stash_foreach_unique(), or maybe not ... @@ -694,6 +675,40 @@ Later: -=-=-=-=-=-=-=-=-=-=-=-=-=-=- ALREADY DONE -=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=- + +* Rethink binparser. Maybe the default mode should be: + - there is a current offset + - you can move the cursor + - _goto() + - _align() + - you can read structs with "begin_struct (format) / end_struct()" + Or maybe just "set_format()" that would accept NULL? + - when you are reading a struct, you can skip records with _index() + - you can read fields with get_string/get_uint by passing a name. + - you can read anonymous strings and uints by passing NULL for name + This is allowed even when not reading structs. Or maybe this + should be separate functions. Advantages: + - they can skip ahead, unlike fields accessors + - you can access specific types (8,16,32,64) + - there is no "name" field + Disadvantage: + - the field accesors would need renaming. + bin_parser_get_uint_field () + is not really that bad though. + Maybe begin_record() could return a structure you could + use to access that particular record? Would nicely solve + the problems with "goto" and "index". + bin_record_get_uint(); + What should begin/end be called? They will have different + objects passed. + bin_parser_get_record (parser) -> record + bin_record_free (record); + - Maybe support for indirect strings? Ie., get_string() in elfparser + - This will require endianness to be a per-parser property. Which is + probably just fine. Although d-bus actually has + per-message endianness. Maybe there could be a settable + "endianness" property. + * Don't look in $(libdir) for separate debug files (since $libdir is the libdir for sysprof, not a system wide libdir). Tim Rowley. Fix is probably to hardcode /usr/lib, and also look in $libdir. diff --git a/binparser.c b/binparser.c index 51d9ecd..f2eac4e 100644 --- a/binparser.c +++ b/binparser.c @@ -16,57 +16,50 @@ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ #include <string.h> -#include <stdlib.h> -#include <glib.h> -#include <stdarg.h> -#include "binparser.h" -typedef struct ParserFrame ParserFrame; +#include "binparser2.h" -struct BinRecord +typedef struct Field Field; + +struct BinParser { - BinFormat * format; - int index; + const guchar * data; + gsize length; + gsize offset; - BinParser * parser; -}; + const char * error_msg; + GList * records; + BinEndian endian; -struct BinField -{ - guint64 offset; - int width; - int align; - char * name; + gsize saved_offset; }; -struct BinFormat +struct Field { - gboolean big_endian; - - int n_fields; - BinField * fields; + char name[BIN_MAX_NAME]; + guint offset; /* from beginning of struct */ + guint width; + BinType type; }; -struct BinParser +struct BinRecord { - gsize offset; - const guchar * data; - gsize length; - - gboolean cache_in_use; - BinRecord cache; + int n_fields; + Field fields[1]; }; BinParser * -bin_parser_new (const guchar *data, - gsize length) +bin_parser_new (const guchar *data, + gsize length) { BinParser *parser = g_new0 (BinParser, 1); - parser->offset = 0; parser->data = data; parser->length = length; - parser->cache_in_use = FALSE; + parser->offset = 0; + parser->error_msg = NULL; + parser->records = NULL; + parser->endian = BIN_NATIVE_ENDIAN; return parser; } @@ -74,31 +67,28 @@ bin_parser_new (const guchar *data, void bin_parser_free (BinParser *parser) { - g_free (parser); -} + GList *list; -static GQueue * -read_varargs (va_list args, - const char * name, - BinField * field) -{ - GQueue *queue = g_queue_new (); - gpointer p; - - if (name) + for (list = parser->records; list != NULL; list = list->next) { - g_queue_push_tail (queue, (gpointer)name); - g_queue_push_tail (queue, field); + BinRecord *record = list->data; - p = va_arg (args, gpointer); - while (p) - { - g_queue_push_tail (queue, p); - p = va_arg (args, gpointer); - } + g_free (record); } - return queue; + g_free (parser); +} + +const guchar * +bin_parser_get_data (BinParser *parser) +{ + return parser->data; +} + +gsize +bin_parser_get_length (BinParser *parser) +{ + return parser->length; } static guint64 @@ -117,84 +107,168 @@ align (guint64 offset, int alignment) return offset; } -gsize -bin_format_get_size (BinFormat *format) +static int +get_field_width (const BinField *field) { - BinField *last_field = &(format->fields[format->n_fields - 1]); - BinField *first_field = &(format->fields[0]); + switch (field->type) + { + case BIN_UINT8: + return 1; + case BIN_UINT16: + return 2; + case BIN_UINT32: + return 4; + case BIN_UINT64: + return 8; + case BIN_UNINTERPRETED: + return field->n_bytes; + } - return align (last_field->offset + last_field->width, first_field->width); + g_assert_not_reached (); + return -1; } -BinFormat * -bin_format_new (gboolean big_endian, - const char *name, BinField *field, - ...) +static int +get_align (const BinField *field) { - GQueue *queue = g_queue_new (); - BinFormat *format = g_new0 (BinFormat, 1); - GList *list; - int i; - guint64 offset; - va_list args; - - format->big_endian = big_endian; - - /* Build queue of child types */ - va_start (args, field); - queue = read_varargs (args, name, field); - va_end (args); - - g_assert (queue->length % 2 == 0); - - format->n_fields = queue->length / 2; - format->fields = g_new (BinField, format->n_fields); - - i = 0; + if (field->type == BIN_UNINTERPRETED) + return 1; + else + return get_field_width (field); +} + +BinRecord * +bin_parser_create_record (BinParser *parser, + const BinField *fields) +{ + BinRecord *record; + int i, n_fields; + guint offset; + + n_fields = 0; + while (fields[n_fields].name[0] != '\0') + { + n_fields++; +#if 0 + g_print ("type: %d\n", fields[n_fields].type); +#endif + } + + record = g_malloc0 (sizeof (BinRecord) + + (n_fields - 1) * sizeof (Field)); + offset = 0; - for (list = queue->head; list != NULL; list = list->next->next) + record->n_fields = n_fields; + for (i = 0; i < n_fields; ++i) { - const char *name = list->data; - BinField *field = list->next->data; + const BinField *bin_field = &(fields[i]); + Field *field = &(record->fields[i]); - offset = align (offset, field->align); + offset = align (offset, get_align (bin_field)); - format->fields[i].name = g_strdup (name); - format->fields[i].width = field->width; - format->fields[i].offset = offset; - - offset += field->width; - ++i; + strncpy (field->name, bin_field->name, BIN_MAX_NAME - 1); + field->offset = offset; + field->type = bin_field->type; + field->width = get_field_width (bin_field); + +#if 0 + g_print ("created field %s with type %d\n", field->name, field->type); +#endif - g_free (field); + offset += record->fields[i].width; } + + parser->records = g_list_prepend (parser->records, record); - g_queue_free (queue); - - return format; + return record; } -static const BinField * -get_field (BinFormat *format, - const gchar *name) +gboolean +bin_parser_error (BinParser *parser) { - int i; - - for (i = 0; i < format->n_fields; ++i) - { - BinField *field = &(format->fields[i]); - - if (strcmp (field->name, name) == 0) - return field; - } + return parser->error_msg != NULL; +} + +void +bin_parser_clear_error (BinParser *parser) +{ + parser->error_msg = NULL; +} + +const gchar * +bin_parser_get_error_msg (BinParser *parser) +{ + return parser->error_msg; +} + +void +bin_parser_set_endian (BinParser *parser, + BinEndian endian) +{ + parser->endian = endian; +} + +/* Move current offset */ +gsize +bin_parser_get_offset (BinParser *parser) +{ + return parser->offset; +} + +void +bin_parser_set_offset (BinParser *parser, + gsize offset) +{ + parser->offset = offset; +} + +void +bin_parser_align (BinParser *parser, + gsize byte_width) +{ + parser->offset = align (parser->offset, byte_width); +} + +gsize +bin_record_get_size (BinRecord *record) +{ + Field *last_field = &(record->fields[record->n_fields - 1]); + Field *first_field = &(record->fields[0]); + + /* align to first field, since that's the alignment of the record + * following this one + */ - return NULL; + return align (last_field->offset + last_field->width, first_field->width); +} + +void +bin_parser_seek_record (BinParser *parser, + BinRecord *record, + int n_records) +{ + gsize record_size = bin_record_get_size (record); + + parser->offset += record_size * n_records; +} + +void +bin_parser_save (BinParser *parser) +{ + parser->saved_offset = parser->offset; +} + +void +bin_parser_restore (BinParser *parser) +{ + parser->offset = parser->saved_offset; } +/* retrieve data */ static guint64 convert_uint (const guchar *data, - gboolean big_endian, - int width) + BinEndian endian, + BinType type) { guint8 r8; guint16 r16; @@ -205,39 +279,41 @@ convert_uint (const guchar *data, if (width == 4) g_print ("converting at %p %d %d %d %d\n", data, data[0], data[1], data[2], data[3]); #endif + + /* FIXME: check that we are within the file */ - switch (width) + switch (type) { - case 1: + case BIN_UINT8: r8 = *(guint8 *)data; return r8; - case 2: + case BIN_UINT16: r16 = *(guint16 *)data; - if (big_endian) + if (endian == BIN_BIG_ENDIAN) r16 = GUINT16_FROM_BE (r16); - else + else if (endian == BIN_LITTLE_ENDIAN) r16 = GUINT16_FROM_LE (r16); return r16; - case 4: + case BIN_UINT32: r32 = *(guint32 *)data; - if (big_endian) + if (endian == BIN_BIG_ENDIAN) r32 = GUINT32_FROM_BE (r32); - else + else if (endian == BIN_LITTLE_ENDIAN) r32 = GUINT32_FROM_LE (r32); return r32; - case 8: + case BIN_UINT64: r64 = *(guint64 *)data; - if (big_endian) + if (endian == BIN_BIG_ENDIAN) r64 = GUINT64_FROM_BE (r64); - else + else if (endian == BIN_LITTLE_ENDIAN) r64 = GUINT64_FROM_LE (r64); return r64; @@ -248,61 +324,36 @@ convert_uint (const guchar *data, } } -guint32 -bin_parser_get_uint32 (BinParser *parser) -{ - guint32 result; - - /* FIXME: This is broken for two reasons: - * - * (1) It assumes file_endian==machine_endian - * - * (2) It doesn't check for file overrun. - * - */ - result = *(guint32 *)(parser->data + parser->offset); - - parser->offset += 4; - - return result; -} - -static BinField * -new_field_uint (int width) +static int +get_uint_width (BinType type) { - BinField *field = g_new0 (BinField, 1); - - field->width = width; - field->align = width; - - return field; -} - -BinField * -bin_field_new_uint8 (void) -{ - return new_field_uint (1); + switch (type) + { + case BIN_UINT8: + return 1; + case BIN_UINT16: + return 2; + case BIN_UINT32: + return 4; + case BIN_UINT64: + return 8; + default: + return -1; + } } -BinField * -bin_field_new_uint16 (void) +guint64 +bin_parser_get_uint (BinParser *parser, + BinType type) { - return new_field_uint (2); -} + guint64 r = convert_uint (parser->data + parser->offset, parser->endian, type); -BinField * -bin_field_new_uint32 (void) -{ - return new_field_uint (4); -} + parser->offset += get_uint_width (type); -BinField * -bin_field_new_uint64 (void) -{ - return new_field_uint (8); + return r; } -const gchar * +const char * bin_parser_get_string (BinParser *parser) { const char *result; @@ -314,116 +365,46 @@ bin_parser_get_string (BinParser *parser) parser->offset += strlen (result) + 1; return result; -} - -void -bin_parser_align (BinParser *parser, - gsize byte_width) -{ - parser->offset = align (parser->offset, byte_width); -} - -void -bin_parser_goto (BinParser *parser, - gsize offset) -{ - parser->offset = offset; -} - -BinParser * -bin_record_get_parser (BinRecord *record) -{ - return record->parser; -} - -const gchar * -bin_record_get_string_indirect (BinRecord *record, - const char *name, - gsize str_table) -{ - BinParser *parser = record->parser; - const char *result = NULL; - gsize index; - gsize saved_offset; - - saved_offset = bin_parser_get_offset (record->parser); - index = bin_record_get_uint (record, name); - - bin_parser_goto (record->parser, str_table + index); - - result = bin_parser_get_string (parser); - - bin_parser_goto (record->parser, saved_offset); - - return result; } -gsize -bin_parser_get_offset (BinParser *parser) -{ - g_return_val_if_fail (parser != NULL, 0); - - return parser->offset; -} - -const guchar * -bin_parser_get_data (BinParser *parser) -{ - return parser->data; -} - -gsize -bin_parser_get_length (BinParser *parser) -{ - return parser->length; -} - - -/* Record */ -BinRecord * -bin_parser_get_record (BinParser *parser, - BinFormat *format, - gsize offset) +static const Field * +get_field (BinRecord *format, + const gchar *name) { - BinRecord *record; + int i; - if (!parser->cache_in_use) - { - parser->cache_in_use = TRUE; - record = &(parser->cache); - } - else + for (i = 0; i < format->n_fields; ++i) { - record = g_new0 (BinRecord, 1); - } + Field *field = &(format->fields[i]); - record->parser = parser; - record->index = 0; - record->offset = offset; - record->format = format; - - return record; -} - -void -bin_record_free (BinRecord *record) -{ - if (record == &(record->parser->cache)) - record->parser->cache_in_use = FALSE; - else - g_free (record); + if (strcmp (field->name, name) == 0) + { +#if 0 + g_print ("found field: %s (offset: %d, type %d)\n", field->name, field->offset, field->type); +#endif + + + return field; + } + } + + return NULL; } guint64 -bin_record_get_uint (BinRecord *record, - const char *name) +bin_parser_get_uint_field (BinParser *parser, + BinRecord *record, + const char *name) { - const guint8 *pos; - const BinField *field; + const Field *field = get_field (record, name); + const guchar *pos; - field = get_field (record->format, name); - pos = record->parser->data + record->offset + field->offset; +#if 0 + g_print ("moving to %d (%d + %d)\n", parser->offset + field->offset, parser->offset, field->offset); +#endif + + pos = parser->data + parser->offset + field->offset; #if 0 g_print (" record offset: %d\n", record->offset); @@ -431,7 +412,7 @@ bin_record_get_uint (BinRecord *record, g_print (" field offset %d\n", field->offset); #endif - if (record->offset + field->offset + field->width > record->parser->length) + if (pos > parser->data + parser->length) { /* FIXME: generate error */ return 0; @@ -441,34 +422,5 @@ bin_record_get_uint (BinRecord *record, g_print (" uint %d at %p => %d\n", field->width, pos, convert_uint (pos, record->format->big_endian, field->width)); #endif - return convert_uint (pos, record->format->big_endian, field->width); -} - -void -bin_record_index (BinRecord *record, - int index) -{ - gsize format_size = bin_format_get_size (record->format); - - record->offset -= record->index * format_size; - record->offset += index * format_size; - record->index = index; -} - -gsize -bin_record_get_offset (BinRecord *record) -{ - return record->offset; -} - -/* Fields */ - -BinField * -bin_field_new_fixed_array (int n_elements, - int element_size) -{ - BinField *field = g_new0 (BinField, 1); - field->width = n_elements * element_size; - field->align = element_size; - return field; + return convert_uint (pos, parser->endian, field->type); } diff --git a/binparser.h b/binparser.h index 503e397..a0b423c 100644 --- a/binparser.h +++ b/binparser.h @@ -17,52 +17,98 @@ */ #include <glib.h> -typedef struct BinField BinField; -typedef struct BinFormat BinFormat; typedef struct BinParser BinParser; typedef struct BinRecord BinRecord; +typedef struct BinField BinField; + +/* The model is: + * + * BinParser has an offset associated with it. This offset can be + * manipulated with methods + * + * goto - go to absolute position from file start + * goto_rel - go to relative positio + * goto_record_rel - skip the given number of records + * align - move forward until aligned to given width + * save/restore - save/restore the current offset (stack) + * + * and queried with + * + * get_offset - return current offset in bytes from start + * + * data can be retrieved with + * + * get_uint - return a uint of given width, and skip + * get_string - return a null terminated stringm, and skip + * get_pstring - return a 'pascal' string with given length + * + * get_uint_field - return the named field + * + * formats should probably be definable as static data. + * + * A bin parser also has an associated "status" with it. This can be + * OK, or error. It is ok to use a parser with an error status, but + * the data returned will not be meaningfull. + * + * + */ + +#define BIN_MAX_NAME 52 + +typedef enum +{ + BIN_LITTLE_ENDIAN, + BIN_BIG_ENDIAN, + BIN_NATIVE_ENDIAN +} BinEndian; + +typedef enum +{ + /* More types can (and probably will) be added in the future */ + BIN_UINT8, + BIN_UINT16, + BIN_UINT32, + BIN_UINT64, + BIN_UNINTERPRETED +} BinType; + +struct BinField { + const char name[BIN_MAX_NAME]; + char type; + char n_bytes; /* number of bytes if type + * is UNINTERPRETED */ +}; -/* BinParser */ BinParser * bin_parser_new (const guchar *data, gsize length); void bin_parser_free (BinParser *parser); const guchar *bin_parser_get_data (BinParser *parser); gsize bin_parser_get_length (BinParser *parser); -gsize bin_parser_get_offset (BinParser *parser); -void bin_parser_align (BinParser *parser, - gsize byte_width); -void bin_parser_goto (BinParser *parser, - gsize offset); -const char * bin_parser_get_string (BinParser *parser); -guint32 bin_parser_get_uint32 (BinParser *parser); - -/* Record */ -BinRecord * bin_parser_get_record (BinParser *parser, - BinFormat *format, - gsize offset); -void bin_record_free (BinRecord *record); -guint64 bin_record_get_uint (BinRecord *record, - const char *name); -void bin_record_index (BinRecord *record, - int index); -gsize bin_record_get_offset (BinRecord *record); -const gchar *bin_record_get_string_indirect (BinRecord *record, - const char *name, - gsize str_table); -BinParser * bin_record_get_parser (BinRecord *record); +void bin_parser_set_endian (BinParser *parser, + BinEndian endian); +gboolean bin_parser_error (BinParser *parser); +void bin_parser_clear_error (BinParser *parser); +const gchar * bin_parser_get_error_msg (BinParser *parser); +BinRecord * bin_parser_create_record (BinParser *parser, + const BinField *fields); +gsize bin_record_get_size (BinRecord *record); +/* Move current offset */ +gsize bin_parser_get_offset (BinParser *parser); +void bin_parser_set_offset (BinParser *parser, + gsize offset); +void bin_parser_align (BinParser *parser, + gsize byte_width); +void bin_parser_seek_record (BinParser *parser, + BinRecord *record, + int n_records); +void bin_parser_save (BinParser *parser); +void bin_parser_restore (BinParser *parser); -/* BinFormat */ -BinFormat *bin_format_new (gboolean big_endian, - const char *name, - BinField *field, - ...); -gsize bin_format_get_size (BinFormat *format); - -/* BinField */ -BinField *bin_field_new_uint8 (void); -BinField *bin_field_new_uint16 (void); -BinField *bin_field_new_uint32 (void); -BinField *bin_field_new_uint64 (void); -BinField *bin_field_new_fixed_array (int n_elements, - int element_size); +/* retrieve data */ +guint64 bin_parser_get_uint (BinParser *parser, + BinType type); +const char * bin_parser_get_string (BinParser *parser); +guint64 bin_parser_get_uint_field (BinParser *parser, + BinRecord *record, + const char *field); diff --git a/elfparser.c b/elfparser.c index a16be6d..d0e6273 100644 --- a/elfparser.c +++ b/elfparser.c @@ -44,51 +44,71 @@ struct ElfParser { BinParser * parser; - BinFormat * header; - BinFormat * strtab_format; - BinFormat * shn_entry; - BinFormat * sym_format; + BinRecord * header; + BinRecord * strtab_format; + BinRecord * shn_entry; + BinRecord * sym_format; int n_sections; Section ** sections; - + int n_symbols; ElfSym * symbols; gsize sym_strings; - + GMappedFile * file; - + const Section * text_section; }; static gboolean parse_elf_signature (const guchar *data, gsize length, gboolean *is_64, gboolean *is_be); -static void make_formats (ElfParser *parser, - gboolean is_64, - gboolean is_big_endian); +static void make_formats (ElfParser *parser, gboolean is_64); + +static const char * +get_string_indirect (BinParser *parser, + BinRecord *record, + const char *name, + gsize str_table) +{ + const char *result = NULL; + gsize index; + + bin_parser_save (parser); + + index = bin_parser_get_uint_field (parser, record, name); + + bin_parser_set_offset (parser, str_table + index); + + result = bin_parser_get_string (parser); + + bin_parser_restore (parser); + + return result; +} static Section * -section_new (BinRecord *record, +section_new (BinParser *parser, + BinRecord *record, gsize name_table) { Section *section = g_new (Section, 1); guint64 flags; - section->name = bin_record_get_string_indirect ( - record, "sh_name", name_table); - section->size = bin_record_get_uint (record, "sh_size"); - section->offset = bin_record_get_uint (record, "sh_offset"); - - flags = bin_record_get_uint (record, "sh_flags"); + section->name = get_string_indirect (parser, record, "sh_name", name_table); + section->size = bin_parser_get_uint_field (parser, record, "sh_size"); + section->offset = bin_parser_get_uint_field (parser, record, "sh_offset"); + + flags = bin_parser_get_uint_field (parser, record, "sh_flags"); section->allocated = !!(flags & SHF_ALLOC); - + if (section->allocated) - section->load_address = bin_record_get_uint (record, "sh_addr"); + section->load_address = bin_parser_get_uint_field (parser, record, "sh_addr"); else section->load_address = 0; - - section->type = bin_record_get_uint (record, "sh_type"); - + + section->type = bin_parser_get_uint_field (parser, record, "sh_type"); + return section; } @@ -104,7 +124,7 @@ find_section (ElfParser *parser, guint type) { int i; - + for (i = 0; i < parser->n_sections; ++i) { Section *section = parser->sections[i]; @@ -112,7 +132,7 @@ find_section (ElfParser *parser, if (strcmp (section->name, name) == 0 && section->type == type) return section; } - + return NULL; } @@ -126,7 +146,6 @@ elf_parser_new_from_data (const guchar *data, gsize section_names; gsize section_headers; int i; - BinRecord *elf_header, *shn_entry; if (!parse_elf_signature (data, length, &is_64, &is_big_endian)) { @@ -138,41 +157,47 @@ elf_parser_new_from_data (const guchar *data, parser->parser = bin_parser_new (data, length); - make_formats (parser, is_64, is_big_endian); + if (is_big_endian) + bin_parser_set_endian (parser->parser, BIN_BIG_ENDIAN); + else + bin_parser_set_endian (parser->parser, BIN_LITTLE_ENDIAN); - /* Read ELF header */ + make_formats (parser, is_64); - elf_header = bin_parser_get_record (parser->parser, parser->header, 0); - parser->n_sections = bin_record_get_uint (elf_header, "e_shnum"); - section_names_idx = bin_record_get_uint (elf_header, "e_shstrndx"); - section_headers = bin_record_get_uint (elf_header, "e_shoff"); + /* Read ELF header */ + + bin_parser_set_offset (parser->parser, 0); - bin_record_free (elf_header); + parser->n_sections = bin_parser_get_uint_field (parser->parser, parser->header, "e_shnum"); + section_names_idx = bin_parser_get_uint_field (parser->parser, parser->header, "e_shstrndx"); + section_headers = bin_parser_get_uint_field (parser->parser, parser->header, "e_shoff"); /* Read section headers */ parser->sections = g_new0 (Section *, parser->n_sections); - shn_entry = bin_parser_get_record (parser->parser, - parser->shn_entry, section_headers); - - bin_record_index (shn_entry, section_names_idx); - section_names = bin_record_get_uint (shn_entry, "sh_offset"); + bin_parser_set_offset (parser->parser, section_headers); + + bin_parser_seek_record (parser->parser, parser->shn_entry, + section_names_idx); + + section_names = bin_parser_get_uint_field (parser->parser, parser->shn_entry, "sh_offset"); for (i = 0; i < parser->n_sections; ++i) { - bin_record_index (shn_entry, i); + bin_parser_set_offset (parser->parser, section_headers); + bin_parser_seek_record (parser->parser, parser->shn_entry, i); - parser->sections[i] = section_new (shn_entry, section_names); + parser->sections[i] = section_new (parser->parser, + parser->shn_entry, + section_names); } - + /* Cache the text section */ parser->text_section = find_section (parser, ".text", SHT_PROGBITS); if (!parser->text_section) parser->text_section = find_section (parser, ".text", SHT_NOBITS); - bin_record_free (shn_entry); - return parser; } @@ -185,23 +210,23 @@ elf_parser_new (const char *filename, ElfParser *parser; GMappedFile *file = g_mapped_file_new (filename, FALSE, NULL); - + if (!file) return NULL; - + #if 0 g_print ("elf parser new : %s\n", filename); #endif data = (guchar *)g_mapped_file_get_contents (file); length = g_mapped_file_get_length (file); - + #if 0 g_print ("data %p: for %s\n", data, filename); #endif - + parser = elf_parser_new_from_data (data, length); - + if (!parser) { g_mapped_file_free (file); @@ -209,14 +234,14 @@ elf_parser_new (const char *filename, } parser->file = file; - + #if 0 g_print ("Elf file: %s (debug: %s)\n", filename, elf_parser_get_debug_link (parser, NULL)); #endif parser->file = file; - + #if 0 if (!parser->symbols) g_print ("at this point %s has no symbols\n", filename); @@ -277,15 +302,15 @@ elf_parser_get_crc32 (ElfParser *parser) gsize length; gulong crc; gsize i; - + data = bin_parser_get_data (parser->parser); length = bin_parser_get_length (parser->parser); - + crc = 0xffffffff; - + for (i = 0; i < length; ++i) crc = crc32_table[(crc ^ data[i]) & 0xff] ^ (crc >> 8); - + /* We just read the entire file into memory, but we only really * need the symbol table, so swap the whole thing out. * @@ -301,14 +326,14 @@ void elf_parser_free (ElfParser *parser) { int i; - + for (i = 0; i < parser->n_sections; ++i) section_free (parser->sections[i]); g_free (parser->sections); - + if (parser->file) g_mapped_file_free (parser->file); - + bin_parser_free (parser->parser); g_free (parser); @@ -323,7 +348,7 @@ elf_demangle (const char *name) #define DMGL_ANSI (1 << 1) /* Include const, volatile, etc */ char *demangled = sysprof_cplus_demangle (name, DMGL_PARAMS | DMGL_ANSI); - + if (demangled) return demangled; else @@ -338,7 +363,7 @@ compare_sym (const void *a, const void *b) { const ElfSym *sym_a = a; const ElfSym *sym_b = b; - + if (sym_a->address < sym_b->address) return -1; else if (sym_a->address == sym_b->address) @@ -352,11 +377,11 @@ static void dump_symbols (ElfParser *parser, ElfSym *syms, guint n_syms) { int i; - + for (i = 0; i < n_syms; ++i) { ElfSym *s = &(syms[i]); - + g_print (" %s: %lx\n", elf_parser_get_sym_name (parser, s), s->address); } } @@ -367,28 +392,32 @@ read_table (ElfParser *parser, const Section *sym_table, const Section *str_table) { - int sym_size = bin_format_get_size (parser->sym_format); + int sym_size = bin_record_get_size (parser->sym_format); int i; int n_functions; - BinRecord *symbol; - + parser->n_symbols = sym_table->size / sym_size; parser->symbols = g_new (ElfSym, parser->n_symbols); - symbol = bin_parser_get_record (parser->parser, parser->sym_format, sym_table->offset); - +#if 0 + g_print ("sym table offset: %d\n", sym_table->offset); +#endif + + bin_parser_set_offset (parser->parser, sym_table->offset); + n_functions = 0; +#if 0 + g_print ("n syms: %d\n", parser->n_symbols); +#endif for (i = 0; i < parser->n_symbols; ++i) { guint info; gulong addr; gulong offset; - - bin_record_index (symbol, i); - info = bin_record_get_uint (symbol, "st_info"); - addr = bin_record_get_uint (symbol, "st_value"); - offset = bin_record_get_offset (symbol); + info = bin_parser_get_uint_field (parser->parser, parser->sym_format, "st_info"); + addr = bin_parser_get_uint_field (parser->parser, parser->sym_format, "st_value"); + offset = bin_parser_get_offset (parser->parser); if (addr != 0 && (info & 0xf) == STT_FUNC && @@ -399,15 +428,22 @@ read_table (ElfParser *parser, parser->symbols[n_functions].offset = offset; n_functions++; - } + } + +#if 0 + g_print ("read symbol: %s\n", get_string_indirect (parser->parser, + parser->sym_format, "st_name", + str_table->offset)); +#endif + + + bin_parser_seek_record (parser->parser, parser->sym_format, 1); } - - bin_record_free (symbol); - + parser->sym_strings = str_table->offset; parser->n_symbols = n_functions; parser->symbols = g_renew (ElfSym, parser->symbols, parser->n_symbols); - + qsort (parser->symbols, parser->n_symbols, sizeof (ElfSym), compare_sym); } @@ -418,7 +454,7 @@ read_symbols (ElfParser *parser) const Section *strtab = find_section (parser, ".strtab", SHT_STRTAB); const Section *dynsym = find_section (parser, ".dynsym", SHT_DYNSYM); const Section *dynstr = find_section (parser, ".dynstr", SHT_STRTAB); - + if (symtab && strtab) { read_table (parser, symtab, strtab); @@ -451,10 +487,10 @@ do_lookup (ElfSym *symbols, { if (address >= symbols[last].address) return &(symbols[last]); - + last--; } - + return NULL; } else @@ -476,38 +512,43 @@ elf_parser_lookup_symbol (ElfParser *parser, const ElfSym *result; if (!parser->symbols) + { +#if 0 + g_print ("reading symbols\n"); +#endif read_symbols (parser); - + } + if (parser->n_symbols == 0) return NULL; - + if (!parser->text_section) return NULL; address += parser->text_section->load_address; - + #if 0 g_print ("the address we are looking up is %p\n", address); #endif result = do_lookup (parser->symbols, address, 0, parser->n_symbols - 1); - + #if 0 if (result) { g_print ("found %s at %lx\n", elf_parser_get_sym_name (parser, result), result->address); } #endif - + if (result) { gulong size; - BinRecord *record; - record = bin_parser_get_record (parser->parser, parser->sym_format, result->offset); - size = bin_record_get_uint (record, "st_size"); - bin_record_free (record); - + bin_parser_set_offset (parser->parser, result->offset); + + size = bin_parser_get_uint_field (parser->parser, + parser->sym_format, "st_size"); + if (result->address + size <= address) result = NULL; } @@ -519,10 +560,10 @@ gulong elf_parser_get_text_offset (ElfParser *parser) { g_return_val_if_fail (parser != NULL, (gulong)-1); - + if (!parser->text_section) return (gulong)-1; - + return parser->text_section->offset; } @@ -532,18 +573,18 @@ elf_parser_get_debug_link (ElfParser *parser, guint32 *crc32) const Section *debug_link = find_section (parser, ".gnu_debuglink", SHT_PROGBITS); const gchar *result; - + if (!debug_link) return NULL; - - bin_parser_goto (parser->parser, debug_link->offset); - + + bin_parser_set_offset (parser->parser, debug_link->offset); + result = bin_parser_get_string (parser->parser); - + bin_parser_align (parser->parser, 4); - + if (crc32) - *crc32 = bin_parser_get_uint32 (parser->parser); + *crc32 = bin_parser_get_uint (parser->parser, BIN_UINT32); return result; } @@ -552,7 +593,7 @@ const guchar * elf_parser_get_eh_frame (ElfParser *parser) { const Section *eh_frame = find_section (parser, ".eh_frame", SHT_PROGBITS); - + if (eh_frame) return bin_parser_get_data (parser->parser) + eh_frame->offset; else @@ -564,16 +605,12 @@ elf_parser_get_sym_name (ElfParser *parser, const ElfSym *sym) { const char *result; - BinRecord *symbol; - + g_return_val_if_fail (parser != NULL, NULL); - - symbol = bin_parser_get_record (parser->parser, parser->sym_format, sym->offset); - - result = bin_record_get_string_indirect (symbol, "st_name", - parser->sym_strings); - bin_record_free (symbol); + bin_parser_set_offset (parser->parser, sym->offset); + result = get_string_indirect ( + parser->parser, parser->sym_format, "st_name", parser->sym_strings); return result; } @@ -616,81 +653,127 @@ parse_elf_signature (const guchar *data, } if (is_64) - *is_64 = (EI_CLASS == ELFCLASS64); + *is_64 = (data[EI_CLASS] == ELFCLASS64); if (is_be) - *is_be = (EI_DATA == ELFDATA2MSB); + *is_be = (data[EI_DATA] == ELFDATA2MSB); return TRUE; } -static BinField * -make_word (gboolean is_64) -{ - if (is_64) - return bin_field_new_uint64 (); - else - return bin_field_new_uint32 (); -} - static void -make_formats (ElfParser *parser, gboolean is_64, gboolean is_big_endian) +get_formats (gboolean is_64, + const BinField **elf_header, + const BinField **shn_entry, + const BinField **sym_format) { - parser->header = bin_format_new ( - is_big_endian, - "e_ident", bin_field_new_fixed_array (EI_NIDENT, 1), - "e_type", bin_field_new_uint16 (), - "e_machine", bin_field_new_uint16 (), - "e_version", bin_field_new_uint32 (), - "e_entry", make_word (is_64), - "e_phoff", make_word (is_64), - "e_shoff", make_word (is_64), - "e_flags", bin_field_new_uint32 (), - "e_ehsize", bin_field_new_uint16 (), - "e_phentsize", bin_field_new_uint16 (), - "e_phnum", bin_field_new_uint16 (), - "e_shentsize", bin_field_new_uint16 (), - "e_shnum", bin_field_new_uint16 (), - "e_shstrndx", bin_field_new_uint16 (), - NULL); - - parser->shn_entry = bin_format_new ( - is_big_endian, - "sh_name", bin_field_new_uint32 (), - "sh_type", bin_field_new_uint32 (), - "sh_flags", make_word (is_64), - "sh_addr", make_word (is_64), - "sh_offset", make_word (is_64), - "sh_size", make_word (is_64), - "sh_link", bin_field_new_uint32 (), - "sh_info", bin_field_new_uint32 (), - "sh_addralign", make_word (is_64), - "sh_entsize", make_word (is_64), - NULL); + static const BinField elf64_header[] = { + { "e_ident", BIN_UNINTERPRETED, EI_NIDENT }, + { "e_type", BIN_UINT16 }, + { "e_machine", BIN_UINT16 }, + { "e_version", BIN_UINT32 }, + { "e_entry", BIN_UINT64 }, + { "e_phoff", BIN_UINT64 }, + { "e_shoff", BIN_UINT64 }, + { "e_flags", BIN_UINT32 }, + { "e_ehsize", BIN_UINT16 }, + { "e_phentsize", BIN_UINT16 }, + { "e_phnum", BIN_UINT16 }, + { "e_shentsize", BIN_UINT16 }, + { "e_shnum", BIN_UINT16 }, + { "e_shstrndx", BIN_UINT16 }, + { "" }, + }; + + static const BinField elf32_header[] = { + { "e_ident", BIN_UNINTERPRETED, EI_NIDENT }, + { "e_type", BIN_UINT16 }, + { "e_machine", BIN_UINT16 }, + { "e_version", BIN_UINT32 }, + { "e_entry", BIN_UINT32 }, + { "e_phoff", BIN_UINT32 }, + { "e_shoff", BIN_UINT32 }, + { "e_flags", BIN_UINT32 }, + { "e_ehsize", BIN_UINT16 }, + { "e_phentsize", BIN_UINT16 }, + { "e_phnum", BIN_UINT16 }, + { "e_shentsize", BIN_UINT16 }, + { "e_shnum", BIN_UINT16 }, + { "e_shstrndx", BIN_UINT16 }, + { "" }, + }; + + static const BinField shn64_entry[] = { + { "sh_name", BIN_UINT32 }, + { "sh_type", BIN_UINT32 }, + { "sh_flags", BIN_UINT64 }, + { "sh_addr", BIN_UINT64 }, + { "sh_offset", BIN_UINT64 }, + { "sh_size", BIN_UINT64 }, + { "sh_link", BIN_UINT32 }, + { "sh_info", BIN_UINT32 }, + { "sh_addralign", BIN_UINT64 }, + { "sh_entsize", BIN_UINT64 }, + { "" } + }; + + static const BinField shn32_entry[] = { + { "sh_name", BIN_UINT32 }, + { "sh_type", BIN_UINT32 }, + { "sh_flags", BIN_UINT32 }, + { "sh_addr", BIN_UINT32 }, + { "sh_offset", BIN_UINT32 }, + { "sh_size", BIN_UINT32 }, + { "sh_link", BIN_UINT32 }, + { "sh_info", BIN_UINT32 }, + { "sh_addralign", BIN_UINT32 }, + { "sh_entsize", BIN_UINT32 }, + { "" } + }; + static const BinField sym64_format[] = { + { "st_name", BIN_UINT32 }, + { "st_info", BIN_UINT8 }, + { "st_other", BIN_UINT8 }, + { "st_shndx", BIN_UINT16 }, + { "st_value", BIN_UINT64 }, + { "st_size", BIN_UINT64 }, + { "" } + }; + + static const BinField sym32_format[] = { + { "st_name", BIN_UINT32 }, + { "st_value", BIN_UINT32 }, + { "st_size", BIN_UINT32 }, + { "st_info", BIN_UINT8 }, + { "st_other", BIN_UINT8 }, + { "st_shndx", BIN_UINT16 }, + { "" }, + }; + if (is_64) { - parser->sym_format = bin_format_new ( - is_big_endian, - "st_name", bin_field_new_uint32 (), - "st_info", bin_field_new_uint8 (), - "st_other", bin_field_new_uint8 (), - "st_shndx", bin_field_new_uint16 (), - "st_value", bin_field_new_uint64 (), - "st_size", bin_field_new_uint64 (), - NULL); + *elf_header = elf64_header; + *shn_entry = shn64_entry; + *sym_format = sym64_format; } else { - parser->sym_format = bin_format_new ( - is_big_endian, - "st_name", bin_field_new_uint32 (), - "st_value", bin_field_new_uint32 (), - "st_size", bin_field_new_uint32 (), - "st_info", bin_field_new_uint8 (), - "st_other", bin_field_new_uint8 (), - "st_shndx", bin_field_new_uint16 (), - NULL); + *elf_header = elf32_header; + *shn_entry = shn32_entry; + *sym_format = sym32_format; } } +static void +make_formats (ElfParser *parser, gboolean is_64) +{ + const BinField *elf_header, *shn_entry, *sym_format; + + get_formats (is_64, &elf_header, &shn_entry, &sym_format); + + parser->header = bin_parser_create_record (parser->parser, elf_header); + parser->shn_entry = bin_parser_create_record (parser->parser, shn_entry); + parser->sym_format = bin_parser_create_record (parser->parser, sym_format); +} + |