summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--ChangeLog6
-rw-r--r--TODO81
-rw-r--r--binparser.c546
-rw-r--r--binparser.h124
-rw-r--r--elfparser.c417
5 files changed, 638 insertions, 536 deletions
diff --git a/ChangeLog b/ChangeLog
index 3aa27f5..f58bbbf 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,9 @@
+2007-02-24 Soren Sandman <sandmann@daimi.au.dk>
+
+ * binparser.[ch]: Switch to a simpler conceptual model.
+ * elfparser.c: Update to binparser API changes.
+ * TODO: updates
+
Fri Feb 9 16:53:29 2007 Søren Sandmann <sandmann@redhat.com>
* Update copyright notices
diff --git a/TODO b/TODO
index b1f3f30..9776bd3 100644
--- a/TODO
+++ b/TODO
@@ -103,40 +103,21 @@ Before 1.2:
* crc32 checking probably doesn't belong in elfparser.c
-* Rethink binparser. Maybe the default mode should be:
- - there is a current offset
- - you can move the cursor
- - _goto()
- - _align()
- - you can read structs with "begin_struct (format) / end_struct()"
- Or maybe just "set_format()" that would accept NULL?
- - when you are reading a struct, you can skip records with _index()
- - you can read fields with get_string/get_uint by passing a name.
- - you can read anonymous strings and uints by passing NULL for name
- This is allowed even when not reading structs. Or maybe this
- should be separate functions. Advantages:
- - they can skip ahead, unlike fields accessors
- - you can access specific types (8,16,32,64)
- - there is no "name" field
- Disadvantage:
- - the field accesors would need renaming.
- bin_parser_get_uint_field ()
- is not really that bad though.
- Maybe begin_record() could return a structure you could
- use to access that particular record? Would nicely solve
- the problems with "goto" and "index".
- bin_record_get_uint();
- What should begin/end be called? They will have different
- objects passed.
- bin_parser_get_record (parser) -> record
- bin_record_free (record);
- - Maybe support for indirect strings? Ie., get_string() in elfparser
- - This will require endianness to be a per-parser property. Which is
- probably just fine. Although d-bus actually has
- per-message endianness. Maybe there could be a settable
- "endianness" property.
+* Missing things in binparser.[ch]
+
+ - it's inconvenient that you have to pass in both a parser _and_
+ a record. The record should just contain a pointer to the parser
+
+ - the bin_parser_seek_record (..., 1); idiom is a little dubious
+
+ - maybe convert BIN_UINT32 => { BIN_UINT, 4 }
+ we already have the width in the struct.
- Also need to add error checking.
+ - Add error checking
+ Also need to add error checking.
+
+ - "native endian" is probably not useful. Maybe go back to just
+ having big/little endian.
* Rename stack_stash_foreach_by_address() to stack_stash_foreach_unique(),
or maybe not ...
@@ -694,6 +675,40 @@ Later:
-=-=-=-=-=-=-=-=-=-=-=-=-=-=- ALREADY DONE -=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
+
+* Rethink binparser. Maybe the default mode should be:
+ - there is a current offset
+ - you can move the cursor
+ - _goto()
+ - _align()
+ - you can read structs with "begin_struct (format) / end_struct()"
+ Or maybe just "set_format()" that would accept NULL?
+ - when you are reading a struct, you can skip records with _index()
+ - you can read fields with get_string/get_uint by passing a name.
+ - you can read anonymous strings and uints by passing NULL for name
+ This is allowed even when not reading structs. Or maybe this
+ should be separate functions. Advantages:
+ - they can skip ahead, unlike fields accessors
+ - you can access specific types (8,16,32,64)
+ - there is no "name" field
+ Disadvantage:
+ - the field accesors would need renaming.
+ bin_parser_get_uint_field ()
+ is not really that bad though.
+ Maybe begin_record() could return a structure you could
+ use to access that particular record? Would nicely solve
+ the problems with "goto" and "index".
+ bin_record_get_uint();
+ What should begin/end be called? They will have different
+ objects passed.
+ bin_parser_get_record (parser) -> record
+ bin_record_free (record);
+ - Maybe support for indirect strings? Ie., get_string() in elfparser
+ - This will require endianness to be a per-parser property. Which is
+ probably just fine. Although d-bus actually has
+ per-message endianness. Maybe there could be a settable
+ "endianness" property.
+
* Don't look in $(libdir) for separate debug files (since $libdir is
the libdir for sysprof, not a system wide libdir). Tim Rowley.
Fix is probably to hardcode /usr/lib, and also look in $libdir.
diff --git a/binparser.c b/binparser.c
index 51d9ecd..f2eac4e 100644
--- a/binparser.c
+++ b/binparser.c
@@ -16,57 +16,50 @@
* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
*/
#include <string.h>
-#include <stdlib.h>
-#include <glib.h>
-#include <stdarg.h>
-#include "binparser.h"
-typedef struct ParserFrame ParserFrame;
+#include "binparser2.h"
-struct BinRecord
+typedef struct Field Field;
+
+struct BinParser
{
- BinFormat * format;
- int index;
+ const guchar * data;
+ gsize length;
+
gsize offset;
- BinParser * parser;
-};
+ const char * error_msg;
+ GList * records;
+ BinEndian endian;
-struct BinField
-{
- guint64 offset;
- int width;
- int align;
- char * name;
+ gsize saved_offset;
};
-struct BinFormat
+struct Field
{
- gboolean big_endian;
-
- int n_fields;
- BinField * fields;
+ char name[BIN_MAX_NAME];
+ guint offset; /* from beginning of struct */
+ guint width;
+ BinType type;
};
-struct BinParser
+struct BinRecord
{
- gsize offset;
- const guchar * data;
- gsize length;
-
- gboolean cache_in_use;
- BinRecord cache;
+ int n_fields;
+ Field fields[1];
};
BinParser *
-bin_parser_new (const guchar *data,
- gsize length)
+bin_parser_new (const guchar *data,
+ gsize length)
{
BinParser *parser = g_new0 (BinParser, 1);
- parser->offset = 0;
parser->data = data;
parser->length = length;
- parser->cache_in_use = FALSE;
+ parser->offset = 0;
+ parser->error_msg = NULL;
+ parser->records = NULL;
+ parser->endian = BIN_NATIVE_ENDIAN;
return parser;
}
@@ -74,31 +67,28 @@ bin_parser_new (const guchar *data,
void
bin_parser_free (BinParser *parser)
{
- g_free (parser);
-}
+ GList *list;
-static GQueue *
-read_varargs (va_list args,
- const char * name,
- BinField * field)
-{
- GQueue *queue = g_queue_new ();
- gpointer p;
-
- if (name)
+ for (list = parser->records; list != NULL; list = list->next)
{
- g_queue_push_tail (queue, (gpointer)name);
- g_queue_push_tail (queue, field);
+ BinRecord *record = list->data;
- p = va_arg (args, gpointer);
- while (p)
- {
- g_queue_push_tail (queue, p);
- p = va_arg (args, gpointer);
- }
+ g_free (record);
}
- return queue;
+ g_free (parser);
+}
+
+const guchar *
+bin_parser_get_data (BinParser *parser)
+{
+ return parser->data;
+}
+
+gsize
+bin_parser_get_length (BinParser *parser)
+{
+ return parser->length;
}
static guint64
@@ -117,84 +107,168 @@ align (guint64 offset, int alignment)
return offset;
}
-gsize
-bin_format_get_size (BinFormat *format)
+static int
+get_field_width (const BinField *field)
{
- BinField *last_field = &(format->fields[format->n_fields - 1]);
- BinField *first_field = &(format->fields[0]);
+ switch (field->type)
+ {
+ case BIN_UINT8:
+ return 1;
+ case BIN_UINT16:
+ return 2;
+ case BIN_UINT32:
+ return 4;
+ case BIN_UINT64:
+ return 8;
+ case BIN_UNINTERPRETED:
+ return field->n_bytes;
+ }
- return align (last_field->offset + last_field->width, first_field->width);
+ g_assert_not_reached ();
+ return -1;
}
-BinFormat *
-bin_format_new (gboolean big_endian,
- const char *name, BinField *field,
- ...)
+static int
+get_align (const BinField *field)
{
- GQueue *queue = g_queue_new ();
- BinFormat *format = g_new0 (BinFormat, 1);
- GList *list;
- int i;
- guint64 offset;
- va_list args;
-
- format->big_endian = big_endian;
-
- /* Build queue of child types */
- va_start (args, field);
- queue = read_varargs (args, name, field);
- va_end (args);
-
- g_assert (queue->length % 2 == 0);
-
- format->n_fields = queue->length / 2;
- format->fields = g_new (BinField, format->n_fields);
-
- i = 0;
+ if (field->type == BIN_UNINTERPRETED)
+ return 1;
+ else
+ return get_field_width (field);
+}
+
+BinRecord *
+bin_parser_create_record (BinParser *parser,
+ const BinField *fields)
+{
+ BinRecord *record;
+ int i, n_fields;
+ guint offset;
+
+ n_fields = 0;
+ while (fields[n_fields].name[0] != '\0')
+ {
+ n_fields++;
+#if 0
+ g_print ("type: %d\n", fields[n_fields].type);
+#endif
+ }
+
+ record = g_malloc0 (sizeof (BinRecord) +
+ (n_fields - 1) * sizeof (Field));
+
offset = 0;
- for (list = queue->head; list != NULL; list = list->next->next)
+ record->n_fields = n_fields;
+ for (i = 0; i < n_fields; ++i)
{
- const char *name = list->data;
- BinField *field = list->next->data;
+ const BinField *bin_field = &(fields[i]);
+ Field *field = &(record->fields[i]);
- offset = align (offset, field->align);
+ offset = align (offset, get_align (bin_field));
- format->fields[i].name = g_strdup (name);
- format->fields[i].width = field->width;
- format->fields[i].offset = offset;
-
- offset += field->width;
- ++i;
+ strncpy (field->name, bin_field->name, BIN_MAX_NAME - 1);
+ field->offset = offset;
+ field->type = bin_field->type;
+ field->width = get_field_width (bin_field);
+
+#if 0
+ g_print ("created field %s with type %d\n", field->name, field->type);
+#endif
- g_free (field);
+ offset += record->fields[i].width;
}
+
+ parser->records = g_list_prepend (parser->records, record);
- g_queue_free (queue);
-
- return format;
+ return record;
}
-static const BinField *
-get_field (BinFormat *format,
- const gchar *name)
+gboolean
+bin_parser_error (BinParser *parser)
{
- int i;
-
- for (i = 0; i < format->n_fields; ++i)
- {
- BinField *field = &(format->fields[i]);
-
- if (strcmp (field->name, name) == 0)
- return field;
- }
+ return parser->error_msg != NULL;
+}
+
+void
+bin_parser_clear_error (BinParser *parser)
+{
+ parser->error_msg = NULL;
+}
+
+const gchar *
+bin_parser_get_error_msg (BinParser *parser)
+{
+ return parser->error_msg;
+}
+
+void
+bin_parser_set_endian (BinParser *parser,
+ BinEndian endian)
+{
+ parser->endian = endian;
+}
+
+/* Move current offset */
+gsize
+bin_parser_get_offset (BinParser *parser)
+{
+ return parser->offset;
+}
+
+void
+bin_parser_set_offset (BinParser *parser,
+ gsize offset)
+{
+ parser->offset = offset;
+}
+
+void
+bin_parser_align (BinParser *parser,
+ gsize byte_width)
+{
+ parser->offset = align (parser->offset, byte_width);
+}
+
+gsize
+bin_record_get_size (BinRecord *record)
+{
+ Field *last_field = &(record->fields[record->n_fields - 1]);
+ Field *first_field = &(record->fields[0]);
+
+ /* align to first field, since that's the alignment of the record
+ * following this one
+ */
- return NULL;
+ return align (last_field->offset + last_field->width, first_field->width);
+}
+
+void
+bin_parser_seek_record (BinParser *parser,
+ BinRecord *record,
+ int n_records)
+{
+ gsize record_size = bin_record_get_size (record);
+
+ parser->offset += record_size * n_records;
+}
+
+void
+bin_parser_save (BinParser *parser)
+{
+ parser->saved_offset = parser->offset;
+}
+
+void
+bin_parser_restore (BinParser *parser)
+{
+ parser->offset = parser->saved_offset;
}
+/* retrieve data */
static guint64
convert_uint (const guchar *data,
- gboolean big_endian,
- int width)
+ BinEndian endian,
+ BinType type)
{
guint8 r8;
guint16 r16;
@@ -205,39 +279,41 @@ convert_uint (const guchar *data,
if (width == 4)
g_print ("converting at %p %d %d %d %d\n", data, data[0], data[1], data[2], data[3]);
#endif
+
+ /* FIXME: check that we are within the file */
- switch (width)
+ switch (type)
{
- case 1:
+ case BIN_UINT8:
r8 = *(guint8 *)data;
return r8;
- case 2:
+ case BIN_UINT16:
r16 = *(guint16 *)data;
- if (big_endian)
+ if (endian == BIN_BIG_ENDIAN)
r16 = GUINT16_FROM_BE (r16);
- else
+ else if (endian == BIN_LITTLE_ENDIAN)
r16 = GUINT16_FROM_LE (r16);
return r16;
- case 4:
+ case BIN_UINT32:
r32 = *(guint32 *)data;
- if (big_endian)
+ if (endian == BIN_BIG_ENDIAN)
r32 = GUINT32_FROM_BE (r32);
- else
+ else if (endian == BIN_LITTLE_ENDIAN)
r32 = GUINT32_FROM_LE (r32);
return r32;
- case 8:
+ case BIN_UINT64:
r64 = *(guint64 *)data;
- if (big_endian)
+ if (endian == BIN_BIG_ENDIAN)
r64 = GUINT64_FROM_BE (r64);
- else
+ else if (endian == BIN_LITTLE_ENDIAN)
r64 = GUINT64_FROM_LE (r64);
return r64;
@@ -248,61 +324,36 @@ convert_uint (const guchar *data,
}
}
-guint32
-bin_parser_get_uint32 (BinParser *parser)
-{
- guint32 result;
-
- /* FIXME: This is broken for two reasons:
- *
- * (1) It assumes file_endian==machine_endian
- *
- * (2) It doesn't check for file overrun.
- *
- */
- result = *(guint32 *)(parser->data + parser->offset);
-
- parser->offset += 4;
-
- return result;
-}
-
-static BinField *
-new_field_uint (int width)
+static int
+get_uint_width (BinType type)
{
- BinField *field = g_new0 (BinField, 1);
-
- field->width = width;
- field->align = width;
-
- return field;
-}
-
-BinField *
-bin_field_new_uint8 (void)
-{
- return new_field_uint (1);
+ switch (type)
+ {
+ case BIN_UINT8:
+ return 1;
+ case BIN_UINT16:
+ return 2;
+ case BIN_UINT32:
+ return 4;
+ case BIN_UINT64:
+ return 8;
+ default:
+ return -1;
+ }
}
-BinField *
-bin_field_new_uint16 (void)
+guint64
+bin_parser_get_uint (BinParser *parser,
+ BinType type)
{
- return new_field_uint (2);
-}
+ guint64 r = convert_uint (parser->data + parser->offset, parser->endian, type);
-BinField *
-bin_field_new_uint32 (void)
-{
- return new_field_uint (4);
-}
+ parser->offset += get_uint_width (type);
-BinField *
-bin_field_new_uint64 (void)
-{
- return new_field_uint (8);
+ return r;
}
-const gchar *
+const char *
bin_parser_get_string (BinParser *parser)
{
const char *result;
@@ -314,116 +365,46 @@ bin_parser_get_string (BinParser *parser)
parser->offset += strlen (result) + 1;
return result;
-}
-
-void
-bin_parser_align (BinParser *parser,
- gsize byte_width)
-{
- parser->offset = align (parser->offset, byte_width);
-}
-
-void
-bin_parser_goto (BinParser *parser,
- gsize offset)
-{
- parser->offset = offset;
-}
-
-BinParser *
-bin_record_get_parser (BinRecord *record)
-{
- return record->parser;
-}
-
-const gchar *
-bin_record_get_string_indirect (BinRecord *record,
- const char *name,
- gsize str_table)
-{
- BinParser *parser = record->parser;
- const char *result = NULL;
- gsize index;
- gsize saved_offset;
-
- saved_offset = bin_parser_get_offset (record->parser);
- index = bin_record_get_uint (record, name);
-
- bin_parser_goto (record->parser, str_table + index);
-
- result = bin_parser_get_string (parser);
-
- bin_parser_goto (record->parser, saved_offset);
-
- return result;
}
-gsize
-bin_parser_get_offset (BinParser *parser)
-{
- g_return_val_if_fail (parser != NULL, 0);
-
- return parser->offset;
-}
-
-const guchar *
-bin_parser_get_data (BinParser *parser)
-{
- return parser->data;
-}
-
-gsize
-bin_parser_get_length (BinParser *parser)
-{
- return parser->length;
-}
-
-
-/* Record */
-BinRecord *
-bin_parser_get_record (BinParser *parser,
- BinFormat *format,
- gsize offset)
+static const Field *
+get_field (BinRecord *format,
+ const gchar *name)
{
- BinRecord *record;
+ int i;
- if (!parser->cache_in_use)
- {
- parser->cache_in_use = TRUE;
- record = &(parser->cache);
- }
- else
+ for (i = 0; i < format->n_fields; ++i)
{
- record = g_new0 (BinRecord, 1);
- }
+ Field *field = &(format->fields[i]);
- record->parser = parser;
- record->index = 0;
- record->offset = offset;
- record->format = format;
-
- return record;
-}
-
-void
-bin_record_free (BinRecord *record)
-{
- if (record == &(record->parser->cache))
- record->parser->cache_in_use = FALSE;
- else
- g_free (record);
+ if (strcmp (field->name, name) == 0)
+ {
+#if 0
+ g_print ("found field: %s (offset: %d, type %d)\n", field->name, field->offset, field->type);
+#endif
+
+
+ return field;
+ }
+ }
+
+ return NULL;
}
guint64
-bin_record_get_uint (BinRecord *record,
- const char *name)
+bin_parser_get_uint_field (BinParser *parser,
+ BinRecord *record,
+ const char *name)
{
- const guint8 *pos;
- const BinField *field;
+ const Field *field = get_field (record, name);
+ const guchar *pos;
- field = get_field (record->format, name);
- pos = record->parser->data + record->offset + field->offset;
+#if 0
+ g_print ("moving to %d (%d + %d)\n", parser->offset + field->offset, parser->offset, field->offset);
+#endif
+
+ pos = parser->data + parser->offset + field->offset;
#if 0
g_print (" record offset: %d\n", record->offset);
@@ -431,7 +412,7 @@ bin_record_get_uint (BinRecord *record,
g_print (" field offset %d\n", field->offset);
#endif
- if (record->offset + field->offset + field->width > record->parser->length)
+ if (pos > parser->data + parser->length)
{
/* FIXME: generate error */
return 0;
@@ -441,34 +422,5 @@ bin_record_get_uint (BinRecord *record,
g_print (" uint %d at %p => %d\n", field->width, pos, convert_uint (pos, record->format->big_endian, field->width));
#endif
- return convert_uint (pos, record->format->big_endian, field->width);
-}
-
-void
-bin_record_index (BinRecord *record,
- int index)
-{
- gsize format_size = bin_format_get_size (record->format);
-
- record->offset -= record->index * format_size;
- record->offset += index * format_size;
- record->index = index;
-}
-
-gsize
-bin_record_get_offset (BinRecord *record)
-{
- return record->offset;
-}
-
-/* Fields */
-
-BinField *
-bin_field_new_fixed_array (int n_elements,
- int element_size)
-{
- BinField *field = g_new0 (BinField, 1);
- field->width = n_elements * element_size;
- field->align = element_size;
- return field;
+ return convert_uint (pos, parser->endian, field->type);
}
diff --git a/binparser.h b/binparser.h
index 503e397..a0b423c 100644
--- a/binparser.h
+++ b/binparser.h
@@ -17,52 +17,98 @@
*/
#include <glib.h>
-typedef struct BinField BinField;
-typedef struct BinFormat BinFormat;
typedef struct BinParser BinParser;
typedef struct BinRecord BinRecord;
+typedef struct BinField BinField;
+
+/* The model is:
+ *
+ * BinParser has an offset associated with it. This offset can be
+ * manipulated with methods
+ *
+ * goto - go to absolute position from file start
+ * goto_rel - go to relative positio
+ * goto_record_rel - skip the given number of records
+ * align - move forward until aligned to given width
+ * save/restore - save/restore the current offset (stack)
+ *
+ * and queried with
+ *
+ * get_offset - return current offset in bytes from start
+ *
+ * data can be retrieved with
+ *
+ * get_uint - return a uint of given width, and skip
+ * get_string - return a null terminated stringm, and skip
+ * get_pstring - return a 'pascal' string with given length
+ *
+ * get_uint_field - return the named field
+ *
+ * formats should probably be definable as static data.
+ *
+ * A bin parser also has an associated "status" with it. This can be
+ * OK, or error. It is ok to use a parser with an error status, but
+ * the data returned will not be meaningfull.
+ *
+ *
+ */
+
+#define BIN_MAX_NAME 52
+
+typedef enum
+{
+ BIN_LITTLE_ENDIAN,
+ BIN_BIG_ENDIAN,
+ BIN_NATIVE_ENDIAN
+} BinEndian;
+
+typedef enum
+{
+ /* More types can (and probably will) be added in the future */
+ BIN_UINT8,
+ BIN_UINT16,
+ BIN_UINT32,
+ BIN_UINT64,
+ BIN_UNINTERPRETED
+} BinType;
+
+struct BinField {
+ const char name[BIN_MAX_NAME];
+ char type;
+ char n_bytes; /* number of bytes if type
+ * is UNINTERPRETED */
+};
-/* BinParser */
BinParser * bin_parser_new (const guchar *data,
gsize length);
void bin_parser_free (BinParser *parser);
const guchar *bin_parser_get_data (BinParser *parser);
gsize bin_parser_get_length (BinParser *parser);
-gsize bin_parser_get_offset (BinParser *parser);
-void bin_parser_align (BinParser *parser,
- gsize byte_width);
-void bin_parser_goto (BinParser *parser,
- gsize offset);
-const char * bin_parser_get_string (BinParser *parser);
-guint32 bin_parser_get_uint32 (BinParser *parser);
-
-/* Record */
-BinRecord * bin_parser_get_record (BinParser *parser,
- BinFormat *format,
- gsize offset);
-void bin_record_free (BinRecord *record);
-guint64 bin_record_get_uint (BinRecord *record,
- const char *name);
-void bin_record_index (BinRecord *record,
- int index);
-gsize bin_record_get_offset (BinRecord *record);
-const gchar *bin_record_get_string_indirect (BinRecord *record,
- const char *name,
- gsize str_table);
-BinParser * bin_record_get_parser (BinRecord *record);
+void bin_parser_set_endian (BinParser *parser,
+ BinEndian endian);
+gboolean bin_parser_error (BinParser *parser);
+void bin_parser_clear_error (BinParser *parser);
+const gchar * bin_parser_get_error_msg (BinParser *parser);
+BinRecord * bin_parser_create_record (BinParser *parser,
+ const BinField *fields);
+gsize bin_record_get_size (BinRecord *record);
+/* Move current offset */
+gsize bin_parser_get_offset (BinParser *parser);
+void bin_parser_set_offset (BinParser *parser,
+ gsize offset);
+void bin_parser_align (BinParser *parser,
+ gsize byte_width);
+void bin_parser_seek_record (BinParser *parser,
+ BinRecord *record,
+ int n_records);
+void bin_parser_save (BinParser *parser);
+void bin_parser_restore (BinParser *parser);
-/* BinFormat */
-BinFormat *bin_format_new (gboolean big_endian,
- const char *name,
- BinField *field,
- ...);
-gsize bin_format_get_size (BinFormat *format);
-
-/* BinField */
-BinField *bin_field_new_uint8 (void);
-BinField *bin_field_new_uint16 (void);
-BinField *bin_field_new_uint32 (void);
-BinField *bin_field_new_uint64 (void);
-BinField *bin_field_new_fixed_array (int n_elements,
- int element_size);
+/* retrieve data */
+guint64 bin_parser_get_uint (BinParser *parser,
+ BinType type);
+const char * bin_parser_get_string (BinParser *parser);
+guint64 bin_parser_get_uint_field (BinParser *parser,
+ BinRecord *record,
+ const char *field);
diff --git a/elfparser.c b/elfparser.c
index a16be6d..d0e6273 100644
--- a/elfparser.c
+++ b/elfparser.c
@@ -44,51 +44,71 @@ struct ElfParser
{
BinParser * parser;
- BinFormat * header;
- BinFormat * strtab_format;
- BinFormat * shn_entry;
- BinFormat * sym_format;
+ BinRecord * header;
+ BinRecord * strtab_format;
+ BinRecord * shn_entry;
+ BinRecord * sym_format;
int n_sections;
Section ** sections;
-
+
int n_symbols;
ElfSym * symbols;
gsize sym_strings;
-
+
GMappedFile * file;
-
+
const Section * text_section;
};
static gboolean parse_elf_signature (const guchar *data, gsize length,
gboolean *is_64, gboolean *is_be);
-static void make_formats (ElfParser *parser,
- gboolean is_64,
- gboolean is_big_endian);
+static void make_formats (ElfParser *parser, gboolean is_64);
+
+static const char *
+get_string_indirect (BinParser *parser,
+ BinRecord *record,
+ const char *name,
+ gsize str_table)
+{
+ const char *result = NULL;
+ gsize index;
+
+ bin_parser_save (parser);
+
+ index = bin_parser_get_uint_field (parser, record, name);
+
+ bin_parser_set_offset (parser, str_table + index);
+
+ result = bin_parser_get_string (parser);
+
+ bin_parser_restore (parser);
+
+ return result;
+}
static Section *
-section_new (BinRecord *record,
+section_new (BinParser *parser,
+ BinRecord *record,
gsize name_table)
{
Section *section = g_new (Section, 1);
guint64 flags;
- section->name = bin_record_get_string_indirect (
- record, "sh_name", name_table);
- section->size = bin_record_get_uint (record, "sh_size");
- section->offset = bin_record_get_uint (record, "sh_offset");
-
- flags = bin_record_get_uint (record, "sh_flags");
+ section->name = get_string_indirect (parser, record, "sh_name", name_table);
+ section->size = bin_parser_get_uint_field (parser, record, "sh_size");
+ section->offset = bin_parser_get_uint_field (parser, record, "sh_offset");
+
+ flags = bin_parser_get_uint_field (parser, record, "sh_flags");
section->allocated = !!(flags & SHF_ALLOC);
-
+
if (section->allocated)
- section->load_address = bin_record_get_uint (record, "sh_addr");
+ section->load_address = bin_parser_get_uint_field (parser, record, "sh_addr");
else
section->load_address = 0;
-
- section->type = bin_record_get_uint (record, "sh_type");
-
+
+ section->type = bin_parser_get_uint_field (parser, record, "sh_type");
+
return section;
}
@@ -104,7 +124,7 @@ find_section (ElfParser *parser,
guint type)
{
int i;
-
+
for (i = 0; i < parser->n_sections; ++i)
{
Section *section = parser->sections[i];
@@ -112,7 +132,7 @@ find_section (ElfParser *parser,
if (strcmp (section->name, name) == 0 && section->type == type)
return section;
}
-
+
return NULL;
}
@@ -126,7 +146,6 @@ elf_parser_new_from_data (const guchar *data,
gsize section_names;
gsize section_headers;
int i;
- BinRecord *elf_header, *shn_entry;
if (!parse_elf_signature (data, length, &is_64, &is_big_endian))
{
@@ -138,41 +157,47 @@ elf_parser_new_from_data (const guchar *data,
parser->parser = bin_parser_new (data, length);
- make_formats (parser, is_64, is_big_endian);
+ if (is_big_endian)
+ bin_parser_set_endian (parser->parser, BIN_BIG_ENDIAN);
+ else
+ bin_parser_set_endian (parser->parser, BIN_LITTLE_ENDIAN);
- /* Read ELF header */
+ make_formats (parser, is_64);
- elf_header = bin_parser_get_record (parser->parser, parser->header, 0);
- parser->n_sections = bin_record_get_uint (elf_header, "e_shnum");
- section_names_idx = bin_record_get_uint (elf_header, "e_shstrndx");
- section_headers = bin_record_get_uint (elf_header, "e_shoff");
+ /* Read ELF header */
+
+ bin_parser_set_offset (parser->parser, 0);
- bin_record_free (elf_header);
+ parser->n_sections = bin_parser_get_uint_field (parser->parser, parser->header, "e_shnum");
+ section_names_idx = bin_parser_get_uint_field (parser->parser, parser->header, "e_shstrndx");
+ section_headers = bin_parser_get_uint_field (parser->parser, parser->header, "e_shoff");
/* Read section headers */
parser->sections = g_new0 (Section *, parser->n_sections);
- shn_entry = bin_parser_get_record (parser->parser,
- parser->shn_entry, section_headers);
-
- bin_record_index (shn_entry, section_names_idx);
- section_names = bin_record_get_uint (shn_entry, "sh_offset");
+ bin_parser_set_offset (parser->parser, section_headers);
+
+ bin_parser_seek_record (parser->parser, parser->shn_entry,
+ section_names_idx);
+
+ section_names = bin_parser_get_uint_field (parser->parser, parser->shn_entry, "sh_offset");
for (i = 0; i < parser->n_sections; ++i)
{
- bin_record_index (shn_entry, i);
+ bin_parser_set_offset (parser->parser, section_headers);
+ bin_parser_seek_record (parser->parser, parser->shn_entry, i);
- parser->sections[i] = section_new (shn_entry, section_names);
+ parser->sections[i] = section_new (parser->parser,
+ parser->shn_entry,
+ section_names);
}
-
+
/* Cache the text section */
parser->text_section = find_section (parser, ".text", SHT_PROGBITS);
if (!parser->text_section)
parser->text_section = find_section (parser, ".text", SHT_NOBITS);
- bin_record_free (shn_entry);
-
return parser;
}
@@ -185,23 +210,23 @@ elf_parser_new (const char *filename,
ElfParser *parser;
GMappedFile *file = g_mapped_file_new (filename, FALSE, NULL);
-
+
if (!file)
return NULL;
-
+
#if 0
g_print ("elf parser new : %s\n", filename);
#endif
data = (guchar *)g_mapped_file_get_contents (file);
length = g_mapped_file_get_length (file);
-
+
#if 0
g_print ("data %p: for %s\n", data, filename);
#endif
-
+
parser = elf_parser_new_from_data (data, length);
-
+
if (!parser)
{
g_mapped_file_free (file);
@@ -209,14 +234,14 @@ elf_parser_new (const char *filename,
}
parser->file = file;
-
+
#if 0
g_print ("Elf file: %s (debug: %s)\n",
filename, elf_parser_get_debug_link (parser, NULL));
#endif
parser->file = file;
-
+
#if 0
if (!parser->symbols)
g_print ("at this point %s has no symbols\n", filename);
@@ -277,15 +302,15 @@ elf_parser_get_crc32 (ElfParser *parser)
gsize length;
gulong crc;
gsize i;
-
+
data = bin_parser_get_data (parser->parser);
length = bin_parser_get_length (parser->parser);
-
+
crc = 0xffffffff;
-
+
for (i = 0; i < length; ++i)
crc = crc32_table[(crc ^ data[i]) & 0xff] ^ (crc >> 8);
-
+
/* We just read the entire file into memory, but we only really
* need the symbol table, so swap the whole thing out.
*
@@ -301,14 +326,14 @@ void
elf_parser_free (ElfParser *parser)
{
int i;
-
+
for (i = 0; i < parser->n_sections; ++i)
section_free (parser->sections[i]);
g_free (parser->sections);
-
+
if (parser->file)
g_mapped_file_free (parser->file);
-
+
bin_parser_free (parser->parser);
g_free (parser);
@@ -323,7 +348,7 @@ elf_demangle (const char *name)
#define DMGL_ANSI (1 << 1) /* Include const, volatile, etc */
char *demangled = sysprof_cplus_demangle (name, DMGL_PARAMS | DMGL_ANSI);
-
+
if (demangled)
return demangled;
else
@@ -338,7 +363,7 @@ compare_sym (const void *a, const void *b)
{
const ElfSym *sym_a = a;
const ElfSym *sym_b = b;
-
+
if (sym_a->address < sym_b->address)
return -1;
else if (sym_a->address == sym_b->address)
@@ -352,11 +377,11 @@ static void
dump_symbols (ElfParser *parser, ElfSym *syms, guint n_syms)
{
int i;
-
+
for (i = 0; i < n_syms; ++i)
{
ElfSym *s = &(syms[i]);
-
+
g_print (" %s: %lx\n", elf_parser_get_sym_name (parser, s), s->address);
}
}
@@ -367,28 +392,32 @@ read_table (ElfParser *parser,
const Section *sym_table,
const Section *str_table)
{
- int sym_size = bin_format_get_size (parser->sym_format);
+ int sym_size = bin_record_get_size (parser->sym_format);
int i;
int n_functions;
- BinRecord *symbol;
-
+
parser->n_symbols = sym_table->size / sym_size;
parser->symbols = g_new (ElfSym, parser->n_symbols);
- symbol = bin_parser_get_record (parser->parser, parser->sym_format, sym_table->offset);
-
+#if 0
+ g_print ("sym table offset: %d\n", sym_table->offset);
+#endif
+
+ bin_parser_set_offset (parser->parser, sym_table->offset);
+
n_functions = 0;
+#if 0
+ g_print ("n syms: %d\n", parser->n_symbols);
+#endif
for (i = 0; i < parser->n_symbols; ++i)
{
guint info;
gulong addr;
gulong offset;
-
- bin_record_index (symbol, i);
- info = bin_record_get_uint (symbol, "st_info");
- addr = bin_record_get_uint (symbol, "st_value");
- offset = bin_record_get_offset (symbol);
+ info = bin_parser_get_uint_field (parser->parser, parser->sym_format, "st_info");
+ addr = bin_parser_get_uint_field (parser->parser, parser->sym_format, "st_value");
+ offset = bin_parser_get_offset (parser->parser);
if (addr != 0 &&
(info & 0xf) == STT_FUNC &&
@@ -399,15 +428,22 @@ read_table (ElfParser *parser,
parser->symbols[n_functions].offset = offset;
n_functions++;
- }
+ }
+
+#if 0
+ g_print ("read symbol: %s\n", get_string_indirect (parser->parser,
+ parser->sym_format, "st_name",
+ str_table->offset));
+#endif
+
+
+ bin_parser_seek_record (parser->parser, parser->sym_format, 1);
}
-
- bin_record_free (symbol);
-
+
parser->sym_strings = str_table->offset;
parser->n_symbols = n_functions;
parser->symbols = g_renew (ElfSym, parser->symbols, parser->n_symbols);
-
+
qsort (parser->symbols, parser->n_symbols, sizeof (ElfSym), compare_sym);
}
@@ -418,7 +454,7 @@ read_symbols (ElfParser *parser)
const Section *strtab = find_section (parser, ".strtab", SHT_STRTAB);
const Section *dynsym = find_section (parser, ".dynsym", SHT_DYNSYM);
const Section *dynstr = find_section (parser, ".dynstr", SHT_STRTAB);
-
+
if (symtab && strtab)
{
read_table (parser, symtab, strtab);
@@ -451,10 +487,10 @@ do_lookup (ElfSym *symbols,
{
if (address >= symbols[last].address)
return &(symbols[last]);
-
+
last--;
}
-
+
return NULL;
}
else
@@ -476,38 +512,43 @@ elf_parser_lookup_symbol (ElfParser *parser,
const ElfSym *result;
if (!parser->symbols)
+ {
+#if 0
+ g_print ("reading symbols\n");
+#endif
read_symbols (parser);
-
+ }
+
if (parser->n_symbols == 0)
return NULL;
-
+
if (!parser->text_section)
return NULL;
address += parser->text_section->load_address;
-
+
#if 0
g_print ("the address we are looking up is %p\n", address);
#endif
result = do_lookup (parser->symbols, address, 0, parser->n_symbols - 1);
-
+
#if 0
if (result)
{
g_print ("found %s at %lx\n", elf_parser_get_sym_name (parser, result), result->address);
}
#endif
-
+
if (result)
{
gulong size;
- BinRecord *record;
- record = bin_parser_get_record (parser->parser, parser->sym_format, result->offset);
- size = bin_record_get_uint (record, "st_size");
- bin_record_free (record);
-
+ bin_parser_set_offset (parser->parser, result->offset);
+
+ size = bin_parser_get_uint_field (parser->parser,
+ parser->sym_format, "st_size");
+
if (result->address + size <= address)
result = NULL;
}
@@ -519,10 +560,10 @@ gulong
elf_parser_get_text_offset (ElfParser *parser)
{
g_return_val_if_fail (parser != NULL, (gulong)-1);
-
+
if (!parser->text_section)
return (gulong)-1;
-
+
return parser->text_section->offset;
}
@@ -532,18 +573,18 @@ elf_parser_get_debug_link (ElfParser *parser, guint32 *crc32)
const Section *debug_link = find_section (parser, ".gnu_debuglink",
SHT_PROGBITS);
const gchar *result;
-
+
if (!debug_link)
return NULL;
-
- bin_parser_goto (parser->parser, debug_link->offset);
-
+
+ bin_parser_set_offset (parser->parser, debug_link->offset);
+
result = bin_parser_get_string (parser->parser);
-
+
bin_parser_align (parser->parser, 4);
-
+
if (crc32)
- *crc32 = bin_parser_get_uint32 (parser->parser);
+ *crc32 = bin_parser_get_uint (parser->parser, BIN_UINT32);
return result;
}
@@ -552,7 +593,7 @@ const guchar *
elf_parser_get_eh_frame (ElfParser *parser)
{
const Section *eh_frame = find_section (parser, ".eh_frame", SHT_PROGBITS);
-
+
if (eh_frame)
return bin_parser_get_data (parser->parser) + eh_frame->offset;
else
@@ -564,16 +605,12 @@ elf_parser_get_sym_name (ElfParser *parser,
const ElfSym *sym)
{
const char *result;
- BinRecord *symbol;
-
+
g_return_val_if_fail (parser != NULL, NULL);
-
- symbol = bin_parser_get_record (parser->parser, parser->sym_format, sym->offset);
-
- result = bin_record_get_string_indirect (symbol, "st_name",
- parser->sym_strings);
- bin_record_free (symbol);
+ bin_parser_set_offset (parser->parser, sym->offset);
+ result = get_string_indirect (
+ parser->parser, parser->sym_format, "st_name", parser->sym_strings);
return result;
}
@@ -616,81 +653,127 @@ parse_elf_signature (const guchar *data,
}
if (is_64)
- *is_64 = (EI_CLASS == ELFCLASS64);
+ *is_64 = (data[EI_CLASS] == ELFCLASS64);
if (is_be)
- *is_be = (EI_DATA == ELFDATA2MSB);
+ *is_be = (data[EI_DATA] == ELFDATA2MSB);
return TRUE;
}
-static BinField *
-make_word (gboolean is_64)
-{
- if (is_64)
- return bin_field_new_uint64 ();
- else
- return bin_field_new_uint32 ();
-}
-
static void
-make_formats (ElfParser *parser, gboolean is_64, gboolean is_big_endian)
+get_formats (gboolean is_64,
+ const BinField **elf_header,
+ const BinField **shn_entry,
+ const BinField **sym_format)
{
- parser->header = bin_format_new (
- is_big_endian,
- "e_ident", bin_field_new_fixed_array (EI_NIDENT, 1),
- "e_type", bin_field_new_uint16 (),
- "e_machine", bin_field_new_uint16 (),
- "e_version", bin_field_new_uint32 (),
- "e_entry", make_word (is_64),
- "e_phoff", make_word (is_64),
- "e_shoff", make_word (is_64),
- "e_flags", bin_field_new_uint32 (),
- "e_ehsize", bin_field_new_uint16 (),
- "e_phentsize", bin_field_new_uint16 (),
- "e_phnum", bin_field_new_uint16 (),
- "e_shentsize", bin_field_new_uint16 (),
- "e_shnum", bin_field_new_uint16 (),
- "e_shstrndx", bin_field_new_uint16 (),
- NULL);
-
- parser->shn_entry = bin_format_new (
- is_big_endian,
- "sh_name", bin_field_new_uint32 (),
- "sh_type", bin_field_new_uint32 (),
- "sh_flags", make_word (is_64),
- "sh_addr", make_word (is_64),
- "sh_offset", make_word (is_64),
- "sh_size", make_word (is_64),
- "sh_link", bin_field_new_uint32 (),
- "sh_info", bin_field_new_uint32 (),
- "sh_addralign", make_word (is_64),
- "sh_entsize", make_word (is_64),
- NULL);
+ static const BinField elf64_header[] = {
+ { "e_ident", BIN_UNINTERPRETED, EI_NIDENT },
+ { "e_type", BIN_UINT16 },
+ { "e_machine", BIN_UINT16 },
+ { "e_version", BIN_UINT32 },
+ { "e_entry", BIN_UINT64 },
+ { "e_phoff", BIN_UINT64 },
+ { "e_shoff", BIN_UINT64 },
+ { "e_flags", BIN_UINT32 },
+ { "e_ehsize", BIN_UINT16 },
+ { "e_phentsize", BIN_UINT16 },
+ { "e_phnum", BIN_UINT16 },
+ { "e_shentsize", BIN_UINT16 },
+ { "e_shnum", BIN_UINT16 },
+ { "e_shstrndx", BIN_UINT16 },
+ { "" },
+ };
+
+ static const BinField elf32_header[] = {
+ { "e_ident", BIN_UNINTERPRETED, EI_NIDENT },
+ { "e_type", BIN_UINT16 },
+ { "e_machine", BIN_UINT16 },
+ { "e_version", BIN_UINT32 },
+ { "e_entry", BIN_UINT32 },
+ { "e_phoff", BIN_UINT32 },
+ { "e_shoff", BIN_UINT32 },
+ { "e_flags", BIN_UINT32 },
+ { "e_ehsize", BIN_UINT16 },
+ { "e_phentsize", BIN_UINT16 },
+ { "e_phnum", BIN_UINT16 },
+ { "e_shentsize", BIN_UINT16 },
+ { "e_shnum", BIN_UINT16 },
+ { "e_shstrndx", BIN_UINT16 },
+ { "" },
+ };
+
+ static const BinField shn64_entry[] = {
+ { "sh_name", BIN_UINT32 },
+ { "sh_type", BIN_UINT32 },
+ { "sh_flags", BIN_UINT64 },
+ { "sh_addr", BIN_UINT64 },
+ { "sh_offset", BIN_UINT64 },
+ { "sh_size", BIN_UINT64 },
+ { "sh_link", BIN_UINT32 },
+ { "sh_info", BIN_UINT32 },
+ { "sh_addralign", BIN_UINT64 },
+ { "sh_entsize", BIN_UINT64 },
+ { "" }
+ };
+
+ static const BinField shn32_entry[] = {
+ { "sh_name", BIN_UINT32 },
+ { "sh_type", BIN_UINT32 },
+ { "sh_flags", BIN_UINT32 },
+ { "sh_addr", BIN_UINT32 },
+ { "sh_offset", BIN_UINT32 },
+ { "sh_size", BIN_UINT32 },
+ { "sh_link", BIN_UINT32 },
+ { "sh_info", BIN_UINT32 },
+ { "sh_addralign", BIN_UINT32 },
+ { "sh_entsize", BIN_UINT32 },
+ { "" }
+ };
+ static const BinField sym64_format[] = {
+ { "st_name", BIN_UINT32 },
+ { "st_info", BIN_UINT8 },
+ { "st_other", BIN_UINT8 },
+ { "st_shndx", BIN_UINT16 },
+ { "st_value", BIN_UINT64 },
+ { "st_size", BIN_UINT64 },
+ { "" }
+ };
+
+ static const BinField sym32_format[] = {
+ { "st_name", BIN_UINT32 },
+ { "st_value", BIN_UINT32 },
+ { "st_size", BIN_UINT32 },
+ { "st_info", BIN_UINT8 },
+ { "st_other", BIN_UINT8 },
+ { "st_shndx", BIN_UINT16 },
+ { "" },
+ };
+
if (is_64)
{
- parser->sym_format = bin_format_new (
- is_big_endian,
- "st_name", bin_field_new_uint32 (),
- "st_info", bin_field_new_uint8 (),
- "st_other", bin_field_new_uint8 (),
- "st_shndx", bin_field_new_uint16 (),
- "st_value", bin_field_new_uint64 (),
- "st_size", bin_field_new_uint64 (),
- NULL);
+ *elf_header = elf64_header;
+ *shn_entry = shn64_entry;
+ *sym_format = sym64_format;
}
else
{
- parser->sym_format = bin_format_new (
- is_big_endian,
- "st_name", bin_field_new_uint32 (),
- "st_value", bin_field_new_uint32 (),
- "st_size", bin_field_new_uint32 (),
- "st_info", bin_field_new_uint8 (),
- "st_other", bin_field_new_uint8 (),
- "st_shndx", bin_field_new_uint16 (),
- NULL);
+ *elf_header = elf32_header;
+ *shn_entry = shn32_entry;
+ *sym_format = sym32_format;
}
}
+static void
+make_formats (ElfParser *parser, gboolean is_64)
+{
+ const BinField *elf_header, *shn_entry, *sym_format;
+
+ get_formats (is_64, &elf_header, &shn_entry, &sym_format);
+
+ parser->header = bin_parser_create_record (parser->parser, elf_header);
+ parser->shn_entry = bin_parser_create_record (parser->parser, shn_entry);
+ parser->sym_format = bin_parser_create_record (parser->parser, sym_format);
+}
+