diff options
author | sandmann <sandmann> | 2004-05-18 07:48:50 +0000 |
---|---|---|
committer | sandmann <sandmann> | 2004-05-18 07:48:50 +0000 |
commit | cc477a23dc9d7fb4b534d67265b3e3b2a442bb0a (patch) | |
tree | 291c79830282b84f849615afcd009684a62ce794 /tests/htmlparser.c |
Initial revision
Diffstat (limited to 'tests/htmlparser.c')
-rw-r--r-- | tests/htmlparser.c | 171 |
1 files changed, 171 insertions, 0 deletions
diff --git a/tests/htmlparser.c b/tests/htmlparser.c new file mode 100644 index 0000000..b36a862 --- /dev/null +++ b/tests/htmlparser.c @@ -0,0 +1,171 @@ +/* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- */ + +/* Lac - Library for asynchronous communication + * Copyright (C) 2002 Søren Sandmann (sandmann@daimi.au.dk) + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Library General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Library General Public License for more details. + * + * You should have received a copy of the GNU Library General Public + * License along with this library; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 02111-1307, USA. + */ + +#include "htmlparser.h" +#include <libxml/HTMLparser.h> + +struct HtmlParser { + HtmlParserFunc callback; + gpointer data; + GString * unparsed; + xmlParserCtxt * xml_context; +}; + +static void +start_element_cb (void *ctx, + const xmlChar *name, + const xmlChar **atts) +{ + HtmlParser *parser = ctx; + HtmlParserEvent event; + GPtrArray *attributes = g_ptr_array_new (); + int i; + + event.type = HTML_PARSER_BEGIN_ELEMENT; + event.begin_element.name = name; + + if (atts) + { + const xmlChar **s; + for (s = atts; *s != NULL; ) + { + HtmlAttribute *attr = g_new (HtmlAttribute, 1); + + attr->name = *s++; + attr->content = *s++; + + g_ptr_array_add (attributes, attr); + } + } + + event.begin_element.attributes = attributes; + + parser->callback (parser, &event); + + for (i = 0; i < attributes->len; ++i) + g_free (attributes->pdata[i]); + g_ptr_array_free (attributes, TRUE); +} + +static void +end_element_cb (void *ctx, + const xmlChar *name) +{ + HtmlParser *parser = ctx; + HtmlParserEvent event; + + event.type = HTML_PARSER_END_ELEMENT; + event.end_element.name = name; + + parser->callback (parser, &event); +} + +static void +comment_cb (void *ctx, + const xmlChar *comment) +{ + HtmlParser *parser = ctx; + HtmlParserEvent event; + + event.type = HTML_PARSER_COMMENT; + event.comment.text = comment; + + parser->callback (parser, &event); +} + +static void +end_document_cb (void *ctx) +{ + HtmlParser *parser = ctx; + HtmlParserEvent event; + + event.type = HTML_PARSER_END_DOCUMENT; + + parser->callback (parser, &event); +} + +static htmlSAXHandler html_sax_handler = { + NULL, /* internalSubset */ + NULL, /* isStandalone */ + NULL, /* hasInternalSubset */ + NULL, /* hasExternalSubset */ + NULL, /* resolveEntity */ + NULL, /* getEntity */ + NULL, /* entityDecl */ + NULL, /* notationDecl */ + NULL, /* attributeDecl */ + NULL, /* elementDecl */ + NULL, /* unparsedEntityDecl */ + NULL, /* setDocumentLocator */ + + NULL, /* startDocument */ + end_document_cb, /* endDocument */ + start_element_cb, /* startElement */ + end_element_cb, /* endElement */ + NULL, /* reference */ + NULL, /* characters */ + NULL, /* ignorableWhitespace */ + NULL, /* processingInstruction */ + comment_cb, /* comment */ + NULL, /* warning */ + NULL, /* error */ + NULL, /* fatalError */ + NULL, /* getParameterEntity */ + NULL, /* cdataBlock */ + NULL /* externalSubset */ +}; + +HtmlParser * +html_parser_new (HtmlParserFunc f, + gpointer data) +{ + HtmlParser *html_parser = g_new (HtmlParser, 1); + + g_return_val_if_fail (f != NULL, NULL); + + html_parser->callback = f; + html_parser->data = data; + html_parser->unparsed = g_string_new (""); + html_parser->xml_context = htmlCreatePushParserCtxt ( + &html_sax_handler, html_parser, NULL, 0, NULL, -1); + + return html_parser; +} + +void +html_parser_push_data (HtmlParser *parser, + const gchar *data, + guint len) +{ + htmlParseChunk (parser->xml_context, data, len, 0); +} + +void +html_parser_push_eof (HtmlParser *parser) +{ + htmlParseChunk (parser->xml_context, NULL, 0, 1); +} + +gpointer +html_parser_get_data (HtmlParser *parser) +{ + return parser->data; +} |