summaryrefslogtreecommitdiff
path: root/glib
diff options
context:
space:
mode:
authorAdrian Perez de Castro <aperez@igalia.com>2013-05-09 12:01:59 +0300
committerCarlos Garcia Campos <carlosgc@gnome.org>2014-02-09 15:46:48 +0100
commit8072d4b0e3ea10b4308f8172891f769f30466133 (patch)
tree5a4d71bcf87f7704114d09d1146d27ac84f9e0a0 /glib
parent46b7470ae9846d7e6dbb72bbb3ff831acd954168 (diff)
glib: Expose the document structure tree
Implements a new PopplerStructureElement classe, which builds upon StructTreeRoot and StructElement to expose the document structure of tagged PDFs in the GLib binding. Navigation of the structure tree is done by an iterator-based interface, using PopplerStructureElementIter. https://bugs.freedesktop.org/show_bug.cgi?id=64821
Diffstat (limited to 'glib')
-rw-r--r--glib/Makefile.am2
-rw-r--r--glib/poppler-private.h10
-rw-r--r--glib/poppler-structure-element.cc663
-rw-r--r--glib/poppler-structure-element.h112
-rw-r--r--glib/poppler.h3
-rw-r--r--glib/reference/poppler-docs.sgml2
-rw-r--r--glib/reference/poppler-sections.txt37
-rw-r--r--glib/reference/poppler.types2
8 files changed, 831 insertions, 0 deletions
diff --git a/glib/Makefile.am b/glib/Makefile.am
index a38e0523..040996ac 100644
--- a/glib/Makefile.am
+++ b/glib/Makefile.am
@@ -41,6 +41,7 @@ poppler_glib_public_headers = \
poppler-layer.h \
poppler-media.h \
poppler-movie.h \
+ poppler-structure-element.h \
poppler.h
poppler_glib_includedir = $(includedir)/poppler/glib
@@ -67,6 +68,7 @@ libpoppler_glib_la_SOURCES = \
poppler-cached-file-loader.h \
poppler-input-stream.cc \
poppler-input-stream.h \
+ poppler-structure-element.cc \
poppler.cc \
poppler-private.h
diff --git a/glib/poppler-private.h b/glib/poppler-private.h
index 93d0f230..874cfdba 100644
--- a/glib/poppler-private.h
+++ b/glib/poppler-private.h
@@ -17,6 +17,7 @@
#include <OptionalContent.h>
#include <CairoOutputDev.h>
#include <FileSpec.h>
+#include <StructElement.h>
#endif
struct _PopplerDocument
@@ -95,6 +96,15 @@ struct _PopplerLayer
gchar *title;
};
+
+struct _PopplerStructureElement
+{
+ /*< private >*/
+ GObject parent_instance;
+ PopplerDocument *document;
+ StructElement *elem;
+};
+
GList *_poppler_document_get_layers (PopplerDocument *document);
GList *_poppler_document_get_layer_rbgroup (PopplerDocument *document,
Layer *layer);
diff --git a/glib/poppler-structure-element.cc b/glib/poppler-structure-element.cc
new file mode 100644
index 00000000..6b8778c1
--- /dev/null
+++ b/glib/poppler-structure-element.cc
@@ -0,0 +1,663 @@
+/* poppler-structure.cc: glib interface to poppler
+ *
+ * Copyright (C) 2013 Igalia S.L.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA 02110-1301, USA.
+ */
+
+#include "config.h"
+
+#ifndef __GI_SCANNER__
+#include <StructTreeRoot.h>
+#include <StructElement.h>
+#include <GlobalParams.h>
+#include <UnicodeMap.h>
+#endif /* !__GI_SCANNER__ */
+
+#include "poppler.h"
+#include "poppler-private.h"
+#include "poppler-structure-element.h"
+
+
+/**
+ * SECTION:poppler-structure-element
+ * @short_description: Document structure element.
+ * @title: PopplerStructureElement
+ * @see_also: #PopplerStructure
+ *
+ * Instances of #PopplerStructureElement are used to describe the structure
+ * of a #PopplerDocument. To access the elements in the structure of the
+ * document, first use poppler_document_get_structure() to obtain its
+ * #PopplerStructure, and then use poppler_structure_get_n_children()
+ * and poppler_structure_get_child() to enumerate the top level elements.
+ */
+
+typedef struct _PopplerStructureElementClass
+{
+ GObjectClass parent_class;
+} PopplerStructureElementClass;
+
+G_DEFINE_TYPE (PopplerStructureElement, poppler_structure_element, G_TYPE_OBJECT);
+
+static PopplerStructureElement *
+_poppler_structure_element_new (PopplerDocument *document, StructElement *element)
+{
+ PopplerStructureElement *poppler_structure_element;
+
+ g_assert (POPPLER_IS_DOCUMENT (document));
+ g_assert (element);
+
+ poppler_structure_element = (PopplerStructureElement *) g_object_new (POPPLER_TYPE_STRUCTURE_ELEMENT, NULL, NULL);
+ poppler_structure_element->document = (PopplerDocument *) g_object_ref (document);
+ poppler_structure_element->elem = element;
+
+ return poppler_structure_element;
+}
+
+
+static void
+poppler_structure_element_init (PopplerStructureElement *poppler_structure_element)
+{
+}
+
+
+static void
+poppler_structure_element_finalize (GObject *object)
+{
+ PopplerStructureElement *poppler_structure_element = POPPLER_STRUCTURE_ELEMENT (object);
+
+ /* poppler_structure_element->elem is owned by the StructTreeRoot */
+ g_object_unref (poppler_structure_element->document);
+
+ G_OBJECT_CLASS (poppler_structure_element_parent_class)->finalize (object);
+}
+
+
+static void
+poppler_structure_element_class_init (PopplerStructureElementClass *klass)
+{
+ GObjectClass *gobject_class = G_OBJECT_CLASS (klass);
+ gobject_class->finalize = poppler_structure_element_finalize;
+}
+
+
+/**
+ * poppler_structure_element_get_kind:
+ * @poppler_structure_element: A #PopplerStructureElement
+ *
+ * Return value: A #PopplerStructureElementKind value.
+ *
+ * Since: 0.26
+ */
+PopplerStructureElementKind
+poppler_structure_element_get_kind (PopplerStructureElement *poppler_structure_element)
+{
+ g_return_val_if_fail (POPPLER_IS_STRUCTURE_ELEMENT (poppler_structure_element), POPPLER_STRUCTURE_ELEMENT_UNKNOWN);
+ g_return_val_if_fail (poppler_structure_element->elem != NULL, POPPLER_STRUCTURE_ELEMENT_UNKNOWN);
+
+ switch (poppler_structure_element->elem->getType ())
+ {
+ case StructElement::Unknown:
+ return POPPLER_STRUCTURE_ELEMENT_UNKNOWN;
+ case StructElement::MCID:
+ return POPPLER_STRUCTURE_ELEMENT_CONTENT;
+ case StructElement::OBJR:
+ return POPPLER_STRUCTURE_ELEMENT_OBJECT_REFERENCE;
+ case StructElement::Document:
+ return POPPLER_STRUCTURE_ELEMENT_DOCUMENT;
+ case StructElement::Part:
+ return POPPLER_STRUCTURE_ELEMENT_PART;
+ case StructElement::Sect:
+ return POPPLER_STRUCTURE_ELEMENT_SECTION;
+ case StructElement::Div:
+ return POPPLER_STRUCTURE_ELEMENT_DIV;
+ case StructElement::Span:
+ return POPPLER_STRUCTURE_ELEMENT_SPAN;
+ case StructElement::Quote:
+ return POPPLER_STRUCTURE_ELEMENT_QUOTE;
+ case StructElement::Note:
+ return POPPLER_STRUCTURE_ELEMENT_NOTE;
+ case StructElement::Reference:
+ return POPPLER_STRUCTURE_ELEMENT_REFERENCE;
+ case StructElement::BibEntry:
+ return POPPLER_STRUCTURE_ELEMENT_BIBENTRY;
+ case StructElement::Code:
+ return POPPLER_STRUCTURE_ELEMENT_CODE;
+ case StructElement::Link:
+ return POPPLER_STRUCTURE_ELEMENT_LINK;
+ case StructElement::Annot:
+ return POPPLER_STRUCTURE_ELEMENT_ANNOT;
+ case StructElement::Ruby:
+ return POPPLER_STRUCTURE_ELEMENT_RUBY;
+ case StructElement::Warichu:
+ return POPPLER_STRUCTURE_ELEMENT_WARICHU;
+ case StructElement::BlockQuote:
+ return POPPLER_STRUCTURE_ELEMENT_BLOCKQUOTE;
+ case StructElement::Caption:
+ return POPPLER_STRUCTURE_ELEMENT_CAPTION;
+ case StructElement::NonStruct:
+ return POPPLER_STRUCTURE_ELEMENT_NONSTRUCT;
+ case StructElement::TOC:
+ return POPPLER_STRUCTURE_ELEMENT_TOC;
+ case StructElement::TOCI:
+ return POPPLER_STRUCTURE_ELEMENT_TOC_ITEM;
+ case StructElement::Index:
+ return POPPLER_STRUCTURE_ELEMENT_INDEX;
+ case StructElement::Private:
+ return POPPLER_STRUCTURE_ELEMENT_PRIVATE;
+ case StructElement::P:
+ return POPPLER_STRUCTURE_ELEMENT_PARAGRAPH;
+ case StructElement::H:
+ return POPPLER_STRUCTURE_ELEMENT_HEADING;
+ case StructElement::H1:
+ return POPPLER_STRUCTURE_ELEMENT_HEADING_1;
+ case StructElement::H2:
+ return POPPLER_STRUCTURE_ELEMENT_HEADING_2;
+ case StructElement::H3:
+ return POPPLER_STRUCTURE_ELEMENT_HEADING_3;
+ case StructElement::H4:
+ return POPPLER_STRUCTURE_ELEMENT_HEADING_4;
+ case StructElement::H5:
+ return POPPLER_STRUCTURE_ELEMENT_HEADING_5;
+ case StructElement::H6:
+ return POPPLER_STRUCTURE_ELEMENT_HEADING_6;
+ case StructElement::L:
+ return POPPLER_STRUCTURE_ELEMENT_LIST;
+ case StructElement::LI:
+ return POPPLER_STRUCTURE_ELEMENT_LIST_ITEM;
+ case StructElement::Lbl:
+ return POPPLER_STRUCTURE_ELEMENT_LIST_LABEL;
+ case StructElement::LBody:
+ return POPPLER_STRUCTURE_ELEMENT_LIST_BODY;
+ case StructElement::Table:
+ return POPPLER_STRUCTURE_ELEMENT_TABLE;
+ case StructElement::TR:
+ return POPPLER_STRUCTURE_ELEMENT_TABLE_ROW;
+ case StructElement::TH:
+ return POPPLER_STRUCTURE_ELEMENT_TABLE_HEADING;
+ case StructElement::TD:
+ return POPPLER_STRUCTURE_ELEMENT_TABLE_DATA;
+ case StructElement::THead:
+ return POPPLER_STRUCTURE_ELEMENT_TABLE_HEADER;
+ case StructElement::TFoot:
+ return POPPLER_STRUCTURE_ELEMENT_TABLE_FOOTER;
+ case StructElement::TBody:
+ return POPPLER_STRUCTURE_ELEMENT_TABLE_BODY;
+ case StructElement::Figure:
+ return POPPLER_STRUCTURE_ELEMENT_FIGURE;
+ case StructElement::Formula:
+ return POPPLER_STRUCTURE_ELEMENT_FORMULA;
+ case StructElement::Form:
+ return POPPLER_STRUCTURE_ELEMENT_FORM;
+ default:
+ g_assert_not_reached ();
+ }
+}
+
+/**
+ * poppler_structure_element_get_page:
+ * @poppler_structure_element: A #PopplerStructureElement
+ *
+ * Obtains the page number in which the element is contained.
+ *
+ * Return value: Number of the page that contains the element, of
+ * <code>-1</code> if not defined.
+ *
+ * Since: 0.26
+ */
+gint
+poppler_structure_element_get_page (PopplerStructureElement *poppler_structure_element)
+{
+ g_return_val_if_fail (POPPLER_IS_STRUCTURE_ELEMENT (poppler_structure_element), -1);
+ g_return_val_if_fail (poppler_structure_element->elem != NULL, -1);
+
+ Ref ref;
+ if (poppler_structure_element->elem->getPageRef (ref))
+ {
+ return poppler_structure_element->document->doc->findPage(ref.num, ref.gen) - 1;
+ }
+
+ return -1;
+}
+
+/**
+ * poppler_structure_element_is_content:
+ * @poppler_structure_element: A #PopplerStructureElement
+ *
+ * Checks whether an element is actual document content.
+ *
+ * Return value: %TRUE if the element is content, or %FALSE otherwise.
+ *
+ * Since: 0.26
+ */
+gboolean
+poppler_structure_element_is_content (PopplerStructureElement *poppler_structure_element)
+{
+ g_return_val_if_fail (POPPLER_IS_STRUCTURE_ELEMENT (poppler_structure_element), FALSE);
+ g_return_val_if_fail (poppler_structure_element->elem != NULL, FALSE);
+
+ return poppler_structure_element->elem->isContent ();
+}
+
+/**
+ * poppler_structure_element_is_inline:
+ * @poppler_structure_element: A #PopplerStructureElement
+ *
+ * Checks whether an element is an inline element.
+ *
+ * Return value: %TRUE if the element is an inline element, or %FALSE otherwise.
+ *
+ * Since: 0.26
+ */
+gboolean
+poppler_structure_element_is_inline (PopplerStructureElement *poppler_structure_element)
+{
+ g_return_val_if_fail (POPPLER_IS_STRUCTURE_ELEMENT (poppler_structure_element), FALSE);
+ g_return_val_if_fail (poppler_structure_element->elem != NULL, FALSE);
+
+ return poppler_structure_element->elem->isInline ();
+}
+
+/**
+ * poppler_structure_element_is_block:
+ * @poppler_structure_element: A #PopplerStructureElement
+ *
+ * Checks whether an element is a block element.
+ *
+ * Return value: %TRUE if the element is a block element, or %FALSE otherwise.
+ *
+ * Since: 0.26
+ */
+gboolean
+poppler_structure_element_is_block (PopplerStructureElement *poppler_structure_element)
+{
+ g_return_val_if_fail (POPPLER_IS_STRUCTURE_ELEMENT (poppler_structure_element), FALSE);
+ g_return_val_if_fail (poppler_structure_element->elem != NULL, FALSE);
+
+ return poppler_structure_element->elem->isBlock ();
+}
+
+/**
+ * poppler_structure_element_get_id:
+ * @poppler_structure_element: A #PopplerStructureElement
+ *
+ * Obtains the identifier of an element.
+ *
+ * Return value: (transfer full): The identifier of the element (if
+ * defined), or %NULL.
+ *
+ * Since: 0.26
+ */
+gchar *
+poppler_structure_element_get_id (PopplerStructureElement *poppler_structure_element)
+{
+ g_return_val_if_fail (POPPLER_IS_STRUCTURE_ELEMENT (poppler_structure_element), NULL);
+ g_return_val_if_fail (poppler_structure_element->elem != NULL, NULL);
+
+ GooString *string = poppler_structure_element->elem->getID ();
+ return string ? _poppler_goo_string_to_utf8 (string) : NULL;
+}
+
+/**
+ * poppler_structure_element_get_title:
+ * @poppler_structure_element: A #PopplerStructureElement
+ *
+ * Obtains the title of an element.
+ *
+ * Return value: (transfer full): The title of the element, or %NULL.
+ *
+ * Since: 0.26
+ */
+gchar *
+poppler_structure_element_get_title (PopplerStructureElement *poppler_structure_element)
+{
+ g_return_val_if_fail (POPPLER_IS_STRUCTURE_ELEMENT (poppler_structure_element), NULL);
+ g_return_val_if_fail (poppler_structure_element->elem != NULL, NULL);
+
+ GooString *string = poppler_structure_element->elem->getTitle ();
+ return string ? _poppler_goo_string_to_utf8 (string) : NULL;
+}
+
+/**
+ * popppler_structure_element_get_abbreviation:
+ * @poppler_structure_element: A #PopplerStructureElement
+ *
+ * Acronyms and abbreviations contained in elements of type
+ * #POPPLER_STRUCTURE_ELEMENT_SPAN may have an associated expanded
+ * text form, which can be retrieved using this function.
+ *
+ * Return value: (transfer full): Text of the expanded abbreviation if the
+ * element text is an abbreviation or acrony, %NULL if not.
+ *
+ * Since: 0.26
+ */
+gchar *
+poppler_structure_element_get_abbreviation (PopplerStructureElement *poppler_structure_element)
+{
+ g_return_val_if_fail (POPPLER_IS_STRUCTURE_ELEMENT (poppler_structure_element), NULL);
+ g_return_val_if_fail (poppler_structure_element->elem != NULL, NULL);
+
+ if (poppler_structure_element->elem->getType () != StructElement::Span)
+ return NULL;
+
+ GooString *string = poppler_structure_element->elem->getExpandedAbbr ();
+ return string ? _poppler_goo_string_to_utf8 (string) : NULL;
+}
+
+/**
+ * poppler_structure_element_get_language:
+ * @poppler_structure_element: A #PopplerStructureElement
+ *
+ * Obtains the language and country code for the content in an element,
+ * in two-letter ISO format, e.g. <code>en_ES</code>, or %NULL if not
+ * defined.
+ *
+ * Return value: (transfer full): language and country code, or %NULL.
+ *
+ * Since: 0.26
+ */
+gchar *
+poppler_structure_element_get_language (PopplerStructureElement *poppler_structure_element)
+{
+ g_return_val_if_fail (POPPLER_IS_STRUCTURE_ELEMENT (poppler_structure_element), NULL);
+ g_return_val_if_fail (poppler_structure_element->elem != NULL, NULL);
+
+ GooString *string = poppler_structure_element->elem->getLanguage ();
+ return string ? _poppler_goo_string_to_utf8 (string) : NULL;
+}
+
+/**
+ * poppler_structure_element_get_alt_text:
+ * @poppler_structure_element: A #PopplerStructureElement
+ *
+ * Obtains the “alternate” text representation of the element (and its child
+ * elements). This is mostly used for non-text elements like images and
+ * figures, to specify a textual description of the element.
+ *
+ * Note that for elements containing proper text, the function
+ * poppler_structure_element_get_text() must be used instead.
+ *
+ * Return value: (transfer full): The alternate text representation for the
+ * element, or %NULL if not defined.
+ *
+ * Since: 0.26
+ */
+gchar *
+poppler_structure_element_get_alt_text (PopplerStructureElement *poppler_structure_element)
+{
+ g_return_val_if_fail (POPPLER_IS_STRUCTURE_ELEMENT (poppler_structure_element), NULL);
+ g_return_val_if_fail (poppler_structure_element->elem != NULL, NULL);
+
+ GooString *string = poppler_structure_element->elem->getAltText ();
+ return string ? _poppler_goo_string_to_utf8 (string) : NULL;
+}
+
+/**
+ * poppler_structure_element_get_actual_text:
+ * @poppler_structure_element: A #PopplerStructureElement
+ *
+ * Obtains the actual text enclosed by the element (and its child elements).
+ * The actual text is mostly used for non-text elements like images and
+ * figures which <em>do</em> have the graphical appearance of text, like
+ * a logo. For those the actual text is the equivalent text to those
+ * graphical elements which look like text when rendered.
+ *
+ * Note that for elements containing proper text, the function
+ * poppler_structure_element_get_text() must be used instead.
+ *
+ * Return value: (transfer full): The actual text for the element, or %NULL
+ * if not defined.
+ *
+ * Since: 0.26
+ */
+gchar *
+poppler_structure_element_get_actual_text (PopplerStructureElement *poppler_structure_element)
+{
+ g_return_val_if_fail (POPPLER_IS_STRUCTURE_ELEMENT (poppler_structure_element), NULL);
+ g_return_val_if_fail (poppler_structure_element->elem != NULL, NULL);
+
+ GooString *string = poppler_structure_element->elem->getActualText ();
+ return string ? _poppler_goo_string_to_utf8 (string) : NULL;
+}
+
+/**
+ * poppler_structure_element_get_text:
+ * @poppler_structure_element: A #PopplerStructureElement
+ * @recursive: If %TRUE, the text of child elements is gathered recursively
+ * in logical order and returned as part of the result.
+ *
+ * Obtains the text enclosed by an element, or the text enclosed by the
+ * elements in the subtree (including the element itself).
+ *
+ * Return value: (transfer full): A string.
+ *
+ * Since: 0.26
+ */
+gchar *
+poppler_structure_element_get_text (PopplerStructureElement *poppler_structure_element,
+ gboolean recursive)
+{
+ g_return_val_if_fail (POPPLER_IS_STRUCTURE_ELEMENT (poppler_structure_element), NULL);
+ g_return_val_if_fail (poppler_structure_element->elem != NULL, NULL);
+
+ GooString *string = poppler_structure_element->elem->getText (recursive);
+ gchar *result = string ? _poppler_goo_string_to_utf8 (string) : NULL;
+ delete string;
+ return result;
+}
+
+struct _PopplerStructureElementIter
+{
+ PopplerDocument *document;
+ union {
+ StructElement *elem;
+ StructTreeRoot *root;
+ };
+ gboolean is_root;
+ unsigned index;
+};
+
+POPPLER_DEFINE_BOXED_TYPE (PopplerStructureElementIter,
+ poppler_structure_element_iter,
+ poppler_structure_element_iter_copy,
+ poppler_structure_element_iter_free)
+
+/**
+ * poppler_structure_element_iter_copy:
+ * @iter: a #PopplerStructureElementIter
+ *
+ * Creates a new #PopplerStructureElementIter as a copy of @iter. The
+ * returned value must be freed with poppler_structure_element_iter_free().
+ *
+ * Return value: (transfer full): a new #PopplerStructureElementIter
+ *
+ * Since: 0.26
+ */
+PopplerStructureElementIter *
+poppler_structure_element_iter_copy (PopplerStructureElementIter *iter)
+{
+ PopplerStructureElementIter *new_iter;
+
+ g_return_val_if_fail (iter != NULL, NULL);
+
+ new_iter = g_slice_dup (PopplerStructureElementIter, iter);
+ new_iter->document = (PopplerDocument *) g_object_ref (new_iter->document);
+
+ return new_iter;
+}
+
+/**
+ * poppler_structure_element_iter_free:
+ * @iter: a #PopplerStructureElementIter
+ *
+ * Frees @iter.
+ *
+ * Since: 0.26
+ */
+void
+poppler_structure_element_iter_free (PopplerStructureElementIter *iter)
+{
+ if (G_UNLIKELY (iter == NULL))
+ return;
+
+ g_object_unref (iter->document);
+ g_slice_free (PopplerStructureElementIter, iter);
+}
+
+/**
+ * poppler_structure_element_iter_new:
+ * @poppler_document: a #PopplerDocument.
+ *
+ * Returns the root #PopplerStructureElementIter for @document, or %NULL. The
+ * returned value must be freed with poppler_structure_element_iter_free().
+ *
+ * Documents may have an associated structure tree &mdashmostly, Tagged-PDF
+ * compliant documents&mdash; which can be used to obtain information about
+ * the document structure and its contents. Each node in the tree contains
+ * a #PopplerStructureElement.
+ *
+ * Here is a simple example that walks the whole tree:
+ *
+ * <informalexample><programlisting>
+ * static void
+ * walk_structure (PopplerStructureElementIter *iter)
+ * {
+ * do {
+ * /<!-- -->* Get the element and do something with it *<!-- -->/
+ * PopplerStructureElementIter *child = poppler_structure_element_iter_get_child (iter);
+ * if (child)
+ * walk_structure (child);
+ * poppler_structure_element_iter_free (child);
+ * } while (poppler_structure_element_iter_next (iter));
+ * }
+ * ...
+ * {
+ * iter = poppler_structure_element_iter_new (document);
+ * walk_structure (iter);
+ * poppler_structure_element_iter_free (iter);
+ * }
+ * </programlisting></informalexample>
+ *
+ * Return value: (transfer full): a new #PopplerStructureElementIter, or %NULL if document
+ * doesn't have structure tree.
+ *
+ * Since: 0.26
+ */
+PopplerStructureElementIter *
+poppler_structure_element_iter_new (PopplerDocument *poppler_document)
+{
+ PopplerStructureElementIter *iter;
+ StructTreeRoot *root;
+
+ g_return_val_if_fail (POPPLER_IS_DOCUMENT (poppler_document), NULL);
+
+ root = poppler_document->doc->getStructTreeRoot ();
+ if (root == NULL)
+ return NULL;
+
+ if (root->getNumElements () == 0)
+ return NULL;
+
+ iter = g_slice_new0 (PopplerStructureElementIter);
+ iter->document = (PopplerDocument *) g_object_ref (poppler_document);
+ iter->is_root = TRUE;
+ iter->root = root;
+
+ return iter;
+}
+
+/**
+ * poppler_structure_element_iter_next:
+ * @iter: a #PopplerStructureElementIter
+ *
+ * Sets @iter to point to the next structure element at the current level
+ * of the tree, if valid. See poppler_structure_element_iter_new() for more
+ * information.
+ *
+ * Return value: %TRUE, if @iter was set to the next structure element
+ *
+ * Since: 0.26
+ */
+gboolean
+poppler_structure_element_iter_next (PopplerStructureElementIter *iter)
+{
+ unsigned elements;
+
+ g_return_val_if_fail (iter != NULL, FALSE);
+
+ elements = iter->is_root
+ ? iter->root->getNumElements ()
+ : iter->elem->getNumElements ();
+
+ return ++iter->index < elements;
+}
+
+/**
+ * poppler_structure_element_iter_get_element:
+ * @iter: a #PopplerStructureElementIter
+ *
+ * Returns the #PopplerStructureElementIter associated with @iter.
+ *
+ * Return value: (transfer full): a new #PopplerStructureElementIter
+ *
+ * Since: 0.26
+ */
+PopplerStructureElement *
+poppler_structure_element_iter_get_element (PopplerStructureElementIter *iter)
+{
+ StructElement *elem;
+
+ g_return_val_if_fail (iter != NULL, NULL);
+
+ elem = iter->is_root
+ ? iter->root->getElement (iter->index)
+ : iter->elem->getElement (iter->index);
+
+ return _poppler_structure_element_new (iter->document, elem);
+}
+
+/**
+ * poppler_structure_element_iter_get_child:
+ * @parent: a #PopplerStructureElementIter
+ *
+ * Returns a new iterator to the children elements of the
+ * #PopplerStructureElement associated with @iter. The returned value must
+ * be freed with poppler_structure_element_iter_free().
+ *
+ * Return value: a new #PopplerStructureElementIter
+ *
+ * Since: 0.26
+ */
+PopplerStructureElementIter *
+poppler_structure_element_iter_get_child (PopplerStructureElementIter *parent)
+{
+ StructElement *elem;
+
+ g_return_val_if_fail (parent != NULL, NULL);
+
+ elem = parent->is_root
+ ? parent->root->getElement (parent->index)
+ : parent->elem->getElement (parent->index);
+
+ if (elem->getNumElements () > 0)
+ {
+ PopplerStructureElementIter *child = g_slice_new0 (PopplerStructureElementIter);
+ child->document = (PopplerDocument *) g_object_ref (parent->document);
+ child->elem = elem;
+ return child;
+ }
+
+ return NULL;
+}
diff --git a/glib/poppler-structure-element.h b/glib/poppler-structure-element.h
new file mode 100644
index 00000000..a5ac04b5
--- /dev/null
+++ b/glib/poppler-structure-element.h
@@ -0,0 +1,112 @@
+/* poppler-structure-element.h: glib interface to poppler
+ *
+ * Copyright (C) 2013 Igalia S.L.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA 02110-1301, USA.
+ */
+
+#ifndef __POPPLER_STRUCTURE_ELEMENT_H__
+#define __POPPLER_STRUCTURE_ELEMENT_H__
+
+#include <glib-object.h>
+#include "poppler.h"
+
+G_BEGIN_DECLS
+
+#define POPPLER_TYPE_STRUCTURE_ELEMENT (poppler_structure_element_get_type ())
+#define POPPLER_STRUCTURE_ELEMENT(obj) (G_TYPE_CHECK_INSTANCE_CAST ((obj), POPPLER_TYPE_STRUCTURE_ELEMENT, PopplerStructureElement))
+#define POPPLER_IS_STRUCTURE_ELEMENT(obj) (G_TYPE_CHECK_INSTANCE_TYPE ((obj), POPPLER_TYPE_STRUCTURE_ELEMENT))
+
+/**
+ * PopplerStructureElementKind:
+ */
+typedef enum {
+ POPPLER_STRUCTURE_ELEMENT_UNKNOWN,
+ POPPLER_STRUCTURE_ELEMENT_CONTENT,
+ POPPLER_STRUCTURE_ELEMENT_OBJECT_REFERENCE,
+ POPPLER_STRUCTURE_ELEMENT_DOCUMENT,
+ POPPLER_STRUCTURE_ELEMENT_PART,
+ POPPLER_STRUCTURE_ELEMENT_ARTICLE,
+ POPPLER_STRUCTURE_ELEMENT_SECTION,
+ POPPLER_STRUCTURE_ELEMENT_DIV,
+ POPPLER_STRUCTURE_ELEMENT_SPAN,
+ POPPLER_STRUCTURE_ELEMENT_QUOTE,
+ POPPLER_STRUCTURE_ELEMENT_NOTE,
+ POPPLER_STRUCTURE_ELEMENT_REFERENCE,
+ POPPLER_STRUCTURE_ELEMENT_BIBENTRY,
+ POPPLER_STRUCTURE_ELEMENT_CODE,
+ POPPLER_STRUCTURE_ELEMENT_LINK,
+ POPPLER_STRUCTURE_ELEMENT_ANNOT,
+ POPPLER_STRUCTURE_ELEMENT_RUBY,
+ POPPLER_STRUCTURE_ELEMENT_WARICHU,
+ POPPLER_STRUCTURE_ELEMENT_BLOCKQUOTE,
+ POPPLER_STRUCTURE_ELEMENT_CAPTION,
+ POPPLER_STRUCTURE_ELEMENT_NONSTRUCT,
+ POPPLER_STRUCTURE_ELEMENT_TOC,
+ POPPLER_STRUCTURE_ELEMENT_TOC_ITEM,
+ POPPLER_STRUCTURE_ELEMENT_INDEX,
+ POPPLER_STRUCTURE_ELEMENT_PRIVATE,
+ POPPLER_STRUCTURE_ELEMENT_PARAGRAPH,
+ POPPLER_STRUCTURE_ELEMENT_HEADING,
+ POPPLER_STRUCTURE_ELEMENT_HEADING_1,
+ POPPLER_STRUCTURE_ELEMENT_HEADING_2,
+ POPPLER_STRUCTURE_ELEMENT_HEADING_3,
+ POPPLER_STRUCTURE_ELEMENT_HEADING_4,
+ POPPLER_STRUCTURE_ELEMENT_HEADING_5,
+ POPPLER_STRUCTURE_ELEMENT_HEADING_6,
+ POPPLER_STRUCTURE_ELEMENT_LIST,
+ POPPLER_STRUCTURE_ELEMENT_LIST_ITEM,
+ POPPLER_STRUCTURE_ELEMENT_LIST_LABEL,
+ POPPLER_STRUCTURE_ELEMENT_LIST_BODY,
+ POPPLER_STRUCTURE_ELEMENT_TABLE,
+ POPPLER_STRUCTURE_ELEMENT_TABLE_ROW,
+ POPPLER_STRUCTURE_ELEMENT_TABLE_HEADING,
+ POPPLER_STRUCTURE_ELEMENT_TABLE_DATA,
+ POPPLER_STRUCTURE_ELEMENT_TABLE_HEADER,
+ POPPLER_STRUCTURE_ELEMENT_TABLE_FOOTER,
+ POPPLER_STRUCTURE_ELEMENT_TABLE_BODY,
+ POPPLER_STRUCTURE_ELEMENT_FIGURE,
+ POPPLER_STRUCTURE_ELEMENT_FORMULA,
+ POPPLER_STRUCTURE_ELEMENT_FORM,
+} PopplerStructureElementKind;
+
+
+GType poppler_structure_element_get_type (void) G_GNUC_CONST;
+PopplerStructureElementKind poppler_structure_element_get_kind (PopplerStructureElement *poppler_structure_element);
+gint poppler_structure_element_get_page (PopplerStructureElement *poppler_structure_element);
+gboolean poppler_structure_element_is_content (PopplerStructureElement *poppler_structure_element);
+gboolean poppler_structure_element_is_inline (PopplerStructureElement *poppler_structure_element);
+gboolean poppler_structure_element_is_block (PopplerStructureElement *poppler_structure_element);
+gchar *poppler_structure_element_get_id (PopplerStructureElement *poppler_structure_element);
+gchar *poppler_structure_element_get_title (PopplerStructureElement *poppler_structure_element);
+gchar *poppler_structure_element_get_abbreviation (PopplerStructureElement *poppler_structure_element);
+gchar *poppler_structure_element_get_language (PopplerStructureElement *poppler_structure_element);
+gchar *poppler_structure_element_get_text (PopplerStructureElement *poppler_structure_element,
+ gboolean recursive);
+gchar *poppler_structure_element_get_alt_text (PopplerStructureElement *poppler_structure_element);
+gchar *poppler_structure_element_get_actual_text (PopplerStructureElement *poppler_structure_element);
+
+#define POPPLER_TYPE_STRUCTURE_ELEMENT_ITER (poppler_structure_element_iter_get_type ())
+GType poppler_structure_element_iter_get_type (void) G_GNUC_CONST;
+PopplerStructureElementIter *poppler_structure_element_iter_new (PopplerDocument *poppler_document);
+PopplerStructureElementIter *poppler_structure_element_iter_get_child (PopplerStructureElementIter *parent);
+PopplerStructureElementIter *poppler_structure_element_iter_copy (PopplerStructureElementIter *iter);
+PopplerStructureElement *poppler_structure_element_iter_get_element (PopplerStructureElementIter *iter);
+gboolean poppler_structure_element_iter_next (PopplerStructureElementIter *iter);
+void poppler_structure_element_iter_free (PopplerStructureElementIter *iter);
+
+G_END_DECLS
+
+#endif /* !__POPPLER_STRUCTURE_ELEMENT_H__ */
diff --git a/glib/poppler.h b/glib/poppler.h
index 0db97d09..1da2af13 100644
--- a/glib/poppler.h
+++ b/glib/poppler.h
@@ -208,6 +208,8 @@ typedef struct _PopplerAnnotLine PopplerAnnotLine;
typedef struct _PopplerAnnotCircle PopplerAnnotCircle;
typedef struct _PopplerAnnotSquare PopplerAnnotSquare;
typedef struct _PopplerQuadrilateral PopplerQuadrilateral;
+typedef struct _PopplerStructureElement PopplerStructureElement;
+typedef struct _PopplerStructureElementIter PopplerStructureElementIter;
typedef enum
{
@@ -233,5 +235,6 @@ G_END_DECLS
#include "poppler-date.h"
#include "poppler-movie.h"
#include "poppler-media.h"
+#include "poppler-structure-element.h"
#endif /* __POPPLER_GLIB_H__ */
diff --git a/glib/reference/poppler-docs.sgml b/glib/reference/poppler-docs.sgml
index a9d5158e..b817a0d6 100644
--- a/glib/reference/poppler-docs.sgml
+++ b/glib/reference/poppler-docs.sgml
@@ -23,6 +23,8 @@
<xi:include href="xml/poppler-layer.xml"/>
<xi:include href="xml/poppler-media.xml"/>
<xi:include href="xml/poppler-movie.xml"/>
+ <xi:include href="xml/poppler-structure.xml"/>
+ <xi:include href="xml/poppler-structure-element.xml"/>
<xi:include href="xml/poppler-features.xml"/>
</chapter>
diff --git a/glib/reference/poppler-sections.txt b/glib/reference/poppler-sections.txt
index a954f64a..fff370b2 100644
--- a/glib/reference/poppler-sections.txt
+++ b/glib/reference/poppler-sections.txt
@@ -585,6 +585,43 @@ poppler_movie_get_type
</SECTION>
<SECTION>
+<FILE>poppler-structure-element</FILE>
+<TITLE>PopplerStructureElement</TITLE>
+PopplerStructureElement
+PopplerStructureElementKind
+PopplerStructureElementIter
+poppler_structure_element_iter_new
+poppler_structure_element_iter_next
+poppler_structure_element_iter_copy
+poppler_structure_element_iter_free
+poppler_structure_element_iter_get_child
+poppler_structure_element_iter_get_element
+poppler_structure_element_get_kind
+poppler_structure_element_get_page
+poppler_structure_element_is_content
+poppler_structure_element_is_inline
+poppler_structure_element_is_block
+poppler_structure_element_get_id
+poppler_structure_element_get_title
+poppler_structure_element_get_abbreviation
+poppler_structure_element_get_language
+poppler_structure_element_get_text
+poppler_structure_element_get_alt_text
+poppler_structure_element_get_actual_text
+
+<SUBSECTION Standard>
+POPPLER_STRUCTURE_ELEMENT
+POPPLER_IS_STRUCTURE_ELEMENT
+POPPLER_TYPE_STRUCTURE_ELEMENT
+POPPLER_TYPE_STRUCTURE_ELEMENT_ITER
+POPPLER_TYPE_STRUCTURE_ELEMENT_KIND
+
+<SUBSECTION Private>
+poppler_structure_element_get_type
+poppler_structure_element_iter_get_type
+</SECTION>
+
+<SECTION>
<FILE>poppler-features</FILE>
POPPLER_HAS_CAIRO
POPPLER_MAJOR_VERSION
diff --git a/glib/reference/poppler.types b/glib/reference/poppler.types
index eed98499..388852af 100644
--- a/glib/reference/poppler.types
+++ b/glib/reference/poppler.types
@@ -8,3 +8,5 @@ poppler_annot_get_type
poppler_layer_get_type
poppler_media_get_type
poppler_movie_get_type
+poppler_structure_element_get_type
+poppler_structure_element_iter_get_type