diff options
author | Carlos Garcia Campos <carlosgc@gnome.org> | 2013-11-25 09:04:30 +0100 |
---|---|---|
committer | Carlos Garcia Campos <carlosgc@gnome.org> | 2013-11-25 09:04:30 +0100 |
commit | bb2b7fb491fb72f0ea024d80df89680ede3457b4 (patch) | |
tree | 8cb439d08837eef37027fc5fc6024273e4875bf7 /glib | |
parent | f662973b0da52da84acc3668a0e037ee72498193 (diff) |
glib: Add API to get text, text layout and text attributes for a given area
Diffstat (limited to 'glib')
-rw-r--r-- | glib/poppler-page.cc | 107 | ||||
-rw-r--r-- | glib/poppler-page.h | 8 | ||||
-rw-r--r-- | glib/reference/poppler-sections.txt | 3 |
3 files changed, 113 insertions, 5 deletions
diff --git a/glib/poppler-page.cc b/glib/poppler-page.cc index 248649d3..bb324731 100644 --- a/glib/poppler-page.cc +++ b/glib/poppler-page.cc @@ -851,6 +851,28 @@ poppler_page_get_text (PopplerPage *page) } /** + * poppler_page_get_text_for_area: + * @page: a #PopplerPage + * @area: a #PopplerRectangle + * + * Retrieves the text of @page contained in @area. + * + * Return value: a pointer to the text as a string + * + * Since: 0.26 + **/ +char * +poppler_page_get_text_for_area (PopplerPage *page, + PopplerRectangle *area) +{ + g_return_val_if_fail (POPPLER_IS_PAGE (page), NULL); + g_return_val_if_fail (area != NULL, NULL); + + return poppler_page_get_selected_text (page, POPPLER_SELECTION_GLYPH, area); +} + + +/** * poppler_page_find_text_with_options: * @page: a #PopplerPage * @text: the text to search for (UTF-8 encoded) @@ -2018,14 +2040,16 @@ poppler_page_get_crop_box (PopplerPage *page, PopplerRectangle *rect) * poppler_page_get_text_layout: * @page: A #PopplerPage * @rectangles: (out) (array length=n_rectangles) (transfer container): return location for an array of #PopplerRectangle - * @n_rectangles: (out) length of returned array + * @n_rectangles: (out): length of returned array * * Obtains the layout of the text as a list of #PopplerRectangle - * This array must be freed with g_free () when done. + * This array must be freed with g_free() when done. * * The position in the array represents an offset in the text returned by * poppler_page_get_text() * + * See also poppler_page_get_text_layout_for_area(). + * * Return value: %TRUE if the page contains text, %FALSE otherwise * * Since: 0.16 @@ -2035,6 +2059,38 @@ poppler_page_get_text_layout (PopplerPage *page, PopplerRectangle **rectangles, guint *n_rectangles) { + PopplerRectangle selection = {0, 0, 0, 0}; + + g_return_val_if_fail (POPPLER_IS_PAGE (page), FALSE); + + poppler_page_get_size (page, &selection.x2, &selection.y2); + + return poppler_page_get_text_layout_for_area (page, &selection, rectangles, n_rectangles); +} + +/** + * poppler_page_get_text_layout_for_area: + * @page: A #PopplerPage + * @area: a #PopplerRectangle + * @rectangles: (out) (array length=n_rectangles) (transfer container): return location for an array of #PopplerRectangle + * @n_rectangles: (out): length of returned array + * + * Obtains the layout of the text contained in @area as a list of #PopplerRectangle + * This array must be freed with g_free() when done. + * + * The position in the array represents an offset in the text returned by + * poppler_page_get_text_for_area() + * + * Return value: %TRUE if the page contains text, %FALSE otherwise + * + * Since: 0.26 + **/ +gboolean +poppler_page_get_text_layout_for_area (PopplerPage *page, + PopplerRectangle *area, + PopplerRectangle **rectangles, + guint *n_rectangles) +{ TextPage *text; PopplerRectangle *rect; PDFRectangle selection; @@ -2047,10 +2103,15 @@ poppler_page_get_text_layout (PopplerPage *page, int n_lines; g_return_val_if_fail (POPPLER_IS_PAGE (page), FALSE); + g_return_val_if_fail (area != NULL, FALSE); *n_rectangles = 0; - poppler_page_get_size (page, &selection.x2, &selection.y2); + selection.x1 = area->x1; + selection.y1 = area->y1; + selection.x2 = area->x2; + selection.y2 = area->y2; + text = poppler_page_get_text_page (page); word_list = text->getSelectionWords (&selection, selectionStyleGlyph, &n_lines); if (!word_list) @@ -2170,13 +2231,15 @@ word_text_attributes_equal (TextWord *a, gint ai, TextWord *b, gint bi) * poppler_page_get_text_attributes: * @page: A #PopplerPage * - * Obtains the attributes of the text as a GList of #PopplerTextAttributes. + * Obtains the attributes of the text as a #GList of #PopplerTextAttributes. * This list must be freed with poppler_page_free_text_attributes() when done. * * Each list element is a #PopplerTextAttributes struct where start_index and * end_index indicates the range of text (as returned by poppler_page_get_text()) * to which text attributes apply. * + * See also poppler_page_get_text_attributes_for_area() + * * Return value: (element-type PopplerTextAttributes) (transfer full): A #GList of #PopplerTextAttributes * * Since: 0.18 @@ -2184,6 +2247,35 @@ word_text_attributes_equal (TextWord *a, gint ai, TextWord *b, gint bi) GList * poppler_page_get_text_attributes (PopplerPage *page) { + PopplerRectangle selection = {0, 0, 0, 0}; + + g_return_val_if_fail (POPPLER_IS_PAGE (page), NULL); + + poppler_page_get_size (page, &selection.x2, &selection.y2); + + return poppler_page_get_text_attributes_for_area (page, &selection); +} + +/** + * poppler_page_get_text_attributes_for_area: + * @page: A #PopplerPage + * @area: a #PopplerRectangle + * + * Obtains the attributes of the text in @area as a #GList of #PopplerTextAttributes. + * This list must be freed with poppler_page_free_text_attributes() when done. + * + * Each list element is a #PopplerTextAttributes struct where start_index and + * end_index indicates the range of text (as returned by poppler_page_get_text_for_area()) + * to which text attributes apply. + * + * Return value: (element-type PopplerTextAttributes) (transfer full): A #GList of #PopplerTextAttributes + * + * Since: 0.26 + **/ +GList * +poppler_page_get_text_attributes_for_area (PopplerPage *page, + PopplerRectangle *area) +{ TextPage *text; PDFRectangle selection; GooList **word_list; @@ -2196,8 +2288,13 @@ poppler_page_get_text_attributes (PopplerPage *page) GList *attributes = NULL; g_return_val_if_fail (POPPLER_IS_PAGE (page), NULL); + g_return_val_if_fail (area != NULL, FALSE); + + selection.x1 = area->x1; + selection.y1 = area->y1; + selection.x2 = area->x2; + selection.y2 = area->y2; - poppler_page_get_size (page, &selection.x2, &selection.y2); text = poppler_page_get_text_page (page); word_list = text->getSelectionWords (&selection, selectionStyleGlyph, &n_lines); if (!word_list) diff --git a/glib/poppler-page.h b/glib/poppler-page.h index c54eb9c6..68e2a1df 100644 --- a/glib/poppler-page.h +++ b/glib/poppler-page.h @@ -68,6 +68,8 @@ GList *poppler_page_find_text (PopplerPage *page, void poppler_page_render_to_ps (PopplerPage *page, PopplerPSFile *ps_file); char *poppler_page_get_text (PopplerPage *page); +char *poppler_page_get_text_for_area (PopplerPage *page, + PopplerRectangle *area); char *poppler_page_get_selected_text (PopplerPage *page, PopplerSelectionStyle style, PopplerRectangle *selection); @@ -99,8 +101,14 @@ void poppler_page_get_crop_box (PopplerPage *page, gboolean poppler_page_get_text_layout (PopplerPage *page, PopplerRectangle **rectangles, guint *n_rectangles); +gboolean poppler_page_get_text_layout_for_area (PopplerPage *page, + PopplerRectangle *area, + PopplerRectangle **rectangles, + guint *n_rectangles); GList *poppler_page_get_text_attributes (PopplerPage *page); void poppler_page_free_text_attributes (GList *list); +GList * poppler_page_get_text_attributes_for_area (PopplerPage *page, + PopplerRectangle *area); /* A rectangle on a page, with coordinates in PDF points. */ #define POPPLER_TYPE_RECTANGLE (poppler_rectangle_get_type ()) diff --git a/glib/reference/poppler-sections.txt b/glib/reference/poppler-sections.txt index ce829e3d..62e0f772 100644 --- a/glib/reference/poppler-sections.txt +++ b/glib/reference/poppler-sections.txt @@ -35,8 +35,11 @@ poppler_page_get_selected_text poppler_page_find_text poppler_page_find_text_with_options poppler_page_get_text +poppler_page_get_text_for_area poppler_page_get_text_layout +poppler_page_get_text_layout_for_area poppler_page_get_text_attributes +poppler_page_get_text_attributes_for_area poppler_page_free_text_attributes poppler_page_get_link_mapping poppler_page_free_link_mapping |