diff options
author | Mathias Bauer <mba@openoffice.org> | 2011-02-28 17:35:51 +0100 |
---|---|---|
committer | Bjoern Michaelsen <bjoern.michaelsen@canonical.com> | 2011-06-16 13:40:40 +0200 |
commit | 586bed9157edc406fe83ee2c674f6e976d27b5be (patch) | |
tree | 19a63f13b3523974546b8b64fcf588c4a434dc3b /regexp/inc | |
parent | ba450e160447eb8108ab1dd548daccf86c6435d6 (diff) |
CWS gnumake4: convert regexp to new build system [hg:56e4316f8d74]
Diffstat (limited to 'regexp/inc')
-rw-r--r-- | regexp/inc/makefile.mk | 47 | ||||
-rw-r--r-- | regexp/inc/pch/precompiled_regexp.cxx | 29 | ||||
-rw-r--r-- | regexp/inc/pch/precompiled_regexp.hxx | 32 | ||||
-rw-r--r-- | regexp/inc/regexp/reclass.hxx | 391 |
4 files changed, 391 insertions, 108 deletions
diff --git a/regexp/inc/makefile.mk b/regexp/inc/makefile.mk deleted file mode 100644 index b65b126709e7..000000000000 --- a/regexp/inc/makefile.mk +++ /dev/null @@ -1,47 +0,0 @@ -#************************************************************************* -# -# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. -# -# Copyright 2000, 2010 Oracle and/or its affiliates. -# -# OpenOffice.org - a multi-platform office productivity suite -# -# This file is part of OpenOffice.org. -# -# OpenOffice.org is free software: you can redistribute it and/or modify -# it under the terms of the GNU Lesser General Public License version 3 -# only, as published by the Free Software Foundation. -# -# OpenOffice.org is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU Lesser General Public License version 3 for more details -# (a copy is included in the LICENSE file that accompanied this code). -# -# You should have received a copy of the GNU Lesser General Public License -# version 3 along with OpenOffice.org. If not, see -# <http://www.openoffice.org/license.html> -# for a copy of the LGPLv3 License. -# -#************************************************************************* -PRJ=.. - -PRJNAME=regexp -TARGET=inc - -# --- Settings ----------------------------------------------------- - -.INCLUDE : settings.mk - -# --- Files -------------------------------------------------------- -# --- Targets ------------------------------------------------------- - -.INCLUDE : target.mk - -.IF "$(ENABLE_PCH)"!="" -ALLTAR : \ - $(SLO)$/precompiled.pch \ - $(SLO)$/precompiled_ex.pch - -.ENDIF # "$(ENABLE_PCH)"!="" - diff --git a/regexp/inc/pch/precompiled_regexp.cxx b/regexp/inc/pch/precompiled_regexp.cxx deleted file mode 100644 index 2adcb54dccdb..000000000000 --- a/regexp/inc/pch/precompiled_regexp.cxx +++ /dev/null @@ -1,29 +0,0 @@ -/************************************************************************* - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * Copyright 2000, 2010 Oracle and/or its affiliates. - * - * OpenOffice.org - a multi-platform office productivity suite - * - * This file is part of OpenOffice.org. - * - * OpenOffice.org is free software: you can redistribute it and/or modify - * it under the terms of the GNU Lesser General Public License version 3 - * only, as published by the Free Software Foundation. - * - * OpenOffice.org is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License version 3 for more details - * (a copy is included in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU Lesser General Public License - * version 3 along with OpenOffice.org. If not, see - * <http://www.openoffice.org/license.html> - * for a copy of the LGPLv3 License. - * - ************************************************************************/ - -#include "precompiled_regexp.hxx" - diff --git a/regexp/inc/pch/precompiled_regexp.hxx b/regexp/inc/pch/precompiled_regexp.hxx deleted file mode 100644 index 5b02201128a6..000000000000 --- a/regexp/inc/pch/precompiled_regexp.hxx +++ /dev/null @@ -1,32 +0,0 @@ -/************************************************************************* - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * Copyright 2000, 2010 Oracle and/or its affiliates. - * - * OpenOffice.org - a multi-platform office productivity suite - * - * This file is part of OpenOffice.org. - * - * OpenOffice.org is free software: you can redistribute it and/or modify - * it under the terms of the GNU Lesser General Public License version 3 - * only, as published by the Free Software Foundation. - * - * OpenOffice.org is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License version 3 for more details - * (a copy is included in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU Lesser General Public License - * version 3 along with OpenOffice.org. If not, see - * <http://www.openoffice.org/license.html> - * for a copy of the LGPLv3 License. - * - ************************************************************************/ - -// MARKER(update_precomp.py): Generated on 2006-09-01 17:49:54.405006 - -#ifdef PRECOMPILED_HEADERS -#endif - diff --git a/regexp/inc/regexp/reclass.hxx b/regexp/inc/regexp/reclass.hxx new file mode 100644 index 000000000000..329ec2032adc --- /dev/null +++ b/regexp/inc/regexp/reclass.hxx @@ -0,0 +1,391 @@ +/* Definitions for data structures and routines for the regular + expression library, version 0.12. + Copyright (C) 1985,89,90,91,92,93,95,96,97,98 Free Software Foundation, Inc. + + This file is part of the GNU C Library. Its master source is NOT part of + the C library, however. The master source lives in /gd/gnu/lib. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Library General Public License as + published by the Free Software Foundation; either version 2 of the + License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Library General Public License for more details. + + You should have received a copy of the GNU Library General Public + License along with the GNU C Library; see the file COPYING.LIB. If not, + write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, + Boston, MA 02111-1307, USA. */ + +/* + Modified for OpenOffice.org to use sal_Unicode and Transliteration service. + */ + +#ifndef INCLUDED_REGEXP_RECLASS_HXX +#define INCLUDED_REGEXP_RECLASS_HXX + +#include <i18nutil/unicode.hxx> +#include <com/sun/star/util/SearchFlags.hpp> +#include <com/sun/star/util/SearchOptions.hpp> +#include <sal/types.h> +#include <com/sun/star/i18n/XExtendedTransliteration.hpp> + +#if defined REGEXP_DLLIMPLEMENTATION +#define REGEXP_DLLPUBLIC SAL_DLLPUBLIC_EXPORT +#else +#define REGEXP_DLLPUBLIC SAL_DLLPUBLIC_IMPORT +#endif + +/* If any error codes are removed, changed, or added, update the + `re_error_msg' table in regex.c. */ +typedef enum +{ +#ifdef _XOPEN_SOURCE + REG_ENOSYS = -1, /* This will never happen for this implementation. */ +#endif + + REG_NOERROR = 0, /* Success. */ + REG_NOMATCH, /* Didn't find a match (for regexec). */ + + /* POSIX regcomp return error codes. (In the order listed in the + standard.) */ + REG_BADPAT, /* Invalid pattern. */ + REG_ECOLLATE, /* Not implemented. */ + REG_ECTYPE, /* Invalid character class name. */ + REG_EESCAPE, /* Trailing backslash. */ + REG_ESUBREG, /* Invalid back reference. */ + REG_EBRACK, /* Unmatched left bracket. */ + REG_EPAREN, /* Parenthesis imbalance. */ + REG_EBRACE, /* Unmatched \{. */ + REG_BADBR, /* Invalid contents of \{\}. */ + REG_ERANGE, /* Invalid range end. */ + REG_ESPACE, /* Ran out of memory. */ + REG_BADRPT, /* No preceding re for repetition op. */ + + /* Error codes we've added. */ + REG_EEND, /* Premature end. */ + REG_ESIZE, /* Compiled pattern bigger than 2^16 bytes. */ + REG_ERPAREN /* Unmatched ) or \); not returned from regcomp. */ +} reg_errcode_t; + + +/* This data structure represents a compiled pattern. Before calling + the pattern compiler, the fields `buffer', `allocated', `fastmap', + can be set. After the pattern has been + compiled, the `re_nsub' field is available. All other fields are + private to the regex routines. */ + +struct REGEXP_DLLPUBLIC re_pattern_buffer +{ +/* [[[begin pattern_buffer]]] */ + /* Space that holds the compiled pattern. It is declared as + `unsigned char *' because its elements are + sometimes used as array indexes. */ + sal_Unicode *buffer; + + /* Number of bytes to which `buffer' points. */ + sal_uInt32 allocated; + + /* Number of bytes actually used in `buffer'. */ + sal_uInt32 used; + + /* Pointer to a fastmap, if any, otherwise zero. re_search uses + the fastmap, if there is one, to skip over impossible + starting points for matches. */ + sal_Unicode *fastmap; + + + /* Number of subexpressions found by the compiler. */ + size_t re_nsub; + + /* Zero if this pattern cannot match the empty string, one else. + Well, in truth it's used only in `re_search2', to see + whether or not we should use the fastmap, so we don't set + this absolutely perfectly; see `re_compile_fastmap' (the + `duplicate' case). */ + unsigned can_be_null : 1; + + /* Set to zero when `regex_compile' compiles a pattern; set to one + by `re_compile_fastmap' if it updates the fastmap. */ + unsigned fastmap_accurate : 1; + + /* If set, a beginning-of-line anchor doesn't match at the + beginning of the string. */ + unsigned not_bol : 1; + + /* Similarly for an end-of-line anchor. */ + unsigned not_eol : 1; + + /* If true, an anchor at a newline matches. */ + unsigned newline_anchor : 1; + +/* [[[end pattern_buffer]]] */ +}; + +/* These are the command codes that appear in compiled regular + expressions. Some opcodes are followed by argument bytes. A + command code can specify any interpretation whatsoever for its + arguments. Zero bytes may appear in the compiled regular expression. */ + +typedef enum +{ + no_op = 0, + + /* Succeed right away--no more backtracking. */ + succeed, + + /* Followed by one byte giving n, then by n literal bytes. */ + exactn, + + /* Matches any (more or less) character. */ + anychar, + + /* Matches any one char belonging to specified set. First + following byte is number of bitmap bytes. Then come bytes + for a bitmap saying which chars are in. Bits in each byte + are ordered low-bit-first. A character is in the set if its + bit is 1. A character too large to have a bit in the map is + automatically not in the set. */ + charset, + + /* Same parameters as charset, but match any character that is + not one of those specified. */ + charset_not, + + /* Start remembering the text that is matched, for storing in a + register. Followed by one byte with the register number, in + the range 0 to one less than the pattern buffer's re_nsub + field. Then followed by one byte with the number of groups + inner to this one. (This last has to be part of the + start_memory only because we need it in the on_failure_jump + of re_match2.) */ + start_memory, + /* Stop remembering the text that is matched and store it in a + memory register. Followed by one byte with the register + number, in the range 0 to one less than `re_nsub' in the + pattern buffer, and one byte with the number of inner groups, + just like `start_memory'. (We need the number of inner + groups here because we don't have any easy way of finding the + corresponding start_memory when we're at a stop_memory.) */ + stop_memory, + + /* Match a duplicate of something remembered. Followed by one + byte containing the register number. */ + duplicate, + + /* Fail unless at beginning of line. */ + begline, + + /* Fail unless at end of line. */ + endline, + + /* Succeeds if at beginning of buffer (if emacs) or at beginning + of string to be matched (if not). */ + begbuf, + + /* Analogously, for end of buffer/string. */ + endbuf, + + /* Followed by two byte relative address to which to jump. */ + jump, + + /* Same as jump, but marks the end of an alternative. */ + jump_past_alt, + + /* Followed by two-byte relative address of place to resume at + in case of failure. */ + on_failure_jump, + + /* Like on_failure_jump, but pushes a placeholder instead of the + current string position when executed. */ + on_failure_keep_string_jump, + + /* Throw away latest failure point and then jump to following + two-byte relative address. */ + pop_failure_jump, + + /* Change to pop_failure_jump if know won't have to backtrack to + match; otherwise change to jump. This is used to jump + back to the beginning of a repeat. If what follows this jump + clearly won't match what the repeat does, such that we can be + sure that there is no use backtracking out of repetitions + already matched, then we change it to a pop_failure_jump. + Followed by two-byte address. */ + maybe_pop_jump, + + /* Jump to following two-byte address, and push a dummy failure + point. This failure point will be thrown away if an attempt + is made to use it for a failure. A `+' construct makes this + before the first repeat. Also used as an intermediary kind + of jump when compiling an alternative. */ + dummy_failure_jump, + + /* Push a dummy failure point and continue. Used at the end of + alternatives. */ + push_dummy_failure, + + /* Followed by two-byte relative address and two-byte number n. + After matching N times, jump to the address upon failure. */ + succeed_n, + + /* Followed by two-byte relative address, and two-byte number n. + Jump to the address N times, then fail. */ + jump_n, + + /* Set the following two-byte relative address to the + subsequent two-byte number. The address *includes* the two + bytes of number. */ + set_number_at, + + wordbeg, /* Succeeds if at word beginning. */ + wordend /* Succeeds if at word end. */ + +} re_opcode_t; + +typedef struct re_pattern_buffer regex_t; + +/* Type for byte offsets within the string. POSIX mandates this. */ +typedef sal_Int32 regoff_t; + +/* This is the structure we store register match data in. See + regex.texinfo for a full description of what registers match. */ +struct REGEXP_DLLPUBLIC re_registers +{ + sal_uInt32 num_regs; + sal_Int32 *start; + sal_Int32 *end; + sal_Int32 num_of_match; +}; + +typedef struct { + sal_Int32 begalt_offset; + sal_Int32 fixup_alt_jump; + sal_Int32 inner_group_offset; + sal_Int32 laststart_offset; + sal_uInt32 regnum; +} compile_stack_elt_t; + +typedef struct { + compile_stack_elt_t *stack; + sal_uInt32 size; + sal_uInt32 avail; +} compile_stack_type; + +union REGEXP_DLLPUBLIC fail_stack_elt +{ + sal_Unicode *pointer; + sal_Int32 integer; +}; + +typedef union fail_stack_elt fail_stack_elt_t; + +typedef struct +{ + fail_stack_elt_t *stack; + sal_uInt32 size; + sal_uInt32 avail; /* Offset of next open position. */ +} fail_stack_type; + +typedef union +{ + fail_stack_elt_t word; + struct + { + /* This field is one if this group can match the empty string, + zero if not. If not yet determined, `MATCH_NULL_UNSET_VALUE'. */ +#define MATCH_NULL_UNSET_VALUE 3 + unsigned match_null_string_p : 2; + unsigned is_active : 1; + unsigned matched_something : 1; + unsigned ever_matched_something : 1; + } bits; +} register_info_type; + + +class REGEXP_DLLPUBLIC Regexpr +{ + ::com::sun::star::uno::Reference< + ::com::sun::star::i18n::XExtendedTransliteration > translit; + + const sal_Unicode *line; // line to search in + sal_Int32 linelen; // length of search string + + sal_Unicode *pattern; // RE pattern to match + sal_Int32 patsize; // Length of pattern + + struct re_pattern_buffer *bufp; + + sal_Bool isIgnoreCase; + + /* Either a translate table to apply to all characters before + comparing them, or zero for no translation. The translation + is applied to a pattern when it is compiled and to a string + when it is matched. */ + int translate; + + sal_uInt32 failure_id; + sal_uInt32 nfailure_points_pushed; + sal_uInt32 nfailure_points_popped; + /* Counts the total number of registers pushed. */ + sal_uInt32 num_regs_pushed; + + sal_uInt32 re_max_failures; + + /* Registers are set to a sentinel when they haven't yet matched. */ + sal_Unicode reg_unset_dummy; + + // private instance functions + inline void store_number( sal_Unicode * destination, sal_Int32 number ); + inline void store_number_and_incr( sal_Unicode *& destination, sal_Int32 number ); + inline void extract_number(sal_Int32 & dest, sal_Unicode *source); + inline void extract_number_and_incr(sal_Int32 & destination, sal_Unicode *& source); + + sal_Bool group_match_null_string_p(sal_Unicode **p, sal_Unicode *end, + register_info_type *reg_info); + sal_Bool alt_match_null_string_p(sal_Unicode *p, sal_Unicode *end, + register_info_type *reg_info); + + sal_Bool common_op_match_null_string_p(sal_Unicode **p, sal_Unicode *end, + register_info_type *reg_info); + sal_Int32 bcmp_translate(const sal_Unicode *s1, + const sal_Unicode *s2, sal_Int32 len); + + sal_Int32 regcomp(void); + sal_Int32 regex_compile(void); + inline void store_op1(re_opcode_t op, sal_Unicode *loc, sal_Int32 arg); + inline void store_op2(re_opcode_t op, sal_Unicode *loc, sal_Int32 arg1, sal_Int32 arg2); + void insert_op1(re_opcode_t op, sal_Unicode *loc, sal_Int32 arg, + sal_Unicode *end); + void insert_op2(re_opcode_t op, sal_Unicode *loc, sal_Int32 arg1, + sal_Int32 arg2, sal_Unicode *end); + sal_Bool at_begline_loc_p(const sal_Unicode *local_pattern, + const sal_Unicode *p); + sal_Bool at_endline_loc_p(const sal_Unicode *p, const sal_Unicode *pend); + reg_errcode_t compile_range(sal_Unicode range_begin, sal_Unicode range_end, sal_Unicode *b); + sal_Bool group_in_compile_stack(compile_stack_type compile_stack, + sal_uInt32 regnum); + sal_Int32 re_match2(struct re_registers *regs, sal_Int32 pos, sal_Int32 range); + + sal_Bool iswordbegin(const sal_Unicode *d, sal_Unicode *string, sal_Int32 ssize); + sal_Bool iswordend(const sal_Unicode *d, sal_Unicode *string, sal_Int32 ssize); + void set_list_bit(sal_Unicode c, sal_Unicode *b); + +public: + // constructors + Regexpr( const ::com::sun::star::util::SearchOptions & rOptions, + ::com::sun::star::uno::Reference< + ::com::sun::star::i18n::XExtendedTransliteration > XTrans ); + + // destructor + ~Regexpr(); + + void set_line( const sal_Unicode *line, sal_Int32 len ); + + // function returning pointers to occurrences in regs + sal_Int32 re_search(struct re_registers *regs, sal_Int32 pOffset); // find pattern in line +}; + +#endif |