diff options
Diffstat (limited to 'xmlreader')
-rw-r--r-- | xmlreader/Library_xmlreader.mk | 53 | ||||
-rw-r--r-- | xmlreader/Makefile | 39 | ||||
-rw-r--r-- | xmlreader/Module_xmlreader.mk | 36 | ||||
-rw-r--r-- | xmlreader/Package_inc.mk | 35 | ||||
-rw-r--r-- | xmlreader/inc/xmlreader/README | 34 | ||||
-rw-r--r-- | xmlreader/inc/xmlreader/detail/xmlreaderdllapi.hxx | 41 | ||||
-rw-r--r-- | xmlreader/inc/xmlreader/pad.hxx | 59 | ||||
-rw-r--r-- | xmlreader/inc/xmlreader/span.hxx | 69 | ||||
-rw-r--r-- | xmlreader/inc/xmlreader/xmlreader.hxx | 199 | ||||
-rw-r--r-- | xmlreader/prj/build.lst | 2 | ||||
-rw-r--r-- | xmlreader/prj/d.lst | 1 | ||||
-rw-r--r-- | xmlreader/prj/makefile.mk | 40 | ||||
-rw-r--r-- | xmlreader/source/pad.cxx | 83 | ||||
-rw-r--r-- | xmlreader/source/span.cxx | 64 | ||||
-rw-r--r-- | xmlreader/source/xmlreader.cxx | 1053 |
15 files changed, 0 insertions, 1808 deletions
diff --git a/xmlreader/Library_xmlreader.mk b/xmlreader/Library_xmlreader.mk deleted file mode 100644 index 8e2e04072..000000000 --- a/xmlreader/Library_xmlreader.mk +++ /dev/null @@ -1,53 +0,0 @@ -# -*- Mode: makefile; tab-width: 4; indent-tabs-mode: t -*- -#************************************************************************* -# -# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. -# -# Copyright 2000, 2011 Oracle and/or its affiliates. -# -# OpenOffice.org - a multi-platform office productivity suite -# -# This file is part of OpenOffice.org. -# -# OpenOffice.org is free software: you can redistribute it and/or modify -# it under the terms of the GNU Lesser General Public License version 3 -# only, as published by the Free Software Foundation. -# -# OpenOffice.org is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU Lesser General Public License version 3 for more details -# (a copy is included in the LICENSE file that accompanied this code). -# -# You should have received a copy of the GNU Lesser General Public License -# version 3 along with OpenOffice.org. If not, see -# <http://www.openoffice.org/license.html> -# for a copy of the LGPLv3 License. -# -#************************************************************************* - -$(eval $(call gb_Library_Library,xmlreader)) - -$(eval $(call gb_Library_add_package_headers,xmlreader,xmlreader_inc)) - -$(eval $(call gb_Library_add_defs,xmlreader,\ - -DOOO_DLLIMPLEMENTATION_XMLREADER \ -)) - -$(eval $(call gb_Library_add_linked_libs,xmlreader,\ - sal \ - $(gb_STDLIBS) \ -)) - -$(eval $(call gb_Library_add_exception_objects,xmlreader,\ - xmlreader/source/pad \ - xmlreader/source/span \ - xmlreader/source/xmlreader \ -)) - -$(eval $(call gb_Library_add_api,xmlreader,\ - offapi \ - udkapi \ -)) - -# vim: set noet sw=4 ts=4: diff --git a/xmlreader/Makefile b/xmlreader/Makefile deleted file mode 100644 index b912f2f05..000000000 --- a/xmlreader/Makefile +++ /dev/null @@ -1,39 +0,0 @@ -# -*- Mode: makefile-gmake; tab-width: 4; indent-tabs-mode: t -*- -#************************************************************************* -# -# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. -# -# Copyright 2000, 2011 Oracle and/or its affiliates. -# -# OpenOffice.org - a multi-platform office productivity suite -# -# This file is part of OpenOffice.org. -# -# OpenOffice.org is free software: you can redistribute it and/or modify -# it under the terms of the GNU Lesser General Public License version 3 -# only, as published by the Free Software Foundation. -# -# OpenOffice.org is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU Lesser General Public License version 3 for more details -# (a copy is included in the LICENSE file that accompanied this code). -# -# You should have received a copy of the GNU Lesser General Public License -# version 3 along with OpenOffice.org. If not, see -# <http://www.openoffice.org/license.html> -# for a copy of the LGPLv3 License. -# -#************************************************************************* - -ifeq ($(strip $(SOLARENV)),) -$(error No environment set!) -endif - -gb_PARTIALBUILD := T -GBUILDDIR := $(SOLARENV)/gbuild -include $(GBUILDDIR)/gbuild.mk - -$(eval $(call gb_Module_make_global_targets,$(shell ls $(dir $(realpath $(firstword $(MAKEFILE_LIST))))/Module*.mk))) - -# vim: set noet sw=4 ts=4: diff --git a/xmlreader/Module_xmlreader.mk b/xmlreader/Module_xmlreader.mk deleted file mode 100644 index f9d655d47..000000000 --- a/xmlreader/Module_xmlreader.mk +++ /dev/null @@ -1,36 +0,0 @@ -# -*- Mode: makefile; tab-width: 4; indent-tabs-mode: t -*- -#************************************************************************* -# -# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. -# -# Copyright 2000, 2011 Oracle and/or its affiliates. -# -# OpenOffice.org - a multi-platform office productivity suite -# -# This file is part of OpenOffice.org. -# -# OpenOffice.org is free software: you can redistribute it and/or modify -# it under the terms of the GNU Lesser General Public License version 3 -# only, as published by the Free Software Foundation. -# -# OpenOffice.org is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU Lesser General Public License version 3 for more details -# (a copy is included in the LICENSE file that accompanied this code). -# -# You should have received a copy of the GNU Lesser General Public License -# version 3 along with OpenOffice.org. If not, see -# <http://www.openoffice.org/license.html> -# for a copy of the LGPLv3 License. -# -#************************************************************************* - -$(eval $(call gb_Module_Module,xmlreader)) - -$(eval $(call gb_Module_add_targets,xmlreader,\ - Library_xmlreader \ - Package_inc \ -)) - -# vim: set noet sw=4 ts=4: diff --git a/xmlreader/Package_inc.mk b/xmlreader/Package_inc.mk deleted file mode 100644 index 3c2bc604a..000000000 --- a/xmlreader/Package_inc.mk +++ /dev/null @@ -1,35 +0,0 @@ -# -*- Mode: makefile; tab-width: 4; indent-tabs-mode: t -*- -#************************************************************************* -# -# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. -# -# Copyright 2000, 2011 Oracle and/or its affiliates. -# -# OpenOffice.org - a multi-platform office productivity suite -# -# This file is part of OpenOffice.org. -# -# OpenOffice.org is free software: you can redistribute it and/or modify -# it under the terms of the GNU Lesser General Public License version 3 -# only, as published by the Free Software Foundation. -# -# OpenOffice.org is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU Lesser General Public License version 3 for more details -# (a copy is included in the LICENSE file that accompanied this code). -# -# You should have received a copy of the GNU Lesser General Public License -# version 3 along with OpenOffice.org. If not, see -# <http://www.openoffice.org/license.html> -# for a copy of the LGPLv3 License. -# -#************************************************************************* - -$(eval $(call gb_Package_Package,xmlreader_inc,$(SRCDIR)/xmlreader/inc)) -$(eval $(call gb_Package_add_file,xmlreader_inc,inc/xmlreader/pad.hxx,xmlreader/pad.hxx)) -$(eval $(call gb_Package_add_file,xmlreader_inc,inc/xmlreader/span.hxx,xmlreader/span.hxx)) -$(eval $(call gb_Package_add_file,xmlreader_inc,inc/xmlreader/xmlreader.hxx,xmlreader/xmlreader.hxx)) -$(eval $(call gb_Package_add_file,xmlreader_inc,inc/xmlreader/detail/xmlreaderdllapi.hxx,xmlreader/detail/xmlreaderdllapi.hxx)) - -# vim: set noet sw=4 ts=4: diff --git a/xmlreader/inc/xmlreader/README b/xmlreader/inc/xmlreader/README deleted file mode 100644 index cf238a695..000000000 --- a/xmlreader/inc/xmlreader/README +++ /dev/null @@ -1,34 +0,0 @@ -#************************************************************************* -# -# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. -# -# Copyright 2000, 2010 Oracle and/or its affiliates. -# -# OpenOffice.org - a multi-platform office productivity suite -# -# This file is part of OpenOffice.org. -# -# OpenOffice.org is free software: you can redistribute it and/or modify -# it under the terms of the GNU Lesser General Public License version 3 -# only, as published by the Free Software Foundation. -# -# OpenOffice.org is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU Lesser General Public License version 3 for more details -# (a copy is included in the LICENSE file that accompanied this code). -# -# You should have received a copy of the GNU Lesser General Public License -# version 3 along with OpenOffice.org. If not, see -# <http://www.openoffice.org/license.html> -# for a copy of the LGPLv3 License. -# -#***********************************************************************/ - -<http://qa.openoffice.org/issues/show_bug.cgi?id=115203>: "Issue 113189 -extracted xmlreader from configmgr, to make the former available within URE. -The xmlreader library is for now considered a private part of URE (cf. -ure/source/README), for simplicity uses OOO_DLLPUBLIC_XMLREADER-based symbol -visibility (and thus no symbol versioning), but is of course used from outside -URE in configmgr. This works as long as its ABI does not change. If it ever -changes, symbol versioning will have to be added (in some form or other)." diff --git a/xmlreader/inc/xmlreader/detail/xmlreaderdllapi.hxx b/xmlreader/inc/xmlreader/detail/xmlreaderdllapi.hxx deleted file mode 100644 index 238661a24..000000000 --- a/xmlreader/inc/xmlreader/detail/xmlreaderdllapi.hxx +++ /dev/null @@ -1,41 +0,0 @@ -/************************************************************************* -* -* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. -* -* Copyright 2000, 2010 Oracle and/or its affiliates. -* -* OpenOffice.org - a multi-platform office productivity suite -* -* This file is part of OpenOffice.org. -* -* OpenOffice.org is free software: you can redistribute it and/or modify -* it under the terms of the GNU Lesser General Public License version 3 -* only, as published by the Free Software Foundation. -* -* OpenOffice.org is distributed in the hope that it will be useful, -* but WITHOUT ANY WARRANTY; without even the implied warranty of -* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -* GNU Lesser General Public License version 3 for more details -* (a copy is included in the LICENSE file that accompanied this code). -* -* You should have received a copy of the GNU Lesser General Public License -* version 3 along with OpenOffice.org. If not, see -* <http://www.openoffice.org/license.html> -* for a copy of the LGPLv3 License. -* -************************************************************************/ - -#ifndef INCLUDED_XMLREADER_DETAIL_XMLREADERDLLAPI_HXX -#define INCLUDED_XMLREADER_DETAIL_XMLREADERDLLAPI_HXX - -#include "sal/config.h" - -#include "sal/types.h" - -#if defined OOO_DLLIMPLEMENTATION_XMLREADER -#define OOO_DLLPUBLIC_XMLREADER SAL_DLLPUBLIC_EXPORT -#else -#define OOO_DLLPUBLIC_XMLREADER SAL_DLLPUBLIC_IMPORT -#endif - -#endif diff --git a/xmlreader/inc/xmlreader/pad.hxx b/xmlreader/inc/xmlreader/pad.hxx deleted file mode 100644 index d01574147..000000000 --- a/xmlreader/inc/xmlreader/pad.hxx +++ /dev/null @@ -1,59 +0,0 @@ -/************************************************************************* -* -* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. -* -* Copyright 2000, 2010 Oracle and/or its affiliates. -* -* OpenOffice.org - a multi-platform office productivity suite -* -* This file is part of OpenOffice.org. -* -* OpenOffice.org is free software: you can redistribute it and/or modify -* it under the terms of the GNU Lesser General Public License version 3 -* only, as published by the Free Software Foundation. -* -* OpenOffice.org is distributed in the hope that it will be useful, -* but WITHOUT ANY WARRANTY; without even the implied warranty of -* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -* GNU Lesser General Public License version 3 for more details -* (a copy is included in the LICENSE file that accompanied this code). -* -* You should have received a copy of the GNU Lesser General Public License -* version 3 along with OpenOffice.org. If not, see -* <http://www.openoffice.org/license.html> -* for a copy of the LGPLv3 License. -* -************************************************************************/ - -#ifndef INCLUDED_XMLREADER_PAD_HXX -#define INCLUDED_XMLREADER_PAD_HXX - -#include "sal/config.h" - -#include "rtl/strbuf.hxx" -#include "sal/types.h" -#include "xmlreader/detail/xmlreaderdllapi.hxx" -#include "xmlreader/span.hxx" - -namespace xmlreader { - -class OOO_DLLPUBLIC_XMLREADER Pad { -public: - void add(char const * begin, sal_Int32 length); - - void addEphemeral(char const * begin, sal_Int32 length); - - void clear(); - - Span get() const; - -private: - SAL_DLLPRIVATE void flushSpan(); - - Span span_; - rtl::OStringBuffer buffer_; -}; - -} - -#endif diff --git a/xmlreader/inc/xmlreader/span.hxx b/xmlreader/inc/xmlreader/span.hxx deleted file mode 100644 index 4ec8b574f..000000000 --- a/xmlreader/inc/xmlreader/span.hxx +++ /dev/null @@ -1,69 +0,0 @@ -/************************************************************************* -* -* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. -* -* Copyright 2000, 2010 Oracle and/or its affiliates. -* -* OpenOffice.org - a multi-platform office productivity suite -* -* This file is part of OpenOffice.org. -* -* OpenOffice.org is free software: you can redistribute it and/or modify -* it under the terms of the GNU Lesser General Public License version 3 -* only, as published by the Free Software Foundation. -* -* OpenOffice.org is distributed in the hope that it will be useful, -* but WITHOUT ANY WARRANTY; without even the implied warranty of -* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -* GNU Lesser General Public License version 3 for more details -* (a copy is included in the LICENSE file that accompanied this code). -* -* You should have received a copy of the GNU Lesser General Public License -* version 3 along with OpenOffice.org. If not, see -* <http://www.openoffice.org/license.html> -* for a copy of the LGPLv3 License. -* -************************************************************************/ - -#ifndef INCLUDED_XMLREADER_SPAN_HXX -#define INCLUDED_XMLREADER_SPAN_HXX - -#include "sal/config.h" - -#include "rtl/string.h" -#include "sal/types.h" -#include "xmlreader/detail/xmlreaderdllapi.hxx" - -namespace rtl { class OUString; } - -namespace xmlreader { - -struct OOO_DLLPUBLIC_XMLREADER Span { - char const * begin; - sal_Int32 length; - - inline Span(): begin(0), length(0) {} - // init length to avoid compiler warnings - - inline Span(char const * theBegin, sal_Int32 theLength): - begin(theBegin), length(theLength) {} - - inline void clear() throw() { begin = 0; } - - inline bool is() const { return begin != 0; } - - inline bool equals(Span const & text) const { - return rtl_str_compare_WithLength( - begin, length, text.begin, text.length) == 0; - } - - inline bool equals(char const * textBegin, sal_Int32 textLength) const { - return equals(Span(textBegin, textLength)); - } - - rtl::OUString convertFromUtf8() const; -}; - -} - -#endif diff --git a/xmlreader/inc/xmlreader/xmlreader.hxx b/xmlreader/inc/xmlreader/xmlreader.hxx deleted file mode 100644 index 242af58c7..000000000 --- a/xmlreader/inc/xmlreader/xmlreader.hxx +++ /dev/null @@ -1,199 +0,0 @@ -/************************************************************************* -* -* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. -* -* Copyright 2000, 2010 Oracle and/or its affiliates. -* -* OpenOffice.org - a multi-platform office productivity suite -* -* This file is part of OpenOffice.org. -* -* OpenOffice.org is free software: you can redistribute it and/or modify -* it under the terms of the GNU Lesser General Public License version 3 -* only, as published by the Free Software Foundation. -* -* OpenOffice.org is distributed in the hope that it will be useful, -* but WITHOUT ANY WARRANTY; without even the implied warranty of -* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -* GNU Lesser General Public License version 3 for more details -* (a copy is included in the LICENSE file that accompanied this code). -* -* You should have received a copy of the GNU Lesser General Public License -* version 3 along with OpenOffice.org. If not, see -* <http://www.openoffice.org/license.html> -* for a copy of the LGPLv3 License. -* -************************************************************************/ - -#ifndef INCLUDED_XMLREADER_XMLREADER_HXX -#define INCLUDED_XMLREADER_XMLREADER_HXX - -#include "sal/config.h" - -#include <stack> -#include <vector> - -#include "boost/noncopyable.hpp" -#include "com/sun/star/container/NoSuchElementException.hpp" -#include "com/sun/star/uno/RuntimeException.hpp" -#include "osl/file.h" -#include "rtl/ustring.hxx" -#include "sal/types.h" -#include "xmlreader/detail/xmlreaderdllapi.hxx" -#include "xmlreader/pad.hxx" -#include "xmlreader/span.hxx" - -namespace xmlreader { - -class OOO_DLLPUBLIC_XMLREADER XmlReader: private boost::noncopyable { -public: - explicit XmlReader(rtl::OUString const & fileUrl) - SAL_THROW(( - com::sun::star::container::NoSuchElementException, - com::sun::star::uno::RuntimeException)); - - ~XmlReader(); - - enum { NAMESPACE_NONE = -2, NAMESPACE_UNKNOWN = -1, NAMESPACE_XML = 0 }; - - enum Text { TEXT_NONE, TEXT_RAW, TEXT_NORMALIZED }; - - enum Result { RESULT_BEGIN, RESULT_END, RESULT_TEXT, RESULT_DONE }; - - int registerNamespaceIri(Span const & iri); - - // RESULT_BEGIN: data = localName, ns = ns - // RESULT_END: data, ns unused - // RESULT_TEXT: data = text, ns unused - Result nextItem(Text reportText, Span * data, int * nsId); - - bool nextAttribute(int * nsId, Span * localName); - - // the span returned by getAttributeValue is only valid until the next call - // to nextItem or getAttributeValue - Span getAttributeValue(bool fullyNormalize); - - int getNamespaceId(Span const & prefix) const; - - rtl::OUString getUrl() const; - -private: - typedef std::vector< Span > NamespaceIris; - - // If NamespaceData (and similarly ElementData and AttributeData) is made - // SAL_DLLPRIVATE, at least gcc 4.2.3 erroneously warns about - // "'xmlreader::XmlReader' declared with greater visibility than the type of - // its field 'xmlreader::XmlReader::namespaces_'" (and similarly for - // elements_ and attributes_): - - struct NamespaceData { - Span prefix; - int nsId; - - NamespaceData() {} - - NamespaceData(Span const & thePrefix, int theNsId): - prefix(thePrefix), nsId(theNsId) {} - }; - - typedef std::vector< NamespaceData > NamespaceList; - - struct ElementData { - Span name; - NamespaceList::size_type inheritedNamespaces; - int defaultNamespaceId; - - ElementData( - Span const & theName, - NamespaceList::size_type theInheritedNamespaces, - int theDefaultNamespaceId): - name(theName), inheritedNamespaces(theInheritedNamespaces), - defaultNamespaceId(theDefaultNamespaceId) - {} - }; - - typedef std::stack< ElementData > ElementStack; - - struct AttributeData { - char const * nameBegin; - char const * nameEnd; - char const * nameColon; - char const * valueBegin; - char const * valueEnd; - - AttributeData( - char const * theNameBegin, char const * theNameEnd, - char const * theNameColon, char const * theValueBegin, - char const * theValueEnd): - nameBegin(theNameBegin), nameEnd(theNameEnd), - nameColon(theNameColon), valueBegin(theValueBegin), - valueEnd(theValueEnd) - {} - }; - - typedef std::vector< AttributeData > Attributes; - - enum State { - STATE_CONTENT, STATE_START_TAG, STATE_END_TAG, STATE_EMPTY_ELEMENT_TAG, - STATE_DONE }; - - SAL_DLLPRIVATE inline char read() { return pos_ == end_ ? '\0' : *pos_++; } - - SAL_DLLPRIVATE inline char peek() { return pos_ == end_ ? '\0' : *pos_; } - - SAL_DLLPRIVATE void normalizeLineEnds(Span const & text); - - SAL_DLLPRIVATE void skipSpace(); - - SAL_DLLPRIVATE bool skipComment(); - - SAL_DLLPRIVATE void skipProcessingInstruction(); - - SAL_DLLPRIVATE void skipDocumentTypeDeclaration(); - - SAL_DLLPRIVATE Span scanCdataSection(); - - SAL_DLLPRIVATE bool scanName(char const ** nameColon); - - SAL_DLLPRIVATE int scanNamespaceIri( - char const * begin, char const * end); - - SAL_DLLPRIVATE char const * handleReference( - char const * position, char const * end); - - SAL_DLLPRIVATE Span handleAttributeValue( - char const * begin, char const * end, bool fullyNormalize); - - SAL_DLLPRIVATE Result handleStartTag(int * nsId, Span * localName); - - SAL_DLLPRIVATE Result handleEndTag(); - - SAL_DLLPRIVATE void handleElementEnd(); - - SAL_DLLPRIVATE Result handleSkippedText(Span * data, int * nsId); - - SAL_DLLPRIVATE Result handleRawText(Span * text); - - SAL_DLLPRIVATE Result handleNormalizedText(Span * text); - - SAL_DLLPRIVATE int toNamespaceId(NamespaceIris::size_type pos); - - rtl::OUString fileUrl_; - oslFileHandle fileHandle_; - sal_uInt64 fileSize_; - void * fileAddress_; - NamespaceIris namespaceIris_; - NamespaceList namespaces_; - ElementStack elements_; - char const * pos_; - char const * end_; - State state_; - Attributes attributes_; - Attributes::iterator currentAttribute_; - bool firstAttribute_; - Pad pad_; -}; - -} - -#endif diff --git a/xmlreader/prj/build.lst b/xmlreader/prj/build.lst deleted file mode 100644 index 59b71b87d..000000000 --- a/xmlreader/prj/build.lst +++ /dev/null @@ -1,2 +0,0 @@ -xr xmlreader : BOOST:boost cppu offapi sal stlport NULL -xr xmlreader\prj nmake - all xr_prj NULL diff --git a/xmlreader/prj/d.lst b/xmlreader/prj/d.lst deleted file mode 100644 index a4c06081d..000000000 --- a/xmlreader/prj/d.lst +++ /dev/null @@ -1 +0,0 @@ -..\%__SRC%\lib\libxmlreader.dll.a %_DEST%\lib\libxmlreader.dll.a diff --git a/xmlreader/prj/makefile.mk b/xmlreader/prj/makefile.mk deleted file mode 100644 index 88cd9dfe0..000000000 --- a/xmlreader/prj/makefile.mk +++ /dev/null @@ -1,40 +0,0 @@ -#************************************************************************* -# -# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. -# -# Copyright 2000, 2010 Oracle and/or its affiliates. -# -# OpenOffice.org - a multi-platform office productivity suite -# -# This file is part of OpenOffice.org. -# -# OpenOffice.org is free software: you can redistribute it and/or modify -# it under the terms of the GNU Lesser General Public License version 3 -# only, as published by the Free Software Foundation. -# -# OpenOffice.org is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU Lesser General Public License version 3 for more details -# (a copy is included in the LICENSE file that accompanied this code). -# -# You should have received a copy of the GNU Lesser General Public License -# version 3 along with OpenOffice.org. If not, see -# <http://www.openoffice.org/license.html> -# for a copy of the LGPLv3 License. -# -#************************************************************************* - -PRJ=.. -TARGET=prj - -.INCLUDE : settings.mk - -.IF "$(VERBOSE)"!="" -VERBOSEFLAG := -.ELSE -VERBOSEFLAG := -s -.ENDIF - -all: - cd $(PRJ) && $(GNUMAKE) $(VERBOSEFLAG) -r -j$(MAXPROCESS) $(gb_MAKETARGET) && $(GNUMAKE) $(VERBOSEFLAG) -r deliverlog diff --git a/xmlreader/source/pad.cxx b/xmlreader/source/pad.cxx deleted file mode 100644 index 8932ee382..000000000 --- a/xmlreader/source/pad.cxx +++ /dev/null @@ -1,83 +0,0 @@ -/************************************************************************* -* -* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. -* -* Copyright 2000, 2010 Oracle and/or its affiliates. -* -* OpenOffice.org - a multi-platform office productivity suite -* -* This file is part of OpenOffice.org. -* -* OpenOffice.org is free software: you can redistribute it and/or modify -* it under the terms of the GNU Lesser General Public License version 3 -* only, as published by the Free Software Foundation. -* -* OpenOffice.org is distributed in the hope that it will be useful, -* but WITHOUT ANY WARRANTY; without even the implied warranty of -* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -* GNU Lesser General Public License version 3 for more details -* (a copy is included in the LICENSE file that accompanied this code). -* -* You should have received a copy of the GNU Lesser General Public License -* version 3 along with OpenOffice.org. If not, see -* <http://www.openoffice.org/license.html> -* for a copy of the LGPLv3 License. -* -************************************************************************/ - -#include "sal/config.h" -#include "osl/diagnose.h" -#include "rtl/string.h" -#include "sal/types.h" -#include "xmlreader/pad.hxx" -#include "xmlreader/span.hxx" - -namespace xmlreader { - -void Pad::add(char const * begin, sal_Int32 length) { - OSL_ASSERT( - begin != 0 && length >= 0 && !(span_.is() && buffer_.getLength() != 0)); - if (length != 0) { - flushSpan(); - if (buffer_.getLength() == 0) { - span_ = Span(begin, length); - } else { - buffer_.append(begin, length); - } - } -} - -void Pad::addEphemeral(char const * begin, sal_Int32 length) { - OSL_ASSERT( - begin != 0 && length >= 0 && !(span_.is() && buffer_.getLength() != 0)); - if (length != 0) { - flushSpan(); - buffer_.append(begin, length); - } -} - -void Pad::clear() { - OSL_ASSERT(!(span_.is() && buffer_.getLength() != 0)); - span_.clear(); - buffer_.setLength(0); -} - -Span Pad::get() const { - OSL_ASSERT(!(span_.is() && buffer_.getLength() != 0)); - if (span_.is()) { - return span_; - } else if (buffer_.getLength() == 0) { - return Span(RTL_CONSTASCII_STRINGPARAM("")); - } else { - return Span(buffer_.getStr(), buffer_.getLength()); - } -} - -void Pad::flushSpan() { - if (span_.is()) { - buffer_.append(span_.begin, span_.length); - span_.clear(); - } -} - -} diff --git a/xmlreader/source/span.cxx b/xmlreader/source/span.cxx deleted file mode 100644 index 5adfa0597..000000000 --- a/xmlreader/source/span.cxx +++ /dev/null @@ -1,64 +0,0 @@ -/************************************************************************* -* -* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. -* -* Copyright 2000, 2010 Oracle and/or its affiliates. -* -* OpenOffice.org - a multi-platform office productivity suite -* -* This file is part of OpenOffice.org. -* -* OpenOffice.org is free software: you can redistribute it and/or modify -* it under the terms of the GNU Lesser General Public License version 3 -* only, as published by the Free Software Foundation. -* -* OpenOffice.org is distributed in the hope that it will be useful, -* but WITHOUT ANY WARRANTY; without even the implied warranty of -* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -* GNU Lesser General Public License version 3 for more details -* (a copy is included in the LICENSE file that accompanied this code). -* -* You should have received a copy of the GNU Lesser General Public License -* version 3 along with OpenOffice.org. If not, see -* <http://www.openoffice.org/license.html> -* for a copy of the LGPLv3 License. -* -************************************************************************/ - -#include "sal/config.h" -#include "com/sun/star/uno/RuntimeException.hpp" -#include "com/sun/star/uno/XInterface.hpp" -#include "osl/diagnose.h" -#include "rtl/textcvt.h" -#include "rtl/textenc.h" -#include "rtl/ustring.h" -#include "rtl/ustring.hxx" -#include "sal/types.h" -#include "xmlreader/span.hxx" - -namespace xmlreader { - -namespace { - -namespace css = com::sun::star; - -} - -rtl::OUString Span::convertFromUtf8() const { - OSL_ASSERT(is()); - rtl_uString * s = 0; - if (!rtl_convertStringToUString( - &s, begin, length, RTL_TEXTENCODING_UTF8, - (RTL_TEXTTOUNICODE_FLAGS_UNDEFINED_ERROR | - RTL_TEXTTOUNICODE_FLAGS_MBUNDEFINED_ERROR | - RTL_TEXTTOUNICODE_FLAGS_INVALID_ERROR))) - { - throw css::uno::RuntimeException( - rtl::OUString( - RTL_CONSTASCII_USTRINGPARAM("cannot convert from UTF-8")), - css::uno::Reference< css::uno::XInterface >()); - } - return rtl::OUString(s, SAL_NO_ACQUIRE); -} - -} diff --git a/xmlreader/source/xmlreader.cxx b/xmlreader/source/xmlreader.cxx deleted file mode 100644 index 234eb71cb..000000000 --- a/xmlreader/source/xmlreader.cxx +++ /dev/null @@ -1,1053 +0,0 @@ -/************************************************************************* -* -* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. -* -* Copyright 2000, 2010 Oracle and/or its affiliates. -* -* OpenOffice.org - a multi-platform office productivity suite -* -* This file is part of OpenOffice.org. -* -* OpenOffice.org is free software: you can redistribute it and/or modify -* it under the terms of the GNU Lesser General Public License version 3 -* only, as published by the Free Software Foundation. -* -* OpenOffice.org is distributed in the hope that it will be useful, -* but WITHOUT ANY WARRANTY; without even the implied warranty of -* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -* GNU Lesser General Public License version 3 for more details -* (a copy is included in the LICENSE file that accompanied this code). -* -* You should have received a copy of the GNU Lesser General Public License -* version 3 along with OpenOffice.org. If not, see -* <http://www.openoffice.org/license.html> -* for a copy of the LGPLv3 License. -* -************************************************************************/ - -#include "sal/config.h" - -#include <climits> -#include <cstddef> - -#include "com/sun/star/container/NoSuchElementException.hpp" -#include "com/sun/star/uno/Reference.hxx" -#include "com/sun/star/uno/RuntimeException.hpp" -#include "com/sun/star/uno/XInterface.hpp" -#include "osl/diagnose.h" -#include "osl/file.h" -#include "rtl/string.h" -#include "rtl/ustring.h" -#include "rtl/ustring.hxx" -#include "sal/types.h" -#include "xmlreader/pad.hxx" -#include "xmlreader/span.hxx" -#include "xmlreader/xmlreader.hxx" - -namespace xmlreader { - -namespace { - -namespace css = com::sun::star; - -bool isSpace(char c) { - switch (c) { - case '\x09': - case '\x0A': - case '\x0D': - case ' ': - return true; - default: - return false; - } -} - -} - -XmlReader::XmlReader(rtl::OUString const & fileUrl) - SAL_THROW(( - css::container::NoSuchElementException, css::uno::RuntimeException)): - fileUrl_(fileUrl) -{ - switch (osl_openFile(fileUrl_.pData, &fileHandle_, osl_File_OpenFlag_Read)) - { - case osl_File_E_None: - break; - case osl_File_E_NOENT: - throw css::container::NoSuchElementException( - fileUrl_, css::uno::Reference< css::uno::XInterface >()); - default: - throw css::uno::RuntimeException( - (rtl::OUString(RTL_CONSTASCII_USTRINGPARAM("cannot open ")) + - fileUrl_), - css::uno::Reference< css::uno::XInterface >()); - } - oslFileError e = osl_getFileSize(fileHandle_, &fileSize_); - if (e == osl_File_E_None) { - e = osl_mapFile( - fileHandle_, &fileAddress_, fileSize_, 0, - osl_File_MapFlag_WillNeed); - } - if (e != osl_File_E_None) { - e = osl_closeFile(fileHandle_); - if (e != osl_File_E_None) { - OSL_TRACE("osl_closeFile failed with %ld", static_cast< long >(e)); - } - throw css::uno::RuntimeException( - (rtl::OUString(RTL_CONSTASCII_USTRINGPARAM("cannot mmap ")) + - fileUrl_), - css::uno::Reference< css::uno::XInterface >()); - } - namespaceIris_.push_back( - Span( - RTL_CONSTASCII_STRINGPARAM( - "http://www.w3.org/XML/1998/namespace"))); - namespaces_.push_back( - NamespaceData(Span(RTL_CONSTASCII_STRINGPARAM("xml")), NAMESPACE_XML)); - pos_ = static_cast< char * >(fileAddress_); - end_ = pos_ + fileSize_; - state_ = STATE_CONTENT; -} - -XmlReader::~XmlReader() { - oslFileError e = osl_unmapFile(fileAddress_, fileSize_); - if (e != osl_File_E_None) { - OSL_TRACE("osl_unmapFile failed with %ld", static_cast< long >(e)); - } - e = osl_closeFile(fileHandle_); - if (e != osl_File_E_None) { - OSL_TRACE("osl_closeFile failed with %ld", static_cast< long >(e)); - } -} - -int XmlReader::registerNamespaceIri(Span const & iri) { - int id = toNamespaceId(namespaceIris_.size()); - namespaceIris_.push_back(iri); - if (iri.equals( - Span( - RTL_CONSTASCII_STRINGPARAM( - "http://www.w3.org/2001/XMLSchema-instance")))) - { - // Old user layer .xcu files used the xsi namespace prefix without - // declaring a corresponding namespace binding, see issue 77174; reading - // those files during migration would fail without this hack that can be - // removed once migration is no longer relevant (see - // configmgr::Components::parseModificationLayer): - namespaces_.push_back( - NamespaceData(Span(RTL_CONSTASCII_STRINGPARAM("xsi")), id)); - } - return id; -} - -XmlReader::Result XmlReader::nextItem(Text reportText, Span * data, int * nsId) -{ - switch (state_) { - case STATE_CONTENT: - switch (reportText) { - case TEXT_NONE: - return handleSkippedText(data, nsId); - case TEXT_RAW: - return handleRawText(data); - case TEXT_NORMALIZED: - return handleNormalizedText(data); - } - case STATE_START_TAG: - return handleStartTag(nsId, data); - case STATE_END_TAG: - return handleEndTag(); - case STATE_EMPTY_ELEMENT_TAG: - handleElementEnd(); - return RESULT_END; - default: // STATE_DONE - return RESULT_DONE; - } -} - -bool XmlReader::nextAttribute(int * nsId, Span * localName) { - OSL_ASSERT(nsId != 0 && localName != 0); - if (firstAttribute_) { - currentAttribute_ = attributes_.begin(); - firstAttribute_ = false; - } else { - ++currentAttribute_; - } - if (currentAttribute_ == attributes_.end()) { - return false; - } - if (currentAttribute_->nameColon == 0) { - *nsId = NAMESPACE_NONE; - *localName = Span( - currentAttribute_->nameBegin, - currentAttribute_->nameEnd - currentAttribute_->nameBegin); - } else { - *nsId = getNamespaceId( - Span( - currentAttribute_->nameBegin, - currentAttribute_->nameColon - currentAttribute_->nameBegin)); - *localName = Span( - currentAttribute_->nameColon + 1, - currentAttribute_->nameEnd - (currentAttribute_->nameColon + 1)); - } - return true; -} - -Span XmlReader::getAttributeValue(bool fullyNormalize) { - return handleAttributeValue( - currentAttribute_->valueBegin, currentAttribute_->valueEnd, - fullyNormalize); -} - -int XmlReader::getNamespaceId(Span const & prefix) const { - for (NamespaceList::const_reverse_iterator i(namespaces_.rbegin()); - i != namespaces_.rend(); ++i) - { - if (prefix.equals(i->prefix)) { - return i->nsId; - } - } - return NAMESPACE_UNKNOWN; -} - -rtl::OUString XmlReader::getUrl() const { - return fileUrl_; -} - -void XmlReader::normalizeLineEnds(Span const & text) { - char const * p = text.begin; - sal_Int32 n = text.length; - for (;;) { - sal_Int32 i = rtl_str_indexOfChar_WithLength(p, n, '\x0D'); - if (i < 0) { - break; - } - pad_.add(p, i); - p += i + 1; - n -= i + 1; - if (n == 0 || *p != '\x0A') { - pad_.add(RTL_CONSTASCII_STRINGPARAM("\x0A")); - } - } - pad_.add(p, n); -} - -void XmlReader::skipSpace() { - while (isSpace(peek())) { - ++pos_; - } -} - -bool XmlReader::skipComment() { - if (rtl_str_shortenedCompare_WithLength( - pos_, end_ - pos_, RTL_CONSTASCII_STRINGPARAM("--"), - RTL_CONSTASCII_LENGTH("--")) != - 0) - { - return false; - } - pos_ += RTL_CONSTASCII_LENGTH("--"); - sal_Int32 i = rtl_str_indexOfStr_WithLength( - pos_, end_ - pos_, RTL_CONSTASCII_STRINGPARAM("--")); - if (i < 0) { - throw css::uno::RuntimeException( - (rtl::OUString( - RTL_CONSTASCII_USTRINGPARAM( - "premature end (within comment) of ")) + - fileUrl_), - css::uno::Reference< css::uno::XInterface >()); - } - pos_ += i + RTL_CONSTASCII_LENGTH("--"); - if (read() != '>') { - throw css::uno::RuntimeException( - (rtl::OUString( - RTL_CONSTASCII_USTRINGPARAM( - "illegal \"--\" within comment in ")) + - fileUrl_), - css::uno::Reference< css::uno::XInterface >()); - } - return true; -} - -void XmlReader::skipProcessingInstruction() { - sal_Int32 i = rtl_str_indexOfStr_WithLength( - pos_, end_ - pos_, RTL_CONSTASCII_STRINGPARAM("?>")); - if (i < 0) { - throw css::uno::RuntimeException( - (rtl::OUString(RTL_CONSTASCII_USTRINGPARAM("bad '<?' in ")) + - fileUrl_), - css::uno::Reference< css::uno::XInterface >()); - } - pos_ += i + RTL_CONSTASCII_LENGTH("?>"); -} - -void XmlReader::skipDocumentTypeDeclaration() { - // Neither is it checked that the doctypedecl is at the correct position in - // the document, nor that it is well-formed: - for (;;) { - char c = read(); - switch (c) { - case '\0': // i.e., EOF - throw css::uno::RuntimeException( - (rtl::OUString( - RTL_CONSTASCII_USTRINGPARAM( - "premature end (within DTD) of ")) + - fileUrl_), - css::uno::Reference< css::uno::XInterface >()); - case '"': - case '\'': - { - sal_Int32 i = rtl_str_indexOfChar_WithLength( - pos_, end_ - pos_, c); - if (i < 0) { - throw css::uno::RuntimeException( - (rtl::OUString( - RTL_CONSTASCII_USTRINGPARAM( - "premature end (within DTD) of ")) + - fileUrl_), - css::uno::Reference< css::uno::XInterface >()); - } - pos_ += i + 1; - } - break; - case '>': - return; - case '[': - for (;;) { - c = read(); - switch (c) { - case '\0': // i.e., EOF - throw css::uno::RuntimeException( - (rtl::OUString( - RTL_CONSTASCII_USTRINGPARAM( - "premature end (within DTD) of ")) + - fileUrl_), - css::uno::Reference< css::uno::XInterface >()); - case '"': - case '\'': - { - sal_Int32 i = rtl_str_indexOfChar_WithLength( - pos_, end_ - pos_, c); - if (i < 0) { - throw css::uno::RuntimeException( - (rtl::OUString( - RTL_CONSTASCII_USTRINGPARAM( - "premature end (within DTD) of ")) + - fileUrl_), - css::uno::Reference< css::uno::XInterface >()); - } - pos_ += i + 1; - } - break; - case '<': - switch (read()) { - case '\0': // i.e., EOF - throw css::uno::RuntimeException( - (rtl::OUString( - RTL_CONSTASCII_USTRINGPARAM( - "premature end (within DTD) of ")) + - fileUrl_), - css::uno::Reference< css::uno::XInterface >()); - case '!': - skipComment(); - break; - case '?': - skipProcessingInstruction(); - break; - default: - break; - } - break; - case ']': - skipSpace(); - if (read() != '>') { - throw css::uno::RuntimeException( - (rtl::OUString( - RTL_CONSTASCII_USTRINGPARAM( - "missing \">\" of DTD in ")) + - fileUrl_), - css::uno::Reference< css::uno::XInterface >()); - } - return; - default: - break; - } - } - default: - break; - } - } -} - -Span XmlReader::scanCdataSection() { - if (rtl_str_shortenedCompare_WithLength( - pos_, end_ - pos_, RTL_CONSTASCII_STRINGPARAM("[CDATA["), - RTL_CONSTASCII_LENGTH("[CDATA[")) != - 0) - { - return Span(); - } - pos_ += RTL_CONSTASCII_LENGTH("[CDATA["); - char const * begin = pos_; - sal_Int32 i = rtl_str_indexOfStr_WithLength( - pos_, end_ - pos_, RTL_CONSTASCII_STRINGPARAM("]]>")); - if (i < 0) { - throw css::uno::RuntimeException( - (rtl::OUString( - RTL_CONSTASCII_USTRINGPARAM( - "premature end (within CDATA section) of ")) + - fileUrl_), - css::uno::Reference< css::uno::XInterface >()); - } - pos_ += i + RTL_CONSTASCII_LENGTH("]]>"); - return Span(begin, i); -} - -bool XmlReader::scanName(char const ** nameColon) { - OSL_ASSERT(nameColon != 0 && *nameColon == 0); - for (char const * begin = pos_;; ++pos_) { - switch (peek()) { - case '\0': // i.e., EOF - case '\x09': - case '\x0A': - case '\x0D': - case ' ': - case '/': - case '=': - case '>': - return pos_ != begin; - case ':': - *nameColon = pos_; - break; - default: - break; - } - } -} - -int XmlReader::scanNamespaceIri(char const * begin, char const * end) { - OSL_ASSERT(begin != 0 && begin <= end); - Span iri(handleAttributeValue(begin, end, false)); - for (NamespaceIris::size_type i = 0; i < namespaceIris_.size(); ++i) { - if (namespaceIris_[i].equals(iri)) { - return toNamespaceId(i); - } - } - return XmlReader::NAMESPACE_UNKNOWN; -} - -char const * XmlReader::handleReference(char const * position, char const * end) -{ - OSL_ASSERT(position != 0 && *position == '&' && position < end); - ++position; - if (*position == '#') { - ++position; - sal_Int32 val = 0; - char const * p; - if (*position == 'x') { - ++position; - p = position; - for (;; ++position) { - char c = *position; - if (c >= '0' && c <= '9') { - val = 16 * val + (c - '0'); - } else if (c >= 'A' && c <= 'F') { - val = 16 * val + (c - 'A') + 10; - } else if (c >= 'a' && c <= 'f') { - val = 16 * val + (c - 'a') + 10; - } else { - break; - } - if (val > 0x10FFFF) { // avoid overflow - throw css::uno::RuntimeException( - (rtl::OUString( - RTL_CONSTASCII_USTRINGPARAM( - "'&#x...' too large in ")) + - fileUrl_), - css::uno::Reference< css::uno::XInterface >()); - } - } - } else { - p = position; - for (;; ++position) { - char c = *position; - if (c >= '0' && c <= '9') { - val = 10 * val + (c - '0'); - } else { - break; - } - if (val > 0x10FFFF) { // avoid overflow - throw css::uno::RuntimeException( - (rtl::OUString( - RTL_CONSTASCII_USTRINGPARAM( - "'&#...' too large in ")) + - fileUrl_), - css::uno::Reference< css::uno::XInterface >()); - } - } - } - if (position == p || *position++ != ';') { - throw css::uno::RuntimeException( - (rtl::OUString( - RTL_CONSTASCII_USTRINGPARAM("'&#...' missing ';' in ")) + - fileUrl_), - css::uno::Reference< css::uno::XInterface >()); - } - OSL_ASSERT(val >= 0 && val <= 0x10FFFF); - if ((val < 0x20 && val != 0x9 && val != 0xA && val != 0xD) || - (val >= 0xD800 && val <= 0xDFFF) || val == 0xFFFE || val == 0xFFFF) - { - throw css::uno::RuntimeException( - (rtl::OUString( - RTL_CONSTASCII_USTRINGPARAM( - "character reference denoting invalid character in ")) + - fileUrl_), - css::uno::Reference< css::uno::XInterface >()); - } - char buf[4]; - sal_Int32 len; - if (val < 0x80) { - buf[0] = static_cast< char >(val); - len = 1; - } else if (val < 0x800) { - buf[0] = static_cast< char >((val >> 6) | 0xC0); - buf[1] = static_cast< char >((val & 0x3F) | 0x80); - len = 2; - } else if (val < 0x10000) { - buf[0] = static_cast< char >((val >> 12) | 0xE0); - buf[1] = static_cast< char >(((val >> 6) & 0x3F) | 0x80); - buf[2] = static_cast< char >((val & 0x3F) | 0x80); - len = 3; - } else { - buf[0] = static_cast< char >((val >> 18) | 0xF0); - buf[1] = static_cast< char >(((val >> 12) & 0x3F) | 0x80); - buf[2] = static_cast< char >(((val >> 6) & 0x3F) | 0x80); - buf[3] = static_cast< char >((val & 0x3F) | 0x80); - len = 4; - } - pad_.addEphemeral(buf, len); - return position; - } else { - struct EntityRef { - char const * inBegin; - sal_Int32 inLength; - char const * outBegin; - sal_Int32 outLength; - }; - static EntityRef const refs[] = { - { RTL_CONSTASCII_STRINGPARAM("amp;"), - RTL_CONSTASCII_STRINGPARAM("&") }, - { RTL_CONSTASCII_STRINGPARAM("lt;"), - RTL_CONSTASCII_STRINGPARAM("<") }, - { RTL_CONSTASCII_STRINGPARAM("gt;"), - RTL_CONSTASCII_STRINGPARAM(">") }, - { RTL_CONSTASCII_STRINGPARAM("apos;"), - RTL_CONSTASCII_STRINGPARAM("'") }, - { RTL_CONSTASCII_STRINGPARAM("quot;"), - RTL_CONSTASCII_STRINGPARAM("\"") } }; - for (std::size_t i = 0; i < sizeof refs / sizeof refs[0]; ++i) { - if (rtl_str_shortenedCompare_WithLength( - position, end - position, refs[i].inBegin, refs[i].inLength, - refs[i].inLength) == - 0) - { - position += refs[i].inLength; - pad_.add(refs[i].outBegin, refs[i].outLength); - return position; - } - } - throw css::uno::RuntimeException( - (rtl::OUString( - RTL_CONSTASCII_USTRINGPARAM("unknown entity reference in ")) + - fileUrl_), - css::uno::Reference< css::uno::XInterface >()); - } -} - -Span XmlReader::handleAttributeValue( - char const * begin, char const * end, bool fullyNormalize) -{ - pad_.clear(); - if (fullyNormalize) { - while (begin != end && isSpace(*begin)) { - ++begin; - } - while (end != begin && isSpace(end[-1])) { - --end; - } - char const * p = begin; - enum Space { SPACE_NONE, SPACE_SPAN, SPACE_BREAK }; - // a single true space character can go into the current span, - // everything else breaks the span - Space space = SPACE_NONE; - while (p != end) { - switch (*p) { - case '\x09': - case '\x0A': - case '\x0D': - switch (space) { - case SPACE_NONE: - pad_.add(begin, p - begin); - pad_.add(RTL_CONSTASCII_STRINGPARAM(" ")); - space = SPACE_BREAK; - break; - case SPACE_SPAN: - pad_.add(begin, p - begin); - space = SPACE_BREAK; - break; - case SPACE_BREAK: - break; - } - begin = ++p; - break; - case ' ': - switch (space) { - case SPACE_NONE: - ++p; - space = SPACE_SPAN; - break; - case SPACE_SPAN: - pad_.add(begin, p - begin); - begin = ++p; - space = SPACE_BREAK; - break; - case SPACE_BREAK: - begin = ++p; - break; - } - break; - case '&': - pad_.add(begin, p - begin); - p = handleReference(p, end); - begin = p; - space = SPACE_NONE; - break; - default: - ++p; - space = SPACE_NONE; - break; - } - } - pad_.add(begin, p - begin); - } else { - char const * p = begin; - while (p != end) { - switch (*p) { - case '\x09': - case '\x0A': - pad_.add(begin, p - begin); - begin = ++p; - pad_.add(RTL_CONSTASCII_STRINGPARAM(" ")); - break; - case '\x0D': - pad_.add(begin, p - begin); - ++p; - if (peek() == '\x0A') { - ++p; - } - begin = p; - pad_.add(RTL_CONSTASCII_STRINGPARAM(" ")); - break; - case '&': - pad_.add(begin, p - begin); - p = handleReference(p, end); - begin = p; - break; - default: - ++p; - break; - } - } - pad_.add(begin, p - begin); - } - return pad_.get(); -} - -XmlReader::Result XmlReader::handleStartTag(int * nsId, Span * localName) { - OSL_ASSERT(nsId != 0 && localName); - char const * nameBegin = pos_; - char const * nameColon = 0; - if (!scanName(&nameColon)) { - throw css::uno::RuntimeException( - (rtl::OUString(RTL_CONSTASCII_USTRINGPARAM("bad tag name in ")) + - fileUrl_), - css::uno::Reference< css::uno::XInterface >()); - } - char const * nameEnd = pos_; - NamespaceList::size_type inheritedNamespaces = namespaces_.size(); - bool hasDefaultNs = false; - int defaultNsId = NAMESPACE_NONE; - attributes_.clear(); - for (;;) { - char const * p = pos_; - skipSpace(); - if (peek() == '/' || peek() == '>') { - break; - } - if (pos_ == p) { - throw css::uno::RuntimeException( - (rtl::OUString( - RTL_CONSTASCII_USTRINGPARAM( - "missing whitespace before attribute in ")) + - fileUrl_), - css::uno::Reference< css::uno::XInterface >()); - } - char const * attrNameBegin = pos_; - char const * attrNameColon = 0; - if (!scanName(&attrNameColon)) { - throw css::uno::RuntimeException( - (rtl::OUString( - RTL_CONSTASCII_USTRINGPARAM("bad attribute name in ")) + - fileUrl_), - css::uno::Reference< css::uno::XInterface >()); - } - char const * attrNameEnd = pos_; - skipSpace(); - if (read() != '=') { - throw css::uno::RuntimeException( - (rtl::OUString(RTL_CONSTASCII_USTRINGPARAM("missing '=' in ")) + - fileUrl_), - css::uno::Reference< css::uno::XInterface >()); - } - skipSpace(); - char del = read(); - if (del != '\'' && del != '"') { - throw css::uno::RuntimeException( - (rtl::OUString( - RTL_CONSTASCII_USTRINGPARAM("bad attribute value in ")) + - fileUrl_), - css::uno::Reference< css::uno::XInterface >()); - } - char const * valueBegin = pos_; - sal_Int32 i = rtl_str_indexOfChar_WithLength(pos_, end_ - pos_, del); - if (i < 0) { - throw css::uno::RuntimeException( - (rtl::OUString( - RTL_CONSTASCII_USTRINGPARAM( - "unterminated attribute value in ")) + - fileUrl_), - css::uno::Reference< css::uno::XInterface >()); - } - char const * valueEnd = pos_ + i; - pos_ += i + 1; - if (attrNameColon == 0 && - Span(attrNameBegin, attrNameEnd - attrNameBegin).equals( - RTL_CONSTASCII_STRINGPARAM("xmlns"))) - { - hasDefaultNs = true; - defaultNsId = scanNamespaceIri(valueBegin, valueEnd); - } else if (attrNameColon != 0 && - Span(attrNameBegin, attrNameColon - attrNameBegin).equals( - RTL_CONSTASCII_STRINGPARAM("xmlns"))) - { - namespaces_.push_back( - NamespaceData( - Span(attrNameColon + 1, attrNameEnd - (attrNameColon + 1)), - scanNamespaceIri(valueBegin, valueEnd))); - } else { - attributes_.push_back( - AttributeData( - attrNameBegin, attrNameEnd, attrNameColon, valueBegin, - valueEnd)); - } - } - if (!hasDefaultNs && !elements_.empty()) { - defaultNsId = elements_.top().defaultNamespaceId; - } - firstAttribute_ = true; - if (peek() == '/') { - state_ = STATE_EMPTY_ELEMENT_TAG; - ++pos_; - } else { - state_ = STATE_CONTENT; - } - if (peek() != '>') { - throw css::uno::RuntimeException( - (rtl::OUString(RTL_CONSTASCII_USTRINGPARAM("missing '>' in ")) + - fileUrl_), - css::uno::Reference< css::uno::XInterface >()); - } - ++pos_; - elements_.push( - ElementData( - Span(nameBegin, nameEnd - nameBegin), inheritedNamespaces, - defaultNsId)); - if (nameColon == 0) { - *nsId = defaultNsId; - *localName = Span(nameBegin, nameEnd - nameBegin); - } else { - *nsId = getNamespaceId(Span(nameBegin, nameColon - nameBegin)); - *localName = Span(nameColon + 1, nameEnd - (nameColon + 1)); - } - return RESULT_BEGIN; -} - -XmlReader::Result XmlReader::handleEndTag() { - if (elements_.empty()) { - throw css::uno::RuntimeException( - (rtl::OUString( - RTL_CONSTASCII_USTRINGPARAM("spurious end tag in ")) + - fileUrl_), - css::uno::Reference< css::uno::XInterface >()); - } - char const * nameBegin = pos_; - char const * nameColon = 0; - if (!scanName(&nameColon) || - !elements_.top().name.equals(nameBegin, pos_ - nameBegin)) - { - throw css::uno::RuntimeException( - (rtl::OUString(RTL_CONSTASCII_USTRINGPARAM("tag mismatch in ")) + - fileUrl_), - css::uno::Reference< css::uno::XInterface >()); - } - handleElementEnd(); - skipSpace(); - if (peek() != '>') { - throw css::uno::RuntimeException( - (rtl::OUString(RTL_CONSTASCII_USTRINGPARAM("missing '>' in ")) + - fileUrl_), - css::uno::Reference< css::uno::XInterface >()); - } - ++pos_; - return RESULT_END; -} - -void XmlReader::handleElementEnd() { - OSL_ASSERT(!elements_.empty()); - namespaces_.resize(elements_.top().inheritedNamespaces); - elements_.pop(); - state_ = elements_.empty() ? STATE_DONE : STATE_CONTENT; -} - -XmlReader::Result XmlReader::handleSkippedText(Span * data, int * nsId) { - for (;;) { - sal_Int32 i = rtl_str_indexOfChar_WithLength(pos_, end_ - pos_, '<'); - if (i < 0) { - throw css::uno::RuntimeException( - (rtl::OUString( - RTL_CONSTASCII_USTRINGPARAM("premature end of ")) + - fileUrl_), - css::uno::Reference< css::uno::XInterface >()); - } - pos_ += i + 1; - switch (peek()) { - case '!': - ++pos_; - if (!skipComment() && !scanCdataSection().is()) { - skipDocumentTypeDeclaration(); - } - break; - case '/': - ++pos_; - return handleEndTag(); - case '?': - ++pos_; - skipProcessingInstruction(); - break; - default: - return handleStartTag(nsId, data); - } - } -} - -XmlReader::Result XmlReader::handleRawText(Span * text) { - pad_.clear(); - for (char const * begin = pos_;;) { - switch (peek()) { - case '\0': // i.e., EOF - throw css::uno::RuntimeException( - (rtl::OUString( - RTL_CONSTASCII_USTRINGPARAM("premature end of ")) + - fileUrl_), - css::uno::Reference< css::uno::XInterface >()); - case '\x0D': - pad_.add(begin, pos_ - begin); - ++pos_; - if (peek() != '\x0A') { - pad_.add(RTL_CONSTASCII_STRINGPARAM("\x0A")); - } - begin = pos_; - break; - case '&': - pad_.add(begin, pos_ - begin); - pos_ = handleReference(pos_, end_); - begin = pos_; - break; - case '<': - pad_.add(begin, pos_ - begin); - ++pos_; - switch (peek()) { - case '!': - ++pos_; - if (!skipComment()) { - Span cdata(scanCdataSection()); - if (cdata.is()) { - normalizeLineEnds(cdata); - } else { - skipDocumentTypeDeclaration(); - } - } - begin = pos_; - break; - case '/': - *text = pad_.get(); - ++pos_; - state_ = STATE_END_TAG; - return RESULT_TEXT; - case '?': - ++pos_; - skipProcessingInstruction(); - begin = pos_; - break; - default: - *text = pad_.get(); - state_ = STATE_START_TAG; - return RESULT_TEXT; - } - break; - default: - ++pos_; - break; - } - } -} - -XmlReader::Result XmlReader::handleNormalizedText(Span * text) { - pad_.clear(); - char const * flowBegin = pos_; - char const * flowEnd = pos_; - enum Space { SPACE_START, SPACE_NONE, SPACE_SPAN, SPACE_BREAK }; - // a single true space character can go into the current flow, - // everything else breaks the flow - Space space = SPACE_START; - for (;;) { - switch (peek()) { - case '\0': // i.e., EOF - throw css::uno::RuntimeException( - (rtl::OUString( - RTL_CONSTASCII_USTRINGPARAM("premature end of ")) + - fileUrl_), - css::uno::Reference< css::uno::XInterface >()); - case '\x09': - case '\x0A': - case '\x0D': - switch (space) { - case SPACE_START: - case SPACE_BREAK: - break; - case SPACE_NONE: - case SPACE_SPAN: - space = SPACE_BREAK; - break; - } - ++pos_; - break; - case ' ': - switch (space) { - case SPACE_START: - case SPACE_BREAK: - break; - case SPACE_NONE: - space = SPACE_SPAN; - break; - case SPACE_SPAN: - space = SPACE_BREAK; - break; - } - ++pos_; - break; - case '&': - switch (space) { - case SPACE_START: - break; - case SPACE_NONE: - case SPACE_SPAN: - pad_.add(flowBegin, pos_ - flowBegin); - break; - case SPACE_BREAK: - pad_.add(flowBegin, flowEnd - flowBegin); - pad_.add(RTL_CONSTASCII_STRINGPARAM(" ")); - break; - } - pos_ = handleReference(pos_, end_); - flowBegin = pos_; - flowEnd = pos_; - space = SPACE_NONE; - break; - case '<': - ++pos_; - switch (peek()) { - case '!': - ++pos_; - if (skipComment()) { - space = SPACE_BREAK; - } else { - Span cdata(scanCdataSection()); - if (cdata.is()) { - // CDATA is not normalized (similar to character - // references; it keeps the code simple), but it might - // arguably be better to normalize it: - switch (space) { - case SPACE_START: - break; - case SPACE_NONE: - case SPACE_SPAN: - pad_.add(flowBegin, pos_ - flowBegin); - break; - case SPACE_BREAK: - pad_.add(flowBegin, flowEnd - flowBegin); - pad_.add(RTL_CONSTASCII_STRINGPARAM(" ")); - break; - } - normalizeLineEnds(cdata); - flowBegin = pos_; - flowEnd = pos_; - space = SPACE_NONE; - } else { - skipDocumentTypeDeclaration(); - } - } - break; - case '/': - ++pos_; - pad_.add(flowBegin, flowEnd - flowBegin); - *text = pad_.get(); - state_ = STATE_END_TAG; - return RESULT_TEXT; - case '?': - ++pos_; - skipProcessingInstruction(); - space = SPACE_BREAK; - break; - default: - pad_.add(flowBegin, flowEnd - flowBegin); - *text = pad_.get(); - state_ = STATE_START_TAG; - return RESULT_TEXT; - } - break; - default: - switch (space) { - case SPACE_START: - flowBegin = pos_; - break; - case SPACE_NONE: - case SPACE_SPAN: - break; - case SPACE_BREAK: - pad_.add(flowBegin, flowEnd - flowBegin); - pad_.add(RTL_CONSTASCII_STRINGPARAM(" ")); - flowBegin = pos_; - break; - } - flowEnd = ++pos_; - space = SPACE_NONE; - break; - } - } -} - -int XmlReader::toNamespaceId(NamespaceIris::size_type pos) { - OSL_ASSERT(pos <= INT_MAX); - return static_cast< int >(pos); -} - -} |