diff options
author | Eike Rathke <erack@redhat.com> | 2017-02-28 22:14:08 +0100 |
---|---|---|
committer | Eike Rathke <erack@redhat.com> | 2017-02-28 22:29:36 +0100 |
commit | baca2ec8d5a457512e25b499c3cacc7a66ca853f (patch) | |
tree | 4bdce9be0ba8df9aa9d35255cbde403ea3ac67b5 /sax | |
parent | 6bb6ca1fb30f786385c2357e5435077066a49f82 (diff) |
FastSaxSerializer: SAL_WARN() when writing invalid XML characters
This catches things for OOXML, that could be escaped using _xHHHH_
Change-Id: I937f67dc5edd3c0e5727d74bebb736dc82bdc53d
Diffstat (limited to 'sax')
-rw-r--r-- | sax/source/tools/fastserializer.cxx | 55 |
1 files changed, 54 insertions, 1 deletions
diff --git a/sax/source/tools/fastserializer.cxx b/sax/source/tools/fastserializer.cxx index 620fe68949ad..a571829112b1 100644 --- a/sax/source/tools/fastserializer.cxx +++ b/sax/source/tools/fastserializer.cxx @@ -101,6 +101,26 @@ namespace sax_fastparser { write( sOutput.getStr(), sOutput.getLength(), bEscape ); } +#if OSL_DEBUG_LEVEL > 0 + /** Characters not allowed in XML 1.0 + XML 1.1 would exclude only U+0000 + */ + bool invalidChar( char c ) + { + if (static_cast<unsigned char>(c) >= 0x20) + return false; + + switch (c) + { + case 0x09: + case 0x0a: + case 0x0d: + return false; + } + return true; + } +#endif + void FastSaxSerializer::write( const char* pStr, sal_Int32 nLen, bool bEscape ) { if (nLen == -1) @@ -112,6 +132,7 @@ namespace sax_fastparser { return; } + bool bGood = true; for (sal_Int32 i = 0; i < nLen; ++i) { char c = pStr[ i ]; @@ -124,9 +145,26 @@ namespace sax_fastparser { case '"': writeBytes( """, 6 ); break; case '\n': writeBytes( " ", 5 ); break; case '\r': writeBytes( " ", 5 ); break; - default: writeBytes( &c, 1 ); break; + default: +#if OSL_DEBUG_LEVEL > 0 + /* FIXME: we should escape such invalid characters + * in the _xHHHH_ form OOXML uses. Note that also a + * literal "_x0008_" would have to be escaped then + * as _x005F_x0008_ (where only the leading '_' is + * escaped as _x005F_). */ + if (invalidChar(pStr[i])) + { + bGood = false; + // The SAL_WARN() for the single character is + // issued in writeBytes(), just gather for the + // SAL_WARN_IF() below. + } +#endif + writeBytes( &c, 1 ); break; } } + SAL_WARN_IF( !bGood && nLen > 1, "sax", "in '" << OString(pStr,std::min<sal_Int32>(nLen,42)) << "'"); + (void)bGood; } void FastSaxSerializer::endDocument() @@ -496,6 +534,21 @@ namespace sax_fastparser { void FastSaxSerializer::writeBytes( const char* pStr, size_t nLen ) { +#if OSL_DEBUG_LEVEL > 0 + { + bool bGood = true; + for (size_t i=0; i < nLen; ++i) + { + if (invalidChar(pStr[i])) + { + bGood = false; + SAL_WARN("sax", "FastSaxSerializer::writeBytes - illegal XML character 0x" << + std::hex << int(static_cast<unsigned char>(pStr[i]))); + } + } + SAL_WARN_IF( !bGood && nLen > 1, "sax", "in '" << OString(pStr,std::min<sal_Int32>(nLen,42)) << "'"); + } +#endif maCachedOutputStream.writeBytes( reinterpret_cast<const sal_Int8*>(pStr), nLen ); } |