summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJosé Fonseca <jose.r.fonseca@gmail.com>2011-10-08 10:22:08 +0100
committerJosé Fonseca <jose.r.fonseca@gmail.com>2011-10-08 10:22:08 +0100
commitd92831ec64d9b29314d10e5cc226e6bcdf59475b (patch)
treeb1e8fc47cbc0a9a60c541526331a00285b81a3de
parent18d094ea673e246a01dc318fd4d2dfd50ca2d630 (diff)
Upgrade bundled snappy to version 1.0.4.
-rw-r--r--thirdparty/snappy/CMakeLists.txt8
-rw-r--r--thirdparty/snappy/ChangeLog168
-rw-r--r--thirdparty/snappy/NEWS12
-rw-r--r--thirdparty/snappy/config.h.in9
-rw-r--r--thirdparty/snappy/format_description.txt8
-rw-r--r--thirdparty/snappy/snappy-stubs-internal.h42
-rw-r--r--thirdparty/snappy/snappy-stubs-public.h2
-rw-r--r--thirdparty/snappy/snappy.cc27
-rw-r--r--thirdparty/snappy/snappy_unittest.cc8
9 files changed, 264 insertions, 20 deletions
diff --git a/thirdparty/snappy/CMakeLists.txt b/thirdparty/snappy/CMakeLists.txt
index b727854..2ee896c 100644
--- a/thirdparty/snappy/CMakeLists.txt
+++ b/thirdparty/snappy/CMakeLists.txt
@@ -6,6 +6,9 @@ include (CheckCXXSourceCompiles)
include (TestBigEndian)
check_include_file_cxx (sys/mman.h HAVE_SYS_MMAN_H)
+check_include_file_cxx (sys/byteorder.h HAVE_SYS_BYTEORDER_H)
+check_include_file_cxx (sys/endian.h HAVE_SYS_ENDIAN_H)
+check_include_file_cxx (byteswap.h HAVE_BYTESWAP_H)
check_cxx_source_compiles (
"main() { return __builtin_expect(1, 1) ? 1 : 0; }"
@@ -23,6 +26,11 @@ configure_file (config.h.in config.h)
add_definitions (-DHAVE_CONFIG_H)
+# Adjust warnings
+if (CMAKE_COMPILER_IS_GNUCXX)
+ add_definitions (-Wno-unused-function)
+endif ()
+
include_directories (${CMAKE_CURRENT_BINARY_DIR})
diff --git a/thirdparty/snappy/ChangeLog b/thirdparty/snappy/ChangeLog
index a4e61bf..5e7cccc 100644
--- a/thirdparty/snappy/ChangeLog
+++ b/thirdparty/snappy/ChangeLog
@@ -1,4 +1,172 @@
------------------------------------------------------------------------
+r49 | snappy.mirrorbot@gmail.com | 2011-09-15 11:50:05 +0200 (Thu, 15 Sep 2011) | 5 lines
+
+Fix public issue #50: Include generic byteswap macros.
+Also include Solaris 10 and FreeBSD versions.
+
+R=csilvers
+
+------------------------------------------------------------------------
+r48 | snappy.mirrorbot@gmail.com | 2011-08-10 20:57:27 +0200 (Wed, 10 Aug 2011) | 5 lines
+
+Partially fix public issue 50: Remove an extra comma from the end of some
+enum declarations, as it seems the Sun compiler does not like it.
+
+Based on patch by Travis Vitek.
+
+------------------------------------------------------------------------
+r47 | snappy.mirrorbot@gmail.com | 2011-08-10 20:44:16 +0200 (Wed, 10 Aug 2011) | 4 lines
+
+Use the right #ifdef test for sys/mman.h.
+
+Based on patch by Travis Vitek.
+
+------------------------------------------------------------------------
+r46 | snappy.mirrorbot@gmail.com | 2011-08-10 03:22:09 +0200 (Wed, 10 Aug 2011) | 6 lines
+
+Fix public issue #47: Small comment cleanups in the unit test.
+
+Originally based on a patch by Patrick Pelletier.
+
+R=sanjay
+
+------------------------------------------------------------------------
+r45 | snappy.mirrorbot@gmail.com | 2011-08-10 03:14:43 +0200 (Wed, 10 Aug 2011) | 8 lines
+
+Fix public issue #46: Format description said "3-byte offset"
+instead of "4-byte offset" for the longest copies.
+
+Also fix an inconsistency in the heading for section 2.2.3.
+Both patches by Patrick Pelletier.
+
+R=csilvers
+
+------------------------------------------------------------------------
+r44 | snappy.mirrorbot@gmail.com | 2011-06-28 13:40:25 +0200 (Tue, 28 Jun 2011) | 8 lines
+
+Fix public issue #44: Make the definition and declaration of CompressFragment
+identical, even regarding cv-qualifiers.
+
+This is required to work around a bug in the Solaris Studio C++ compiler
+(it does not properly disregard cv-qualifiers when doing name mangling).
+
+R=sanjay
+
+------------------------------------------------------------------------
+r43 | snappy.mirrorbot@gmail.com | 2011-06-04 12:19:05 +0200 (Sat, 04 Jun 2011) | 7 lines
+
+Correct an inaccuracy in the Snappy format description.
+(I stumbled into this when changing the way we decompress literals.)
+
+R=csilvers
+
+Revision created by MOE tool push_codebase.
+
+------------------------------------------------------------------------
+r42 | snappy.mirrorbot@gmail.com | 2011-06-03 22:53:06 +0200 (Fri, 03 Jun 2011) | 50 lines
+
+Speed up decompression by removing a fast-path attempt.
+
+Whenever we try to enter a copy fast-path, there is a certain cost in checking
+that all the preconditions are in place, but it's normally offset by the fact
+that we can usually take the cheaper path. However, in a certain path we've
+already established that "avail < literal_length", which usually means that
+either the available space is small, or the literal is big. Both will disqualify
+us from taking the fast path, and thus we take the hit from the precondition
+checking without gaining much from having a fast path. Thus, simply don't try
+the fast path in this situation -- we're already on a slow path anyway
+(one where we need to refill more data from the reader).
+
+I'm a bit surprised at how much this gained; it could be that this path is
+more common than I thought, or that the simpler structure somehow makes the
+compiler happier. I haven't looked at the assembler, but it's a win across
+the board on both Core 2, Core i7 and Opteron, at least for the cases we
+typically care about. The gains seem to be the largest on Core i7, though.
+Results from my Core i7 workstation:
+
+
+ Benchmark Time(ns) CPU(ns) Iterations
+ ---------------------------------------------------
+ BM_UFlat/0 73337 73091 190996 1.3GB/s html [ +1.7%]
+ BM_UFlat/1 696379 693501 20173 965.5MB/s urls [ +2.7%]
+ BM_UFlat/2 9765 9734 1472135 12.1GB/s jpg [ +0.7%]
+ BM_UFlat/3 29720 29621 472973 3.0GB/s pdf [ +1.8%]
+ BM_UFlat/4 294636 293834 47782 1.3GB/s html4 [ +2.3%]
+ BM_UFlat/5 28399 28320 494700 828.5MB/s cp [ +3.5%]
+ BM_UFlat/6 12795 12760 1000000 833.3MB/s c [ +1.2%]
+ BM_UFlat/7 3984 3973 3526448 893.2MB/s lsp [ +5.7%]
+ BM_UFlat/8 991996 989322 14141 992.6MB/s xls [ +3.3%]
+ BM_UFlat/9 228620 227835 61404 636.6MB/s txt1 [ +4.0%]
+ BM_UFlat/10 197114 196494 72165 607.5MB/s txt2 [ +3.5%]
+ BM_UFlat/11 605240 603437 23217 674.4MB/s txt3 [ +3.7%]
+ BM_UFlat/12 804157 802016 17456 573.0MB/s txt4 [ +3.9%]
+ BM_UFlat/13 347860 346998 40346 1.4GB/s bin [ +1.2%]
+ BM_UFlat/14 44684 44559 315315 818.4MB/s sum [ +2.3%]
+ BM_UFlat/15 5120 5106 2739726 789.4MB/s man [ +3.3%]
+ BM_UFlat/16 76591 76355 183486 1.4GB/s pb [ +2.8%]
+ BM_UFlat/17 238564 237828 58824 739.1MB/s gaviota [ +1.6%]
+ BM_UValidate/0 42194 42060 333333 2.3GB/s html [ -0.1%]
+ BM_UValidate/1 433182 432005 32407 1.5GB/s urls [ -0.1%]
+ BM_UValidate/2 197 196 71428571 603.3GB/s jpg [ +0.5%]
+ BM_UValidate/3 14494 14462 972222 6.1GB/s pdf [ +0.5%]
+ BM_UValidate/4 168444 167836 83832 2.3GB/s html4 [ +0.1%]
+
+R=jeff
+
+Revision created by MOE tool push_codebase.
+
+------------------------------------------------------------------------
+r41 | snappy.mirrorbot@gmail.com | 2011-06-03 22:47:14 +0200 (Fri, 03 Jun 2011) | 43 lines
+
+Speed up decompression by not needing a lookup table for literal items.
+
+Looking up into and decoding the values from char_table has long shown up as a
+hotspot in the decompressor. While it turns out that it's hard to make a more
+efficient decoder for the copy ops, the literals are simple enough that we can
+decode them without needing a table lookup. (This means that 1/4 of the table
+is now unused, although that in itself doesn't buy us anything.)
+
+The gains are small, but definitely present; some tests win as much as 10%,
+but 1-4% is more typical. These results are from Core i7, in 64-bit mode;
+Core 2 and Opteron show similar results. (I've run with more iterations
+than unusual to make sure the smaller gains don't drown entirely in noise.)
+
+ Benchmark Time(ns) CPU(ns) Iterations
+ ---------------------------------------------------
+ BM_UFlat/0 74665 74428 182055 1.3GB/s html [ +3.1%]
+ BM_UFlat/1 714106 711997 19663 940.4MB/s urls [ +4.4%]
+ BM_UFlat/2 9820 9789 1427115 12.1GB/s jpg [ -1.2%]
+ BM_UFlat/3 30461 30380 465116 2.9GB/s pdf [ +0.8%]
+ BM_UFlat/4 301445 300568 46512 1.3GB/s html4 [ +2.2%]
+ BM_UFlat/5 29338 29263 479452 801.8MB/s cp [ +1.6%]
+ BM_UFlat/6 13004 12970 1000000 819.9MB/s c [ +2.1%]
+ BM_UFlat/7 4180 4168 3349282 851.4MB/s lsp [ +1.3%]
+ BM_UFlat/8 1026149 1024000 10000 959.0MB/s xls [+10.7%]
+ BM_UFlat/9 237441 236830 59072 612.4MB/s txt1 [ +0.3%]
+ BM_UFlat/10 203966 203298 69307 587.2MB/s txt2 [ +0.8%]
+ BM_UFlat/11 627230 625000 22400 651.2MB/s txt3 [ +0.7%]
+ BM_UFlat/12 836188 833979 16787 551.0MB/s txt4 [ +1.3%]
+ BM_UFlat/13 351904 350750 39886 1.4GB/s bin [ +3.8%]
+ BM_UFlat/14 45685 45562 308370 800.4MB/s sum [ +5.9%]
+ BM_UFlat/15 5286 5270 2656546 764.9MB/s man [ +1.5%]
+ BM_UFlat/16 78774 78544 178117 1.4GB/s pb [ +4.3%]
+ BM_UFlat/17 242270 241345 58091 728.3MB/s gaviota [ +1.2%]
+ BM_UValidate/0 42149 42000 333333 2.3GB/s html [ -3.0%]
+ BM_UValidate/1 432741 431303 32483 1.5GB/s urls [ +7.8%]
+ BM_UValidate/2 198 197 71428571 600.7GB/s jpg [+16.8%]
+ BM_UValidate/3 14560 14521 965517 6.1GB/s pdf [ -4.1%]
+ BM_UValidate/4 169065 168671 83832 2.3GB/s html4 [ -2.9%]
+
+R=jeff
+
+Revision created by MOE tool push_codebase.
+
+------------------------------------------------------------------------
+r40 | snappy.mirrorbot@gmail.com | 2011-06-03 00:57:41 +0200 (Fri, 03 Jun 2011) | 2 lines
+
+Release Snappy 1.0.3.
+
+------------------------------------------------------------------------
r39 | snappy.mirrorbot@gmail.com | 2011-06-02 20:06:54 +0200 (Thu, 02 Jun 2011) | 11 lines
Remove an unneeded goto in the decompressor; it turns out that the
diff --git a/thirdparty/snappy/NEWS b/thirdparty/snappy/NEWS
index d514787..11d1e95 100644
--- a/thirdparty/snappy/NEWS
+++ b/thirdparty/snappy/NEWS
@@ -1,3 +1,15 @@
+Snappy v1.0.4, September 15th 2011:
+
+ * Speeded up the decompressor somewhat; typically about 2–8%
+ for Core i7, in 64-bit mode (comparable for Opteron).
+ Somewhat more for some tests, almost no gain for others.
+
+ * Make Snappy compile on certain platforms it didn't before
+ (Solaris with SunPro C++, HP-UX, AIX).
+
+ * Correct some minor errors in the format description.
+
+
Snappy v1.0.3, June 2nd 2011:
* Speeded up the decompressor somewhat; about 3-6% for Core 2,
diff --git a/thirdparty/snappy/config.h.in b/thirdparty/snappy/config.h.in
index e82da40..e9ccb58 100644
--- a/thirdparty/snappy/config.h.in
+++ b/thirdparty/snappy/config.h.in
@@ -9,6 +9,9 @@
/* Define to 1 if the compiler supports __builtin_expect. */
#cmakedefine HAVE_BUILTIN_EXPECT
+/* Define to 1 if you have the <byteswap.h> header file. */
+#cmakedefine HAVE_BYTESWAP_H
+
/* Define to 1 if you have the <dlfcn.h> header file. */
#cmakedefine HAVE_DLFCN_H
@@ -54,6 +57,12 @@
/* Define to 1 if you have the <string.h> header file. */
#cmakedefine HAVE_STRING_H
+/* Define to 1 if you have the <sys/byteswap.h> header file. */
+#cmakedefine HAVE_SYS_BYTESWAP_H
+
+/* Define to 1 if you have the <sys/endian.h> header file. */
+#cmakedefine HAVE_SYS_ENDIAN_H
+
/* Define to 1 if you have the <sys/mman.h> header file. */
#cmakedefine HAVE_SYS_MMAN_H
diff --git a/thirdparty/snappy/format_description.txt b/thirdparty/snappy/format_description.txt
index 943bfc2..43d7a98 100644
--- a/thirdparty/snappy/format_description.txt
+++ b/thirdparty/snappy/format_description.txt
@@ -1,5 +1,5 @@
Snappy compressed format description
-Last revised: 2011-05-16
+Last revised: 2011-08-09
This is not a formal specification, but should suffice to explain most
@@ -38,7 +38,7 @@ follow:
00: Literal
01: Copy with 1-byte offset
10: Copy with 2-byte offset
- 11: Copy with 3-byte offset
+ 11: Copy with 4-byte offset
The interpretation of the upper six bits are element-dependent.
@@ -52,7 +52,7 @@ of the literal:
- For literals up to and including 60 bytes in length, the upper
six bits of the tag byte contain (len-1). The literal follows
immediately thereafter in the bytestream.
- - For longer literals, the length is stored after the tag byte,
+ - For longer literals, the (len-1) value is stored after the tag byte,
little-endian. The upper six bits of the tag byte describe how
many bytes are used for the length; 60, 61, 62 or 63 for
1-4 bytes, respectively. The literal itself follows after the
@@ -103,7 +103,7 @@ six bits ([2..7]) of the tag byte. The offset is stored as a
little-endian 16-bit integer in the two bytes following the tag byte.
-2.2.3. Copy with 4-byte offsets (11)
+2.2.3. Copy with 4-byte offset (11)
These are like the copies with 2-byte offsets (see previous subsection),
except that the offset is stored as a 32-bit integer instead of a
diff --git a/thirdparty/snappy/snappy-stubs-internal.h b/thirdparty/snappy/snappy-stubs-internal.h
index 46ee235..0215288 100644
--- a/thirdparty/snappy/snappy-stubs-internal.h
+++ b/thirdparty/snappy/snappy-stubs-internal.h
@@ -42,7 +42,7 @@
#include <stdlib.h>
#include <string.h>
-#ifdef HAVE_SYS_MMAN
+#ifdef HAVE_SYS_MMAN_H
#include <sys/mman.h>
#endif
@@ -229,6 +229,14 @@ inline void UNALIGNED_STORE64(void *p, uint64 v) {
// The following guarantees declaration of the byte swap functions.
#ifdef WORDS_BIGENDIAN
+#ifdef HAVE_SYS_BYTEORDER_H
+#include <sys/byteorder.h>
+#endif
+
+#ifdef HAVE_SYS_ENDIAN_H
+#include <sys/endian.h>
+#endif
+
#ifdef _MSC_VER
#include <stdlib.h>
#define bswap_16(x) _byteswap_ushort(x)
@@ -242,8 +250,38 @@ inline void UNALIGNED_STORE64(void *p, uint64 v) {
#define bswap_32(x) OSSwapInt32(x)
#define bswap_64(x) OSSwapInt64(x)
-#else
+#elif defined(HAVE_BYTESWAP_H)
#include <byteswap.h>
+
+#elif defined(bswap32)
+// FreeBSD defines bswap{16,32,64} in <sys/endian.h> (already #included).
+#define bswap_16(x) bswap16(x)
+#define bswap_32(x) bswap32(x)
+#define bswap_64(x) bswap64(x)
+
+#elif defined(BSWAP_64)
+// Solaris 10 defines BSWAP_{16,32,64} in <sys/byteorder.h> (already #included).
+#define bswap_16(x) BSWAP_16(x)
+#define bswap_32(x) BSWAP_32(x)
+#define bswap_64(x) BSWAP_64(x)
+
+#else
+
+inline uint16 bswap_16(uint16 x) {
+ return (x << 8) | (x >> 8);
+}
+
+inline uint32 bswap_32(uint32 x) {
+ x = ((x & 0xff00ff00UL) >> 8) | ((x & 0x00ff00ffUL) << 8);
+ return (x >> 16) | (x << 16);
+}
+
+inline uint64 bswap_64(uint64 x) {
+ x = ((x & 0xff00ff00ff00ff00ULL) >> 8) | ((x & 0x00ff00ff00ff00ffULL) << 8);
+ x = ((x & 0xffff0000ffff0000ULL) >> 16) | ((x & 0x0000ffff0000ffffULL) << 16);
+ return (x >> 32) | (x << 32);
+}
+
#endif
#endif // WORDS_BIGENDIAN
diff --git a/thirdparty/snappy/snappy-stubs-public.h b/thirdparty/snappy/snappy-stubs-public.h
index b089bac..d439cb4 100644
--- a/thirdparty/snappy/snappy-stubs-public.h
+++ b/thirdparty/snappy/snappy-stubs-public.h
@@ -46,7 +46,7 @@
#define SNAPPY_MAJOR 1
#define SNAPPY_MINOR 0
-#define SNAPPY_PATCHLEVEL 3
+#define SNAPPY_PATCHLEVEL 4
#define SNAPPY_VERSION \
((SNAPPY_MAJOR << 16) | (SNAPPY_MINOR << 8) | SNAPPY_PATCHLEVEL)
diff --git a/thirdparty/snappy/snappy.cc b/thirdparty/snappy/snappy.cc
index a591aba..c79edb5 100644
--- a/thirdparty/snappy/snappy.cc
+++ b/thirdparty/snappy/snappy.cc
@@ -294,8 +294,8 @@ static inline uint32 GetUint32AtOffset(uint64 v, int offset) {
// Returns an "end" pointer into "op" buffer.
// "end - op" is the compressed size of "input".
namespace internal {
-char* CompressFragment(const char* const input,
- const size_t input_size,
+char* CompressFragment(const char* input,
+ size_t input_size,
char* op,
uint16* table,
const int table_size) {
@@ -663,17 +663,21 @@ class SnappyDecompressor {
}
const unsigned char c = *(reinterpret_cast<const unsigned char*>(ip++));
- const uint32 entry = char_table[c];
- const uint32 trailer = LittleEndian::Load32(ip) & wordmask[entry >> 11];
- ip += entry >> 11;
- const uint32 length = entry & 0xff;
if ((c & 0x3) == LITERAL) {
- uint32 literal_length = length + trailer;
+ uint32 literal_length = c >> 2;
+ if (PREDICT_FALSE(literal_length >= 60)) {
+ // Long literal.
+ const uint32 literal_length_length = literal_length - 59;
+ literal_length =
+ LittleEndian::Load32(ip) & wordmask[literal_length_length];
+ ip += literal_length_length;
+ }
+ ++literal_length;
+
uint32 avail = ip_limit_ - ip;
while (avail < literal_length) {
- bool allow_fast_path = (avail >= 16);
- if (!writer->Append(ip, avail, allow_fast_path)) return;
+ if (!writer->Append(ip, avail, false)) return;
literal_length -= avail;
reader_->Skip(peeked_);
size_t n;
@@ -689,6 +693,11 @@ class SnappyDecompressor {
}
ip += literal_length;
} else {
+ const uint32 entry = char_table[c];
+ const uint32 trailer = LittleEndian::Load32(ip) & wordmask[entry >> 11];
+ const uint32 length = entry & 0xff;
+ ip += entry >> 11;
+
// copy_offset/256 is encoded in bits 8..10. By just fetching
// those bits, we get copy_offset (since the bit-field starts at
// bit 8).
diff --git a/thirdparty/snappy/snappy_unittest.cc b/thirdparty/snappy/snappy_unittest.cc
index b2de246..6fff333 100644
--- a/thirdparty/snappy/snappy_unittest.cc
+++ b/thirdparty/snappy/snappy_unittest.cc
@@ -121,11 +121,11 @@ typedef string DataEndingAtUnreadablePage;
#endif
enum CompressorType {
- ZLIB, LZO, LIBLZF, QUICKLZ, FASTLZ, SNAPPY,
+ ZLIB, LZO, LIBLZF, QUICKLZ, FASTLZ, SNAPPY
};
const char* names[] = {
- "ZLIB", "LZO", "LIBLZF", "QUICKLZ", "FASTLZ", "SNAPPY",
+ "ZLIB", "LZO", "LIBLZF", "QUICKLZ", "FASTLZ", "SNAPPY"
};
static size_t MinimumRequiredOutputSpace(size_t input_size,
@@ -742,11 +742,11 @@ TEST(Snappy, FourByteOffset) {
// it chops up the input into 32KB pieces. So we hand-emit the
// copy manually.
- // The two fragments that make up the input string
+ // The two fragments that make up the input string.
string fragment1 = "012345689abcdefghijklmnopqrstuvwxyz";
string fragment2 = "some other string";
- // How many times is each fragment emittedn
+ // How many times each fragment is emitted.
const int n1 = 2;
const int n2 = 100000 / fragment2.size();
const int length = n1 * fragment1.size() + n2 * fragment2.size();