summaryrefslogtreecommitdiff
path: root/lib/trace/trace_file_snappy.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'lib/trace/trace_file_snappy.cpp')
-rw-r--r--lib/trace/trace_file_snappy.cpp353
1 files changed, 353 insertions, 0 deletions
diff --git a/lib/trace/trace_file_snappy.cpp b/lib/trace/trace_file_snappy.cpp
new file mode 100644
index 00000000..119e4147
--- /dev/null
+++ b/lib/trace/trace_file_snappy.cpp
@@ -0,0 +1,353 @@
+/**************************************************************************
+ *
+ * Copyright 2011 Zack Rusin
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ *
+ **************************************************************************/
+
+
+/*
+ * Snappy file format.
+ * -------------------
+ *
+ * Snappy at its core is just a compressoin algorithm so we're
+ * creating a new file format which uses snappy compression
+ * to hold the trace data.
+ *
+ * The file is composed of a number of chunks, they are:
+ * chunk {
+ * uint32 - specifying the length of the compressed data
+ * compressed data, in little endian
+ * }
+ * File can contain any number of such chunks.
+ * The default size of an uncompressed chunk is specified in
+ * SNAPPY_CHUNK_SIZE.
+ *
+ * Note:
+ * Currently the default size for a a to-be-compressed data is
+ * 1mb, meaning that the compressed data will be <= 1mb.
+ * The reason it's 1mb is because it seems
+ * to offer a pretty good compression/disk io speed ratio
+ * but that might change.
+ *
+ */
+
+
+#include <snappy.h>
+#include <snappy-sinksource.h>
+
+#include <iostream>
+#include <algorithm>
+
+#include <assert.h>
+#include <string.h>
+
+#include "trace_file.hpp"
+#include "trace_snappy.hpp"
+
+
+#define SNAPPY_CHUNK_SIZE (1 * 1024 * 1024)
+
+
+
+using namespace trace;
+
+
+class SnappyFile : public File {
+public:
+ SnappyFile(void);
+ virtual ~SnappyFile();
+
+ virtual bool supportsOffsets(void) const override;
+ virtual File::Offset currentOffset(void) const override;
+ virtual void setCurrentOffset(const File::Offset &offset) override;
+protected:
+ virtual bool rawOpen(const char *filename) override;
+ virtual size_t rawRead(void *buffer, size_t length) override;
+ virtual int rawGetc(void) override;
+ virtual void rawClose(void) override;
+ virtual bool rawSkip(size_t length) override;
+ virtual int rawPercentRead(void) override;
+
+private:
+ inline size_t usedCacheSize(void) const
+ {
+ assert(m_cachePtr >= m_cache);
+ return m_cachePtr - m_cache;
+ }
+ inline size_t freeCacheSize(void) const
+ {
+ assert(m_cacheSize >= usedCacheSize());
+ if (m_cacheSize > 0) {
+ return m_cacheSize - usedCacheSize();
+ } else {
+ return 0;
+ }
+ }
+ inline bool endOfData(void) const
+ {
+ return m_stream.eof() && freeCacheSize() == 0;
+ }
+ void flushWriteCache(void);
+ void flushReadCache(size_t skipLength = 0);
+ void createCache(size_t size);
+ size_t readCompressedLength();
+private:
+ std::ifstream m_stream;
+ size_t m_cacheMaxSize;
+ size_t m_cacheSize;
+ char *m_cache;
+ char *m_cachePtr;
+
+ char *m_compressedCache;
+
+ uint64_t m_currentChunkOffset;
+ std::streampos m_endPos;
+};
+
+SnappyFile::SnappyFile(void)
+ : File(),
+ m_cacheMaxSize(SNAPPY_CHUNK_SIZE),
+ m_cacheSize(m_cacheMaxSize),
+ m_cache(new char [m_cacheMaxSize]),
+ m_cachePtr(m_cache)
+{
+ size_t maxCompressedLength =
+ snappy::MaxCompressedLength(SNAPPY_CHUNK_SIZE);
+ m_compressedCache = new char[maxCompressedLength];
+}
+
+SnappyFile::~SnappyFile()
+{
+ close();
+ delete [] m_compressedCache;
+ delete [] m_cache;
+}
+
+bool SnappyFile::rawOpen(const char *filename)
+{
+ std::ios_base::openmode fmode = std::fstream::binary
+ | std::fstream::in;
+
+ m_stream.open(filename, fmode);
+
+ //read in the initial buffer if we're reading
+ if (m_stream.is_open()) {
+ m_stream.seekg(0, std::ios::end);
+ m_endPos = m_stream.tellg();
+ m_stream.seekg(0, std::ios::beg);
+
+ // read the snappy file identifier
+ unsigned char byte1, byte2;
+ m_stream >> byte1;
+ m_stream >> byte2;
+ assert(byte1 == SNAPPY_BYTE1 && byte2 == SNAPPY_BYTE2);
+
+ flushReadCache();
+ }
+ return m_stream.is_open();
+}
+
+size_t SnappyFile::rawRead(void *buffer, size_t length)
+{
+ if (endOfData()) {
+ return 0;
+ }
+
+ if (freeCacheSize() >= length) {
+ memcpy(buffer, m_cachePtr, length);
+ m_cachePtr += length;
+ } else {
+ size_t sizeToRead = length;
+ size_t offset = 0;
+ while (sizeToRead) {
+ size_t chunkSize = std::min(freeCacheSize(), sizeToRead);
+ offset = length - sizeToRead;
+ memcpy((char*)buffer + offset, m_cachePtr, chunkSize);
+ m_cachePtr += chunkSize;
+ sizeToRead -= chunkSize;
+ if (sizeToRead > 0) {
+ flushReadCache();
+ }
+ if (!m_cacheSize) {
+ return length - sizeToRead;
+ }
+ }
+ }
+
+ return length;
+}
+
+int SnappyFile::rawGetc(void)
+{
+ unsigned char c = 0;
+ if (rawRead(&c, 1) != 1)
+ return -1;
+ return c;
+}
+
+void SnappyFile::rawClose(void)
+{
+ m_stream.close();
+ delete [] m_cache;
+ m_cache = NULL;
+ m_cachePtr = NULL;
+}
+
+void SnappyFile::flushReadCache(size_t skipLength)
+{
+ //assert(m_cachePtr == m_cache + m_cacheSize);
+ m_currentChunkOffset = m_stream.tellg();
+ size_t compressedLength;
+ compressedLength = readCompressedLength();
+ if (!compressedLength) {
+ // Reached end of file
+ createCache(0);
+ return;
+ }
+
+ m_stream.read((char*)m_compressedCache, compressedLength);
+ if (m_stream.fail()) {
+ std::cerr << "warning: unexpected end of file while reading trace\n";
+
+ compressedLength = m_stream.gcount();
+ if (!snappy::GetUncompressedLength(m_compressedCache, compressedLength,
+ &m_cacheSize)) {
+ createCache(0);
+ return;
+ }
+
+ createCache(m_cacheSize);
+ snappy::ByteArraySource source(m_compressedCache, compressedLength);
+
+ snappy::UncheckedByteArraySink sink(m_cache);
+ m_cacheSize = snappy::UncompressAsMuchAsPossible(&source, &sink);
+
+ return;
+ }
+
+ if (!snappy::GetUncompressedLength(m_compressedCache, compressedLength,
+ &m_cacheSize)) {
+ createCache(0);
+ return;
+ }
+
+ createCache(m_cacheSize);
+ if (skipLength < m_cacheSize) {
+ snappy::RawUncompress(m_compressedCache, compressedLength,
+ m_cache);
+ }
+}
+
+void SnappyFile::createCache(size_t size)
+{
+ if (size > m_cacheMaxSize) {
+ do {
+ m_cacheMaxSize <<= 1;
+ } while (size > m_cacheMaxSize);
+
+ delete [] m_cache;
+ m_cache = new char[size];
+ m_cacheMaxSize = size;
+ }
+
+ m_cachePtr = m_cache;
+ m_cacheSize = size;
+}
+
+size_t SnappyFile::readCompressedLength()
+{
+ unsigned char buf[4];
+ size_t length;
+ m_stream.read((char *)buf, sizeof buf);
+ if (m_stream.fail()) {
+ length = 0;
+ } else {
+ length = (size_t)buf[0];
+ length |= ((size_t)buf[1] << 8);
+ length |= ((size_t)buf[2] << 16);
+ length |= ((size_t)buf[3] << 24);
+ }
+ return length;
+}
+
+bool SnappyFile::supportsOffsets(void) const
+{
+ return true;
+}
+
+File::Offset SnappyFile::currentOffset(void) const
+{
+ File::Offset offset;
+ offset.chunk = m_currentChunkOffset;
+ offset.offsetInChunk = m_cachePtr - m_cache;
+ return offset;
+}
+
+void SnappyFile::setCurrentOffset(const File::Offset &offset)
+{
+ // to remove eof bit
+ m_stream.clear();
+ // seek to the start of a chunk
+ m_stream.seekg(offset.chunk, std::ios::beg);
+ // load the chunk
+ flushReadCache();
+ assert(m_cacheSize >= offset.offsetInChunk);
+ // seek within our cache to the correct location within the chunk
+ m_cachePtr = m_cache + offset.offsetInChunk;
+
+}
+
+bool SnappyFile::rawSkip(size_t length)
+{
+ if (endOfData()) {
+ return false;
+ }
+
+ if (freeCacheSize() >= length) {
+ m_cachePtr += length;
+ } else {
+ size_t sizeToRead = length;
+ while (sizeToRead) {
+ size_t chunkSize = std::min(freeCacheSize(), sizeToRead);
+ m_cachePtr += chunkSize;
+ sizeToRead -= chunkSize;
+ if (sizeToRead > 0) {
+ flushReadCache(sizeToRead);
+ }
+ if (!m_cacheSize) {
+ break;
+ }
+ }
+ }
+
+ return true;
+}
+
+int SnappyFile::rawPercentRead(void)
+{
+ return int(100 * (double(m_stream.tellg()) / double(m_endPos)));
+}
+
+
+File* File::createSnappy(void) {
+ return new SnappyFile;
+}