Implement tick events compression in a log file.

Two techniques are involved:
 - compress repeated line ends (common stack beginnings) by using back references;
 - do RLE compression of repeated tick events.

This gives only 5% size reduction on benchmarks run, but this is because tick events are only comprise 10% of file size. Under Chromium winnings are bigger because long repeated samples of idleness are now compressed into a single line.

Tickprocessor will be updated in the next patch.

Review URL: http://codereview.chromium.org/123012


git-svn-id: http://v8.googlecode.com/svn/branches/bleeding_edge@2147 ce2b1a6d-e550-0410-aec6-3dcde31c8c00
parent f1fcab31
......@@ -123,15 +123,22 @@ bool Log::is_stopped_ = false;
Log::WritePtr Log::Write = NULL;
FILE* Log::output_handle_ = NULL;
LogDynamicBuffer* Log::output_buffer_ = NULL;
// Must be the same message as in Logger::PauseProfiler
// Must be the same message as in Logger::PauseProfiler.
const char* Log::kDynamicBufferSeal = "profiler,\"pause\"\n";
Mutex* Log::mutex_ = NULL;
char* Log::message_buffer_ = NULL;
LogRecordCompressor* Log::record_compressor_ = NULL;
// Must be the same as compressed tick event name from Logger.
const char* Log::kCompressedTickEventName = "t,";
void Log::Init() {
mutex_ = OS::CreateMutex();
message_buffer_ = NewArray<char>(kMessageBufferSize);
if (FLAG_compress_log) {
record_compressor_ = new LogRecordCompressor(
kRecordCompressorWindow, strlen(kCompressedTickEventName));
}
}
......@@ -173,6 +180,12 @@ void Log::Close() {
}
Write = NULL;
delete record_compressor_;
record_compressor_ = NULL;
DeleteArray(message_buffer_);
message_buffer_ = NULL;
delete mutex_;
mutex_ = NULL;
......@@ -280,6 +293,29 @@ void LogMessageBuilder::AppendDetailed(String* str, bool show_impl_info) {
}
bool LogMessageBuilder::StoreInCompressor() {
if (!FLAG_compress_log) return true;
ASSERT(Log::record_compressor_ != NULL);
return Log::record_compressor_->Store(
Vector<const char>(Log::message_buffer_, pos_));
}
bool LogMessageBuilder::RetrieveCompressedPrevious(const char* prefix) {
if (!FLAG_compress_log) return true;
ASSERT(Log::record_compressor_ != NULL);
pos_ = 0;
if (prefix[0] != '\0') Append(prefix);
Vector<char> prev_record(Log::message_buffer_ + pos_,
Log::kMessageBufferSize - pos_);
const bool has_previous =
Log::record_compressor_->RetrievePreviousCompressed(&prev_record);
if (!has_previous) return false;
pos_ += prev_record.length();
return true;
}
void LogMessageBuilder::WriteToLogFile() {
ASSERT(pos_ <= Log::kMessageBufferSize);
const int written = Log::Write(Log::message_buffer_, pos_);
......@@ -297,6 +333,105 @@ void LogMessageBuilder::WriteCStringToLogFile(const char* str) {
}
}
// Formatting string for back references. E.g. "#2:10" means
// "the second line above, start from char 10 (0-based)".
const char* LogRecordCompressor::kBackwardReferenceFormat = "#%d:%d";
LogRecordCompressor::~LogRecordCompressor() {
for (int i = 0; i < buffer_.length(); ++i) {
buffer_[i].Dispose();
}
}
bool LogRecordCompressor::Store(const Vector<const char>& record) {
// Check if the record is the same as the last stored one.
if (curr_ != -1) {
Vector<const char>& curr = buffer_[curr_];
if (record.length() == curr.length()
&& strncmp(record.start(), curr.start(), record.length()) == 0) {
return false;
}
}
// buffer_ is circular.
prev_ = curr_++;
curr_ %= buffer_.length();
Vector<char> record_copy = Vector<char>::New(record.length());
memcpy(record_copy.start(), record.start(), record.length());
buffer_[curr_].Dispose();
buffer_[curr_] =
Vector<const char>(record_copy.start(), record_copy.length());
return true;
}
bool LogRecordCompressor::RetrievePreviousCompressed(
Vector<char>* prev_record) {
if (prev_ == -1) return false;
int index = prev_;
// Distance from prev_.
int distance = 0;
// Best compression result among records in the buffer.
struct {
intptr_t truncated_len;
int distance;
int copy_from_pos;
} best = {-1, 0, 0};
Vector<const char>& prev = buffer_[prev_];
const char* const prev_start = prev.start() + start_pos_;
const char* const prev_end = prev.start() + prev.length();
do {
// We're moving backwards until we reach the current record.
// Remember that buffer_ is circular.
if (--index == -1) index = buffer_.length() - 1;
++distance;
if (index == curr_) break;
Vector<const char>& data = buffer_[index];
if (data.start() == NULL) break;
const char* const data_end = data.start() + data.length();
const char* prev_ptr = prev_end;
const char* data_ptr = data_end;
// Compare strings backwards, stop on the last matching character.
while (prev_ptr != prev_start && data_ptr != data.start()
&& *(prev_ptr - 1) == *(data_ptr - 1)) {
--prev_ptr;
--data_ptr;
}
const intptr_t truncated_len = prev_end - prev_ptr;
if (truncated_len < kUncompressibleBound) continue;
// Record compression results.
if (truncated_len > best.truncated_len) {
best.truncated_len = truncated_len;
best.distance = distance;
best.copy_from_pos = data_ptr - data.start();
}
} while (true);
if (best.distance == 0) {
// Can't compress the previous record. Return as is.
ASSERT(prev_record->length() >= prev.length());
memcpy(prev_record->start(), prev.start(), prev.length());
prev_record->Truncate(prev.length());
} else {
// Copy the uncompressible part unchanged.
const intptr_t unchanged_len = prev.length() - best.truncated_len;
ASSERT(prev_record->length() >= unchanged_len + kUncompressibleBound);
memcpy(prev_record->start(), prev.start(), unchanged_len);
// Append the backward reference.
Vector<char> patch(prev_record->start() + unchanged_len,
kUncompressibleBound);
OS::SNPrintF(patch, kBackwardReferenceFormat,
best.distance, best.copy_from_pos);
prev_record->Truncate(unchanged_len + strlen(patch.start()));
}
return true;
}
#endif // ENABLE_LOGGING_AND_PROFILING
} } // namespace v8::internal
......@@ -88,6 +88,48 @@ class LogDynamicBuffer {
};
// An utility class for performing backward reference compression
// of string ends. It operates using a window of previous strings.
class LogRecordCompressor {
public:
// 'window_size' is the size of backward lookup window.
// 'start_pos' is the length of string prefix that must be left uncompressed.
LogRecordCompressor(int window_size, int start_pos)
: buffer_(window_size), start_pos_(start_pos), curr_(-1), prev_(-1) {
// Must have place for the current record and the previous.
ASSERT(window_size >= 2);
}
~LogRecordCompressor();
// Fills vector with a compressed version of the previous record.
// Returns false if there is no previous record.
bool RetrievePreviousCompressed(Vector<char>* prev_record);
// Stores a record if it differs from a previous one (or there's no previous).
// Returns true, if the record has been stored.
bool Store(const Vector<const char>& record);
private:
// Record field separator. It is assumed that it can't appear
// inside a field (a simplified version of CSV format).
static const char kFieldSeparator = ',';
// Formatting string for back references.
static const char* kBackwardReferenceFormat;
// A boundary value. If current record only differs in
// kUncompressibleBound chars, don't compress it.
// The value must be greater than the length of maximum
// projected backward reference length.
static const intptr_t kUncompressibleBound = 10;
ScopedVector< Vector<const char> > buffer_;
const int start_pos_;
int curr_;
int prev_;
};
// Functions and data for performing output of log messages.
class Log : public AllStatic {
public:
......@@ -114,6 +156,9 @@ class Log : public AllStatic {
return !is_stopped_ && (output_handle_ != NULL || output_buffer_ != NULL);
}
// Size of buffer used for record compression.
static const int kRecordCompressorWindow = 6;
private:
typedef int (*WritePtr)(const char* msg, int length);
......@@ -169,6 +214,11 @@ class Log : public AllStatic {
// mutex_ should be acquired before using it.
static char* message_buffer_;
// An utility object for compressing repeated parts of records.
static LogRecordCompressor* record_compressor_;
static const char* kCompressedTickEventName;
friend class LogMessageBuilder;
};
......@@ -196,6 +246,19 @@ class LogMessageBuilder BASE_EMBEDDED {
void AppendDetailed(String* str, bool show_impl_info);
// Stores log message into compressor, returns true if the message
// was stored (i.e. doesn't repeat the previous one). If log compression is
// disabled, does nothing and returns true.
bool StoreInCompressor();
// Sets log message to a previous version of compressed message.
// Returns false, if there is no previous message. If log compression
// is disabled, does nothing and retuns true.
bool RetrieveCompressedPrevious() { return RetrieveCompressedPrevious(""); }
// Does the same at the version without arguments, and sets a prefix.
bool RetrieveCompressedPrevious(const char* prefix);
// Write the log message to the log file currently opened.
void WriteToLogFile();
......
......@@ -303,6 +303,7 @@ VMState* Logger::current_state_ = NULL;
VMState Logger::bottom_state_(EXTERNAL);
SlidingStateWindow* Logger::sliding_state_window_ = NULL;
const char** Logger::log_events_ = NULL;
int Logger::tick_repeat_count_ = 0;
#define DECLARE_LONG_EVENT(ignore1, long_name, ignore2) long_name,
const char* kLongLogEventsNames[Logger::NUMBER_OF_LOG_EVENTS] = {
......@@ -326,6 +327,9 @@ void Logger::ProfilerBeginEvent() {
if (!Log::IsEnabled()) return;
LogMessageBuilder msg;
msg.Append("profiler,\"begin\",%d\n", kSamplingIntervalMs);
if (FLAG_compress_log) {
msg.Append("profiler,\"compression\",%d\n", Log::kRecordCompressorWindow);
}
msg.WriteToLogFile();
}
......@@ -848,6 +852,28 @@ void Logger::TickEvent(TickSample* sample, bool overflow) {
msg.Append(",0x%" V8PRIxPTR, sample->stack[i]);
}
}
// In case if log compression is disabled, the flow is straightforward,
// because both StoreInCompressor and RetrieveCompressedPrevious do nothing
// and just return 'true'. Otherwise, to perform compression of repeated
// tick events, instead of the current event, the previous one is written.
if (!msg.StoreInCompressor()) {
// Current message repeats the previous one, don't write it.
++tick_repeat_count_;
return;
}
bool has_previous_message;
if (tick_repeat_count_ > 0) {
EmbeddedVector<char, 20> prefix;
OS::SNPrintF(prefix, "%s,%d,",
log_events_[REPEAT_META_EVENT],
tick_repeat_count_ + 1);
tick_repeat_count_ = 0;
has_previous_message = msg.RetrieveCompressedPrevious(prefix.start());
} else {
has_previous_message = msg.RetrieveCompressedPrevious();
}
if (!has_previous_message) return;
msg.Append('\n');
msg.WriteToLogFile();
}
......
......@@ -107,6 +107,7 @@ class VMState BASE_EMBEDDED {
V(CODE_MOVE_EVENT, "code-move", "cm") \
V(CODE_DELETE_EVENT, "code-delete", "cd") \
V(TICK_EVENT, "tick", "t") \
V(REPEAT_META_EVENT, "repeat", "r") \
V(BUILTIN_TAG, "Builtin", "bi") \
V(CALL_DEBUG_BREAK_TAG, "CallDebugBreak", "cdb") \
V(CALL_DEBUG_PREPARE_STEP_IN_TAG, "CallDebugPrepareStepIn", "cdbsi") \
......@@ -299,6 +300,9 @@ class Logger {
// An array of log events names.
static const char** log_events_;
// Counter for repeated tick events.
static int tick_repeat_count_;
// Internal implementation classes with access to
// private members.
friend class EventLog;
......
......@@ -362,6 +362,11 @@ class Vector {
Sort(PointerValueCompare<T>);
}
void Truncate(int length) {
ASSERT(length <= length_);
length_ = length;
}
// Releases the array underlying this vector. Once disposed the
// vector is empty.
void Dispose() {
......
......@@ -9,8 +9,12 @@
#include "log-utils.h"
#include "cctest.h"
using v8::internal::CStrVector;
using v8::internal::EmbeddedVector;
using v8::internal::LogDynamicBuffer;
using v8::internal::LogRecordCompressor;
using v8::internal::MutableCStrVector;
using v8::internal::ScopedVector;
using v8::internal::Vector;
// Fills 'ref_buffer' with test data: a sequence of two-digit
......@@ -47,9 +51,13 @@ static inline void CheckEqualsHelper(const char* file, int line,
const Vector<V>& value) {
if (expected.length() != value.length()) {
V8_Fatal(file, line, "CHECK_EQ(%s, %s) failed\n"
"# Vectors lengths differ: %d expected, %d found",
"# Vectors lengths differ: %d expected, %d found\n"
"# Expected: %.*s\n"
"# Found: %.*s",
expected_source, value_source,
expected.length(), value.length());
expected.length(), value.length(),
expected.length(), expected.start(),
value.length(), value.start());
}
if (strncmp(expected.start(), value.start(), expected.length()) != 0) {
V8_Fatal(file, line, "CHECK_EQ(%s, %s) failed\n"
......@@ -124,9 +132,161 @@ TEST(DynaBufSealing) {
// Check the seal.
EmbeddedVector<char, 50> seal_buf;
CHECK_EQ(seal_size, ReadData(&dynabuf, 100, &seal_buf));
CHECK_EQ(v8::internal::CStrVector(seal), seal_buf.SubVector(0, seal_size));
CHECK_EQ(CStrVector(seal), seal_buf.SubVector(0, seal_size));
// Verify that there's no data beyond the seal.
CHECK_EQ(0, ReadData(&dynabuf, 100 + seal_size, &buf));
}
TEST(CompressorStore) {
LogRecordCompressor comp(2, 0);
const Vector<const char> empty = CStrVector("");
CHECK(comp.Store(empty));
CHECK(!comp.Store(empty));
CHECK(!comp.Store(empty));
const Vector<const char> aaa = CStrVector("aaa");
CHECK(comp.Store(aaa));
CHECK(!comp.Store(aaa));
CHECK(!comp.Store(aaa));
CHECK(comp.Store(empty));
CHECK(!comp.Store(empty));
CHECK(!comp.Store(empty));
}
void CheckCompression(LogRecordCompressor* comp,
const Vector<const char>& after) {
EmbeddedVector<char, 100> result;
CHECK(comp->RetrievePreviousCompressed(&result));
CHECK_EQ(after, result);
}
void CheckCompression(LogRecordCompressor* comp,
const char* after) {
CheckCompression(comp, CStrVector(after));
}
TEST(CompressorNonCompressed) {
LogRecordCompressor comp(2, 0);
CHECK(!comp.RetrievePreviousCompressed(NULL));
const Vector<const char> empty = CStrVector("");
CHECK(comp.Store(empty));
CHECK(!comp.RetrievePreviousCompressed(NULL));
const Vector<const char> a_x_20 = CStrVector("aaaaaaaaaaaaaaaaaaaa");
CHECK(comp.Store(a_x_20));
CheckCompression(&comp, empty);
CheckCompression(&comp, empty);
CHECK(comp.Store(empty));
CheckCompression(&comp, a_x_20);
CheckCompression(&comp, a_x_20);
}
TEST(CompressorSingleLine) {
LogRecordCompressor comp(3, strlen("xxx,"));
const Vector<const char> string_1 = CStrVector("eee,ddd,ccc,bbb,aaa");
CHECK(comp.Store(string_1));
const Vector<const char> string_2 = CStrVector("fff,ddd,ccc,bbb,aaa");
CHECK(comp.Store(string_2));
// string_1 hasn't been compressed.
CheckCompression(&comp, string_1);
CheckCompression(&comp, string_1);
const Vector<const char> string_3 = CStrVector("hhh,ggg,ccc,bbb,aaa");
CHECK(comp.Store(string_3));
// string_2 compressed using string_1.
CheckCompression(&comp, "fff,#1:4");
CheckCompression(&comp, "fff,#1:4");
CHECK(!comp.Store(string_3));
// Expecting no changes.
CheckCompression(&comp, "fff,#1:4");
CHECK(!comp.Store(string_3));
// Expecting no changes.
CheckCompression(&comp, "fff,#1:4");
const Vector<const char> string_4 = CStrVector("iii,hhh,ggg,ccc,bbb,aaa");
CHECK(comp.Store(string_4));
// string_3 compressed using string_2.
CheckCompression(&comp, "hhh,ggg#1:7");
const Vector<const char> string_5 = CStrVector("nnn,mmm,lll,kkk,jjj");
CHECK(comp.Store(string_5));
// string_4 compressed using string_3.
CheckCompression(&comp, "iii,#1:0");
const Vector<const char> string_6 = CStrVector("nnn,mmmmmm,lll,kkk,jjj");
CHECK(comp.Store(string_6));
// string_5 hasn't been compressed.
CheckCompression(&comp, string_5);
CHECK(comp.Store(string_5));
// string_6 compressed using string_5.
CheckCompression(&comp, "nnn,mmm#1:4");
const Vector<const char> string_7 = CStrVector("nnnnnn,mmm,lll,kkk,jjj");
CHECK(comp.Store(string_7));
// string_5 compressed using string_6.
CheckCompression(&comp, "nnn,#1:7");
const Vector<const char> string_8 = CStrVector("xxn,mmm,lll,kkk,jjj");
CHECK(comp.Store(string_8));
// string_7 compressed using string_5.
CheckCompression(&comp, "nnnn#1:1");
const Vector<const char> string_9 =
CStrVector("aaaaaaaaaaaaa,bbbbbbbbbbbbbbbbb");
CHECK(comp.Store(string_9));
// string_8 compressed using string_7.
CheckCompression(&comp, "xxn,#1:7");
const Vector<const char> string_10 =
CStrVector("aaaaaaaaaaaaa,cccccccbbbbbbbbbb");
CHECK(comp.Store(string_10));
// string_9 hasn't been compressed.
CheckCompression(&comp, string_9);
CHECK(comp.Store(string_1));
// string_10 compressed using string_9.
CheckCompression(&comp, "aaaaaaaaaaaaa,ccccccc#1:21");
}
TEST(CompressorMultiLines) {
const int kWindowSize = 5;
LogRecordCompressor comp(kWindowSize, strlen("xxx,"));
const Vector<const char> string_1 = CStrVector("eee,ddd,ccc,bbb,aaa");
CHECK(comp.Store(string_1));
const Vector<const char> string_2 = CStrVector("iii,hhh,ggg,fff,aaa");
CHECK(comp.Store(string_2));
const Vector<const char> string_3 = CStrVector("mmm,lll,kkk,jjj,aaa");
CHECK(comp.Store(string_3));
const Vector<const char> string_4 = CStrVector("nnn,hhh,ggg,fff,aaa");
CHECK(comp.Store(string_4));
const Vector<const char> string_5 = CStrVector("ooo,lll,kkk,jjj,aaa");
CHECK(comp.Store(string_5));
// string_4 compressed using string_2.
CheckCompression(&comp, "nnn,#2:4");
CHECK(comp.Store(string_1));
// string_5 compressed using string_3.
CheckCompression(&comp, "ooo,#2:4");
CHECK(comp.Store(string_4));
// string_1 is out of buffer by now, so it shouldn't be compressed.
CHECK_GE(5, kWindowSize);
CheckCompression(&comp, string_1);
CHECK(comp.Store(string_2));
// string_4 compressed using itself.
CheckCompression(&comp, "nnn,#3:4");
}
TEST(CompressorBestSelection) {
LogRecordCompressor comp(5, strlen("xxx,"));
const Vector<const char> string_1 = CStrVector("eee,ddd,ccc,bbb,aaa");
CHECK(comp.Store(string_1));
const Vector<const char> string_2 = CStrVector("ddd,ccc,bbb,aaa");
CHECK(comp.Store(string_2));
const Vector<const char> string_3 = CStrVector("fff,eee,ddd,ccc,bbb,aaa");
CHECK(comp.Store(string_3));
// string_2 compressed using string_1.
CheckCompression(&comp, "ddd,#1:8");
const Vector<const char> string_4 = CStrVector("nnn,hhh,ggg,fff,aaa");
CHECK(comp.Store(string_4));
// Compressing string_3 using string_1 gives a better compression than
// using string_2.
CheckCompression(&comp, "fff,#2:0");
}
#endif // ENABLE_LOGGING_AND_PROFILING
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment