public long WriteRecords(IEnumerable <KeyValuePair <InterKey, List <InterValue> > > sorted_pairs) { Stopwatch watch = new Stopwatch(); watch.Start(); long written_bytes = 0; foreach (var pair in sorted_pairs) { var record_bytes = IntermediateRecord <InterKey, InterValue> .GetIntermediateRecordBytes(pair.Key, pair.Value); foreach (var bytes in record_bytes) { fileStream.Write(bytes, 0, bytes.Length); written_bytes += bytes.Length; } } watch.Stop(); logger.Debug("Spilled {0} records summing to {2} bytes to disk in {1}.", StringFormatter.DigitGrouped(sorted_pairs.Count()), watch.Elapsed, StringFormatter.HumanReadablePostfixs(written_bytes)); //if (written_bytes > int.MaxValue) // throw new InvalidCastException("The intermediate file is very huge!"); return(written_bytes); }
public void intermediateTest() { string intStr = "QI70000000120507400740B T1 "; IntermediateRecord intermediate = new IntermediateRecord(intStr.ToCharArray()); Assert.AreEqual(intermediate.Time, new System.TimeSpan(07, 40, 0)); Assert.AreEqual(intermediate.Location, "700000001205"); Assert.AreEqual(intermediate.Activity, 'B'); Assert.AreEqual(intermediate.ToString(), "Location: 700000001205\tTime: 07:40:00"); }
public ReduceObject <InterKey, InterVal> GetNextReduceObject() { var read_key = IntermediateRecord <InterKey, InterVal> .ReadKey(inputStream); List <InterVal> vals; var last_chunk = IntermediateRecord <InterKey, InterVal> .ReadValueList(inputStream, out vals); var iterator = new ReduceObject <InterKey, InterVal>(read_key, vals, this, last_chunk); return(iterator); }
public byte ReadNextChunk(InterKey key, out List <InterVal> list) { InterKey _key = (InterKey)key; var read_key = IntermediateRecord <InterKey, InterVal> .ReadKey(inputStream); List <InterVal> vals; var last_chunk = IntermediateRecord <InterKey, InterVal> .ReadValueList(inputStream, out vals); list = vals; if (!read_key.Equals(_key)) { throw new InvalidOperationException("We have reached a different key!"); } return(last_chunk); }
public InterKey GetNextKey() { var key = IntermediateRecord <InterKey, InterVal> .ReadKey(inputStream); return(default(InterKey)); }
public string Merge(bool keep_files = false) { int memory_per_file = maxMemory / (concurrentFilesCount + 2); var fileQ = new Queue <string>(files); Stopwatch watch = new Stopwatch(); long total_records = 0; while (fileQ.Count > 1) { watch.Restart(); var destination_file = new IntermediateFile <InterKey, InterVal>(directory, ID, 2 * memory_per_file); var dest = destination_file.FileStream; var current_streams = new List <FileStream>(); for (int i = 0; i < concurrentFilesCount && fileQ.Count > 0; i++) { current_streams.Add(new FileStream(fileQ.Dequeue(), FileMode.Open, FileAccess.Read, FileShare.Read, memory_per_file)); } PriorityQueue <InterKey, Stream> priorityQ = new PriorityQueue <InterKey, Stream>(); var stream_len = new Dictionary <Stream, long>(); foreach (var stream in current_streams) { stream_len[stream] = stream.Length; if (stream_len[stream] < sizeof(int)) { throw new IOException("Malformed intermediate file: The file is too small!"); } var key = IntermediateRecord <InterKey, InterVal> .ReadKey(stream); priorityQ.Enqueue(key, stream); } logger.Debug("Merging {0} files summing to {1} bytes", current_streams.Count, StringFormatter.HumanReadablePostfixs(stream_len.Values.Sum())); var last_key = priorityQ.Peek().Key; bool first_time = true; while (priorityQ.Count > 0) { total_records++; var best = priorityQ.Dequeue(); if (!first_time) { if (last_key.Equals(best.Key)) { dest.WriteByte(1); } else { dest.WriteByte(0); } } last_key = best.Key; first_time = false; destination_file.WriteKey(best.Key); var current_stream = best.Value; var len = IntermediateRecord <InterKey, InterVal> .ReadValueListLength(current_stream); dest.Write(BitConverter.GetBytes(len), 0, sizeof(int)); StreamUtils.Copy(current_stream, dest, len - sizeof(byte)); current_stream.ReadByte(); if (best.Value.Position >= stream_len[current_stream]) { continue; } var new_key = IntermediateRecord <InterKey, InterVal> .ReadKey(current_stream); priorityQ.Enqueue(new_key, current_stream); } dest.WriteByte(0); dest.Close(); fileQ.Enqueue(destination_file.Path); foreach (var stream in current_streams) { stream.Close(); File.Delete(stream.Name); } watch.Stop(); logger.Debug("Merged {0} records to {1} in {2}.", StringFormatter.DigitGrouped(total_records), destination_file.Path, watch.Elapsed); } return(fileQ.First()); }