Ejemplo n.º 1
0
        public long WriteRecords(IEnumerable <KeyValuePair <InterKey, List <InterValue> > > sorted_pairs)
        {
            Stopwatch watch = new Stopwatch();

            watch.Start();
            long written_bytes = 0;


            foreach (var pair in sorted_pairs)
            {
                var record_bytes = IntermediateRecord <InterKey, InterValue> .GetIntermediateRecordBytes(pair.Key, pair.Value);

                foreach (var bytes in record_bytes)
                {
                    fileStream.Write(bytes, 0, bytes.Length);
                    written_bytes += bytes.Length;
                }
            }

            watch.Stop();
            logger.Debug("Spilled {0} records summing to {2} bytes to disk in {1}.", StringFormatter.DigitGrouped(sorted_pairs.Count()), watch.Elapsed, StringFormatter.HumanReadablePostfixs(written_bytes));

            //if (written_bytes > int.MaxValue)
            //    throw new InvalidCastException("The intermediate file is very huge!");
            return(written_bytes);
        }
Ejemplo n.º 2
0
        public void intermediateTest()
        {
            string             intStr       = "QI70000000120507400740B   T1  ";
            IntermediateRecord intermediate = new IntermediateRecord(intStr.ToCharArray());

            Assert.AreEqual(intermediate.Time, new System.TimeSpan(07, 40, 0));
            Assert.AreEqual(intermediate.Location, "700000001205");
            Assert.AreEqual(intermediate.Activity, 'B');
            Assert.AreEqual(intermediate.ToString(), "Location: 700000001205\tTime: 07:40:00");
        }
Ejemplo n.º 3
0
        public ReduceObject <InterKey, InterVal> GetNextReduceObject()
        {
            var read_key = IntermediateRecord <InterKey, InterVal> .ReadKey(inputStream);

            List <InterVal> vals;
            var             last_chunk = IntermediateRecord <InterKey, InterVal> .ReadValueList(inputStream, out vals);

            var iterator = new ReduceObject <InterKey, InterVal>(read_key, vals, this, last_chunk);

            return(iterator);
        }
Ejemplo n.º 4
0
        public byte ReadNextChunk(InterKey key, out List <InterVal> list)
        {
            InterKey _key     = (InterKey)key;
            var      read_key = IntermediateRecord <InterKey, InterVal> .ReadKey(inputStream);

            List <InterVal> vals;
            var             last_chunk = IntermediateRecord <InterKey, InterVal> .ReadValueList(inputStream, out vals);

            list = vals;
            if (!read_key.Equals(_key))
            {
                throw new InvalidOperationException("We have reached a different key!");
            }

            return(last_chunk);
        }
Ejemplo n.º 5
0
        public InterKey GetNextKey()
        {
            var key = IntermediateRecord <InterKey, InterVal> .ReadKey(inputStream);

            return(default(InterKey));
        }
Ejemplo n.º 6
0
        public string Merge(bool keep_files = false)
        {
            int       memory_per_file = maxMemory / (concurrentFilesCount + 2);
            var       fileQ           = new Queue <string>(files);
            Stopwatch watch           = new Stopwatch();
            long      total_records   = 0;

            while (fileQ.Count > 1)
            {
                watch.Restart();
                var destination_file = new IntermediateFile <InterKey, InterVal>(directory, ID, 2 * memory_per_file);
                var dest             = destination_file.FileStream;

                var current_streams = new List <FileStream>();
                for (int i = 0; i < concurrentFilesCount && fileQ.Count > 0; i++)
                {
                    current_streams.Add(new FileStream(fileQ.Dequeue(), FileMode.Open, FileAccess.Read, FileShare.Read, memory_per_file));
                }

                PriorityQueue <InterKey, Stream> priorityQ = new PriorityQueue <InterKey, Stream>();

                var stream_len = new Dictionary <Stream, long>();

                foreach (var stream in current_streams)
                {
                    stream_len[stream] = stream.Length;
                    if (stream_len[stream] < sizeof(int))
                    {
                        throw new IOException("Malformed intermediate file: The file is too small!");
                    }
                    var key = IntermediateRecord <InterKey, InterVal> .ReadKey(stream);

                    priorityQ.Enqueue(key, stream);
                }

                logger.Debug("Merging {0} files summing to {1} bytes", current_streams.Count, StringFormatter.HumanReadablePostfixs(stream_len.Values.Sum()));

                var  last_key   = priorityQ.Peek().Key;
                bool first_time = true;
                while (priorityQ.Count > 0)
                {
                    total_records++;
                    var best = priorityQ.Dequeue();

                    if (!first_time)
                    {
                        if (last_key.Equals(best.Key))
                        {
                            dest.WriteByte(1);
                        }
                        else
                        {
                            dest.WriteByte(0);
                        }
                    }
                    last_key   = best.Key;
                    first_time = false;

                    destination_file.WriteKey(best.Key);
                    var current_stream = best.Value;
                    var len            = IntermediateRecord <InterKey, InterVal> .ReadValueListLength(current_stream);

                    dest.Write(BitConverter.GetBytes(len), 0, sizeof(int));
                    StreamUtils.Copy(current_stream, dest, len - sizeof(byte));
                    current_stream.ReadByte();

                    if (best.Value.Position >= stream_len[current_stream])
                    {
                        continue;
                    }
                    var new_key = IntermediateRecord <InterKey, InterVal> .ReadKey(current_stream);

                    priorityQ.Enqueue(new_key, current_stream);
                }
                dest.WriteByte(0);
                dest.Close();
                fileQ.Enqueue(destination_file.Path);
                foreach (var stream in current_streams)
                {
                    stream.Close();
                    File.Delete(stream.Name);
                }
                watch.Stop();
                logger.Debug("Merged {0} records to {1} in {2}.", StringFormatter.DigitGrouped(total_records), destination_file.Path, watch.Elapsed);
            }

            return(fileQ.First());
        }