Ejemplo n.º 1
0
        private void Sort(RecordsReader recordsReader, int field, StreamWriter output,
                          int workersCount, long bufferSizeInBytesPerWorker)
        {
            Stopwatch sortSw = new Stopwatch();

            sortSw.Start();
            RecordsBuffer firstBlock;
            int           blockCount = workersCount > 1
        ? SortBlocksParallel(recordsReader, field, workersCount, out firstBlock)
        : SortBlocksSequential(recordsReader, field, out firstBlock);

            sortSw.Stop();
            if (blockCount == 1)
            {
                RecordsWriter recordsWriter = new RecordsWriter(output);
                recordsWriter.WriteRecords(firstBlock);
            }
            else
            {
                Console.WriteLine($"{blockCount} blocks sorted in {sortSw.Elapsed}");

                Console.WriteLine("Merging...");
                Stopwatch mergeSw = new Stopwatch();
                mergeSw.Start();
                MergeBlocks(blockCount, field, output, bufferSizeInBytesPerWorker);
                mergeSw.Stop();
                Console.WriteLine($"{blockCount} blocks merged in {mergeSw.Elapsed}");
                _tempStreams.ClearBlocks();
            }
        }
Ejemplo n.º 2
0
        private void MergeBlocks(int blockCount, int field, StreamWriter output, long bufferSizeInBytes)
        {
            long blockSize = bufferSizeInBytes / (blockCount + 1);

            List <Stream> blockStreams = new List <Stream>();

            try
            {
                List <IEnumerator <Record> > blockRecordsEnumerators = new List <IEnumerator <Record> >();
                for (int i = 0; i < blockCount; i++)
                {
                    Stream blockStream = _tempStreams.OpenBlockStream(i);
                    blockStreams.Add(blockStream);
                    StreamReader  blockStreamReader = new StreamReader(blockStream);
                    RecordsReader blockReader       = new RecordsReader(blockStreamReader, blockSize);
                    blockRecordsEnumerators.Add(blockReader.ReadRecords().GetEnumerator());
                }

                using (BufferedRecordsWriter recordsWriter = new BufferedRecordsWriter(output, blockSize))
                {
                    MergeBlocks(blockRecordsEnumerators, recordsWriter, field);
                }
            }
            finally
            {
                foreach (Stream s in blockStreams)
                {
                    s.Dispose();
                }
            }
        }
Ejemplo n.º 3
0
        private int SortBlocksSequential(RecordsReader recordsReader, int field, out RecordsBuffer firstBlock)
        {
            int blockIndex = 0;

            firstBlock = null;
            IEnumerable <RecordsBuffer> blocks = recordsReader.ReadBlocks();

            foreach (RecordsBuffer block in blocks)
            {
                block.Sort(field);
                if (blockIndex == 0)
                {
                    if (recordsReader.IsLastBlock)
                    {
                        firstBlock = block;
                        return(1);
                    }
                }

                WriteBlock(block, blockIndex);
                Console.WriteLine($"Block {blockIndex} sorted");
                blockIndex++;
            }
            return(blockIndex);
        }
Ejemplo n.º 4
0
        private int SortBlocksParallel(RecordsReader recordsReader, int field, int workersCount,
                                       out RecordsBuffer firstBlock)
        {
            firstBlock = null;
            IEnumerator <RecordsBuffer> blocks = recordsReader.ReadBlocks().GetEnumerator();

            if (!blocks.MoveNext())
            {
                return(0);
            }

            if (recordsReader.IsLastBlock)
            {
                firstBlock = blocks.Current;
                firstBlock.Sort(field);
                return(1);
            }

            int blockIndex = 0;

            using (var blockCollection = new BlockingCollection <Tuple <RecordsBuffer, int> >(workersCount))
                using (var sortCompletionCollection = new BlockingCollection <bool>(workersCount))
                {
                    Task blocksReadingTask = Task.Factory.StartNew(() =>
                    {
                        do
                        {
                            blockCollection.Add(new Tuple <RecordsBuffer, int>(blocks.Current, blockIndex++));
                            sortCompletionCollection.Add(true);
                        } while (blocks.MoveNext());
                        blockCollection.CompleteAdding();
                    });

                    Task blocksSortingTask = Task.Factory.StartNew(() =>
                    {
                        List <Task> sortTasks = new List <Task>();
                        try
                        {
                            while (true)
                            {
                                Tuple <RecordsBuffer, int> blockAndIndex = blockCollection.Take();

                                Task t = StartBlockSortingTask(blockAndIndex.Item1, blockAndIndex.Item2,
                                                               field, sortCompletionCollection);
                                sortTasks.Add(t);
                            }
                        }
                        catch (InvalidOperationException)
                        {
                            // An InvalidOperationException means that Take() was called on a completed collection
                        }
                        Task.WaitAll(sortTasks.ToArray());
                    });

                    Task.WaitAll(blocksReadingTask, blocksSortingTask);
                }
            return(blockIndex);
        }
Ejemplo n.º 5
0
        /// <summary>
        /// Sorts big tabled data from stream by specified fields.
        /// </summary>
        /// <param name="input">Source data stream.</param>
        /// <param name="field">Field indexes by which sorting will happen one after another.</param>
        /// <param name="output">Dest data stream.</param>
        public void Sort(Stream input, int[] fields, Stream output)
        {
            int    prevField                  = -1;
            Stream originalOutput             = output;
            int    tempOutputFirst            = fields.Length % 2;
            Stream tempOutput                 = fields.Length > 1 ? _tempStreams.CreateTempOutputStream() : null;
            int    workersCount               = _maxWorkersCount;
            long   bufferSizeInBytesPerWorker = _maxBufferSizeInBytes / workersCount;

            try
            {
                for (int i = 0; i < fields.Length; i++)
                {
                    Console.WriteLine($"Sorting by field {fields[i]}...");
                    Stopwatch stopwatch = new Stopwatch();
                    stopwatch.Start();

                    output          = (i % 2) == tempOutputFirst ? tempOutput : originalOutput;
                    input.Position  = 0;
                    output.Position = 0;
                    StreamReader  sr            = new StreamReader(input);
                    StreamWriter  sw            = new StreamWriter(output);
                    RecordsReader recordsReader = new RecordsReader(sr, bufferSizeInBytesPerWorker, prevField);
                    while (!recordsReader.IsEnd)
                    {
                        Sort(recordsReader, fields[i], sw, workersCount, bufferSizeInBytesPerWorker);
                    }
                    sw.Flush();
                    prevField = fields[i];
                    input     = output;

                    stopwatch.Stop();
                    Console.WriteLine($"Sorting by field {fields[i]} done in {stopwatch.Elapsed}");
                }
            }
            finally
            {
                if (tempOutput != null)
                {
                    tempOutput.Dispose();
                }
            }
        }