Beispiel #1
0
        /// <summary>
        /// Creates a task that sorts a list of records and saves them in a temporary file.
        /// </summary>
        /// <param name="records">the records to sort</param>
        /// <returns>the task that will perform the sort and return the path to the temporary file</returns>
        private Task <string> SortAndSave(List <T> records)
        {
            return(Task.Run(() =>
            {
                // Sort the records
                if (StableSort)
                {
                    var listStableOrdered = records.OrderBy(x => x, Comparer).ToList();
                    records.Clear();
                    records.AddRange(listStableOrdered);
                }
                else
                {
                    records.Sort(Comparer);
                }


                // Save in temporary file
                var tmpFilename = Path.GetTempFileName();
                _logger.Debug("Saving temp file: {0}", tmpFilename);

                using (var writer = RecordAccessorFactory.CreateWriter(new FileStream(tmpFilename, FileMode.Create)))
                {
                    foreach (var record in records)
                    {
                        writer.Write(record);
                    }
                }

                return tmpFilename;
            }));
        }
Beispiel #2
0
        /// <summary>
        /// Copies records. This method is used when there is no comparer.
        /// Records are still filtered and formatted as required.
        /// </summary>
        private void Copy()
        {
            _logger.Info("No comparer: copying records to output using filter and formatter");
            var sumWriters = GetSumWriters();

            try
            {
                foreach (var file in InputFiles)
                {
                    using (var reader = RecordAccessorFactory.CreateReader(file.OpenRead()))
                    {
                        _header = reader.ReadHeader(HeaderSize);
                        WriteHeader(sumWriters);
                        var record = ReadRecord(reader);
                        while (record != null)
                        {
                            if (Select(record))
                            {
                                WriteRecord(sumWriters, record);
                            }
                            record = ReadRecord(reader);
                        }
                    }
                }
            }
            finally
            {
                // Dispose all the sum writers
                foreach (var sumWriter in sumWriters)
                {
                    sumWriter.Dispose();
                }
            }
        }
Beispiel #3
0
        /// <summary>
        /// Merges the temporary files to the final output file
        /// </summary>
        /// <param name="tmpFiles"></param>
        /// <param name="outputFile"></param>
        private void Merge(ICollection <string> tmpFiles, FileInfo outputFile)
        {
            _logger.Info("Merging temporary files");
            // we use a list of buffers to sort them by
            // their current record
            var buffers =
                tmpFiles.Select(
                    f => new RecordReaderBuffer <T>(RecordAccessorFactory.CreateReader(new FileStream(f, FileMode.Open)), Comparer))
                .ToList();

            using (var writer = GetSumWriter(outputFile))
            {
                writer.WriteHeader(_header);
                try
                {
                    Merge(buffers, writer);
                }
                finally
                {
                    // Close the buffers
                    foreach (var buffer in buffers)
                    {
                        buffer.Dispose();
                    }
                }
            }

            // Delete temp files
            _logger.Info("Deleting temporary files");
            foreach (var file in tmpFiles)
            {
                File.Delete(file);
            }
        }
Beispiel #4
0
        /// <summary>
        /// Sorts the input files in memory.
        /// </summary>
        private void InMemorySort()
        {
            _logger.Info("Sorting in memory");
            var records = new List <T>();

            // Read all the records
            foreach (var file in InputFiles)
            {
                using (var reader = RecordAccessorFactory.CreateReader(file.OpenRead()))
                {
                    _header = reader.ReadHeader(HeaderSize);
                    var record = ReadRecord(reader);
                    while (record != null)
                    {
                        if (Select(record))
                        {
                            records.Add(record);
                        }
                        record = ReadRecord(reader);
                    }
                }
            }

            // Sort the records
            if (StableSort)
            {
                var listStableOrdered = records.OrderBy(x => x, Comparer).ToList();
                records.Clear();
                records.AddRange(listStableOrdered);
            }
            else
            {
                records.Sort(Comparer);
            }

            // Write the records
            var sumWriters = GetSumWriters();

            try
            {
                WriteHeader(sumWriters);
                foreach (var record in records)
                {
                    WriteRecord(sumWriters, record);
                }
            }
            finally
            {
                // Dispose all the sum writers
                foreach (var sumWriter in sumWriters)
                {
                    sumWriter.Dispose();
                }
            }
        }
Beispiel #5
0
 /// <summary>
 /// Computes the maximum number of records in memory.
 /// The size of the first record is used as the average record size.
 /// </summary>
 /// <param name="record">the first record</param>
 /// <returns>the maximumu number of records to store in memory</returns>
 private long ComputeMaxInMemoryRecords(T record)
 {
     // We write the record in a memory stream just to check its size,
     // then compare with the max file size.
     // This is a very rough approximation, but it is fast to compute.
     using (var stream = new MemoryStream())
         using (var writer = RecordAccessorFactory.CreateWriter(stream))
         {
             writer.Write(record);
             return(_maxInMemorySize / stream.Length);
         }
 }
Beispiel #6
0
        /// <summary>
        /// Merges the temporary files to the final output file
        /// </summary>
        /// <param name="tmpFiles"></param>
        private void Merge(ICollection <string> tmpFiles)
        {
            _logger.Info("Merging temporary files");
            // we use a list of buffers to sort them by
            // their current record
            //order the files to enable stable sort
            var buffers =
                tmpFiles.Select(
                    (f, i) => new RecordReaderBuffer <T>(RecordAccessorFactory.CreateReader(new FileStream(f, FileMode.Open)), Comparer, StableSort, i))
                .ToList();

            var sumWriters = GetSumWriters();

            try
            {
                WriteHeader(sumWriters);
                try
                {
                    Merge(buffers, sumWriters);
                }
                finally
                {
                    // Close the buffers
                    foreach (var buffer in buffers)
                    {
                        buffer.Dispose();
                    }
                }
            }
            finally
            {
                // Dispose all the sum writers
                foreach (var sumWriter in sumWriters)
                {
                    sumWriter.Dispose();
                }
            }

            // Delete temp files
            _logger.Info("Deleting temporary files");
            foreach (var file in tmpFiles)
            {
                File.Delete(file);
            }
        }
Beispiel #7
0
        /// <summary>
        /// Performs the sort using the external merge sort algorithm.
        /// </summary>
        private void ExternalSort()
        {
            _logger.Info("Input files too big for memory sort, performing an external merge sort");
            var  tasks              = new List <Task <string> >();
            var  records            = new List <T>();
            long maxRecordsInMemory = -1;

            foreach (var file in InputFiles)
            {
                using (var reader = RecordAccessorFactory.CreateReader(file.OpenRead()))
                {
                    _header = reader.ReadHeader(HeaderSize);
                    var record = ReadRecord(reader);
                    while (record != null)
                    {
                        if (maxRecordsInMemory == -1)
                        {
                            // computes the max number of records in memory
                            // using the first read record
                            maxRecordsInMemory = ComputeMaxInMemoryRecords(record);
                        }
                        if (records.Count >= maxRecordsInMemory)
                        {
                            // If we reached the limit, sort the current records
                            // and save them in a temporary file
                            tasks.Add(SortAndSave(records));
                            records = new List <T>();
                        }
                        if (Select(record))
                        {
                            records.Add(record);
                        }
                        record = ReadRecord(reader);
                    }
                }
            }

            // Sort and save current records
            tasks.Add(SortAndSave(records));

            var tmpFiles = tasks.Select(task => task.Result).ToList();

            // Merge
            Merge(tmpFiles);
        }
Beispiel #8
0
        /// <summary>
        /// Creates a task that sorts a list of records and saves them in a temporary file.
        /// </summary>
        /// <param name="records">the records to sort</param>
        /// <returns>the task that will perform the sort and return the path to the temporary file</returns>
        private Task <string> SortAndSave(List <T> records)
        {
            return(Task.Run(() =>
            {
                // Sort the records
                records.Sort(Comparer);

                // Save in temporary file
                var tmpFilename = Path.GetTempFileName();
                _logger.Debug("Saving temp file: {0}", tmpFilename);

                using (var writer = RecordAccessorFactory.CreateWriter(new FileStream(tmpFilename, FileMode.Create)))
                {
                    foreach (var record in records)
                    {
                        writer.Write(record);
                    }
                }

                return tmpFilename;
            }));
        }
Beispiel #9
0
        /// <summary>
        /// Sorts the input files in memory.
        /// </summary>
        /// <param name="inputFiles">the files to sort</param>
        /// <param name="outputFile">the output file</param>
        private void InMemorySort(IEnumerable <FileInfo> inputFiles, FileInfo outputFile)
        {
            _logger.Info("Sorting in memory");
            var records = new List <T>();

            // Read all the records
            foreach (var file in inputFiles)
            {
                using (var reader = RecordAccessorFactory.CreateReader(file.OpenRead()))
                {
                    _header = reader.ReadHeader(HeaderSize);
                    var record = ReadRecord(reader);
                    while (record != null)
                    {
                        if (Select(record))
                        {
                            records.Add(record);
                        }
                        record = ReadRecord(reader);
                    }
                }
            }

            // Sort the records
            records.Sort(Comparer);

            // Write the records
            using (var writer = GetSumWriter(outputFile))
            {
                writer.WriteHeader(_header);
                foreach (var record in records)
                {
                    writer.Write(record);
                }
            }
        }
Beispiel #10
0
 /// <summary>
 /// Copies records. This method is used when there is no comparer.
 /// Records are still filtered and formatted as required.
 /// </summary>
 /// <param name="inputFiles">the files to copy</param>
 /// <param name="outputFile">the output file</param>
 private void Copy(IEnumerable <FileInfo> inputFiles, FileInfo outputFile)
 {
     _logger.Info("No comparer: copying records to output using filter and formatter");
     using (var writer = GetSumWriter(outputFile))
     {
         foreach (var file in inputFiles)
         {
             using (var reader = RecordAccessorFactory.CreateReader(file.OpenRead()))
             {
                 _logger.Debug("Writing header");
                 writer.WriteHeader(reader.ReadHeader(HeaderSize));
                 var record = ReadRecord(reader);
                 while (record != null)
                 {
                     if (Select(record))
                     {
                         writer.Write(record);
                     }
                     record = ReadRecord(reader);
                 }
             }
         }
     }
 }
Beispiel #11
0
 /// <summary>
 /// Returns a record writer that uses <see cref="Sum"/> to sum similar records.
 /// </summary>
 /// <param name="file">the file to write to</param>
 /// <returns>the sum writer</returns>
 private SumWriter <T> GetSumWriter(FileInfo file)
 {
     file.Directory.Create();
     return(new SumWriter <T>(RecordAccessorFactory.CreateWriter(file.Create()), Sum, Comparer, OutputFormatter));
 }