Example #1
0
        /// <summary>
        /// Merge a list of sorted temporary files (partitions) into an output file. </summary>
        internal void MergePartitions(IList <FileInfo> merges, FileInfo outputFile)
        {
            long start = Environment.TickCount;

            var @out = new ByteSequencesWriter(outputFile);

            PriorityQueue <FileAndTop> queue = new PriorityQueueAnonymousInnerClassHelper(this, merges.Count);

            var streams = new ByteSequencesReader[merges.Count];

            try
            {
                // Open streams and read the top for each file
                for (int i = 0; i < merges.Count; i++)
                {
                    streams[i] = new ByteSequencesReader(merges[i]);
                    byte[] line = streams[i].Read();
                    if (line != null)
                    {
                        queue.InsertWithOverflow(new FileAndTop(i, line));
                    }
                }

                // Unix utility sort() uses ordered array of files to pick the next line from, updating
                // it as it reads new lines. The PQ used here is a more elegant solution and has
                // a nicer theoretical complexity bound :) The entire sorting process is I/O bound anyway
                // so it shouldn't make much of a difference (didn't check).
                FileAndTop top;
                while ((top = queue.Top) != null)
                {
                    @out.Write(top.Current);
                    if (!streams[top.Fd].Read(top.Current))
                    {
                        queue.Pop();
                    }
                    else
                    {
                        queue.UpdateTop();
                    }
                }

                sortInfo.MergeTime += Environment.TickCount - start;
                sortInfo.MergeRounds++;
            }
            finally
            {
                // The logic below is: if an exception occurs in closing out, it has a priority over exceptions
                // happening in closing streams.
                try
                {
                    IOUtils.Dispose(streams);
                }
                finally
                {
                    IOUtils.Dispose(@out);
                }
            }
        }
Example #2
0
        /// <summary>
        /// Read in a single partition of data. </summary>
        internal int ReadPartition(ByteSequencesReader reader)
        {
            long start   = Environment.TickCount;
            var  scratch = new BytesRef();

            while ((scratch.Bytes = reader.Read()) != null)
            {
                scratch.Length = scratch.Bytes.Length;
                buffer.Append(scratch);
                // Account for the created objects.
                // (buffer slots do not account to buffer size.)
                if (ramBufferSize.bytes < bufferBytesUsed.Get())
                {
                    break;
                }
            }
            sortInfo.ReadTime += (Environment.TickCount - start);
            return(buffer.Length);
        }
Example #3
0
        /// <summary>
        /// Read in a single partition of data. </summary>
        internal int ReadPartition(ByteSequencesReader reader)
        {
            long start   = J2N.Time.NanoTime() / J2N.Time.MillisecondsPerNanosecond; // LUCENENET: Use NanoTime() rather than CurrentTimeMilliseconds() for more accurate/reliable results
            var  scratch = new BytesRef();

            while ((scratch.Bytes = reader.Read()) != null)
            {
                scratch.Length = scratch.Bytes.Length;
                buffer.Append(scratch);
                // Account for the created objects.
                // (buffer slots do not account to buffer size.)
                if (ramBufferSize.bytes < bufferBytesUsed)
                {
                    break;
                }
            }
            sortInfo.ReadTime += ((J2N.Time.NanoTime() / J2N.Time.MillisecondsPerNanosecond) - start); // LUCENENET: Use NanoTime() rather than CurrentTimeMilliseconds() for more accurate/reliable results
            return(buffer.Length);
        }
Example #4
0
        /// <summary>
        /// Read in a single partition of data </summary>
        internal int ReadPartition(ByteSequencesReader reader)
        {
            long start   = DateTime.Now.Millisecond;
            var  scratch = new BytesRef();

            while ((scratch.Bytes = reader.Read()) != null)
            {
                scratch.Length = scratch.Bytes.Length;
                Buffer.Append(scratch);
                // Account for the created objects.
                // (buffer slots do not account to buffer size.)
                if (RamBufferSize.Bytes < BufferBytesUsed.Get())
                {
                    break;
                }
            }
            sortInfo.ReadTime += (DateTime.Now.Millisecond - start);
            return(Buffer.Size());
        }
Example #5
0
        /// <summary>
        /// Sort input to output, explicit hint for the buffer size. The amount of allocated
        /// memory may deviate from the hint (may be smaller or larger).
        /// </summary>
        public SortInfo Sort(FileInfo input, FileInfo output)
        {
            sortInfo = new SortInfo(this)
            {
                TotalTime = Environment.TickCount
            };

            output.Delete();

            var  merges   = new List <FileInfo>();
            bool success2 = false;

            try
            {
                var  inputStream = new ByteSequencesReader(input);
                bool success     = false;
                try
                {
                    int lines = 0;
                    while ((lines = ReadPartition(inputStream)) > 0)
                    {
                        merges.Add(SortPartition(lines));
                        sortInfo.TempMergeFiles++;
                        sortInfo.Lines += lines;

                        // Handle intermediate merges.
                        if (merges.Count == maxTempFiles)
                        {
                            var intermediate = new FileInfo(Path.GetTempFileName());
                            try
                            {
                                MergePartitions(merges, intermediate);
                            }
                            finally
                            {
                                foreach (var file in merges)
                                {
                                    file.Delete();
                                }
                                merges.Clear();
                                merges.Add(intermediate);
                            }
                            sortInfo.TempMergeFiles++;
                        }
                    }
                    success = true;
                }
                finally
                {
                    if (success)
                    {
                        IOUtils.Dispose(inputStream);
                    }
                    else
                    {
                        IOUtils.DisposeWhileHandlingException(inputStream);
                    }
                }

                // One partition, try to rename or copy if unsuccessful.
                if (merges.Count == 1)
                {
                    FileInfo single = merges[0];
                    Copy(single, output);
                    try
                    {
                        File.Delete(single.FullName);
                    }
                    catch (Exception)
                    {
                        // ignored
                    }
                }
                else
                {
                    // otherwise merge the partitions with a priority queue.
                    MergePartitions(merges, output);
                }
                success2 = true;
            }
            finally
            {
                foreach (FileInfo file in merges)
                {
                    file.Delete();
                }
                if (!success2)
                {
                    output.Delete();
                }
            }

            sortInfo.TotalTime = (Environment.TickCount - sortInfo.TotalTime);
            return(sortInfo);
        }
Example #6
0
        /// <summary>
        /// Sort input to output, explicit hint for the buffer size. The amount of allocated
        /// memory may deviate from the hint (may be smaller or larger).
        /// </summary>
        public SortInfo Sort(FileInfo input, FileInfo output)
        {
            sortInfo = new SortInfo(this)
            {
                TotalTime = J2N.Time.NanoTime() / J2N.Time.MillisecondsPerNanosecond
            };                                                                                                      // LUCENENET: Use NanoTime() rather than CurrentTimeMilliseconds() for more accurate/reliable results

            output.Delete();

            var  merges   = new JCG.List <FileInfo>();
            bool success2 = false;

            try
            {
                var  inputStream = new ByteSequencesReader(input);
                bool success     = false;
                try
                {
                    int lines = 0;
                    while ((lines = ReadPartition(inputStream)) > 0)
                    {
                        merges.Add(SortPartition(/*lines*/)); // LUCENENET specific - removed unused parameter
                        sortInfo.TempMergeFiles++;
                        sortInfo.Lines += lines;

                        // Handle intermediate merges.
                        if (merges.Count == maxTempFiles)
                        {
                            var intermediate = new FileInfo(Path.GetTempFileName());
                            try
                            {
                                MergePartitions(merges, intermediate);
                            }
                            finally
                            {
                                foreach (var file in merges)
                                {
                                    file.Delete();
                                }
                                merges.Clear();
                                merges.Add(intermediate);
                            }
                            sortInfo.TempMergeFiles++;
                        }
                    }
                    success = true;
                }
                finally
                {
                    if (success)
                    {
                        IOUtils.Dispose(inputStream);
                    }
                    else
                    {
                        IOUtils.DisposeWhileHandlingException(inputStream);
                    }
                }

                // One partition, try to rename or copy if unsuccessful.
                if (merges.Count == 1)
                {
                    FileInfo single = merges[0];
                    Copy(single, output);
                    try
                    {
                        File.Delete(single.FullName);
                    }
#pragma warning disable CA1031 // Do not catch general exception types
                    catch
                    {
                        // ignored
                    }
#pragma warning restore CA1031 // Do not catch general exception types
                }
                else
                {
                    // otherwise merge the partitions with a priority queue.
                    MergePartitions(merges, output);
                }
                success2 = true;
            }
            finally
            {
                foreach (FileInfo file in merges)
                {
                    file.Delete();
                }
                if (!success2)
                {
                    output.Delete();
                }
            }

            sortInfo.TotalTime = ((J2N.Time.NanoTime() / J2N.Time.MillisecondsPerNanosecond) - sortInfo.TotalTime); // LUCENENET: Use NanoTime() rather than CurrentTimeMilliseconds() for more accurate/reliable results
            return(sortInfo);
        }
Example #7
0
        /// <summary>
        /// Sort input to output, explicit hint for the buffer size. The amount of allocated
        /// memory may deviate from the hint (may be smaller or larger).
        /// </summary>
        public SortInfo Sort(FileInfo input, FileInfo output)
        {
            sortInfo = new SortInfo(this)
            {
                TotalTime = DateTime.Now.Millisecond
            };

            // LUCENENET NOTE: Can't do this because another thread could recreate the file before we are done here.
            // and cause this to bomb. We use the existence of the file as an indicator that we are done using it.
            //output.Delete();

            var  merges   = new List <FileInfo>();
            bool success2 = false;

            try
            {
                var  inputStream = new ByteSequencesReader(input);
                bool success     = false;
                try
                {
                    int lines = 0;
                    while ((lines = ReadPartition(inputStream)) > 0)
                    {
                        merges.Add(SortPartition(lines));
                        sortInfo.TempMergeFiles++;
                        sortInfo.Lines += lines;

                        // Handle intermediate merges.
                        if (merges.Count == MaxTempFiles)
                        {
                            var intermediate = new FileInfo(Path.GetTempFileName());
                            try
                            {
                                MergePartitions(merges, intermediate);
                            }
                            finally
                            {
                                foreach (var file in merges)
                                {
                                    file.Delete();
                                }
                                merges.Clear();
                                merges.Add(intermediate);
                            }
                            sortInfo.TempMergeFiles++;
                        }
                    }
                    success = true;
                }
                finally
                {
                    if (success)
                    {
                        IOUtils.Close(inputStream);
                    }
                    else
                    {
                        IOUtils.CloseWhileHandlingException(inputStream);
                    }
                }

                // One partition, try to rename or copy if unsuccessful.
                if (merges.Count == 1)
                {
                    FileInfo single = merges[0];
                    Copy(single, output);
                    try
                    {
                        File.Delete(single.FullName);
                    }
                    catch (Exception)
                    {
                        // ignored
                    }
                }
                else
                {
                    // otherwise merge the partitions with a priority queue.
                    MergePartitions(merges, output);
                }
                success2 = true;
            }
            finally
            {
                foreach (FileInfo file in merges)
                {
                    file.Delete();
                }
                if (!success2)
                {
                    output.Delete();
                }
            }

            sortInfo.TotalTime = (DateTime.Now.Millisecond - sortInfo.TotalTime);
            return(sortInfo);
        }