/// <summary> /// Merge a list of sorted temporary files (partitions) into an output file. </summary> internal void MergePartitions(IList <FileInfo> merges, FileInfo outputFile) { long start = Environment.TickCount; var @out = new ByteSequencesWriter(outputFile); PriorityQueue <FileAndTop> queue = new PriorityQueueAnonymousInnerClassHelper(this, merges.Count); var streams = new ByteSequencesReader[merges.Count]; try { // Open streams and read the top for each file for (int i = 0; i < merges.Count; i++) { streams[i] = new ByteSequencesReader(merges[i]); byte[] line = streams[i].Read(); if (line != null) { queue.InsertWithOverflow(new FileAndTop(i, line)); } } // Unix utility sort() uses ordered array of files to pick the next line from, updating // it as it reads new lines. The PQ used here is a more elegant solution and has // a nicer theoretical complexity bound :) The entire sorting process is I/O bound anyway // so it shouldn't make much of a difference (didn't check). FileAndTop top; while ((top = queue.Top) != null) { @out.Write(top.Current); if (!streams[top.Fd].Read(top.Current)) { queue.Pop(); } else { queue.UpdateTop(); } } sortInfo.MergeTime += Environment.TickCount - start; sortInfo.MergeRounds++; } finally { // The logic below is: if an exception occurs in closing out, it has a priority over exceptions // happening in closing streams. try { IOUtils.Dispose(streams); } finally { IOUtils.Dispose(@out); } } }
/// <summary> /// Read in a single partition of data. </summary> internal int ReadPartition(ByteSequencesReader reader) { long start = Environment.TickCount; var scratch = new BytesRef(); while ((scratch.Bytes = reader.Read()) != null) { scratch.Length = scratch.Bytes.Length; buffer.Append(scratch); // Account for the created objects. // (buffer slots do not account to buffer size.) if (ramBufferSize.bytes < bufferBytesUsed.Get()) { break; } } sortInfo.ReadTime += (Environment.TickCount - start); return(buffer.Length); }
/// <summary> /// Read in a single partition of data. </summary> internal int ReadPartition(ByteSequencesReader reader) { long start = J2N.Time.NanoTime() / J2N.Time.MillisecondsPerNanosecond; // LUCENENET: Use NanoTime() rather than CurrentTimeMilliseconds() for more accurate/reliable results var scratch = new BytesRef(); while ((scratch.Bytes = reader.Read()) != null) { scratch.Length = scratch.Bytes.Length; buffer.Append(scratch); // Account for the created objects. // (buffer slots do not account to buffer size.) if (ramBufferSize.bytes < bufferBytesUsed) { break; } } sortInfo.ReadTime += ((J2N.Time.NanoTime() / J2N.Time.MillisecondsPerNanosecond) - start); // LUCENENET: Use NanoTime() rather than CurrentTimeMilliseconds() for more accurate/reliable results return(buffer.Length); }
/// <summary> /// Read in a single partition of data </summary> internal int ReadPartition(ByteSequencesReader reader) { long start = DateTime.Now.Millisecond; var scratch = new BytesRef(); while ((scratch.Bytes = reader.Read()) != null) { scratch.Length = scratch.Bytes.Length; Buffer.Append(scratch); // Account for the created objects. // (buffer slots do not account to buffer size.) if (RamBufferSize.Bytes < BufferBytesUsed.Get()) { break; } } sortInfo.ReadTime += (DateTime.Now.Millisecond - start); return(Buffer.Size()); }
/// <summary> /// Sort input to output, explicit hint for the buffer size. The amount of allocated /// memory may deviate from the hint (may be smaller or larger). /// </summary> public SortInfo Sort(FileInfo input, FileInfo output) { sortInfo = new SortInfo(this) { TotalTime = Environment.TickCount }; output.Delete(); var merges = new List <FileInfo>(); bool success2 = false; try { var inputStream = new ByteSequencesReader(input); bool success = false; try { int lines = 0; while ((lines = ReadPartition(inputStream)) > 0) { merges.Add(SortPartition(lines)); sortInfo.TempMergeFiles++; sortInfo.Lines += lines; // Handle intermediate merges. if (merges.Count == maxTempFiles) { var intermediate = new FileInfo(Path.GetTempFileName()); try { MergePartitions(merges, intermediate); } finally { foreach (var file in merges) { file.Delete(); } merges.Clear(); merges.Add(intermediate); } sortInfo.TempMergeFiles++; } } success = true; } finally { if (success) { IOUtils.Dispose(inputStream); } else { IOUtils.DisposeWhileHandlingException(inputStream); } } // One partition, try to rename or copy if unsuccessful. if (merges.Count == 1) { FileInfo single = merges[0]; Copy(single, output); try { File.Delete(single.FullName); } catch (Exception) { // ignored } } else { // otherwise merge the partitions with a priority queue. MergePartitions(merges, output); } success2 = true; } finally { foreach (FileInfo file in merges) { file.Delete(); } if (!success2) { output.Delete(); } } sortInfo.TotalTime = (Environment.TickCount - sortInfo.TotalTime); return(sortInfo); }
/// <summary> /// Sort input to output, explicit hint for the buffer size. The amount of allocated /// memory may deviate from the hint (may be smaller or larger). /// </summary> public SortInfo Sort(FileInfo input, FileInfo output) { sortInfo = new SortInfo(this) { TotalTime = J2N.Time.NanoTime() / J2N.Time.MillisecondsPerNanosecond }; // LUCENENET: Use NanoTime() rather than CurrentTimeMilliseconds() for more accurate/reliable results output.Delete(); var merges = new JCG.List <FileInfo>(); bool success2 = false; try { var inputStream = new ByteSequencesReader(input); bool success = false; try { int lines = 0; while ((lines = ReadPartition(inputStream)) > 0) { merges.Add(SortPartition(/*lines*/)); // LUCENENET specific - removed unused parameter sortInfo.TempMergeFiles++; sortInfo.Lines += lines; // Handle intermediate merges. if (merges.Count == maxTempFiles) { var intermediate = new FileInfo(Path.GetTempFileName()); try { MergePartitions(merges, intermediate); } finally { foreach (var file in merges) { file.Delete(); } merges.Clear(); merges.Add(intermediate); } sortInfo.TempMergeFiles++; } } success = true; } finally { if (success) { IOUtils.Dispose(inputStream); } else { IOUtils.DisposeWhileHandlingException(inputStream); } } // One partition, try to rename or copy if unsuccessful. if (merges.Count == 1) { FileInfo single = merges[0]; Copy(single, output); try { File.Delete(single.FullName); } #pragma warning disable CA1031 // Do not catch general exception types catch { // ignored } #pragma warning restore CA1031 // Do not catch general exception types } else { // otherwise merge the partitions with a priority queue. MergePartitions(merges, output); } success2 = true; } finally { foreach (FileInfo file in merges) { file.Delete(); } if (!success2) { output.Delete(); } } sortInfo.TotalTime = ((J2N.Time.NanoTime() / J2N.Time.MillisecondsPerNanosecond) - sortInfo.TotalTime); // LUCENENET: Use NanoTime() rather than CurrentTimeMilliseconds() for more accurate/reliable results return(sortInfo); }
/// <summary> /// Sort input to output, explicit hint for the buffer size. The amount of allocated /// memory may deviate from the hint (may be smaller or larger). /// </summary> public SortInfo Sort(FileInfo input, FileInfo output) { sortInfo = new SortInfo(this) { TotalTime = DateTime.Now.Millisecond }; // LUCENENET NOTE: Can't do this because another thread could recreate the file before we are done here. // and cause this to bomb. We use the existence of the file as an indicator that we are done using it. //output.Delete(); var merges = new List <FileInfo>(); bool success2 = false; try { var inputStream = new ByteSequencesReader(input); bool success = false; try { int lines = 0; while ((lines = ReadPartition(inputStream)) > 0) { merges.Add(SortPartition(lines)); sortInfo.TempMergeFiles++; sortInfo.Lines += lines; // Handle intermediate merges. if (merges.Count == MaxTempFiles) { var intermediate = new FileInfo(Path.GetTempFileName()); try { MergePartitions(merges, intermediate); } finally { foreach (var file in merges) { file.Delete(); } merges.Clear(); merges.Add(intermediate); } sortInfo.TempMergeFiles++; } } success = true; } finally { if (success) { IOUtils.Close(inputStream); } else { IOUtils.CloseWhileHandlingException(inputStream); } } // One partition, try to rename or copy if unsuccessful. if (merges.Count == 1) { FileInfo single = merges[0]; Copy(single, output); try { File.Delete(single.FullName); } catch (Exception) { // ignored } } else { // otherwise merge the partitions with a priority queue. MergePartitions(merges, output); } success2 = true; } finally { foreach (FileInfo file in merges) { file.Delete(); } if (!success2) { output.Delete(); } } sortInfo.TotalTime = (DateTime.Now.Millisecond - sortInfo.TotalTime); return(sortInfo); }