IEnumerable <PieceOfWork> ReadRawDataFromMedia_Backward(CancellationToken cancellationToken) { Stream stream = owner.media.DataStream; CreateParserParams parserParams = owner.currentParams; FileRange.Range range = parserParams.Range.Value; TextStreamPosition startPosition = new TextStreamPosition(parserParams.StartPosition, owner.textStreamPositioningParams); long beginStreamPos = new TextStreamPosition(range.Begin, owner.textStreamPositioningParams).StreamPositionAlignedToBlockSize; long endStreamPos = startPosition.StreamPositionAlignedToBlockSize + owner.textStreamPositioningParams.AlignmentBlockSize; if (beginStreamPos != 0 && !owner.encoding.IsSingleByte) { int maxBytesPerCharacter = owner.encoding.GetMaxByteCount(1); beginStreamPos -= maxBytesPerCharacter; } PieceOfWork firstPieceOfWork = new PieceOfWork(Interlocked.Increment(ref owner.nextPieceOfWorkId), owner.tracer); { firstPieceOfWork.streamData = AllocateAndReadStreamData_Backward(stream, endStreamPos); if (firstPieceOfWork.streamData.IsEmpty) { yield break; } firstPieceOfWork.startTextPosition = startPosition.Value; firstPieceOfWork.stopTextPosition = endStreamPos - owner.BytesToParsePerThread; firstPieceOfWork.outputBuffer = owner.AllocateOutputBuffer(); endStreamPos -= owner.BytesToParsePerThread; } PieceOfWork pieceOfWorkToYieldNextTime = firstPieceOfWork; for (; ;) { cancellationToken.ThrowIfCancellationRequested(); PieceOfWork nextPieceOfWork = new PieceOfWork(Interlocked.Increment(ref owner.nextPieceOfWorkId), owner.tracer); nextPieceOfWork.streamData = AllocateAndReadStreamData_Backward(stream, endStreamPos); nextPieceOfWork.nextStreamData = pieceOfWorkToYieldNextTime.streamData; nextPieceOfWork.startTextPosition = endStreamPos; nextPieceOfWork.stopTextPosition = endStreamPos - owner.BytesToParsePerThread; nextPieceOfWork.outputBuffer = owner.AllocateOutputBuffer(); pieceOfWorkToYieldNextTime.prevStreamData = nextPieceOfWork.streamData; yield return(pieceOfWorkToYieldNextTime); if (endStreamPos < beginStreamPos) { break; } if (nextPieceOfWork.streamData.IsEmpty) { break; } pieceOfWorkToYieldNextTime = nextPieceOfWork; endStreamPos -= owner.BytesToParsePerThread; } }
async Task <PreprocessingStepParams> ExecuteInternal(IPreprocessingStepCallback callback) { await callback.BecomeLongRunning(); string factoryName = @params.Argument; callback.TempFilesCleanupList.Add(@params.Location); Action <double?> setStepDescription = prctComplete => { var str = new StringBuilder(); str.Append(@params.FullPath); str.Append(": fixing timestamp anomalies..."); if (prctComplete != null) { str.AppendFormat(" {0}%", (int)(prctComplete.Value * 100)); } callback.SetStepDescription(str.ToString()); }; setStepDescription(null); string tmpFileName = callback.TempFilesManager.GenerateNewName(); var factoryNameSplit = factoryName.Split('\\'); if (factoryNameSplit.Length != 2) { throw new InvalidFormatException(); } var factory = logProviderFactoryRegistry.Find(factoryNameSplit[0], factoryNameSplit[1]); if (factory == null) { throw new InvalidDataException("factory not found: " + factoryName); } var readerFactory = factory as IMediaBasedReaderFactory; if (readerFactory == null) { throw new InvalidDataException("bad factory: " + factoryName); } using (ILogMedia fileMedia = await SimpleFileMedia.Create(fileSystem, SimpleFileMedia.CreateConnectionParamsFromFileName(@params.Location))) using (ILogSourceThreadsInternal threads = new LogSourceThreads()) using (var reader = readerFactory.CreateMessagesReader( new MediaBasedReaderParams(threads, fileMedia))) { var readerImpl = reader as MediaBasedPositionedMessagesReader; // todo: do not use real classes; have stream encoding in an interface. if (readerImpl == null) { throw new InvalidDataException("bad reader was made by factory " + factoryName); } await reader.UpdateAvailableBounds(false); var range = new FileRange.Range(reader.BeginPosition, reader.EndPosition); double rangeLen = range.Length; using (var progress = progressAggregator.CreateProgressSink()) using (var writer = new StreamWriter(tmpFileName, false, readerImpl.StreamEncoding)) await DisposableAsync.Using(await reader.CreateParser(new CreateParserParams(reader.BeginPosition, flags: MessagesParserFlag.DisableDejitter | MessagesParserFlag.HintParserWillBeUsedForMassiveSequentialReading)), async parser => { var queue = new VCSKicksCollection.PriorityQueue <IMessage>( new MessagesComparer(ignoreConnectionIds: true)); Action dequeue = () => writer.WriteLine(queue.Dequeue().RawText.ToString()); double lastPrctComplete = 0; var cancellation = callback.Cancellation; for (long msgIdx = 0;; ++msgIdx) { if (cancellation.IsCancellationRequested) { break; } var msg = await parser.ReadNext(); if (msg == null) { break; } if ((msgIdx % progressUpdateThreshold) == 0 && rangeLen > 0) { var prctComplete = (double)(msg.Position - range.Begin) / rangeLen; progress.SetValue(prctComplete); if (prctComplete - lastPrctComplete > 0.05) { setStepDescription(prctComplete); lastPrctComplete = prctComplete; } } queue.Enqueue(msg); if (queue.Count > queueSize) { dequeue(); } } while (queue.Count > 0) { dequeue(); } }); } return(new PreprocessingStepParams( tmpFileName, @params.FullPath + " (reordered)", @params.PreprocessingHistory.Add(new PreprocessingHistoryItem(name, factoryName)) )); }
IEnumerable <PieceOfWork> ReadRawDataFromMedia_Forward(CancellationToken cancellationToken) { Stream stream = owner.media.DataStream; CreateParserParams parserParams = owner.currentParams; FileRange.Range range = parserParams.Range.Value; TextStreamPosition startPosition = new TextStreamPosition(parserParams.StartPosition, owner.textStreamPositioningParams); long beginStreamPos = startPosition.StreamPositionAlignedToBlockSize; long endStreamPos = new TextStreamPosition(range.End, owner.textStreamPositioningParams).StreamPositionAlignedToBlockSize + owner.textStreamPositioningParams.AlignmentBlockSize; PieceOfWork firstPieceOfWork = new PieceOfWork(Interlocked.Increment(ref owner.nextPieceOfWorkId), owner.tracer); if (beginStreamPos != 0 && !owner.encoding.IsSingleByte) { int maxBytesPerCharacter = owner.encoding.GetMaxByteCount(1); firstPieceOfWork.prevStreamData = new StreamData( beginStreamPos - maxBytesPerCharacter, new byte[maxBytesPerCharacter]); stream.Position = beginStreamPos - maxBytesPerCharacter; stream.Read(firstPieceOfWork.prevStreamData.Bytes, 0, maxBytesPerCharacter); } else { stream.Position = beginStreamPos; } { firstPieceOfWork.streamData = AllocateAndReadStreamData(stream); if (firstPieceOfWork.streamData.IsEmpty) { yield break; } firstPieceOfWork.startTextPosition = startPosition.Value; firstPieceOfWork.stopTextPosition = beginStreamPos + owner.BytesToParsePerThread; firstPieceOfWork.outputBuffer = owner.AllocateOutputBuffer(); beginStreamPos += owner.BytesToParsePerThread; } PieceOfWork pieceOfWorkToYieldNextTime = firstPieceOfWork; for (; ;) { cancellationToken.ThrowIfCancellationRequested(); PieceOfWork nextPieceOfWork = new PieceOfWork(Interlocked.Increment(ref owner.nextPieceOfWorkId), owner.tracer); nextPieceOfWork.streamData = AllocateAndReadStreamData(stream); nextPieceOfWork.prevStreamData = pieceOfWorkToYieldNextTime.streamData; nextPieceOfWork.startTextPosition = beginStreamPos; nextPieceOfWork.stopTextPosition = beginStreamPos + owner.BytesToParsePerThread; nextPieceOfWork.outputBuffer = owner.AllocateOutputBuffer(); pieceOfWorkToYieldNextTime.nextStreamData = nextPieceOfWork.streamData; owner.tracer.Info("Start processing new peice of work. Currently being processed: {0}", Interlocked.Increment(ref owner.peicesOfWorkBeingProgressed)); yield return(pieceOfWorkToYieldNextTime); if (beginStreamPos > endStreamPos) { break; } if (nextPieceOfWork.streamData.IsEmpty) { break; } pieceOfWorkToYieldNextTime = nextPieceOfWork; beginStreamPos += owner.BytesToParsePerThread; } }