static IEnumerableAsync <Checkpoint> EnumCheckpoints( ITextAccessIterator tai, PlainTextMatcher matcher, ProgressAndCancellation progressAndCancellation, LJTraceSource trace) { return(EnumerableAsync.Produce <Checkpoint>(async yieldAsync => { var advanceTime = new Stopwatch(); long advancesCount = 0; var matchingTime = new Stopwatch(); long matchCount = 0; for (; ;) { StringSlice buf = new StringSlice(tai.CurrentBuffer); for (int startIdx = 0; ;) { matchingTime.Start(); var match = matcher.Match(buf, startIdx); matchingTime.Stop(); ++matchCount; if (!match.HasValue) { break; } await yieldAsync.YieldAsync(new Checkpoint() { Position = tai.CharIndexToPosition(match.Value.MatchBegin), EndPosition = tai.CharIndexToPosition(match.Value.MatchEnd), IsMatch = true }); startIdx = match.Value.MatchEnd; progressAndCancellation.CheckTextIterationCancellation(); } advanceTime.Start(); bool stop = !await tai.Advance(Math.Max(0, tai.CurrentBuffer.Length - matcher.MaxMatchLength)); advanceTime.Stop(); ++advancesCount; if (stop) { break; } await yieldAsync.YieldAsync(new Checkpoint() { EndPosition = tai.CharIndexToPosition(0), IsMatch = false }); progressAndCancellation.CheckTextIterationCancellation(); } trace.Info("Stats: text buffer matching time: {0} ({1} times)", matchingTime.Elapsed, matchCount); trace.Info("Stats: text buffer advance time: {0}/{1}={2}", advanceTime.Elapsed, advancesCount, TimeSpan.FromTicks(advanceTime.ElapsedTicks / Math.Max(1, advancesCount))); })); }
static IEnumerableAsync <FileRange.Range> IterateMatchRanges( IEnumerableAsync <Checkpoint> checkpoints, long threshhold, ProgressAndCancellation progressAndCancellation) { return(EnumerableAsync.Produce <FileRange.Range>(async yieldAsync => { FileRange.Range?lastMatch = null; await checkpoints.ForEach(async checkpoint => { if (lastMatch == null) { if (checkpoint.IsMatch) { lastMatch = new FileRange.Range(checkpoint.Position, checkpoint.EndPosition); } else { progressAndCancellation.continuationToken.NextPosition = checkpoint.EndPosition; progressAndCancellation.HandleTextIterationProgress(checkpoint.EndPosition); } } else { FileRange.Range lastMatchVal = lastMatch.Value; if (checkpoint.Position - lastMatchVal.End < threshhold) { if (checkpoint.IsMatch) { lastMatch = new FileRange.Range(lastMatchVal.Begin, checkpoint.EndPosition); } } else { await yieldAsync.YieldAsync(lastMatchVal); progressAndCancellation.continuationToken.NextPosition = checkpoint.EndPosition; progressAndCancellation.HandleTextIterationProgress(checkpoint.EndPosition); if (checkpoint.IsMatch) { lastMatch = new FileRange.Range(checkpoint.Position, checkpoint.EndPosition); } else { lastMatch = null; } } } return true; }); if (lastMatch != null) { await yieldAsync.YieldAsync(lastMatch.Value); } })); }
IEnumerableAsync <PostprocessedMessage> ReadAddMessagesFromRangeCompleteJitterBuffer( Func <CreateParserParams, Task <IPositionedMessagesParser> > underlyingParserFactory) { return(EnumerableAsync.Produce <PostprocessedMessage>(async yieldAsync => { CreateParserParams mainParserParams = originalParams; //mainParserParams.Range = null; await DisposableAsync.Using(await underlyingParserFactory(mainParserParams), async mainParser => { for (; ;) { var msg = await mainParser.ReadNextAndPostprocess(); if (msg.Message == null) { break; } if (!await yieldAsync.YieldAsync(msg)) { break; } } }); CreateParserParams jitterBufferCompletionParams = originalParams; jitterBufferCompletionParams.Flags |= MessagesParserFlag.DisableMultithreading; jitterBufferCompletionParams.Range = null; jitterBufferCompletionParams.StartPosition = originalParams.Direction == MessagesParserDirection.Forward ? originalParams.Range.Value.End : originalParams.Range.Value.Begin; await DisposableAsync.Using(await underlyingParserFactory(jitterBufferCompletionParams), async completionParser => { for (int i = 0; i < jitterBufferSize; ++i) { var msg = await completionParser.ReadNextAndPostprocess(); if (msg.Message == null) { break; } if (!await yieldAsync.YieldAsync(msg)) { break; } } }); })); }
public IEnumerableAsync <Message[]> Read(Func <Stream> getStream, Action <Stream> releaseStream, string logFileNameHint = null, Action <double> progressHandler = null) { using (var ctx = new Context()) return(EnumerableAsync.Produce <Message[]>(yieldAsync => ctx.Read(yieldAsync, getStream, releaseStream, logFileNameHint, cancellation, progressHandler), false)); }
public IEnumerableAsync <Message[]> Read(Func <Task <Stream> > getStream, Action <Stream> releaseStream, Action <double> progressHandler = null) { using (var ctx = new Context()) return(EnumerableAsync.Produce <Message[]>(yieldAsync => ctx.Read(yieldAsync, getStream, releaseStream, textLogParser, cancellation, progressHandler), false)); }
public static MessageFilteringResult GetFilteringResultFromPostprocessorResult(object obj, IFilter dummyFilter) { var f = (IFilter)obj; if (f == null) { return new MessageFilteringResult { Action = FilterAction.Exclude } } ; if (f == dummyFilter) { return new MessageFilteringResult { Action = FilterAction.Include } } ; return(new MessageFilteringResult { Action = f.Action, Filter = f }); } }; IEnumerableAsync <SearchResultMessage> Enum() { return(EnumerableAsync.Produce <SearchResultMessage>(async yieldAsync => { Func <IMessagesPostprocessor> postprocessor = () => new MessagesPostprocessor(parserParams.SearchParams, trace, dummyFilter); long searchableRangesLength = 0; int searchableRangesCount = 0; long totalMessagesCount = 0; long totalHitsCount = 0; await EnumSearchableRanges().ForEach(async currentSearchableRange => { searchableRangesLength += currentSearchableRange.Length; ++searchableRangesCount; await DisposableAsync.Using(await CreateParserForSearchableRange(currentSearchableRange, postprocessor), async parser => { long messagesCount = 0; long hitsCount = 0; for (; ;) { var tmp = await parser.ReadNextAndPostprocess(); if (tmp.Message == null) { break; } ++messagesCount; var msg = tmp.Message; var filteringResult = MessagesPostprocessor.GetFilteringResultFromPostprocessorResult( tmp.PostprocessingResult, dummyFilter); if (filteringResult.Action != FilterAction.Exclude) { ++hitsCount; await yieldAsync.YieldAsync(new SearchResultMessage(msg, filteringResult)); } progressAndCancellation.HandleMessageReadingProgress(msg.Position); progressAndCancellation.continuationToken.NextPosition = msg.EndPosition; progressAndCancellation.CheckTextIterationCancellation(); } PrintPctStats(string.Format("hits pct in range {0}", currentSearchableRange), hitsCount, messagesCount); totalMessagesCount += messagesCount; totalHitsCount += hitsCount; }); return true; }); trace.Info("Stats: searchable ranges count: {0}", searchableRangesCount); trace.Info("Stats: ave searchable range len: {0}", searchableRangesCount != 0 ? searchableRangesLength / searchableRangesCount : 0); PrintPctStats("searchable ranges coverage pct", searchableRangesLength, requestedRange.Length); PrintPctStats("hits pct overall", totalHitsCount, totalMessagesCount); await yieldAsync.YieldAsync(new SearchResultMessage(null, new MessageFilteringResult())); })); } void PrintPctStats(string name, long num, long denum) { trace.Info("Stats: {0}: {1:F4}%", name, denum != 0 ? num * 100d / denum : 0d); } IEnumerableAsync <FileRange.Range> EnumSearchableRanges() { return(EnumerableAsync.Produce <FileRange.Range>(async yieldAsync => { var matcher = new PlainTextMatcher(parserParams, textStreamPositioningParams, plainTextSearchOptimizationAllowed, regexFactory); if (!matcher.PlainTextSearchOptimizationPossible) { await yieldAsync.YieldAsync(requestedRange); return; } long?skipRangesDownThisPosition = null; await EnumSearchableRangesCore(matcher).ForEach(async currentRange => { if (skipRangesDownThisPosition == null) { await yieldAsync.YieldAsync(currentRange); } else { long skipRangesDownThisPositionVal = skipRangesDownThisPosition.Value; if (currentRange.End < skipRangesDownThisPositionVal) // todo: < or <= ? { return true; } skipRangesDownThisPosition = null; if (currentRange.Begin < skipRangesDownThisPositionVal) // todo: < or <= ? { await yieldAsync.YieldAsync(new FileRange.Range(skipRangesDownThisPositionVal, currentRange.End)); } else { await yieldAsync.YieldAsync(currentRange); } } return true; }); })); } async Task <IPositionedMessagesParser> CreateParserForSearchableRange( FileRange.Range searchableRange, Func <IMessagesPostprocessor> messagesPostprocessor) { bool disableMultithreading = false; return(await owner.CreateParser(new CreateParserParams( searchableRange.Begin, searchableRange, MessagesParserFlag.HintParserWillBeUsedForMassiveSequentialReading | (disableMultithreading ? MessagesParserFlag.DisableMultithreading : MessagesParserFlag.None), MessagesParserDirection.Forward, messagesPostprocessor))); } IEnumerableAsync <FileRange.Range> EnumSearchableRangesCore(PlainTextMatcher matcher) { return(EnumerableAsync.Produce <FileRange.Range>(async yieldAsync => { ITextAccess ta = new StreamTextAccess(rawStream, streamEncoding, textStreamPositioningParams); using (var tai = await ta.OpenIterator(requestedRange.Begin, TextAccessDirection.Forward)) { var lastRange = new FileRange.Range(); await IterateMatchRanges( EnumCheckpoints(tai, matcher, progressAndCancellation, trace), // todo: tune next parameter to find the value giving max performance. // On one sample log bigger block was better than many small ones. // Hence quite big threshold. textStreamPositioningParams.AlignmentBlockSize * 8, progressAndCancellation ).ForEach(async r => { var postprocessedRange = await PostprocessHintRange(r, lastRange); lastRange = postprocessedRange; await yieldAsync.YieldAsync(postprocessedRange); return true; }); } })); } async Task <FileRange.Range> PostprocessHintRange(FileRange.Range r, FileRange.Range lastRange) { long fixedBegin = r.Begin; long fixedEnd = r.End; int?inflateRangeBy = null; if (dejitteringParams != null && (parserParams.Flags & MessagesParserFlag.DisableDejitter) == 0) { inflateRangeBy = dejitteringParams.Value.JitterBufferSize; } await aligmentSplitter.BeginSplittingSession(requestedRange, r.End, MessagesParserDirection.Forward); if (await aligmentSplitter.GetCurrentMessageAndMoveToNextOne(aligmentCapture)) { fixedEnd = aligmentCapture.EndPosition; if (inflateRangeBy != null) { for (int i = 0; i < inflateRangeBy.Value; ++i) { if (!await aligmentSplitter.GetCurrentMessageAndMoveToNextOne(aligmentCapture)) { break; } fixedEnd = aligmentCapture.EndPosition; } } } else { fixedEnd = requestedRange.End; } aligmentSplitter.EndSplittingSession(); await aligmentSplitter.BeginSplittingSession(requestedRange, fixedBegin, MessagesParserDirection.Backward); if (await aligmentSplitter.GetCurrentMessageAndMoveToNextOne(aligmentCapture)) { fixedBegin = aligmentCapture.BeginPosition; if (inflateRangeBy != null) { for (int i = 0; i < inflateRangeBy.Value; ++i) { if (!await aligmentSplitter.GetCurrentMessageAndMoveToNextOne(aligmentCapture)) { break; } fixedBegin = aligmentCapture.BeginPosition; } } } aligmentSplitter.EndSplittingSession(); var ret = new FileRange.Range(fixedBegin, fixedEnd); ret = FileRange.Range.Intersect(ret, requestedRange).Common; var lastRangeIntersection = FileRange.Range.Intersect(ret, lastRange); if (lastRangeIntersection.RelativePosition == 0) { ret = lastRangeIntersection.Leftover1Right; } return(ret); }