static IEnumerable <Checkpoint> EnumCheckpoints( ITextAccessIterator tai, PlainTextMatcher matcher, ProgressAndCancellation progressAndCancellation, LJTraceSource trace) { var advanceTime = new Stopwatch(); long advancesCount = 0; var matchingTime = new Stopwatch(); long matchCount = 0; for (;;) { StringSlice buf = new StringSlice(tai.CurrentBuffer); for (int startIdx = 0; ;) { matchingTime.Start(); var match = matcher.Match(buf, startIdx); matchingTime.Stop(); ++matchCount; if (!match.HasValue) { break; } yield return(new Checkpoint() { Position = tai.CharIndexToPosition(match.Value.MatchBegin), EndPosition = tai.CharIndexToPosition(match.Value.MatchEnd), IsMatch = true }); startIdx = match.Value.MatchEnd; progressAndCancellation.CheckTextIterationCancellation(); } advanceTime.Start(); bool stop = !tai.Advance(Math.Max(0, tai.CurrentBuffer.Length - matcher.MaxMatchLength)); advanceTime.Stop(); ++advancesCount; if (stop) { break; } yield return(new Checkpoint() { EndPosition = tai.CharIndexToPosition(0), IsMatch = false }); progressAndCancellation.CheckTextIterationCancellation(); } trace.Info("Stats: text buffer matching time: {0} ({1} times)", matchingTime.Elapsed, matchCount); trace.Info("Stats: text buffer advance time: {0}/{1}={2}", advanceTime.Elapsed, advancesCount, TimeSpan.FromTicks(advanceTime.ElapsedTicks / Math.Max(1, advancesCount))); }
public static MessageFilteringResult GetFilteringResultFromPostprocessorResult(object obj) { var f = (IFilter)obj; if (f == null) { return new MessageFilteringResult { Action = FilterAction.Exclude } } ; if (f == dummyFilter) { return new MessageFilteringResult { Action = FilterAction.Include } } ; return(new MessageFilteringResult { Action = f.Action, Filter = f }); } }; IEnumerable <SearchResultMessage> Enum() { using (var threadsBulkProcessing = threads.UnderlyingThreadsContainer.StartBulkProcessing()) { Func <IMessagesPostprocessor> postprocessor = () => new MessagesPostprocessor(parserParams.SearchParams, trace); long searchableRangesLength = 0; int searchableRangesCount = 0; long totalMessagesCount = 0; long totalHitsCount = 0; foreach (var currentSearchableRange in EnumSearchableRanges()) { searchableRangesLength += currentSearchableRange.Length; ++searchableRangesCount; using (var parser = CreateParserForSearchableRange(currentSearchableRange, postprocessor)) { long messagesCount = 0; long hitsCount = 0; for (;;) { var tmp = parser.ReadNextAndPostprocess(); if (tmp.Message == null) { break; } ++messagesCount; var msg = tmp.Message; var filteringResult = MessagesPostprocessor.GetFilteringResultFromPostprocessorResult( tmp.PostprocessingResult); if (filteringResult.Action != FilterAction.Exclude) { ++hitsCount; yield return(new SearchResultMessage(msg, filteringResult)); } progressAndCancellation.HandleMessageReadingProgress(msg.Position); progressAndCancellation.continuationToken.NextPosition = msg.EndPosition; progressAndCancellation.CheckTextIterationCancellation(); } PrintPctStats(string.Format("hits pct in range {0}", currentSearchableRange), hitsCount, messagesCount); totalMessagesCount += messagesCount; totalHitsCount += hitsCount; } } trace.Info("Stats: searchable ranges count: {0}", searchableRangesCount); trace.Info("Stats: ave searchable range len: {0}", searchableRangesCount != 0 ? searchableRangesLength / searchableRangesCount : 0); PrintPctStats("searchable ranges coverage pct", searchableRangesLength, requestedRange.Length); PrintPctStats("hits pct overall", totalHitsCount, totalMessagesCount); } yield return(new SearchResultMessage(null, new MessageFilteringResult())); } void PrintPctStats(string name, long num, long denum) { trace.Info("Stats: {0}: {1:F4}%", name, denum != 0 ? num * 100d / denum : 0d); } IEnumerable <FileRange.Range> EnumSearchableRanges() { var matcher = new PlainTextMatcher(parserParams, textStreamPositioningParams, plainTextSearchOptimizationAllowed); if (!matcher.PlainTextSearchOptimizationPossible) { yield return(requestedRange); yield break; } long?skipRangesDownThisPosition = null; foreach (var currentRange in EnumSearchableRangesCore(matcher)) { if (skipRangesDownThisPosition == null) { yield return(currentRange); } else { long skipRangesDownThisPositionVal = skipRangesDownThisPosition.Value; if (currentRange.End < skipRangesDownThisPositionVal) // todo: < or <= ? { continue; } skipRangesDownThisPosition = null; if (currentRange.Begin < skipRangesDownThisPositionVal) // todo: < or <= ? { yield return(new FileRange.Range(skipRangesDownThisPositionVal, currentRange.End)); } else { yield return(currentRange); } } } } IPositionedMessagesParser CreateParserForSearchableRange( FileRange.Range searchableRange, Func <IMessagesPostprocessor> messagesPostprocessor) { bool disableMultithreading = false; return(owner.CreateParser(new CreateParserParams( searchableRange.Begin, searchableRange, MessagesParserFlag.HintParserWillBeUsedForMassiveSequentialReading | (disableMultithreading ? MessagesParserFlag.DisableMultithreading : MessagesParserFlag.None), MessagesParserDirection.Forward, messagesPostprocessor))); } IEnumerable <FileRange.Range> EnumSearchableRangesCore(PlainTextMatcher matcher) { ITextAccess ta = new StreamTextAccess(rawStream, streamEncoding, textStreamPositioningParams); using (var tai = ta.OpenIterator(requestedRange.Begin, TextAccessDirection.Forward)) { var lastRange = new FileRange.Range(); foreach (var r in IterateMatchRanges( EnumCheckpoints(tai, matcher, progressAndCancellation, trace), // todo: tune next parameter to find the value giving max performance. // On one sample log bigger block was better than many small ones. // Hence quite big threshold. textStreamPositioningParams.AlignmentBlockSize * 8, progressAndCancellation ) .Select(r => PostprocessHintRange(r, lastRange)) ) { lastRange = r; yield return(r); } } } FileRange.Range PostprocessHintRange(FileRange.Range r, FileRange.Range lastRange) { long fixedBegin = r.Begin; long fixedEnd = r.End; int?inflateRangeBy = null; if (dejitteringParams != null && (parserParams.Flags & MessagesParserFlag.DisableDejitter) == 0) { inflateRangeBy = dejitteringParams.Value.JitterBufferSize; } aligmentSplitter.BeginSplittingSession(requestedRange, r.End, MessagesParserDirection.Forward); if (aligmentSplitter.GetCurrentMessageAndMoveToNextOne(aligmentCapture)) { fixedEnd = aligmentCapture.EndPosition; if (inflateRangeBy != null) { for (int i = 0; i < inflateRangeBy.Value; ++i) { if (!aligmentSplitter.GetCurrentMessageAndMoveToNextOne(aligmentCapture)) { break; } fixedEnd = aligmentCapture.EndPosition; } } } else { fixedEnd = requestedRange.End; } aligmentSplitter.EndSplittingSession(); aligmentSplitter.BeginSplittingSession(requestedRange, fixedBegin, MessagesParserDirection.Backward); if (aligmentSplitter.GetCurrentMessageAndMoveToNextOne(aligmentCapture)) { fixedBegin = aligmentCapture.BeginPosition; if (inflateRangeBy != null) { for (int i = 0; i < inflateRangeBy.Value; ++i) { if (!aligmentSplitter.GetCurrentMessageAndMoveToNextOne(aligmentCapture)) { break; } fixedBegin = aligmentCapture.BeginPosition; } } } aligmentSplitter.EndSplittingSession(); var ret = new FileRange.Range(fixedBegin, fixedEnd); ret = FileRange.Range.Intersect(ret, requestedRange).Common; var lastRangeIntersection = FileRange.Range.Intersect(ret, lastRange); if (lastRangeIntersection.RelativePosition == 0) { ret = lastRangeIntersection.Leftover1Right; } return(ret); }