public SearchingParser( IPositionedMessagesReader owner, CreateSearchingParserParams p, TextStreamPositioningParams textStreamPositioningParams, DejitteringParams?dejitteringParams, Stream rawStream, Encoding streamEncoding, bool allowPlainTextSearchOptimization, LoadedRegex headerRe, ILogSourceThreads threads, ITraceSourceFactory traceSourceFactory, RegularExpressions.IRegexFactory regexFactory ) { this.owner = owner; this.parserParams = p; this.plainTextSearchOptimizationAllowed = allowPlainTextSearchOptimization && ((p.Flags & MessagesParserFlag.DisablePlainTextSearchOptimization) == 0); this.threads = threads; this.requestedRange = p.Range; this.textStreamPositioningParams = textStreamPositioningParams; this.dejitteringParams = dejitteringParams; this.rawStream = rawStream; this.streamEncoding = streamEncoding; this.regexFactory = regexFactory; this.trace = traceSourceFactory.CreateTraceSource("LogSource", "srchp." + GetHashCode().ToString("x")); this.dummyFilter = new Filter(FilterAction.Include, "", true, new Search.Options(), null, regexFactory); var continuationToken = p.ContinuationToken as ContinuationToken; if (continuationToken != null) { this.requestedRange = new FileRange.Range(continuationToken.NextPosition, requestedRange.End); } this.aligmentTextAccess = new StreamTextAccess(rawStream, streamEncoding, textStreamPositioningParams); this.aligmentSplitter = new MessagesSplitter(aligmentTextAccess, headerRe.Clone().Regex, headerRe.GetHeaderReSplitterFlags()); this.aligmentCapture = new TextMessageCapture(); this.progressAndCancellation = new ProgressAndCancellation() { progressHandler = p.ProgressHandler, cancellationToken = p.Cancellation, continuationToken = new ContinuationToken() { NextPosition = requestedRange.Begin } }; this.impl = Enum(); }
/// <summary> /// Creates valid TextStreamPosition object that points to the charachter that starts at or contains /// the byte defined by <paramref name="streamPosition"/> /// </summary> /// <param name="streamPosition">Stream position. In other words 0-based byte index in stream's data.</param> /// <param name="streamEncoding">Manadatory encoding information of the stream</param> /// <param name="stream"></param> /// <returns>Valid TextStreamPosition object</returns> public static async Task <TextStreamPosition> StreamPositionToTextStreamPosition(long streamPosition, Encoding streamEncoding, Stream stream, TextStreamPositioningParams textStreamPositioningParams) { if (streamEncoding == null) { throw new ArgumentNullException("streamEncoding"); } TextStreamPosition tmp = new TextStreamPosition(streamPosition, textStreamPositioningParams); #if !SILVERLIGHT if (streamEncoding.IsSingleByte) { return(tmp); } #endif if (streamEncoding == Encoding.Unicode || streamEncoding == Encoding.BigEndianUnicode) { return(new TextStreamPosition(tmp.StreamPositionAlignedToBlockSize, tmp.CharPositionInsideBuffer / 2, textStreamPositioningParams)); } #if !SILVERLIGHT if (streamEncoding == Encoding.UTF32) { return(new TextStreamPosition(tmp.StreamPositionAlignedToBlockSize, tmp.CharPositionInsideBuffer / 4, textStreamPositioningParams)); } #endif if (stream == null) { throw new ArgumentNullException("stream object is required to determine text stream position with given encoding", "stream"); } var boundedStream = new BoundedStream(); boundedStream.SetStream(stream, false); boundedStream.SetBounds(null, streamPosition); StreamTextAccess tmpTextAccess = new StreamTextAccess(boundedStream, streamEncoding, textStreamPositioningParams); await tmpTextAccess.BeginReading(tmp.StreamPositionAlignedToBlockSize, TextAccessDirection.Forward); tmp = tmpTextAccess.CharIndexToPosition(tmpTextAccess.BufferString.Length); tmpTextAccess.EndReading(); return(tmp); }
public static MessageFilteringResult GetFilteringResultFromPostprocessorResult(object obj) { var f = (IFilter)obj; if (f == null) { return new MessageFilteringResult { Action = FilterAction.Exclude } } ; if (f == dummyFilter) { return new MessageFilteringResult { Action = FilterAction.Include } } ; return(new MessageFilteringResult { Action = f.Action, Filter = f }); } }; IEnumerable <SearchResultMessage> Enum() { using (var threadsBulkProcessing = threads.UnderlyingThreadsContainer.StartBulkProcessing()) { Func <IMessagesPostprocessor> postprocessor = () => new MessagesPostprocessor(parserParams.SearchParams, trace); long searchableRangesLength = 0; int searchableRangesCount = 0; long totalMessagesCount = 0; long totalHitsCount = 0; foreach (var currentSearchableRange in EnumSearchableRanges()) { searchableRangesLength += currentSearchableRange.Length; ++searchableRangesCount; using (var parser = CreateParserForSearchableRange(currentSearchableRange, postprocessor)) { long messagesCount = 0; long hitsCount = 0; for (;;) { var tmp = parser.ReadNextAndPostprocess(); if (tmp.Message == null) { break; } ++messagesCount; var msg = tmp.Message; var filteringResult = MessagesPostprocessor.GetFilteringResultFromPostprocessorResult( tmp.PostprocessingResult); if (filteringResult.Action != FilterAction.Exclude) { ++hitsCount; yield return(new SearchResultMessage(msg, filteringResult)); } progressAndCancellation.HandleMessageReadingProgress(msg.Position); progressAndCancellation.continuationToken.NextPosition = msg.EndPosition; progressAndCancellation.CheckTextIterationCancellation(); } PrintPctStats(string.Format("hits pct in range {0}", currentSearchableRange), hitsCount, messagesCount); totalMessagesCount += messagesCount; totalHitsCount += hitsCount; } } trace.Info("Stats: searchable ranges count: {0}", searchableRangesCount); trace.Info("Stats: ave searchable range len: {0}", searchableRangesCount != 0 ? searchableRangesLength / searchableRangesCount : 0); PrintPctStats("searchable ranges coverage pct", searchableRangesLength, requestedRange.Length); PrintPctStats("hits pct overall", totalHitsCount, totalMessagesCount); } yield return(new SearchResultMessage(null, new MessageFilteringResult())); } void PrintPctStats(string name, long num, long denum) { trace.Info("Stats: {0}: {1:F4}%", name, denum != 0 ? num * 100d / denum : 0d); } IEnumerable <FileRange.Range> EnumSearchableRanges() { var matcher = new PlainTextMatcher(parserParams, textStreamPositioningParams, plainTextSearchOptimizationAllowed); if (!matcher.PlainTextSearchOptimizationPossible) { yield return(requestedRange); yield break; } long?skipRangesDownThisPosition = null; foreach (var currentRange in EnumSearchableRangesCore(matcher)) { if (skipRangesDownThisPosition == null) { yield return(currentRange); } else { long skipRangesDownThisPositionVal = skipRangesDownThisPosition.Value; if (currentRange.End < skipRangesDownThisPositionVal) // todo: < or <= ? { continue; } skipRangesDownThisPosition = null; if (currentRange.Begin < skipRangesDownThisPositionVal) // todo: < or <= ? { yield return(new FileRange.Range(skipRangesDownThisPositionVal, currentRange.End)); } else { yield return(currentRange); } } } } IPositionedMessagesParser CreateParserForSearchableRange( FileRange.Range searchableRange, Func <IMessagesPostprocessor> messagesPostprocessor) { bool disableMultithreading = false; return(owner.CreateParser(new CreateParserParams( searchableRange.Begin, searchableRange, MessagesParserFlag.HintParserWillBeUsedForMassiveSequentialReading | (disableMultithreading ? MessagesParserFlag.DisableMultithreading : MessagesParserFlag.None), MessagesParserDirection.Forward, messagesPostprocessor))); } IEnumerable <FileRange.Range> EnumSearchableRangesCore(PlainTextMatcher matcher) { ITextAccess ta = new StreamTextAccess(rawStream, streamEncoding, textStreamPositioningParams); using (var tai = ta.OpenIterator(requestedRange.Begin, TextAccessDirection.Forward)) { var lastRange = new FileRange.Range(); foreach (var r in IterateMatchRanges( EnumCheckpoints(tai, matcher, progressAndCancellation, trace), // todo: tune next parameter to find the value giving max performance. // On one sample log bigger block was better than many small ones. // Hence quite big threshold. textStreamPositioningParams.AlignmentBlockSize * 8, progressAndCancellation ) .Select(r => PostprocessHintRange(r, lastRange)) ) { lastRange = r; yield return(r); } } } FileRange.Range PostprocessHintRange(FileRange.Range r, FileRange.Range lastRange) { long fixedBegin = r.Begin; long fixedEnd = r.End; int?inflateRangeBy = null; if (dejitteringParams != null && (parserParams.Flags & MessagesParserFlag.DisableDejitter) == 0) { inflateRangeBy = dejitteringParams.Value.JitterBufferSize; } aligmentSplitter.BeginSplittingSession(requestedRange, r.End, MessagesParserDirection.Forward); if (aligmentSplitter.GetCurrentMessageAndMoveToNextOne(aligmentCapture)) { fixedEnd = aligmentCapture.EndPosition; if (inflateRangeBy != null) { for (int i = 0; i < inflateRangeBy.Value; ++i) { if (!aligmentSplitter.GetCurrentMessageAndMoveToNextOne(aligmentCapture)) { break; } fixedEnd = aligmentCapture.EndPosition; } } } else { fixedEnd = requestedRange.End; } aligmentSplitter.EndSplittingSession(); aligmentSplitter.BeginSplittingSession(requestedRange, fixedBegin, MessagesParserDirection.Backward); if (aligmentSplitter.GetCurrentMessageAndMoveToNextOne(aligmentCapture)) { fixedBegin = aligmentCapture.BeginPosition; if (inflateRangeBy != null) { for (int i = 0; i < inflateRangeBy.Value; ++i) { if (!aligmentSplitter.GetCurrentMessageAndMoveToNextOne(aligmentCapture)) { break; } fixedBegin = aligmentCapture.BeginPosition; } } } aligmentSplitter.EndSplittingSession(); var ret = new FileRange.Range(fixedBegin, fixedEnd); ret = FileRange.Range.Intersect(ret, requestedRange).Common; var lastRangeIntersection = FileRange.Range.Intersect(ret, lastRange); if (lastRangeIntersection.RelativePosition == 0) { ret = lastRangeIntersection.Leftover1Right; } return(ret); }
private TextStreamPosition DetectEndPositionFromMediaSize() { return(StreamTextAccess.StreamPositionToTextStreamPosition(mediaSize, StreamEncoding, VolatileStream, textStreamPositioningParams)); }
public TextAccessIterator(StreamTextAccess impl) { this.impl = impl; }