public void AddPattern(bool caseSensitive, bool needSeparators, IPatternMatcher matcher, object[] tags) { var patternMatcherReference = new PatternMatchReference(matcher) {NextSibling = FirstExpression, Tags = tags, NeedSeparators = needSeparators}; FirstExpression = patternMatcherReference; }
public void AddExpression(bool caseSensitive, bool needSeparators, IPatternMatcher matcher, object tag) { if (matcher == null) throw new ArgumentNullException("matcher"); // do not localize AddExpression(null, caseSensitive, needSeparators, matcher, tag); }
public void AddPattern(IPatternMatcher matcher, bool caseSensitive, bool needSeparators, object[] tags) { if (matcher == null) throw new ArgumentNullException("matcher"); AddPattern(null, matcher, caseSensitive, needSeparators, tags); }
public Tokenizer AddPattern(IPatternMatcher matcher,bool caseSensitive,bool needsSeparators,params object[] tags) { ThrowIfImmutable(); tree.AddPattern(matcher, caseSensitive, needsSeparators, tags); return this; }
public void AddExpression(string text, bool caseSensitive, bool needSeparators, IPatternMatcher matcher, object tag) { if (StringUtils.IsNullOrEmpty(text)) throw new ArgumentException("text may not be empty", "text"); // do not localize Char = text[0]; if (!caseSensitive) ContainsCaseInsensitiveData = true; if (text.Length == 1) { PatternMatchReference patternMatcherReference = new PatternMatchReference(matcher); patternMatcherReference.NextSibling = FirstExpression; patternMatcherReference.Tag = tag; FirstExpression = patternMatcherReference; } else { string leftovers = text.Substring(1); char childChar = leftovers[0]; int childIndex = (int) childChar & 0xff; //make a lookupindex TokenTreeNode node = ChildNodes[childIndex]; if (node == null) { TokenTreeNode child = new TokenTreeNode(); ChildNodes[childIndex] = child; child.AddExpression(leftovers, caseSensitive, needSeparators, matcher, tag); if (child.Char == ' ') { // if the node contains " " (whitespace) // then add the node as a childnode of itself. // thus allowing it to parse things like // "end sub" even if the pattern is "end sub" // do not localize child.ChildNodes[(int) ' '] = child; } } else { while (node.NextSibling != null && node.Char != childChar) { node = node.NextSibling; } if (node.Char != childChar) { TokenTreeNode child = new TokenTreeNode(); node.NextSibling = child; child.AddExpression(leftovers, caseSensitive, needSeparators, matcher, tag); } else { node.AddExpression(leftovers, caseSensitive, needSeparators, matcher, tag); } } } }
public void AddPattern(IPatternMatcher matcher, bool caseSensitive, bool needSeparators, object[] tags) { matcher.Require("matcher") .NotNull(); AddPattern(null, matcher, caseSensitive, needSeparators, tags); }
private void AddPatternWithCaseSensitivePrefix(string prefix, IPatternMatcher matcher, bool needSeparators, object[] tags) { char startChar = prefix[0]; int startIndex = startChar; if (nodes[startIndex] == null) nodes[startIndex] = new TokenTreeNode(); nodes[startIndex].AddPattern(prefix, true, needSeparators, matcher, tags); }
public void AddPattern(bool caseSensitive, bool needSeparators, IPatternMatcher matcher, object[] tags) { var patternMatcherReference = new PatternMatchReference(matcher) { NextSibling = FirstExpression, Tags = tags, NeedSeparators = needSeparators }; FirstExpression = patternMatcherReference; }
public void AddPattern(IPatternMatcher matcher, bool caseSensitive, bool needSeparators, object[] tags) { if (matcher == null) { throw new ArgumentNullException("matcher"); } AddPattern(null, matcher, caseSensitive, needSeparators, tags); }
static void VerifySymbol(IPatternMatcher matcher) { foreach (var m in _matchers) { if (m.Pattern.Equals(matcher.Pattern, StringComparison.InvariantCultureIgnoreCase)) { throw new ArgumentException("A matcher with same pattern already exists!"); } } }
public void AddPattern(string prefix, IPatternMatcher matcher, bool caseSensitive, bool needSeparators, object[] tags) { if (string.IsNullOrEmpty(prefix)) { AddPatternWithoutPrefix(matcher, caseSensitive, needSeparators, tags); } else if (caseSensitive) { AddPatternWithCaseSensitivePrefix(prefix, matcher, needSeparators, tags); } else { AddPatternWithCaseInsensitivePrefix(prefix, matcher, needSeparators, tags); } }
private void AddPatternWithCaseSensitivePrefix(string prefix, IPatternMatcher matcher, bool needSeparators, object[] tags) { char startChar = prefix[0]; int startIndex = startChar; if (nodes[startIndex] == null) { nodes[startIndex] = new TokenTreeNode(); } nodes[startIndex].AddPattern(prefix, true, needSeparators, matcher, tags); }
IList <RunInfo> ToRunInfo(IReadOnlyCollection <ClassifiedText> parts, [CanBeNull] IPatternMatcher patternMatcher, out bool hasMatch) { hasMatch = false; if (patternMatcher == null) { return(parts.Select(part => new RunInfo(part, isMatch: false)).ToList()); } var runInfos = new List <RunInfo>(); foreach (var part in parts) { var patternMatch = patternMatcher.TryMatch(part.Text); if (patternMatch != null && patternMatch.Value.MatchedSpans.Length > 0) { var matchedSpans = patternMatch.Value.MatchedSpans; var currentIndex = 0; foreach (var match in matchedSpans) { // Der Text vor dem Treffertext if (match.Start > currentIndex) { var text = part.Text.Substring(currentIndex, length: match.Start - currentIndex); runInfos.Add(new RunInfo(new ClassifiedText(text, part.Classification), isMatch: false)); } // Der Treffertext var matchtext = part.Text.Substring(match.Start, match.Length); runInfos.Add(new RunInfo(new ClassifiedText(matchtext, part.Classification), isMatch: true)); currentIndex = match.End; } // Der Text nach dem letzten Treffertext if (currentIndex < part.Text.Length) { var text = part.Text.Substring(currentIndex, length: part.Text.Length - currentIndex); runInfos.Add(new RunInfo(new ClassifiedText(text, part.Classification), isMatch: false)); } hasMatch = true; } else { runInfos.Add(new RunInfo(part, false)); } } return(runInfos); }
public void AddPattern(string prefix, bool caseSensitive, bool needSeparators, IPatternMatcher matcher, object[] tags) { if (string.IsNullOrEmpty(prefix)) throw new ArgumentNullException("prefix"); TokenTreeNode node = AddTokenInternal(prefix, caseSensitive); var patternMatcherReference = new PatternMatchReference(matcher) {NextSibling = FirstExpression, Tags = tags, NeedSeparators = needSeparators}; node.FirstExpression = patternMatcherReference; }
public IEndpoint?TryGetEndpoint(PathTree tree, IPatternMatcher patternMatcher) { var children = patternMatcher.GetEndpoints(tree); if (children.Count == 0) { return(null); // Plain endpoint with no children serves no purpose } var endpoint = new Endpoint(); endpoint.Children.AddRange(children); return(endpoint); }
public SearchContext(string searchString, IPatternMatcherFactory patternMatcherFactory) { SearchString = searchString ?? String.Empty; if (SearchString.Length > 0) { _patternMatcher = patternMatcherFactory.CreatePatternMatcher( searchString, new PatternMatcherCreationOptions( cultureInfo: CultureInfo.CurrentCulture, flags: PatternMatcherCreationFlags.IncludeMatchedSpans)); } }
public IEndpoint TryGetEndpoint(PathTree tree, IPatternMatcher patternMatcher) { var operations = tree.Item?.Operations.Keys ?? new OperationType[0]; if (!RequiredOperations.All(operations.Contains)) { return(null); } var endpoint = BuildEndpoint(tree.Item); endpoint?.Children.AddRange(patternMatcher.GetEndpoints(tree)); return(endpoint); }
public IEndpoint?TryGetEndpoint(PathTree tree, IPatternMatcher patternMatcher) { var item = tree.Item; if (item == null || !RequiredOperations.All(item.Operations.Keys.Contains)) { return(null); } var endpoint = BuildEndpoint(item); endpoint?.Children.AddRange(patternMatcher.GetEndpoints(tree)); return(endpoint); }
public PatternTextFilter(string pattern, IPatternMatcherFactory factory) { if (factory is null) { throw new ArgumentNullException(nameof(factory)); } _matcher = factory.CreatePatternMatcher( pattern, new PatternMatcherCreationOptions( CultureInfo.CurrentCulture, PatternMatcherCreationFlags.AllowSimpleSubstringMatching | PatternMatcherCreationFlags.IncludeMatchedSpans ) ); }
public void AddExpression(string prefix, bool caseSensitive, bool needSeparators, IPatternMatcher matcher, object tag) { if (StringUtils.IsNullOrEmpty(prefix)) { AddExpressionWithoutPrefix(matcher, caseSensitive, needSeparators, tag); } else if (caseSensitive) { AddExpressionWithCaseSensitivePrefix(prefix, needSeparators, matcher, tag); } else { AddExpressionWithCaseInsensitivePrefix(prefix, needSeparators, matcher, tag); } }
public void AddPattern(string prefix, bool caseSensitive, bool needSeparators, IPatternMatcher matcher, object[] tags) { prefix.Require("prefix") .NotNullOrEmpty(); TokenTreeNode node = AddTokenInternal(prefix, caseSensitive); var patternMatcherReference = new PatternMatchReference(matcher) { NextSibling = FirstExpression, Tags = tags, NeedSeparators = needSeparators }; node.FirstExpression = patternMatcherReference; }
internal void ShowOutline([CanBeNull] OutlineData outlineData, [CanBeNull] IPatternMatcher patternMatcher) { ThreadHelper.ThrowIfNotOnUIThread(); AddOutlineElement(null, outlineData?.OutlineElement, patternMatcher); if (TreeView.Items.Count == 0) { TreeView.Visibility = Visibility.Collapsed; Watermark.Visibility = Visibility.Visible; } else { TreeView.Visibility = Visibility.Visible; Watermark.Visibility = Visibility.Collapsed; } }
internal static void Remove(IPatternMatcher matcher) { VerifyParameter(matcher); lock (_matchers) { for (int i = 0; i < _matchers.Count; i++) { var m = _matchers[i]; if (!ReferenceEquals(m, matcher)) { continue; } _matchers.RemoveAt(i); return; } } }
private void AddExpressionWithoutPrefix(IPatternMatcher matcher, bool caseSensitive, bool needSeparators, object tag) { if (matcher.DefaultPrefixes != null) { foreach (string defaultPrefix in matcher.DefaultPrefixes) { AddExpression(defaultPrefix, caseSensitive, needSeparators, matcher, tag); } } else { PatternMatchReference patternMatcherReference = new PatternMatchReference(matcher); patternMatcherReference.Tag = tag; patternMatcherReference.NextSibling = root.FirstExpression; root.FirstExpression = patternMatcherReference; } }
private void AddPatternWithCaseInsensitivePrefix(string prefix, IPatternMatcher matcher, bool needSeparators, object[] tags) { //make a lowercase string and add it as a token prefix = prefix.ToLower(); char startChar = prefix[0]; int startIndex = startChar; if (nodes[startIndex] == null) nodes[startIndex] = new TokenTreeNode(); nodes[startIndex].AddPattern(prefix, false, needSeparators, matcher, tags); //make a lowercase string with a uppercase start char and add it as a token prefix = char.ToUpper(startChar) + prefix.Substring(1); startChar = prefix[0]; startIndex = startChar; if (nodes[startIndex] == null) nodes[startIndex] = new TokenTreeNode(); nodes[startIndex].AddPattern(prefix, false, needSeparators, matcher, tags); }
private void AddPatternWithoutPrefix(IPatternMatcher matcher, bool caseSensitive, bool needSeparators, object[] tags) { if (matcher.DefaultPrefixes != null) { foreach (string defaultPrefix in matcher.DefaultPrefixes) { AddPattern(defaultPrefix, matcher, caseSensitive, needSeparators, tags); } } else { var patternMatcherReference = new PatternMatchReference(matcher) { Tags = tags, NextSibling = root.FirstExpression, NeedSeparators = needSeparators }; root.FirstExpression = patternMatcherReference; } }
public override IEndpoint?TryGetEndpoint(PathTree tree, IPatternMatcher patternMatcher) { var item = tree.Item; if (item == null || !item.Operations.TryGetValue(OperationType.Get, out var operation)) { return(null); } var children = patternMatcher.GetEndpoints(tree); var element = ExtractElement <ElementEndpoint>(children); if (element == null) { return(null); } var response = operation.Get200Response(); var schema = response?.GetJsonSchema(); // Ensure collection and element schemas match if (schema?.Type != "array" || schema.Items?.Reference?.Id != element.Schema?.Reference?.Id) { return(null); } element.Schema = null; var endpoint = new CollectionEndpoint { Schema = schema.Items, Element = (element.Children.Count == 0) ? null : element, // Trim trivial element endpoint Description = item.Description ?? operation.Description ?? operation.Summary ?? response?.Description ?? schema.Description }; endpoint.Children.AddRange(children); return(endpoint); }
public virtual IEndpoint?TryGetEndpoint(PathTree tree, IPatternMatcher patternMatcher) { var item = tree.Item; OpenApiOperation?operation = null; item?.Operations.TryGetValue(OperationType.Get, out operation); var children = patternMatcher.GetEndpoints(tree); var element = ExtractElement <IEndpoint>(children); if (element == null) { return(null); } var endpoint = new IndexerEndpoint { Element = element, Description = item?.Description ?? operation?.Description ?? operation?.Summary }; endpoint.Children.AddRange(children); return(endpoint); }
public FuzzyMatchCommandsFilter(IPatternMatcher patternMatcher) { this.patternMatcher = patternMatcher; }
private void AddExpressionWithCaseInsensitivePrefix(string prefix, bool needSeparators, IPatternMatcher matcher, object tag) { //make a lowercase string and add it as a token prefix = prefix.ToLower(); char startChar = prefix[0]; int startIndex = (int)startChar; if (nodes[startIndex] == null) { nodes[startIndex] = new TokenTreeNode(); } nodes[startIndex].AddExpression(prefix, false, needSeparators, matcher, tag); //make a lowercase string with a uppercase start char and add it as a token prefix = char.ToUpper(startChar) + prefix.Substring(1); startChar = prefix[0]; startIndex = (int)startChar; if (nodes[startIndex] == null) { nodes[startIndex] = new TokenTreeNode(); } nodes[startIndex].AddExpression(prefix, false, needSeparators, matcher, tag); }
public List <string> GetRows(string documentPath, ICollection <string> stopWords = null, string[] skipWords = null) { var memoryStream = new MemoryStream(); try { _logger.Info("=== ENTERING WORD DOCUMENT EXTRACTOR ==="); _logger.Debug("Retrieving document stored at : " + documentPath); Document document = new Document(documentPath); _logger.Info(documentPath + " successfully retrieved."); _logger.Debug("Converting and saving document " + documentPath + " as PDF in memory."); ThrowIfTimedOut( () => document.SaveToFile(memoryStream, FileFormat.PDF), TimeSpan.FromSeconds(10) ); _logger.Info(documentPath + " successfully converted to PDF."); memoryStream.Position = 0; using (PdfReader reader = new PdfReader(memoryStream)) { _logger.Debug("Preparing to read and process PDF content of " + documentPath); ITextExtractionStrategy strategy = new LocationTextExtractionStrategy(); List <string> parsedLines = new List <string>(); _logger.Info("PDF stream successfully read: " + documentPath); for (int i = 1; i <= reader.NumberOfPages; i++) { string page = PdfTextExtractor.GetTextFromPage(reader, i, strategy); if (skipWords != null && skipWords.Any(s => page.Contains(s))) { continue; } parsedLines.AddRange(page.Split('\n')); } if (_patternMatcher == null) { _patternMatcher = new NullPatternMatcher(); } if (stopWords != null) { parsedLines = parsedLines.TakeWhile(line => !stopWords.Any(line.Contains)) .Union(_patternMatcher.GetMatchedRows(parsedLines)) .ToList(); } _logger.Info(documentPath + " PDF stream successfully processed"); _logger.Info(parsedLines.Count + " rows processed and retrieved."); return(parsedLines); } } catch (ArgumentOutOfRangeException ex) { _logger.Error("ArgumentOutOfRangeException occurred: " + ex); } catch (Exception exception) { _logger.Error("Unknown exception occurred: " + exception); } finally { memoryStream.Dispose(); } return(new List <string>()); }
public void AddExpression(string text, bool caseSensitive, bool needSeparators, IPatternMatcher matcher, object tag) { if (StringUtils.IsNullOrEmpty(text)) { throw new ArgumentException("text may not be empty", "text"); // do not localize } Char = text[0]; if (!caseSensitive) { ContainsCaseInsensitiveData = true; } if (text.Length == 1) { PatternMatchReference patternMatcherReference = new PatternMatchReference(matcher); patternMatcherReference.NextSibling = FirstExpression; patternMatcherReference.Tag = tag; FirstExpression = patternMatcherReference; } else { string leftovers = text.Substring(1); char childChar = leftovers[0]; int childIndex = (int)childChar & 0xff; //make a lookupindex TokenTreeNode node = ChildNodes[childIndex]; if (node == null) { TokenTreeNode child = new TokenTreeNode(); ChildNodes[childIndex] = child; child.AddExpression(leftovers, caseSensitive, needSeparators, matcher, tag); if (child.Char == ' ') { // if the node contains " " (whitespace) // then add the node as a childnode of itself. // thus allowing it to parse things like // "end sub" even if the pattern is "end sub" // do not localize child.ChildNodes[(int)' '] = child; } } else { while (node.NextSibling != null && node.Char != childChar) { node = node.NextSibling; } if (node.Char != childChar) { TokenTreeNode child = new TokenTreeNode(); node.NextSibling = child; child.AddExpression(leftovers, caseSensitive, needSeparators, matcher, tag); } else { node.AddExpression(leftovers, caseSensitive, needSeparators, matcher, tag); } } } }
public PatternMatchReference(IPatternMatcher matcher) { Matcher = matcher; }
#pragma warning disable CA1822 public IPatternMatcher CreatePatternMatcher(string pattern, PatternMatcherCreationOptions creationOptions, IPatternMatcher linkedMatcher) { if (string.IsNullOrWhiteSpace(pattern)) { throw new ArgumentException("A non-empty pattern is required to create a pattern matcher", nameof(pattern)); } if (creationOptions == null) { throw new ArgumentNullException(nameof(creationOptions)); } var matcher = linkedMatcher as PatternMatcher; if (creationOptions.ContainerSplitCharacters == null) { return(PatternMatcher.CreateSimplePatternMatcher( pattern, creationOptions.CultureInfo, creationOptions.Flags.HasFlag(IncludeMatchedSpans), creationOptions.Flags.HasFlag(AllowFuzzyMatching), creationOptions.Flags.HasFlag(AllowSimpleSubstringMatching), matcher)); } else { return(PatternMatcher.CreateContainerPatternMatcher( pattern.Split(creationOptions.ContainerSplitCharacters.ToArray()), creationOptions.ContainerSplitCharacters, creationOptions.CultureInfo, creationOptions.Flags.HasFlag(AllowFuzzyMatching), creationOptions.Flags.HasFlag(AllowSimpleSubstringMatching), creationOptions.Flags.HasFlag(IncludeMatchedSpans), matcher)); } }
public void AddPattern(string prefix, bool caseSensitive, bool needSeparators, IPatternMatcher matcher, object[] tags) { if (string.IsNullOrEmpty(prefix)) { throw new ArgumentNullException("prefix"); } TokenTreeNode node = AddTokenInternal(prefix, caseSensitive); var patternMatcherReference = new PatternMatchReference(matcher) { NextSibling = FirstExpression, Tags = tags, NeedSeparators = needSeparators }; node.FirstExpression = patternMatcherReference; }
public void RemovePatternMatcher(IPatternMatcher matcher) { PatternRepository.Remove(matcher); }
public TextBlock ToTextBlock(ClassifiedText part, [CanBeNull] IPatternMatcher patternMatcher, out bool hasMatch) { ThreadHelper.ThrowIfNotOnUIThread(); return(ToTextBlock(new[] { part }, patternMatcher, out hasMatch)); }
static void VerifyParameter(IPatternMatcher matcher) { Requires.Instance.NotNull(matcher, "matcher"); Requires.Instance.NotNull(matcher.Pattern, "matcher.Pattern"); }
public void AddPatternMatcher(IPatternMatcher matcher) { PatternRepository.Add(matcher); }
private void AddOutlineElement(TreeViewItem parent, [CanBeNull] OutlineElement outline, [CanBeNull] IPatternMatcher patternMatcher) { ThreadHelper.ThrowIfNotOnUIThread(); if (parent == null) { TreeView.Items.Clear(); _flattenTree.Clear(); } if (outline == null) { return; } var itemCollection = parent?.Items ?? TreeView.Items; var item = new TreeViewItem { Header = new OutlineItemControl { CrispImage = { Moniker = GdImageMonikers.GetMoniker(outline.Glyph) },