private string ParseTextDefinition(TextDefinition textDefinition, OcrResult ocrResult) { string text = string.Empty; switch (textDefinition.GetAs) { case GetTextAs.Text: text = ocrResult.AsString(); break; case GetTextAs.Continuous: text = ocrResult.AsContinuousText(); break; } if (textDefinition.Replace != null && textDefinition.Replace.Length > 0) { foreach (var replaceDefinition in textDefinition.Replace) { text = ParseReplaceDefinition(replaceDefinition, text); } } return(text); }
public IEnumerable <AnalyzedText> Analyze(OcrResult ocrResult) { var matchedText = Constants.TextAnalysisConfiguration.LettersRegexes .Select(x => Regex.Matches(ocrResult.AsString(), x)) .SelectMany(y => y.Cast <Match>() .Select(x => x.Value)); return(matchedText.Select(x => new AnalyzedText() { Text = x, TextType = TextType.Letters.ToString(), BoundingBox = ocrResult.BoundingBox })); }
private string[] ParseFromTextTypeDefintion(TextTypeDefinition textTypeDefinition, OcrResult ocrResult) { if (string.IsNullOrWhiteSpace(textTypeDefinition.Name)) { throw new Exception("Could not analyze custom text type definition, a definition is missing a name"); } if (textTypeDefinition.Regexes.Length == 0) { return(new string[] { }); } if (ocrResult.AsString() is var text && textTypeDefinition.Text != null) { text = ParseTextDefinition(textTypeDefinition.Text, ocrResult); } var matches = textTypeDefinition.Regexes .Select(x => Regex.Matches(text, x, RegexOptions.IgnoreCase)) .SelectMany(x => x.Cast <Match>()) .Select(x => x.Value).ToArray(); return(matches); }
public static IEnumerable <string> AllWords(this OcrResult ocrResult) { return(Regex.Split(ocrResult.AsString().Replace(',', ' '), @"\s+")); }