private int GetStartIndex(GMWord word) { int startIndex = (word.BoundingBox.YMin - yEpsilon) / segmentSize; if (startIndex < 0) { startIndex = 0; } return(startIndex); }
private int GetEndIndex(GMWord word) { int endIndex = (word.BoundingBox.YMax + yEpsilon) / segmentSize; if (endIndex >= hash.Count()) { endIndex = hash.Count() - 1; } return(endIndex); }
private void AddWord(GMWord word) { int startIndex = GetStartIndex(word); int endIndex = GetEndIndex(word); for (int index = startIndex; index <= endIndex; index++) { hash[index].Add(word); } }
//return HashSet coincidered words private static IEnumerable <GMWord> EnumerateWordsFromFineReader(TextGeometryModel fineReaderModel, TextGeometryModel sampleModel, LinesHashInGeometryModel tesseractCache) { foreach (var block in fineReaderModel.TextBlocks()) { var blockBox = block.BoundingBox; var thisBlock = sampleModel.AddTextBlock(new BoundingBox(blockBox.XMin, blockBox.YMin, blockBox.XMax, blockBox.YMax)); foreach (var paragraph in block.Paragraphs()) { var paragraphBox = paragraph.BoundingBox; var thisParagraph = thisBlock.AddParagraph(new BoundingBox(paragraphBox.XMin, paragraphBox.YMin, paragraphBox.XMax, paragraphBox.YMax)); foreach (var line in paragraph.Lines()) { var lineBox = line.BoundingBox; var thisLine = thisParagraph.AddLine(new BoundingBox(lineBox.XMin, lineBox.YMin, lineBox.XMax, lineBox.YMax)); foreach (var word in line.Words()) { var wordBox = word.BoundingBox; var accuracy = 50; if (tesseractCache.Contains(word, String.Compare)) { yield return(word); accuracy = 100; } var thisWord = new GMWord(wordBox, word.Text, accuracy); thisLine.AddWord(thisWord); } } } foreach (var word in block.StandaloneWords()) { var wordBox = word.BoundingBox; var accuracy = 50; if (tesseractCache.Contains(word, String.Compare)) { yield return(word); accuracy = 100; } var thisWord = new GMWord(wordBox, word.Text, accuracy); thisBlock.AddStandaloneWord(thisWord); } } }
public bool Contains(GMWord word, Func <string, string, int> comparator = null) { if (comparator == null) { comparator = (w1, w2) => String.Compare(w1, w2, StringComparison.OrdinalIgnoreCase); } var startIndex = GetStartIndex(word); var endIndex = GetEndIndex(word); for (int i = startIndex; i <= endIndex; i++) { if (startIndex >= hash.Length || endIndex >= hash.Length) { continue; } if (hash[i].Any(w => EqualsBoxes(w.BoundingBox, word.BoundingBox) && comparator(w.Text, word.Text) == 0))//String.Compare(w.Text, word.Text, StringComparison.OrdinalIgnoreCase) == 0)) { return(true); } } return(false); }