コード例 #1
0
        private int GetStartIndex(GMWord word)
        {
            int startIndex = (word.BoundingBox.YMin - yEpsilon) / segmentSize;

            if (startIndex < 0)
            {
                startIndex = 0;
            }
            return(startIndex);
        }
コード例 #2
0
        private int GetEndIndex(GMWord word)
        {
            int endIndex = (word.BoundingBox.YMax + yEpsilon) / segmentSize;

            if (endIndex >= hash.Count())
            {
                endIndex = hash.Count() - 1;
            }
            return(endIndex);
        }
コード例 #3
0
        private void AddWord(GMWord word)
        {
            int startIndex = GetStartIndex(word);
            int endIndex   = GetEndIndex(word);

            for (int index = startIndex; index <= endIndex; index++)
            {
                hash[index].Add(word);
            }
        }
コード例 #4
0
        //return HashSet coincidered words
        private static IEnumerable <GMWord> EnumerateWordsFromFineReader(TextGeometryModel fineReaderModel, TextGeometryModel sampleModel,
                                                                         LinesHashInGeometryModel tesseractCache)
        {
            foreach (var block in fineReaderModel.TextBlocks())
            {
                var blockBox  = block.BoundingBox;
                var thisBlock =
                    sampleModel.AddTextBlock(new BoundingBox(blockBox.XMin, blockBox.YMin, blockBox.XMax, blockBox.YMax));
                foreach (var paragraph in block.Paragraphs())
                {
                    var paragraphBox  = paragraph.BoundingBox;
                    var thisParagraph =
                        thisBlock.AddParagraph(new BoundingBox(paragraphBox.XMin, paragraphBox.YMin, paragraphBox.XMax,
                                                               paragraphBox.YMax));
                    foreach (var line in paragraph.Lines())
                    {
                        var lineBox  = line.BoundingBox;
                        var thisLine =
                            thisParagraph.AddLine(new BoundingBox(lineBox.XMin, lineBox.YMin, lineBox.XMax, lineBox.YMax));
                        foreach (var word in line.Words())
                        {
                            var wordBox  = word.BoundingBox;
                            var accuracy = 50;
                            if (tesseractCache.Contains(word, String.Compare))
                            {
                                yield return(word);

                                accuracy = 100;
                            }
                            var thisWord = new GMWord(wordBox, word.Text, accuracy);
                            thisLine.AddWord(thisWord);
                        }
                    }
                }
                foreach (var word in block.StandaloneWords())
                {
                    var wordBox  = word.BoundingBox;
                    var accuracy = 50;
                    if (tesseractCache.Contains(word, String.Compare))
                    {
                        yield return(word);

                        accuracy = 100;
                    }
                    var thisWord = new GMWord(wordBox, word.Text, accuracy);
                    thisBlock.AddStandaloneWord(thisWord);
                }
            }
        }
コード例 #5
0
        public bool Contains(GMWord word, Func <string, string, int> comparator = null)
        {
            if (comparator == null)
            {
                comparator = (w1, w2) => String.Compare(w1, w2, StringComparison.OrdinalIgnoreCase);
            }
            var startIndex = GetStartIndex(word);
            var endIndex   = GetEndIndex(word);

            for (int i = startIndex; i <= endIndex; i++)
            {
                if (startIndex >= hash.Length || endIndex >= hash.Length)
                {
                    continue;
                }
                if (hash[i].Any(w => EqualsBoxes(w.BoundingBox, word.BoundingBox) && comparator(w.Text, word.Text) == 0))//String.Compare(w.Text, word.Text, StringComparison.OrdinalIgnoreCase) == 0))
                {
                    return(true);
                }
            }
            return(false);
        }