Exemplo n.º 1
0
        private RecognitionStatistics computeStatisticsForBlock(int pageIndex, FREngine.IBlock block)
        {
            RecognitionStatistics result = new RecognitionStatistics();

            if (block.Type == FREngine.BlockTypeEnum.BT_Table)
            {
                FREngine.TableBlock tableBlock = block.GetAsTableBlock();
                for (int iCell = 0; iCell < tableBlock.Cells.Count; iCell++)
                {
                    result += computeStatisticsForBlock(pageIndex, tableBlock.Cells[iCell].Block);
                }
            }
            else if (block.Type == FREngine.BlockTypeEnum.BT_Text)
            {
                FREngine.ITextBlock textBlock = block.GetAsTextBlock();

                int paragraphsCount = textBlock.Text.Paragraphs.Count;
                for (int iPar = 0; iPar < paragraphsCount; iPar++)
                {
                    FREngine.IParagraph par = textBlock.Text.Paragraphs[iPar];
                    string text             = par.Text;
                    result.TotalCharsCount += text.Length;
                    FREngine.CharParams charParams = engine.CreateCharParams();
                    for (int iChar = 0; iChar < text.Length; iChar++)
                    {
                        par.GetCharParams(iChar, charParams);
                        if (charParams.IsSuspicious)
                        {
                            result.SuspiciousCharsCount++;
                        }
                    }
                }
            }

            return(result);
        }
Exemplo n.º 2
0
        private List <CustomLine> buildTextFromParagraph(FREngine.IParagraph par, int iBlock)
        {
            // CustomParagraph result = new CustomParagraph();

            List <CustomWord> wordItems = new List <CustomWord>();


            string text = par.Text;

            FREngine.CharParams charParams = par.Application.CreateCharParams();

            int wordStart                  = 0;
            int wordStartInText            = Text.Count;
            List <Character> charaterItems = new List <Character>();

            for (int iChar = 0; iChar < text.Length; iChar++)
            {
                char ch = text[iChar];
                par.GetCharParams(iChar, charParams);

                bool isVisible = (!char.IsWhiteSpace(ch));
                if (isVisible)
                {
                    Text.Add(new Character(ch, charParams));
                    charaterItems.Add(new Character(ch, charParams));
                }

                // Splitting words by whitespaces
                // TODO: line break, hyphen
                if (char.IsWhiteSpace(ch))
                {
                    if (wordStart < iChar)
                    {
                        int wordEndInText = Text.Count - 1;
                        words.Add(new Word(wordStartInText, wordEndInText, Text));

                        CustomWord word = new CustomWord();
                        //word.Word = new Word(wordStartInText, wordEndInText, Text);
                        word.X     = charaterItems.Min(x => x.Rect.X);
                        word.Y     = charaterItems.Min(x => x.Rect.Y);
                        word.Value = new Word(wordStartInText, wordEndInText, Text).ToString();

                        int xMax = charaterItems.Max(x => x.Rect.X);
                        word.Width  = xMax + charaterItems.First(x => x.Rect.X == xMax).Rect.Width - word.X;
                        word.Height = charaterItems.Max(x => x.Rect.Height);

                        //word.CharaterItems = charaterItems;
                        wordItems.Add(word);

                        listWords.Add(word);
                        charaterItems = new List <Character>();
                    }

                    wordStart       = iChar + 1;
                    wordStartInText = Text.Count;
                }
            }

            // Add last word if necessary
            if (wordStart < text.Length)
            {
                int wordEndInText = Text.Count - 1;
                words.Add(new Word(wordStartInText, wordEndInText, Text));
                CustomWord word = new CustomWord();
                //word.Word = new Word(wordStartInText, wordEndInText, Text);
                //word.CharaterItems = charaterItems;
                word.Value = new Word(wordStartInText, wordEndInText, Text).ToString();
                word.X     = charaterItems.Min(x => x.Rect.X);
                word.Y     = charaterItems.Min(x => x.Rect.Y);
                int xMax = charaterItems.Max(x => x.Rect.X);
                word.Width  = xMax + charaterItems.First(x => x.Rect.X == xMax).Rect.Width - word.X;
                word.Height = charaterItems.Max(x => x.Rect.Height);


                wordItems.Add(word);
                listWords.Add(word);
                charaterItems = new List <Character>();
            }
            List <CustomLine> csLineItems = new List <CustomLine>();

            //if (result.WordItems.Any())
            //{
            //    CustomLine line = new CustomLine()

            //        {
            //            X = par.Left,
            //            Y = par.Top,
            //            Width = par.Right - par.Left,
            //            Height = par.Bottom - par.Top,
            //            WordItems = result.WordItems,
            //            Index = 0,
            //            ParentBlockIndex = iBlock,
            //            Value = string.Join(" ", result.WordItems.Select(x => x.Word.ToString()).ToArray())
            //        }
            //        ;



            //    csLineItems.Add(line);
            //    lineAll.Add(line);

            //}


            FREngine.IParagraphLines lines = par.Lines;



            if (wordItems.Any())
            {
                int iLine = 0;
                foreach (FREngine.IParagraphLine item in lines)
                {
                    CustomLine lineItem = new CustomLine()
                    {
                        X           = item.Left,
                        Y           = item.Top,
                        Height      = item.Bottom - item.Top,
                        Width       = item.Right - item.Left,
                        ParentIndex = 0,
                        Index       = iLine,
                    };
                    int iWords = 0;
                    lineItem.WordItems         = wordItems.Where(x => IsCollucion(lineItem, x)).Select(x => { x.Index = iWords; iWords++; x.ParentIndex = iLine; return(x); }).ToList();
                    lineItem.Value             = string.Join(" ", lineItem.WordItems.OrderBy(x => x.X).Select(x => x.Value));
                    lineItem.ValueWithoutSpace = string.Join("", lineItem.WordItems.OrderBy(x => x.X).Select(x => x.Value));

                    csLineItems.Add(lineItem);
                    lineAll.Add(lineItem);
                    iLine++;
                }
            }



            return(csLineItems.OrderBy(x => x.Index).ToList());
        }
Exemplo n.º 3
0
        private CustomBlock buildTextFromBlock(FREngine.IBlock block, int iBlock)
        {
            if (block.Type == FREngine.BlockTypeEnum.BT_Table)
            {
                FREngine.ITableBlock tableBlock = block.GetAsTableBlock();

                CustomTable cstable = new CustomTable();

                cstable.X           = tableBlock.Region.BoundingRectangle.Left;
                cstable.Y           = tableBlock.Region.BoundingRectangle.Top;
                cstable.Width       = tableBlock.Region.BoundingRectangle.Width;
                cstable.Height      = tableBlock.Region.BoundingRectangle.Height;
                cstable.ParentIndex = Page.Index;
                cstable.Index       = tableIndex;
                tableIndex++;

                List <CustomBlock> blockItems = new List <CustomBlock>();

                for (int iCell = 0; iCell < tableBlock.Cells.Count; iCell++)
                {
                    FREngine.ITableCell cell    = tableBlock.Cells[iCell];
                    CustomBlock         csBlock = buildTextFromBlock(cell.Block, iCell);
                    if (csBlock != null)
                    {
                        blockItems.Add(csBlock);
                        //blockAll.Add(csBlock);
                    }
                }
                var rowItemsGroup = blockItems.GroupBy(x => x.Y);

                int iRow = 0;
                foreach (var item in rowItemsGroup)
                {
                    CustomRow rowItem = new CustomRow()
                    {
                        Width       = cstable.Width,
                        Height      = item.Max(x => x.Height),
                        BlockItems  = item.ToList(),
                        X           = item.Min(x => x.X),
                        Y           = item.Min(x => x.Y),
                        Value       = string.Join("\t", item.Select(x => x.Value)),
                        Index       = iRow,
                        ParentIndex = iBlock,
                    };
                    cstable.RowItems.Add(rowItem);
                    iRow++;
                }



                cstable.Value = string.Join("\n", cstable.RowItems.Select(x => x.Value));; //
                //string.Join("|", cstable.BlockItems.Select(x => x.Value));
                Page.TableItems.Add(cstable);
                return(null);
            }

            if (block.Type != FREngine.BlockTypeEnum.BT_Text)
            {
                return(null);
            }

            CustomBlock result = new CustomBlock();

            FREngine.ITextBlock textBlock = block.GetAsTextBlock();

            result.Index  = iBlock;
            result.X      = textBlock.Region.BoundingRectangle.Left;
            result.Y      = textBlock.Region.BoundingRectangle.Top;
            result.Width  = textBlock.Region.BoundingRectangle.Width;
            result.Height = textBlock.Region.BoundingRectangle.Height;


            for (int iPar = 0; iPar < textBlock.Text.Paragraphs.Count; iPar++)
            {
                FREngine.IParagraph par = textBlock.Text.Paragraphs[iPar];
                result.LineItems.AddRange(buildTextFromParagraph(par, iBlock));
            }

            result.Value = string.Join("\n", result.LineItems.Select(x => x.Value));

            return(result);
        }