private RecognitionStatistics computeStatisticsForBlock(int pageIndex, FREngine.IBlock block) { RecognitionStatistics result = new RecognitionStatistics(); if (block.Type == FREngine.BlockTypeEnum.BT_Table) { FREngine.TableBlock tableBlock = block.GetAsTableBlock(); for (int iCell = 0; iCell < tableBlock.Cells.Count; iCell++) { result += computeStatisticsForBlock(pageIndex, tableBlock.Cells[iCell].Block); } } else if (block.Type == FREngine.BlockTypeEnum.BT_Text) { FREngine.ITextBlock textBlock = block.GetAsTextBlock(); int paragraphsCount = textBlock.Text.Paragraphs.Count; for (int iPar = 0; iPar < paragraphsCount; iPar++) { FREngine.IParagraph par = textBlock.Text.Paragraphs[iPar]; string text = par.Text; result.TotalCharsCount += text.Length; FREngine.CharParams charParams = engine.CreateCharParams(); for (int iChar = 0; iChar < text.Length; iChar++) { par.GetCharParams(iChar, charParams); if (charParams.IsSuspicious) { result.SuspiciousCharsCount++; } } } } return(result); }
private List <CustomLine> buildTextFromParagraph(FREngine.IParagraph par, int iBlock) { // CustomParagraph result = new CustomParagraph(); List <CustomWord> wordItems = new List <CustomWord>(); string text = par.Text; FREngine.CharParams charParams = par.Application.CreateCharParams(); int wordStart = 0; int wordStartInText = Text.Count; List <Character> charaterItems = new List <Character>(); for (int iChar = 0; iChar < text.Length; iChar++) { char ch = text[iChar]; par.GetCharParams(iChar, charParams); bool isVisible = (!char.IsWhiteSpace(ch)); if (isVisible) { Text.Add(new Character(ch, charParams)); charaterItems.Add(new Character(ch, charParams)); } // Splitting words by whitespaces // TODO: line break, hyphen if (char.IsWhiteSpace(ch)) { if (wordStart < iChar) { int wordEndInText = Text.Count - 1; words.Add(new Word(wordStartInText, wordEndInText, Text)); CustomWord word = new CustomWord(); //word.Word = new Word(wordStartInText, wordEndInText, Text); word.X = charaterItems.Min(x => x.Rect.X); word.Y = charaterItems.Min(x => x.Rect.Y); word.Value = new Word(wordStartInText, wordEndInText, Text).ToString(); int xMax = charaterItems.Max(x => x.Rect.X); word.Width = xMax + charaterItems.First(x => x.Rect.X == xMax).Rect.Width - word.X; word.Height = charaterItems.Max(x => x.Rect.Height); //word.CharaterItems = charaterItems; wordItems.Add(word); listWords.Add(word); charaterItems = new List <Character>(); } wordStart = iChar + 1; wordStartInText = Text.Count; } } // Add last word if necessary if (wordStart < text.Length) { int wordEndInText = Text.Count - 1; words.Add(new Word(wordStartInText, wordEndInText, Text)); CustomWord word = new CustomWord(); //word.Word = new Word(wordStartInText, wordEndInText, Text); //word.CharaterItems = charaterItems; word.Value = new Word(wordStartInText, wordEndInText, Text).ToString(); word.X = charaterItems.Min(x => x.Rect.X); word.Y = charaterItems.Min(x => x.Rect.Y); int xMax = charaterItems.Max(x => x.Rect.X); word.Width = xMax + charaterItems.First(x => x.Rect.X == xMax).Rect.Width - word.X; word.Height = charaterItems.Max(x => x.Rect.Height); wordItems.Add(word); listWords.Add(word); charaterItems = new List <Character>(); } List <CustomLine> csLineItems = new List <CustomLine>(); //if (result.WordItems.Any()) //{ // CustomLine line = new CustomLine() // { // X = par.Left, // Y = par.Top, // Width = par.Right - par.Left, // Height = par.Bottom - par.Top, // WordItems = result.WordItems, // Index = 0, // ParentBlockIndex = iBlock, // Value = string.Join(" ", result.WordItems.Select(x => x.Word.ToString()).ToArray()) // } // ; // csLineItems.Add(line); // lineAll.Add(line); //} FREngine.IParagraphLines lines = par.Lines; if (wordItems.Any()) { int iLine = 0; foreach (FREngine.IParagraphLine item in lines) { CustomLine lineItem = new CustomLine() { X = item.Left, Y = item.Top, Height = item.Bottom - item.Top, Width = item.Right - item.Left, ParentIndex = 0, Index = iLine, }; int iWords = 0; lineItem.WordItems = wordItems.Where(x => IsCollucion(lineItem, x)).Select(x => { x.Index = iWords; iWords++; x.ParentIndex = iLine; return(x); }).ToList(); lineItem.Value = string.Join(" ", lineItem.WordItems.OrderBy(x => x.X).Select(x => x.Value)); lineItem.ValueWithoutSpace = string.Join("", lineItem.WordItems.OrderBy(x => x.X).Select(x => x.Value)); csLineItems.Add(lineItem); lineAll.Add(lineItem); iLine++; } } return(csLineItems.OrderBy(x => x.Index).ToList()); }
private CustomBlock buildTextFromBlock(FREngine.IBlock block, int iBlock) { if (block.Type == FREngine.BlockTypeEnum.BT_Table) { FREngine.ITableBlock tableBlock = block.GetAsTableBlock(); CustomTable cstable = new CustomTable(); cstable.X = tableBlock.Region.BoundingRectangle.Left; cstable.Y = tableBlock.Region.BoundingRectangle.Top; cstable.Width = tableBlock.Region.BoundingRectangle.Width; cstable.Height = tableBlock.Region.BoundingRectangle.Height; cstable.ParentIndex = Page.Index; cstable.Index = tableIndex; tableIndex++; List <CustomBlock> blockItems = new List <CustomBlock>(); for (int iCell = 0; iCell < tableBlock.Cells.Count; iCell++) { FREngine.ITableCell cell = tableBlock.Cells[iCell]; CustomBlock csBlock = buildTextFromBlock(cell.Block, iCell); if (csBlock != null) { blockItems.Add(csBlock); //blockAll.Add(csBlock); } } var rowItemsGroup = blockItems.GroupBy(x => x.Y); int iRow = 0; foreach (var item in rowItemsGroup) { CustomRow rowItem = new CustomRow() { Width = cstable.Width, Height = item.Max(x => x.Height), BlockItems = item.ToList(), X = item.Min(x => x.X), Y = item.Min(x => x.Y), Value = string.Join("\t", item.Select(x => x.Value)), Index = iRow, ParentIndex = iBlock, }; cstable.RowItems.Add(rowItem); iRow++; } cstable.Value = string.Join("\n", cstable.RowItems.Select(x => x.Value));; // //string.Join("|", cstable.BlockItems.Select(x => x.Value)); Page.TableItems.Add(cstable); return(null); } if (block.Type != FREngine.BlockTypeEnum.BT_Text) { return(null); } CustomBlock result = new CustomBlock(); FREngine.ITextBlock textBlock = block.GetAsTextBlock(); result.Index = iBlock; result.X = textBlock.Region.BoundingRectangle.Left; result.Y = textBlock.Region.BoundingRectangle.Top; result.Width = textBlock.Region.BoundingRectangle.Width; result.Height = textBlock.Region.BoundingRectangle.Height; for (int iPar = 0; iPar < textBlock.Text.Paragraphs.Count; iPar++) { FREngine.IParagraph par = textBlock.Text.Paragraphs[iPar]; result.LineItems.AddRange(buildTextFromParagraph(par, iBlock)); } result.Value = string.Join("\n", result.LineItems.Select(x => x.Value)); return(result); }