public Character(char ch, FREngine.CharParams charParams) { _ch = ch; _isSuspicious = charParams.IsSuspicious; _rect = System.Drawing.Rectangle.FromLTRB(charParams.Left, charParams.Top, charParams.Right, charParams.Bottom); }
private RecognitionStatistics computeStatisticsForBlock(int pageIndex, FREngine.IBlock block) { RecognitionStatistics result = new RecognitionStatistics(); if (block.Type == FREngine.BlockTypeEnum.BT_Table) { FREngine.TableBlock tableBlock = block.GetAsTableBlock(); for (int iCell = 0; iCell < tableBlock.Cells.Count; iCell++) { result += computeStatisticsForBlock(pageIndex, tableBlock.Cells[iCell].Block); } } else if (block.Type == FREngine.BlockTypeEnum.BT_Text) { FREngine.ITextBlock textBlock = block.GetAsTextBlock(); int paragraphsCount = textBlock.Text.Paragraphs.Count; for (int iPar = 0; iPar < paragraphsCount; iPar++) { FREngine.IParagraph par = textBlock.Text.Paragraphs[iPar]; string text = par.Text; result.TotalCharsCount += text.Length; FREngine.CharParams charParams = engine.CreateCharParams(); for (int iChar = 0; iChar < text.Length; iChar++) { par.GetCharParams(iChar, charParams); if (charParams.IsSuspicious) { result.SuspiciousCharsCount++; } } } } return(result); }
private List <CustomLine> buildTextFromParagraph(FREngine.IParagraph par, int iBlock) { // CustomParagraph result = new CustomParagraph(); List <CustomWord> wordItems = new List <CustomWord>(); string text = par.Text; FREngine.CharParams charParams = par.Application.CreateCharParams(); int wordStart = 0; int wordStartInText = Text.Count; List <Character> charaterItems = new List <Character>(); for (int iChar = 0; iChar < text.Length; iChar++) { char ch = text[iChar]; par.GetCharParams(iChar, charParams); bool isVisible = (!char.IsWhiteSpace(ch)); if (isVisible) { Text.Add(new Character(ch, charParams)); charaterItems.Add(new Character(ch, charParams)); } // Splitting words by whitespaces // TODO: line break, hyphen if (char.IsWhiteSpace(ch)) { if (wordStart < iChar) { int wordEndInText = Text.Count - 1; words.Add(new Word(wordStartInText, wordEndInText, Text)); CustomWord word = new CustomWord(); //word.Word = new Word(wordStartInText, wordEndInText, Text); word.X = charaterItems.Min(x => x.Rect.X); word.Y = charaterItems.Min(x => x.Rect.Y); word.Value = new Word(wordStartInText, wordEndInText, Text).ToString(); int xMax = charaterItems.Max(x => x.Rect.X); word.Width = xMax + charaterItems.First(x => x.Rect.X == xMax).Rect.Width - word.X; word.Height = charaterItems.Max(x => x.Rect.Height); //word.CharaterItems = charaterItems; wordItems.Add(word); listWords.Add(word); charaterItems = new List <Character>(); } wordStart = iChar + 1; wordStartInText = Text.Count; } } // Add last word if necessary if (wordStart < text.Length) { int wordEndInText = Text.Count - 1; words.Add(new Word(wordStartInText, wordEndInText, Text)); CustomWord word = new CustomWord(); //word.Word = new Word(wordStartInText, wordEndInText, Text); //word.CharaterItems = charaterItems; word.Value = new Word(wordStartInText, wordEndInText, Text).ToString(); word.X = charaterItems.Min(x => x.Rect.X); word.Y = charaterItems.Min(x => x.Rect.Y); int xMax = charaterItems.Max(x => x.Rect.X); word.Width = xMax + charaterItems.First(x => x.Rect.X == xMax).Rect.Width - word.X; word.Height = charaterItems.Max(x => x.Rect.Height); wordItems.Add(word); listWords.Add(word); charaterItems = new List <Character>(); } List <CustomLine> csLineItems = new List <CustomLine>(); //if (result.WordItems.Any()) //{ // CustomLine line = new CustomLine() // { // X = par.Left, // Y = par.Top, // Width = par.Right - par.Left, // Height = par.Bottom - par.Top, // WordItems = result.WordItems, // Index = 0, // ParentBlockIndex = iBlock, // Value = string.Join(" ", result.WordItems.Select(x => x.Word.ToString()).ToArray()) // } // ; // csLineItems.Add(line); // lineAll.Add(line); //} FREngine.IParagraphLines lines = par.Lines; if (wordItems.Any()) { int iLine = 0; foreach (FREngine.IParagraphLine item in lines) { CustomLine lineItem = new CustomLine() { X = item.Left, Y = item.Top, Height = item.Bottom - item.Top, Width = item.Right - item.Left, ParentIndex = 0, Index = iLine, }; int iWords = 0; lineItem.WordItems = wordItems.Where(x => IsCollucion(lineItem, x)).Select(x => { x.Index = iWords; iWords++; x.ParentIndex = iLine; return(x); }).ToList(); lineItem.Value = string.Join(" ", lineItem.WordItems.OrderBy(x => x.X).Select(x => x.Value)); lineItem.ValueWithoutSpace = string.Join("", lineItem.WordItems.OrderBy(x => x.X).Select(x => x.Value)); csLineItems.Add(lineItem); lineAll.Add(lineItem); iLine++; } } return(csLineItems.OrderBy(x => x.Index).ToList()); }