List <string> getPdfTextLines() { if (ActualRectangle == null) { return(null); } RectangleF ar = (RectangleF)ActualRectangle; TextAutoInsertSpace textAutoInsertSpace = ActualField.TextAutoInsertSpace != null ? ActualField.TextAutoInsertSpace : page.PageCollection.ActiveTemplate.TextAutoInsertSpace; if (ActualField.ColumnOfTable == null) { return(Pdf.GetTextLinesSurroundedByRectangle(page.PdfCharBoxs, ar, textAutoInsertSpace)); } List <string> ls = new List <string>(); List <Pdf.CharBox> cbs = (List <Pdf.CharBox>)TableFieldActualInfo.GetValue(Template.Field.Types.PdfCharBoxs); foreach (Line <Pdf.CharBox> l in GetLines(cbs, textAutoInsertSpace, null)) { StringBuilder sb = new StringBuilder(); foreach (Pdf.CharBox cb in l.CharBoxs) { if (cb.R.Left >= ar.Left && cb.R.Right <= ar.Right && cb.R.Top >= ar.Top && cb.R.Bottom <= ar.Bottom) { sb.Append(cb.Char); } } ls.Add(sb.ToString()); } return(ls); }
public static List <Line> GetLines(IEnumerable <CharBox> cbs, TextAutoInsertSpace textAutoInsertSpace) { bool spaceAutoInsert = textAutoInsertSpace != null && textAutoInsertSpace.Threshold > 0; cbs = cbs.OrderBy(a => a.R.X).ToList(); List <Line> lines = new List <Line>(); foreach (CharBox cb in cbs) { for (int i = 0; i < lines.Count; i++) { if (cb.R.Bottom < lines[i].Top) { Line l = new Line { Top = cb.R.Top, Bottom = cb.R.Bottom }; l.CharBoxs.Add(cb); lines.Insert(i, l); goto CONTINUE; } if (cb.R.Bottom - cb.R.Height / 2 <= lines[i].Bottom) { if (spaceAutoInsert && /*cb.Char != " " &&*/ lines[i].CharBoxs.Count > 0) { CharBox cb0 = lines[i].CharBoxs[lines[i].CharBoxs.Count - 1]; if (/*cb0.Char != " " && */ cb.R.Left - cb0.R.Right > (cb.R.Width + cb.R.Height) / textAutoInsertSpace.Threshold) { float spaceWidth = (cb.R.Width + cb.R.Width) / 2; int spaceNumber = (int)Math.Ceiling((cb.R.Left - cb0.R.Right) / spaceWidth); for (int j = 0; j < spaceNumber; j++) { lines[i].CharBoxs.Add(new CharBox { Char = textAutoInsertSpace.Representative, R = new System.Drawing.RectangleF(cb.R.Left + spaceWidth * j, 0, 0, 0) }); } } } lines[i].CharBoxs.Add(cb); if (lines[i].Top > cb.R.Top) { lines[i].Top = cb.R.Top; } if (lines[i].Bottom < cb.R.Bottom) { lines[i].Bottom = cb.R.Bottom; } goto CONTINUE; } } { Line l = new Line { Top = cb.R.Top, Bottom = cb.R.Bottom }; l.CharBoxs.Add(cb); lines.Add(l); } CONTINUE :; } return(lines); }
public static string GetText(IEnumerable <CharBox> cbs, TextAutoInsertSpace textAutoInsertSpace) { List <string> ls = new List <string>(); foreach (Line l in GetLines(cbs, textAutoInsertSpace)) { StringBuilder sb = new StringBuilder(); foreach (CharBox cb in l.CharBoxs) { sb.Append(cb.Char); } ls.Add(sb.ToString()); } return(string.Join("\r\n", ls)); }
public AnchorPdfTextControl(TextAutoInsertSpace textAutoInsertSpace) { InitializeComponent(); this.textAutoInsertSpace = textAutoInsertSpace; cSearchRectangleMargin.CheckedChanged += delegate { SearchRectangleMargin.Enabled = cSearchRectangleMargin.Checked; if (SearchRectangleMargin.Value >= 0) { return; } SearchRectangleMargin.Value = cSearchRectangleMargin.Checked ? ((_object == null || _object.ParentAnchorId != null) ? (decimal)Settings.Constants.CoordinateDeviationMargin : 100) : -1; }; }
public static string GetText(IEnumerable <CharBox> cbs, TextAutoInsertSpace textAutoInsertSpace) { return(string.Join("\r\n", GetTextLines(cbs, textAutoInsertSpace))); }
public static List <string> GetTextLines <CharBoxT>(IEnumerable <CharBoxT> cbs, TextAutoInsertSpace textAutoInsertSpace) where CharBoxT : CharBox, new() { List <string> ls = new List <string>(); foreach (Line <CharBoxT> l in GetLines(cbs, textAutoInsertSpace)) { StringBuilder sb = new StringBuilder(); foreach (CharBox cb in l.CharBoxs) { sb.Append(cb.Char); } ls.Add(sb.ToString()); } return(ls); }
public static List <string> GetTextLinesSurroundedByRectangle(IEnumerable <CharBox> cbs, System.Drawing.RectangleF r, TextAutoInsertSpace textAutoInsertSpace) { cbs = GetCharBoxsSurroundedByRectangle(cbs, r); List <string> ls = new List <string>(); foreach (Line l in GetLines(cbs, textAutoInsertSpace)) { StringBuilder sb = new StringBuilder(); foreach (CharBox cb in l.CharBoxs) { sb.Append(cb.Char); } ls.Add(sb.ToString()); } return(ls); }
public static List <string> GetTextLinesSurroundedByRectangle(IEnumerable <CharBox> cbs, System.Drawing.RectangleF r, TextAutoInsertSpace textAutoInsertSpace) { cbs = GetCharBoxsSurroundedByRectangle(cbs, r); List <string> ls = new List <string>(); foreach (Line l in GetLines(cbs, textAutoInsertSpace)) { ls.Add(l.ToString()); } return(ls); }
public static List <string> GetTextLines <CharBoxT>(IEnumerable <CharBoxT> cbs, TextAutoInsertSpace textAutoInsertSpace, CharFilter charFilter) where CharBoxT : CharBox, new() { return(GetLines(cbs, textAutoInsertSpace, charFilter).Select(a => a.GetString()).ToList()); }
//public static string GetTextByTopLeftCoordinates(List<CharBox> orderedCbs, RectangleF r) //{ // orderedCbs = orderedCbs.Where(a => (r.Contains(a.R) /*|| d.IntersectsWith(a.R)*/)).ToList(); // return orderedCbs.Aggregate(new StringBuilder(), (sb, n) => sb.Append(n)).ToString(); //} public static string GetTextSurroundedByRectangle(List <CharBox> cbs, RectangleF r, TextAutoInsertSpace textAutoInsertSpace) { return(string.Join("\r\n", GetTextLinesSurroundedByRectangle(cbs, r, textAutoInsertSpace))); }
public static List <string> GetTextLinesSurroundedByRectangle(IEnumerable <CharBox> cbs, System.Drawing.RectangleF r, TextAutoInsertSpace textAutoInsertSpace) { cbs = GetCharBoxsSurroundedByRectangle(cbs, r); return(Page.GetLines(cbs, textAutoInsertSpace, null).Select(a => a.GetString()).ToList()); }
List <string> getOcrTextLines() { if (ActualRectangle == null) { return(null); } RectangleF ar = (RectangleF)ActualRectangle; Template.Field.Ocr aof = ActualField as Template.Field.Ocr; TextAutoInsertSpace textAutoInsertSpace = aof?.TextAutoInsertSpace != null ? aof?.TextAutoInsertSpace : page.PageCollection.ActiveTemplate.TextAutoInsertSpace; if (ActualField.ColumnOfTable == null) { if (aof?.SingleFieldFromFieldImage ?? page.PageCollection.ActiveTemplate.SingleFieldFromFieldImage) { List <Ocr.CharBox> cs = Ocr.This.GetCharBoxsSurroundedByRectangle(page.ActiveTemplateBitmap, ar, aof?.TesseractPageSegMode ?? page.PageCollection.ActiveTemplate.TesseractPageSegMode); if (cs == null) { return(null); } return(GetTextLines(cs, textAutoInsertSpace, ActualField.CharFilter ?? page.PageCollection.ActiveTemplate.CharFilter)); } else { return(Ocr.GetTextLinesSurroundedByRectangle(page.ActiveTemplateOcrCharBoxs, ar, textAutoInsertSpace, ActualField.CharFilter ?? page.PageCollection.ActiveTemplate.CharFilter)); } } if (!TableFieldActualInfo.Found) { return(null); } List <Ocr.CharBox> cbs = (List <Ocr.CharBox>)TableFieldActualInfo.GetValue(Template.Field.Types.OcrCharBoxs); List <string> ls = new List <string>(); if (aof?.ColumnCellFromCellImage ?? page.PageCollection.ActiveTemplate.ColumnCellFromCellImage) { List <Line <Ocr.CharBox> > ols = GetLines(cbs, null, ActualField.CharFilter ?? page.PageCollection.ActiveTemplate.CharFilter); if (aof?.AdjustLineBorders ?? page.PageCollection.ActiveTemplate.AdjustLineBorders) { AdjustBorders(ols, TableFieldActualInfo.ActualRectangle.Value); } else { PadLines(ols, ActualField.LinePaddingY ?? page.PageCollection.ActiveTemplate.LinePaddingY); } foreach (Line <Ocr.CharBox> l in ols) { float x = ar.X > TableFieldActualInfo.ActualRectangle.Value.X ? ar.X : TableFieldActualInfo.ActualRectangle.Value.X; RectangleF r = new RectangleF( x, l.Top, (ar.Right < TableFieldActualInfo.ActualRectangle.Value.Right ? ar.Right : TableFieldActualInfo.ActualRectangle.Value.Right) - x, l.Bottom - l.Top ); List <Ocr.CharBox> cs = Ocr.This.GetCharBoxsSurroundedByRectangle(page.ActiveTemplateBitmap, r, aof?.TesseractPageSegMode ?? page.PageCollection.ActiveTemplate.TesseractPageSegMode); ls.Add(cs != null ? string.Join("", GetTextLines(cs, textAutoInsertSpace, ActualField.CharFilter ?? page.PageCollection.ActiveTemplate.CharFilter)) : ""); } } else { foreach (Line <Ocr.CharBox> l in GetLines(cbs, textAutoInsertSpace, ActualField.CharFilter ?? page.PageCollection.ActiveTemplate.CharFilter)) { StringBuilder sb = new StringBuilder(); foreach (Ocr.CharBox cb in l.CharBoxs) { if (cb.R.Left >= ar.Left && cb.R.Right <= ar.Right && cb.R.Top >= ar.Top && cb.R.Bottom <= ar.Bottom) { sb.Append(cb.Char); } } ls.Add(sb.ToString()); } } return(ls); }
public static List <Line> RemoveDuplicatesAndGetLines(IEnumerable <CharBox> cbs, TextAutoInsertSpace textAutoInsertSpace) { return(GetLines(removeDuplicates(cbs), textAutoInsertSpace)); }
public static List <Line <CharBoxT> > GetLines <CharBoxT>(IEnumerable <CharBoxT> cbs, TextAutoInsertSpace textAutoInsertSpace, CharFilter charFilter) where CharBoxT : CharBox, new() { if (textAutoInsertSpace?.IgnoreSourceSpaces == true) { cbs = cbs.Where(a => a.Char != " "); } if (charFilter != null)//to filter out wrong OCR chars like borders etc which brakes lines { //SizeF s=new SizeF(ignoreCharsBiggerThan.Width*Settings.Constants.Pdf2ImageResolutionRatio) float maxWidth = charFilter.MaxWidth <= 0 ? float.MaxValue : charFilter.MaxWidth; float maxHeight = charFilter.MaxHeight <= 0 ? float.MaxValue : charFilter.MaxHeight; cbs = cbs.Where(a => a.R.Width >= charFilter.MinWidth && a.R.Width <= maxWidth && a.R.Height >= charFilter.MinHeight && a.R.Height <= maxHeight); } List <Line <CharBoxT> > lines = new List <Line <CharBoxT> >(); foreach (CharBoxT cb in cbs) { for (int i = 0; i < lines.Count; i++) { float mY = cb.R.Bottom - cb.R.Height / 2; if (mY < lines[i].Top) { Line <CharBoxT> l = new Line <CharBoxT> { Top = cb.R.Top, Bottom = cb.R.Bottom }; l.CharBoxs.Add(cb); lines.Insert(i, l); goto NEXT_CHAR; } if (mY <= lines[i].Bottom)//the char's center is in the line { lines[i].CharBoxs.Add(cb); if (lines[i].Top > cb.R.Top) { lines[i].Top = cb.R.Top; } if (lines[i].Bottom < cb.R.Bottom) { lines[i].Bottom = cb.R.Bottom; } goto NEXT_CHAR; } } { Line <CharBoxT> l = new Line <CharBoxT> { Top = cb.R.Top, Bottom = cb.R.Bottom }; l.CharBoxs.Add(cb); lines.Add(l); } NEXT_CHAR :; } for (int i = 1; i < lines.Count; i++) { float intersectionH2 = (lines[i - 1].Bottom - lines[i].Top) * 2; if (intersectionH2 > lines[i - 1].Height || intersectionH2 > lines[i].Height) { lines[i - 1].CharBoxs.AddRange(lines[i].CharBoxs); if (lines[i - 1].Top > lines[i].Top) { lines[i - 1].Top = lines[i].Top; } if (lines[i - 1].Bottom < lines[i].Bottom) { lines[i - 1].Bottom = lines[i].Bottom; } lines.RemoveAt(i); i--; } } lines.ForEach(a => a.CharBoxs = a.CharBoxs.OrderBy(b => b.R.X).ToList()); if (textAutoInsertSpace?.Threshold > 0) { foreach (Line <CharBoxT> l in lines) { for (int i = 1; i < l.CharBoxs.Count; i++) { CharBox cb0 = l.CharBoxs[i - 1]; CharBox cb = l.CharBoxs[i]; if (/*cb0.Char != " " && */ cb.R.Left - cb0.R.Right > (/*cb0.R.Width*/ 0.8 / cb0.R.Height + /*cb.R.Width*/ 0.8 / cb.R.Height) * textAutoInsertSpace.Threshold) { float spaceWidth = (cb0.R.Width + cb.R.Width) / 2; int spaceNumber = (int)Math.Ceiling((cb.R.Left - cb0.R.Right) / spaceWidth); for (int j = 0; j < spaceNumber; j++) { l.CharBoxs.Insert(i, new CharBoxT { Char = textAutoInsertSpace.Representative, R = new RectangleF(cb0.R.Right + spaceWidth * j, cb0.R.Y, spaceWidth, cb.R.Height) }); } i += spaceNumber; } } } } return(lines); }
//public static string GetTextSurroundedByRectangle(List<CharBox> cbs, RectangleF r, TextAutoInsertSpace textAutoInsertSpace, Template.CharFilter charFilter) //{ // return string.Join("\r\n", GetTextLinesSurroundedByRectangle(cbs, r, textAutoInsertSpace, charFilter)); //} public static List <Page.Line <CharBox> > GetLinesSurroundedByRectangle(List <CharBox> cbs, RectangleF r, TextAutoInsertSpace textAutoInsertSpace, CharFilter charFilter) { cbs = GetCharBoxsSurroundedByRectangle(cbs, r); return(Page.GetLines(cbs, textAutoInsertSpace, charFilter)); }
public static List <string> GetTextLinesSurroundedByRectangle(List <CharBox> cbs, RectangleF r, TextAutoInsertSpace textAutoInsertSpace, CharFilter charFilter) { cbs = GetCharBoxsSurroundedByRectangle(cbs, r); return(Page.GetLines(cbs, textAutoInsertSpace, charFilter).Select(a => a.GetString()).ToList()); }
/// <summary> /// Splits the chars onto non-intesecting lines. /// </summary> /// <typeparam name="CharBoxT"></typeparam> /// <param name="cbs"></param> /// <param name="textAutoInsertSpace"></param> /// <returns></returns> public static List <Line <CharBoxT> > GetLines2 <CharBoxT>(IEnumerable <CharBoxT> cbs, TextAutoInsertSpace textAutoInsertSpace) where CharBoxT : CharBox, new() {//!!!no line must intersect with an other!!! bool spaceAutoInsert = textAutoInsertSpace?.Threshold > 0; if (textAutoInsertSpace?.IgnoreSourceSpaces == true) { cbs = cbs.Where(a => a.Char != " "); } List <Line <CharBoxT> > lines = new List <Line <CharBoxT> >(); foreach (CharBoxT cb in cbs) { for (int i = 0; i < lines.Count; i++) { if (cb.R.Bottom < lines[i].Top) { Line <CharBoxT> l = new Line <CharBoxT> { Top = cb.R.Top, Bottom = cb.R.Bottom }; l.CharBoxs.Add(cb); lines.Insert(i, l); goto NEXT_CHAR; } if (cb.R.Top <= lines[i].Bottom) //the char is on the line { if (i + 1 < lines.Count && cb.R.Bottom >= lines[i + 1].Top) //the char is also on the next line { lines[i].CharBoxs.AddRange(lines[i + 1].CharBoxs); if (lines[i].Top > lines[i + 1].Top) { lines[i].Top = lines[i + 1].Top; } if (lines[i].Bottom < lines[i + 1].Bottom) { lines[i].Bottom = lines[i + 1].Bottom; } lines.RemoveAt(i + 1); } lines[i].CharBoxs.Add(cb); if (lines[i].Top > cb.R.Top) { lines[i].Top = cb.R.Top; } if (lines[i].Bottom < cb.R.Bottom) { lines[i].Bottom = cb.R.Bottom; } goto NEXT_CHAR; } } { Line <CharBoxT> l = new Line <CharBoxT> { Top = cb.R.Top, Bottom = cb.R.Bottom }; l.CharBoxs.Add(cb); lines.Add(l); } NEXT_CHAR :; } for (int i = 1; i < lines.Count; i++) { float intersetionH2 = (lines[i - 1].Bottom - lines[i].Top) * 2; if (intersetionH2 > lines[i - 1].Height || intersetionH2 > lines[i].Height) { lines[i - 1].CharBoxs.AddRange(lines[i].CharBoxs); if (lines[i - 1].Top > lines[i].Top) { lines[i - 1].Top = lines[i].Top; } if (lines[i - 1].Bottom < lines[i].Bottom) { lines[i - 1].Bottom = lines[i].Bottom; } lines.RemoveAt(i); i--; } } lines.ForEach(a => a.CharBoxs = a.CharBoxs.OrderBy(b => b.R.X).ToList()); if (spaceAutoInsert) { foreach (Line <CharBoxT> l in lines) { for (int i = 1; i < l.CharBoxs.Count; i++) { CharBox cb0 = l.CharBoxs[i - 1]; CharBox cb = l.CharBoxs[i]; if (/*cb0.Char != " " && */ cb.R.Left - cb0.R.Right > (/*cb0.R.Width*/ 0.8 / cb0.R.Height + /*cb.R.Width*/ 0.8 / cb.R.Height) * textAutoInsertSpace.Threshold) { float spaceWidth = (cb0.R.Width + cb.R.Width) / 2; int spaceNumber = (int)Math.Ceiling((cb.R.Left - cb0.R.Right) / spaceWidth); for (int j = 0; j < spaceNumber; j++) { l.CharBoxs.Insert(i, new CharBoxT { Char = textAutoInsertSpace.Representative, R = new RectangleF(cb0.R.Right + spaceWidth * j, cb0.R.Y, spaceWidth, cb.R.Height) }); } i += spaceNumber; } } } } return(lines); }