public static List <string> GetTextLinesSurroundedByRectangle(List <CharBox> cbs, RectangleF r, TextAutoInsertSpace textAutoInsertSpace, CharFilter charFilter) { cbs = GetCharBoxsSurroundedByRectangle(cbs, r); return(Page.GetLines(cbs, textAutoInsertSpace, charFilter).Select(a => a.GetString()).ToList()); }
//public static string GetTextSurroundedByRectangle(List<CharBox> cbs, RectangleF r, TextAutoInsertSpace textAutoInsertSpace, Template.CharFilter charFilter) //{ // return string.Join("\r\n", GetTextLinesSurroundedByRectangle(cbs, r, textAutoInsertSpace, charFilter)); //} public static List <Page.Line <CharBox> > GetLinesSurroundedByRectangle(List <CharBox> cbs, RectangleF r, TextAutoInsertSpace textAutoInsertSpace, CharFilter charFilter) { cbs = GetCharBoxsSurroundedByRectangle(cbs, r); return(Page.GetLines(cbs, textAutoInsertSpace, charFilter)); }
public static string GetText(IEnumerable <CharBox> cbs, TextAutoInsertSpace textAutoInsertSpace, CharFilter charFilter) { return(string.Join("\r\n", GetTextLines(cbs, textAutoInsertSpace, charFilter))); }
public static List <Line <CharBoxT> > GetLines <CharBoxT>(IEnumerable <CharBoxT> cbs, TextAutoInsertSpace textAutoInsertSpace, CharFilter charFilter) where CharBoxT : CharBox, new() { if (textAutoInsertSpace?.IgnoreSourceSpaces == true) { cbs = cbs.Where(a => a.Char != " "); } if (charFilter != null)//to filter out wrong OCR chars like borders etc which brakes lines { //SizeF s=new SizeF(ignoreCharsBiggerThan.Width*Settings.Constants.Pdf2ImageResolutionRatio) float maxWidth = charFilter.MaxWidth <= 0 ? float.MaxValue : charFilter.MaxWidth; float maxHeight = charFilter.MaxHeight <= 0 ? float.MaxValue : charFilter.MaxHeight; cbs = cbs.Where(a => a.R.Width >= charFilter.MinWidth && a.R.Width <= maxWidth && a.R.Height >= charFilter.MinHeight && a.R.Height <= maxHeight); } List <Line <CharBoxT> > lines = new List <Line <CharBoxT> >(); foreach (CharBoxT cb in cbs) { for (int i = 0; i < lines.Count; i++) { float mY = cb.R.Bottom - cb.R.Height / 2; if (mY < lines[i].Top) { Line <CharBoxT> l = new Line <CharBoxT> { Top = cb.R.Top, Bottom = cb.R.Bottom }; l.CharBoxs.Add(cb); lines.Insert(i, l); goto NEXT_CHAR; } if (mY <= lines[i].Bottom)//the char's center is in the line { lines[i].CharBoxs.Add(cb); if (lines[i].Top > cb.R.Top) { lines[i].Top = cb.R.Top; } if (lines[i].Bottom < cb.R.Bottom) { lines[i].Bottom = cb.R.Bottom; } goto NEXT_CHAR; } } { Line <CharBoxT> l = new Line <CharBoxT> { Top = cb.R.Top, Bottom = cb.R.Bottom }; l.CharBoxs.Add(cb); lines.Add(l); } NEXT_CHAR :; } for (int i = 1; i < lines.Count; i++) { float intersectionH2 = (lines[i - 1].Bottom - lines[i].Top) * 2; if (intersectionH2 > lines[i - 1].Height || intersectionH2 > lines[i].Height) { lines[i - 1].CharBoxs.AddRange(lines[i].CharBoxs); if (lines[i - 1].Top > lines[i].Top) { lines[i - 1].Top = lines[i].Top; } if (lines[i - 1].Bottom < lines[i].Bottom) { lines[i - 1].Bottom = lines[i].Bottom; } lines.RemoveAt(i); i--; } } lines.ForEach(a => a.CharBoxs = a.CharBoxs.OrderBy(b => b.R.X).ToList()); if (textAutoInsertSpace?.Threshold > 0) { foreach (Line <CharBoxT> l in lines) { for (int i = 1; i < l.CharBoxs.Count; i++) { CharBox cb0 = l.CharBoxs[i - 1]; CharBox cb = l.CharBoxs[i]; if (/*cb0.Char != " " && */ cb.R.Left - cb0.R.Right > (/*cb0.R.Width*/ 0.8 / cb0.R.Height + /*cb.R.Width*/ 0.8 / cb.R.Height) * textAutoInsertSpace.Threshold) { float spaceWidth = (cb0.R.Width + cb.R.Width) / 2; int spaceNumber = (int)Math.Ceiling((cb.R.Left - cb0.R.Right) / spaceWidth); for (int j = 0; j < spaceNumber; j++) { l.CharBoxs.Insert(i, new CharBoxT { Char = textAutoInsertSpace.Representative, R = new RectangleF(cb0.R.Right + spaceWidth * j, cb0.R.Y, spaceWidth, cb.R.Height) }); } i += spaceNumber; } } } } return(lines); }
public static List <string> GetTextLines <CharBoxT>(IEnumerable <CharBoxT> cbs, TextAutoInsertSpace textAutoInsertSpace, CharFilter charFilter) where CharBoxT : CharBox, new() { return(GetLines(cbs, textAutoInsertSpace, charFilter).Select(a => a.GetString()).ToList()); }