Пример #1
0
 public static List <string> GetTextLinesSurroundedByRectangle(List <CharBox> cbs, RectangleF r, TextAutoInsertSpace textAutoInsertSpace, CharFilter charFilter)
 {
     cbs = GetCharBoxsSurroundedByRectangle(cbs, r);
     return(Page.GetLines(cbs, textAutoInsertSpace, charFilter).Select(a => a.GetString()).ToList());
 }
Пример #2
0
        //public static string GetTextSurroundedByRectangle(List<CharBox> cbs, RectangleF r, TextAutoInsertSpace textAutoInsertSpace, Template.CharFilter charFilter)
        //{
        //    return string.Join("\r\n", GetTextLinesSurroundedByRectangle(cbs, r, textAutoInsertSpace, charFilter));
        //}

        public static List <Page.Line <CharBox> > GetLinesSurroundedByRectangle(List <CharBox> cbs, RectangleF r, TextAutoInsertSpace textAutoInsertSpace, CharFilter charFilter)
        {
            cbs = GetCharBoxsSurroundedByRectangle(cbs, r);
            return(Page.GetLines(cbs, textAutoInsertSpace, charFilter));
        }
Пример #3
0
 public static string GetText(IEnumerable <CharBox> cbs, TextAutoInsertSpace textAutoInsertSpace, CharFilter charFilter)
 {
     return(string.Join("\r\n", GetTextLines(cbs, textAutoInsertSpace, charFilter)));
 }
Пример #4
0
        public static List <Line <CharBoxT> > GetLines <CharBoxT>(IEnumerable <CharBoxT> cbs, TextAutoInsertSpace textAutoInsertSpace, CharFilter charFilter) where CharBoxT : CharBox, new()
        {
            if (textAutoInsertSpace?.IgnoreSourceSpaces == true)
            {
                cbs = cbs.Where(a => a.Char != " ");
            }
            if (charFilter != null)//to filter out wrong OCR chars like borders etc which brakes lines
            {
                //SizeF s=new SizeF(ignoreCharsBiggerThan.Width*Settings.Constants.Pdf2ImageResolutionRatio)
                float maxWidth  = charFilter.MaxWidth <= 0 ? float.MaxValue : charFilter.MaxWidth;
                float maxHeight = charFilter.MaxHeight <= 0 ? float.MaxValue : charFilter.MaxHeight;
                cbs = cbs.Where(a => a.R.Width >= charFilter.MinWidth && a.R.Width <= maxWidth && a.R.Height >= charFilter.MinHeight && a.R.Height <= maxHeight);
            }
            List <Line <CharBoxT> > lines = new List <Line <CharBoxT> >();

            foreach (CharBoxT cb in cbs)
            {
                for (int i = 0; i < lines.Count; i++)
                {
                    float mY = cb.R.Bottom - cb.R.Height / 2;
                    if (mY < lines[i].Top)
                    {
                        Line <CharBoxT> l = new Line <CharBoxT> {
                            Top = cb.R.Top, Bottom = cb.R.Bottom
                        };
                        l.CharBoxs.Add(cb);
                        lines.Insert(i, l);
                        goto NEXT_CHAR;
                    }
                    if (mY <= lines[i].Bottom)//the char's center is in the line
                    {
                        lines[i].CharBoxs.Add(cb);
                        if (lines[i].Top > cb.R.Top)
                        {
                            lines[i].Top = cb.R.Top;
                        }
                        if (lines[i].Bottom < cb.R.Bottom)
                        {
                            lines[i].Bottom = cb.R.Bottom;
                        }
                        goto NEXT_CHAR;
                    }
                }
                {
                    Line <CharBoxT> l = new Line <CharBoxT> {
                        Top = cb.R.Top, Bottom = cb.R.Bottom
                    };
                    l.CharBoxs.Add(cb);
                    lines.Add(l);
                }
                NEXT_CHAR :;
            }

            for (int i = 1; i < lines.Count; i++)
            {
                float intersectionH2 = (lines[i - 1].Bottom - lines[i].Top) * 2;
                if (intersectionH2 > lines[i - 1].Height || intersectionH2 > lines[i].Height)
                {
                    lines[i - 1].CharBoxs.AddRange(lines[i].CharBoxs);
                    if (lines[i - 1].Top > lines[i].Top)
                    {
                        lines[i - 1].Top = lines[i].Top;
                    }
                    if (lines[i - 1].Bottom < lines[i].Bottom)
                    {
                        lines[i - 1].Bottom = lines[i].Bottom;
                    }
                    lines.RemoveAt(i);
                    i--;
                }
            }

            lines.ForEach(a => a.CharBoxs = a.CharBoxs.OrderBy(b => b.R.X).ToList());

            if (textAutoInsertSpace?.Threshold > 0)
            {
                foreach (Line <CharBoxT> l in lines)
                {
                    for (int i = 1; i < l.CharBoxs.Count; i++)
                    {
                        CharBox cb0 = l.CharBoxs[i - 1];
                        CharBox cb  = l.CharBoxs[i];
                        if (/*cb0.Char != " " && */ cb.R.Left - cb0.R.Right > (/*cb0.R.Width*/ 0.8 / cb0.R.Height + /*cb.R.Width*/ 0.8 / cb.R.Height) * textAutoInsertSpace.Threshold)
                        {
                            float spaceWidth  = (cb0.R.Width + cb.R.Width) / 2;
                            int   spaceNumber = (int)Math.Ceiling((cb.R.Left - cb0.R.Right) / spaceWidth);
                            for (int j = 0; j < spaceNumber; j++)
                            {
                                l.CharBoxs.Insert(i, new CharBoxT {
                                    Char = textAutoInsertSpace.Representative, R = new RectangleF(cb0.R.Right + spaceWidth * j, cb0.R.Y, spaceWidth, cb.R.Height)
                                });
                            }
                            i += spaceNumber;
                        }
                    }
                }
            }

            return(lines);
        }
Пример #5
0
 public static List <string> GetTextLines <CharBoxT>(IEnumerable <CharBoxT> cbs, TextAutoInsertSpace textAutoInsertSpace, CharFilter charFilter) where CharBoxT : CharBox, new()
 {
     return(GetLines(cbs, textAutoInsertSpace, charFilter).Select(a => a.GetString()).ToList());
 }