Esempio n. 1
0
        internal static TableLine RemoveRepeatedCharacters(TableLine line, char c, int minRunLength)
        {
            TableLine rv = new TableLine();

            foreach (TextChunk t in line.TextElements)
            {
                foreach (TextChunk r in t.Squeeze(c, minRunLength))
                {
                    rv.AddTextChunk(r);
                }
            }

            return(rv);
        }
Esempio n. 2
0
        public static List <TableLine> GroupByLines(List <TextChunk> textChunks)
        {
            Utils.Sort(textChunks); // added by bobLd: force re-sorting

            List <TableLine> lines = new List <TableLine>();

            if (textChunks.Count == 0)
            {
                return(lines);
            }

            double bbwidth = BoundingBoxOf(textChunks).Width;

            TableLine l = new TableLine();

            l.AddTextChunk(textChunks[0]);
            textChunks.RemoveAt(0);
            lines.Add(l);

            TableLine last = lines[lines.Count - 1];

            foreach (TextChunk te in textChunks)
            {
                if (last.VerticalOverlapRatio(te) < 0.1)
                {
                    if (last.Width / bbwidth > 0.9 && AllSameChar(last.TextElements))
                    {
                        lines.RemoveAt(lines.Count - 1);
                    }
                    lines.Add(new TableLine());
                    last = lines[lines.Count - 1];
                }
                last.AddTextChunk(te);
            }

            if (last.Width / bbwidth > 0.9 && AllSameChar(last.TextElements))
            {
                lines.RemoveAt(lines.Count - 1);
            }

            List <TableLine> rv = new List <TableLine>(lines.Count);

            foreach (TableLine line in lines)
            {
                rv.Add(TableLine.RemoveRepeatedCharacters(line, ' ', 3));
            }

            return(rv);
        }