internal static TableLine RemoveRepeatedCharacters(TableLine line, char c, int minRunLength) { TableLine rv = new TableLine(); foreach (TextChunk t in line.TextElements) { foreach (TextChunk r in t.Squeeze(c, minRunLength)) { rv.AddTextChunk(r); } } return(rv); }
public static List <TableLine> GroupByLines(List <TextChunk> textChunks) { Utils.Sort(textChunks); // added by bobLd: force re-sorting List <TableLine> lines = new List <TableLine>(); if (textChunks.Count == 0) { return(lines); } double bbwidth = BoundingBoxOf(textChunks).Width; TableLine l = new TableLine(); l.AddTextChunk(textChunks[0]); textChunks.RemoveAt(0); lines.Add(l); TableLine last = lines[lines.Count - 1]; foreach (TextChunk te in textChunks) { if (last.VerticalOverlapRatio(te) < 0.1) { if (last.Width / bbwidth > 0.9 && AllSameChar(last.TextElements)) { lines.RemoveAt(lines.Count - 1); } lines.Add(new TableLine()); last = lines[lines.Count - 1]; } last.AddTextChunk(te); } if (last.Width / bbwidth > 0.9 && AllSameChar(last.TextElements)) { lines.RemoveAt(lines.Count - 1); } List <TableLine> rv = new List <TableLine>(lines.Count); foreach (TableLine line in lines) { rv.Add(TableLine.RemoveRepeatedCharacters(line, ' ', 3)); } return(rv); }
/// <summary> /// Returns true if the TableRectangle contains the TableLine. /// </summary> /// <param name="tableLine"></param> public bool Contains(TableLine tableLine) { return(this.BoundingBox.Contains(tableLine.BoundingBox, true)); }