/// <summary> /// Adds 4 <see cref="Line"/>s forming a rectangle to the path. /// </summary> public void Rectangle(double x, double y, double width, double height) { currentPosition = new PdfPoint(x, y); LineTo(x + width, y); LineTo(x + width, y + height); LineTo(x, y + height); LineTo(x, y); IsDrawnAsRectangle = true; }
/// <summary> /// log(n) implementation of find_intersections /// based on http://people.csail.mit.edu/indyk/6.838-old/handouts/lec2.pdf /// </summary> /// <param name="horizontals"></param> /// <param name="verticals"></param> public static SortedDictionary <PdfPoint, Ruling[]> FindIntersections(IReadOnlyList <Ruling> horizontals, IReadOnlyList <Ruling> verticals) { //https://github.com/tabulapdf/tabula-java/blob/master/src/main/java/technology/tabula/Ruling.java#L312 List <SortObject> sos = new List <SortObject>(); SortedDictionary <Ruling, bool> tree = new SortedDictionary <Ruling, bool>(new TreeMapRulingComparer()); // TreeMap<Ruling, Boolean> tree SortedDictionary <PdfPoint, Ruling[]> rv = new SortedDictionary <PdfPoint, Ruling[]>(new TreeMapPdfPointComparer()); // TreeMap<Point2D, Ruling[]> rv foreach (Ruling h in horizontals) { sos.Add(new SortObject(SOType.HLEFT, h.Left - PERPENDICULAR_PIXEL_EXPAND_AMOUNT, h)); sos.Add(new SortObject(SOType.HRIGHT, h.Right + PERPENDICULAR_PIXEL_EXPAND_AMOUNT, h)); } foreach (Ruling v in verticals) { sos.Add(new SortObject(SOType.VERTICAL, v.Left, v)); } sos.Sort(new SortObjectComparer()); //Collections.sort(sos, new Comparator<SortObject>() ... foreach (SortObject so in sos) { switch (so.type) { case SOType.VERTICAL: foreach (var h in tree) { PdfPoint?i = h.Key.IntersectionPoint(so.ruling); if (!i.HasValue) { continue; } rv[i.Value] = new Ruling[] { h.Key.Expand(PERPENDICULAR_PIXEL_EXPAND_AMOUNT), so.ruling.Expand(PERPENDICULAR_PIXEL_EXPAND_AMOUNT) }; } break; case SOType.HRIGHT: tree.Remove(so.ruling); break; case SOType.HLEFT: tree[so.ruling] = true; break; } } return(rv); }
internal void LineTo(decimal x, decimal y) { if (currentPosition.HasValue) { var to = new PdfPoint(x, y); commands.Add(new Line(currentPosition.Value, to)); currentPosition = to; } else { MoveTo(x, y); } }
internal void BezierCurveTo(decimal x1, decimal y1, decimal x2, decimal y2, decimal x3, decimal y3) { if (currentPosition.HasValue) { var to = new PdfPoint(x3, y3); commands.Add(new BezierCurve(currentPosition.Value, new PdfPoint(x1, y1), new PdfPoint(x2, y2), to)); currentPosition = to; } else { MoveTo(x3, y3); } }
internal void LineTo(decimal x, decimal y) { if (currentPosition.HasValue) { var to = new PdfPoint(x, y); commands.Add(new Line(currentPosition.Value, to)); currentPosition = to; } else { // TODO: probably the wrong behaviour here, maybe line starts from (0, 0)? MoveTo(x, y); } }
internal void BezierCurveTo(decimal x1, decimal y1, decimal x2, decimal y2, decimal x3, decimal y3) { if (currentPosition.HasValue) { var to = currentTransformationMatrix.Transform(new PdfPoint(x3, y3)); commands.Add(new BezierCurve(currentPosition.Value, currentTransformationMatrix.Transform(new PdfPoint(x1, y1)), currentTransformationMatrix.Transform(new PdfPoint(x2, y2)), to)); currentPosition = to; } else { MoveTo(x3, y3); } }
/// <summary> /// Add a <see cref="Line"/> command to the path. /// </summary> public void LineTo(double x, double y) { if (currentPosition.HasValue) { shoeLaceSum += ((x - currentPosition.Value.X) * (y + currentPosition.Value.Y)); var to = new PdfPoint(x, y); commands.Add(new Line(currentPosition.Value, to)); currentPosition = to; } else { // PDF Reference 1.7 p226 throw new ArgumentNullException("LineTo(): currentPosition is null."); } }
/// <summary> /// Add a <see cref="Line"/> command to the path. /// </summary> public void LineTo(double x, double y) { if (currentPosition.HasValue) { shoeLaceSum += ((x - currentPosition.Value.X) * (y + currentPosition.Value.Y)); var to = new PdfPoint(x, y); commands.Add(new Line(currentPosition.Value, to)); currentPosition = to; } else { // TODO: probably the wrong behaviour here, maybe line starts from (0, 0)? MoveTo(x, y); } }
internal void BezierCurveTo(decimal x1, decimal y1, decimal x2, decimal y2, decimal x3, decimal y3) { if (currentPosition.HasValue) { shoeLaceSum += (double)((x1 - currentPosition.Value.X) * (y1 + currentPosition.Value.Y)); shoeLaceSum += (double)((x2 - x1) * (y2 + y1)); shoeLaceSum += (double)((x3 - x2) * (y3 + y2)); var to = new PdfPoint(x3, y3); commands.Add(new BezierCurve(currentPosition.Value, new PdfPoint(x1, y1), new PdfPoint(x2, y2), to)); currentPosition = to; } else { MoveTo(x3, y3); } }
/// <summary> /// Add a <see cref="BezierCurve"/> to the path. /// </summary> public void BezierCurveTo(double x1, double y1, double x2, double y2, double x3, double y3) { if (currentPosition.HasValue) { shoeLaceSum += (x1 - currentPosition.Value.X) * (y1 + currentPosition.Value.Y); shoeLaceSum += (x2 - x1) * (y2 + y1); shoeLaceSum += (x3 - x2) * (y3 + y2); var to = new PdfPoint(x3, y3); commands.Add(new BezierCurve(currentPosition.Value, new PdfPoint(x1, y1), new PdfPoint(x2, y2), to)); currentPosition = to; } else { MoveTo(x3, y3); } }
/// <summary> /// Add a <see cref="BezierCurve"/> to the path. /// </summary> public void BezierCurveTo(double x1, double y1, double x2, double y2, double x3, double y3) { if (currentPosition.HasValue) { shoeLaceSum += (x1 - currentPosition.Value.X) * (y1 + currentPosition.Value.Y); shoeLaceSum += (x2 - x1) * (y2 + y1); shoeLaceSum += (x3 - x2) * (y3 + y2); var to = new PdfPoint(x3, y3); commands.Add(new BezierCurve(currentPosition.Value, new PdfPoint(x1, y1), new PdfPoint(x2, y2), to)); currentPosition = to; } else { // PDF Reference 1.7 p226 throw new ArgumentNullException("BezierCurveTo(): currentPosition is null."); } }
/// <summary> /// Extract the <see cref="PageArea"/>, with its text elements (letters) and rulings (processed PdfPath and PdfSubpath). /// </summary> /// <param name="pageNumber">The page number to extract.</param> public PageArea ExtractPage(int pageNumber) { if (pageNumber > this.pdfDocument.NumberOfPages || pageNumber < 1) { throw new IndexOutOfRangeException("Page number does not exist"); } Page p = this.pdfDocument.GetPage(pageNumber); //ObjectExtractorStreamEngine se = new ObjectExtractorStreamEngine(p); //se.processPage(p); /**************** ObjectExtractorStreamEngine(PDPage page)*******************/ var rulings = new List <Ruling>(); foreach (var image in p.GetImages()) { if (image.TryGetPng(out var png)) { } } foreach (var path in p.ExperimentalAccess.Paths) { if (!path.IsFilled && !path.IsStroked) { continue; // strokeOrFillPath operator => filter stroke and filled } foreach (var subpath in path) { if (!(subpath.Commands[0] is Move first)) { // skip paths whose first operation is not a MOVETO continue; } if (subpath.Commands.Any(c => c is BezierCurve)) { // or contains operations other than LINETO, MOVETO or CLOSE // bobld: skip at subpath or path level? continue; } // TODO: how to implement color filter? PdfPoint? start_pos = RoundPdfPoint(first.Location, rounding); PdfPoint? last_move = start_pos; PdfPoint? end_pos = null; PdfLine line; PointComparer pc = new PointComparer(); foreach (var command in subpath.Commands) { if (command is Line linePath) { end_pos = RoundPdfPoint(linePath.To, rounding); if (!start_pos.HasValue || !end_pos.HasValue) { break; } line = pc.Compare(start_pos.Value, end_pos.Value) == -1 ? new PdfLine(start_pos.Value, end_pos.Value) : new PdfLine(end_pos.Value, start_pos.Value); // already clipped Ruling r = new Ruling(line.Point1, line.Point2); if (r.Length > 0.01) { rulings.Add(r); } } else if (command is Move move) { start_pos = RoundPdfPoint(move.Location, rounding); end_pos = start_pos; } else if (command is Close) { // according to PathIterator docs: // "the preceding subpath should be closed by appending a line // segment // back to the point corresponding to the most recent // SEG_MOVETO." if (!start_pos.HasValue || !end_pos.HasValue) { break; } line = pc.Compare(end_pos.Value, last_move.Value) == -1 ? new PdfLine(end_pos.Value, last_move.Value) : new PdfLine(last_move.Value, end_pos.Value); // already clipped Ruling r = new Ruling(line.Point1, line.Point2); //.intersect(this.currentClippingPath()); if (r.Length > 0.01) { rulings.Add(r); } } start_pos = end_pos; } } } /****************************************************************************/ TextStripper pdfTextStripper = new TextStripper(this.pdfDocument, pageNumber); pdfTextStripper.Process(); Utils.Sort(pdfTextStripper.textElements, new TableRectangle.ILL_DEFINED_ORDER()); return(new PageArea(p.CropBox.Bounds, p.Rotation.Value, pageNumber, p, this.pdfDocument, pdfTextStripper.textElements, rulings, pdfTextStripper.minCharWidth, pdfTextStripper.minCharHeight, pdfTextStripper.spatialIndex)); }
internal void MoveTo(decimal x, decimal y) { currentPosition = new PdfPoint(x, y); commands.Add(new Move(currentPosition.Value)); }
/// <summary> /// Add a <see cref="Move"/> command to the path. /// </summary> public void MoveTo(double x, double y) { currentPosition = new PdfPoint(x, y); commands.Add(new Move(currentPosition.Value)); }
internal void MoveTo(decimal x, decimal y) { currentPosition = currentTransformationMatrix.Transform(new PdfPoint(x, y)); commands.Add(new Move(currentPosition.Value)); }