/// <summary> /// Get the render format for the data extraction, you'll get the left and right side. And for the first page you'll get a shorter format of the rectangles so we can skip unnecessary info /// Premiere page sans en tête. /// Rectangle (lower left x, lower left y, upper right x, upper right y) /// </summary> /// <param name="page"> Which page are we extracting the date from</param> /// <param name="right">That's to know which side of the page we should extract the information left or right side </param> /// <returns></returns> private static RenderFilter[] get_render(int page, int right) { if (page == 1) { if (right == 0) { System.util.RectangleJ rect = new System.util.RectangleJ(0, 0, 536 / 2, 500); RenderFilter[] filter = { new RegionTextRenderFilter(rect) }; return(filter); } else { System.util.RectangleJ rectL = new System.util.RectangleJ(300, 0, 536 / 2, 500); RenderFilter[] filterL = { new RegionTextRenderFilter(rectL) }; return(filterL); } } else { if (right == 0) { System.util.RectangleJ rect = new System.util.RectangleJ(0, 0, 536 / 2, 830); RenderFilter[] filter = { new RegionTextRenderFilter(rect) }; return(filter); } else { System.util.RectangleJ rectL = new System.util.RectangleJ(300, 0, 536 / 2, 830); RenderFilter[] filterL = { new RegionTextRenderFilter(rectL) }; return(filterL); } } }
public static ITextExtractionStrategy MakeRectangle(float pixelDistanceFromLeft, float pixelDistanceFromBottom, float pixelDistanceWidth, float pixelDistanceHeight) { var rectangle = new System.util.RectangleJ(pixelDistanceFromLeft, pixelDistanceFromBottom, pixelDistanceWidth, pixelDistanceHeight); var filters = new RenderFilter[1]; filters[0] = new RegionTextRenderFilter(rectangle); ITextExtractionStrategy strategy = new FilteredTextRenderListener(new LocationTextExtractionStrategy(), filters); return(strategy); }
public virtual void RenderText(TextRenderInfo renderInfo) { if (textRectangle == null) { textRectangle = renderInfo.GetDescentLine().GetBoundingRectange(); } else { textRectangle.Add(renderInfo.GetDescentLine().GetBoundingRectange()); } textRectangle.Add(renderInfo.GetAscentLine().GetBoundingRectange()); }
public void Run() { var settingsStr = File.ReadAllText("settings.toml"); var settings = Toml.Toml.Parse(settingsStr); var reader = new PdfReader(settings.input); writer = new StreamWriter(settings.output); manualMode = settings.manualMode; totalNumSponsors = 0; // first page { // Parameters: distanceInPixelsFromLeft, distanceInPixelsFromBottom, width, height var rect = new System.util.RectangleJ(24, 34, 326, 348); var strategy = CreateStrategy(rect); var text = PdfTextExtractor.GetTextFromPage(reader, 1, strategy); Write(text); } // middle page { var rect = new System.util.RectangleJ(24, 0, 326, 569); for (var i = 2; i < reader.NumberOfPages; i++) { var strategy = CreateStrategy(rect); var text = PdfTextExtractor.GetTextFromPage(reader, i, strategy); text = text.Replace("Organisation Name\n", ""); Write(text); } } // last page { var rect = new System.util.RectangleJ(24, 229, 326, 339); var strategy = CreateStrategy(rect); var text = PdfTextExtractor.GetTextFromPage(reader, reader.NumberOfPages, strategy); Write(text); } writer.Flush(); if (totalNumSponsors != settings.totalNumSponsors) { Console.WriteLine( $@"Warning: mismatched total number of sponsors: expected `{settings.totalNumSponsors}`, found `{totalNumSponsors}`"); Console.ReadLine(); } }
public List <Line> getTextFromRectangle(int x, int y, int w, int h) { System.util.RectangleJ rect0 = new System.util.RectangleJ(x, y, w, h); RenderFilter[] filter = { new RegionTextRenderFilter(rect0) }; ITextExtractionStrategy strategy; StringBuilder sb = new StringBuilder(); strategy = new FilteredTextRenderListener(new LocationTextExtractionStrategy(), filter); sb.AppendLine(PdfTextExtractor.GetTextFromPage(reader, 1, strategy)); List <Line> line = stockLine(sb.ToString()); return(line); }
public Path RenderPath(PathPaintingRenderInfo renderInfo) { if (renderInfo.Operation != PathPaintingRenderInfo.NO_OP) { if (textRectangle == null) { textRectangle = currentPathRectangle; } else { textRectangle.Add(currentPathRectangle); } } currentPathRectangle = null; return(null); }
public virtual void RenderImage(ImageRenderInfo renderInfo) { Matrix imageCtm = renderInfo.GetImageCTM(); Vector a = (new Vector(0, 0, 1)).Cross(imageCtm); Vector b = (new Vector(1, 0, 1)).Cross(imageCtm); Vector c = (new Vector(0, 1, 1)).Cross(imageCtm); Vector d = (new Vector(1, 1, 1)).Cross(imageCtm); LineSegment bottom = new LineSegment(a, b); LineSegment top = new LineSegment(c, d); if (textRectangle == null) { textRectangle = bottom.GetBoundingRectange(); } else { textRectangle.Add(bottom.GetBoundingRectange()); } textRectangle.Add(top.GetBoundingRectange()); }
// --------------------------------------------------------------------------- public void Write(Stream stream) { using (ZipFile zip = new ZipFile()) { zip.AddFile(PREFACE, ""); PdfReader reader = new PdfReader(PREFACE); System.util.RectangleJ rect = new System.util.RectangleJ( 70, 80, 420, 500 ); RenderFilter[] filter = {new RegionTextRenderFilter(rect)}; ITextExtractionStrategy strategy; StringBuilder sb = new StringBuilder(); for (int i = 1; i <= reader.NumberOfPages; i++) { strategy = new FilteredTextRenderListener( new LocationTextExtractionStrategy(), filter ); sb.AppendLine( PdfTextExtractor.GetTextFromPage(reader, i, strategy) ); } zip.AddEntry(RESULT, sb.ToString()); zip.Save(stream); } }
// --------------------------------------------------------------------------- public void Write(Stream stream) { using (ZipFile zip = new ZipFile()) { zip.AddFile(PREFACE, ""); PdfReader reader = new PdfReader(PREFACE); System.util.RectangleJ rect = new System.util.RectangleJ( 70, 80, 420, 500 ); RenderFilter[] filter = { new RegionTextRenderFilter(rect) }; ITextExtractionStrategy strategy; StringBuilder sb = new StringBuilder(); for (int i = 1; i <= reader.NumberOfPages; i++) { strategy = new FilteredTextRenderListener( new LocationTextExtractionStrategy(), filter ); sb.AppendLine( PdfTextExtractor.GetTextFromPage(reader, i, strategy) ); } zip.AddEntry(RESULT, sb.ToString()); zip.Save(stream); } }
public void ModifyPath(PathConstructionRenderInfo renderInfo) { IList <Vector> points = new List <Vector>(); if (renderInfo.Operation == PathConstructionRenderInfo.RECT) { float x = renderInfo.SegmentData[0]; float y = renderInfo.SegmentData[1]; float w = renderInfo.SegmentData[2]; float h = renderInfo.SegmentData[3]; points.Add(new Vector(x, y, 1)); points.Add(new Vector(x + w, y, 1)); points.Add(new Vector(x, y + h, 1)); points.Add(new Vector(x + w, y + h, 1)); } else if (renderInfo.SegmentData != null) { for (int i = 0; i < renderInfo.SegmentData.Count - 1; i += 2) { points.Add(new Vector(renderInfo.SegmentData[i], renderInfo.SegmentData[i + 1], 1)); } } foreach (Vector point in points) { Vector point1 = point.Cross(renderInfo.Ctm); Rectangle2D pointRectangle = new Rectangle2D(point1[Vector.I1], point1[Vector.I2], 0, 0); if (currentPathRectangle == null) { currentPathRectangle = pointRectangle; } else { currentPathRectangle.Add(pointRectangle); } } }
/// <summary> /// Create a TextInfo. /// </summary> /// <param name="initialTextChunk"></param> public TextInfo(TextChunk initialTextChunk) { TopLeft = initialTextChunk.AscentLine.GetStartPoint(); BottomRight = initialTextChunk.DecentLine.GetEndPoint(); rectangle = initialTextChunk.AscentLine.GetBoundingRectange(); m_Text = initialTextChunk.Text; }
public void verify() { addresscollection aic = new addresscollection(); addressitem ai = null; string sourceFileName = _sourcefilename; FileStream x = new FileStream(sourceFileName, FileMode.Open); iTextSharp.text.pdf.PdfReader reader = new iTextSharp.text.pdf.PdfReader(x); //AWESOME!! x.Close(); x.Dispose(); string s = ""; string s1 = ""; System.Xml.XmlNode ep = null; System.Xml.XmlNode batch = null; System.Xml.XmlNode startingpage = null; System.Xml.XmlNode envelope = null; int pages = reader.NumberOfPages; int i; for (i = 0; i <= reader.NumberOfPages - 1; i++) { this.Label2.Invoke(new updatetext(updatelabel1text), new object[] { "Processing Page " + Convert.ToString(i + 1) + " of " + pages }); DataRow dr = _dtt.Rows[0]; System.util.RectangleJ rect1 = new System.util.RectangleJ(Convert.ToInt32( dr["x"]), Convert.ToInt32( dr["y"]), Convert.ToInt32( dr["width"]), Convert.ToInt32( dr["height"])); iTextSharp.text.pdf.parser.RegionTextRenderFilter rf = new iTextSharp.text.pdf.parser.RegionTextRenderFilter(rect1); iTextSharp.text.pdf.parser.LocationTextExtractionStrategy mystrat = new iTextSharp.text.pdf.parser.LocationTextExtractionStrategy(); iTextSharp.text.pdf.parser.RegionTextRenderFilter[] rtrf = new iTextSharp.text.pdf.parser.RegionTextRenderFilter[2]; rtrf[0] = rf; //Dim rect2 As New System.util.RectangleJ(0, 700, 800, 140) //Dim rf2 As New iTextSharp.text.pdf.parser.RegionTextRenderFilter(rect2) iTextSharp.text.pdf.parser.FilteredTextRenderListener textExtractionStrategy = new iTextSharp.text.pdf.parser.FilteredTextRenderListener(mystrat, rtrf); DataRow dr1 = _dtt.Rows[1]; System.util.RectangleJ rect2 = new System.util.RectangleJ(Convert.ToInt32( dr1["x"]), Convert.ToInt32( dr1["y"]), Convert.ToInt32( dr1["width"]), Convert.ToInt32( dr1["height"])); iTextSharp.text.pdf.parser.RegionTextRenderFilter rf1 = new iTextSharp.text.pdf.parser.RegionTextRenderFilter(rect2); iTextSharp.text.pdf.parser.LocationTextExtractionStrategy mystrat1 = new iTextSharp.text.pdf.parser.LocationTextExtractionStrategy(); iTextSharp.text.pdf.parser.RegionTextRenderFilter[] rtrf1 = new iTextSharp.text.pdf.parser.RegionTextRenderFilter[2]; rtrf1[0] = rf1; //Dim rect2 As New System.util.RectangleJ(0, 700, 800, 140) //Dim rf2 As New iTextSharp.text.pdf.parser.RegionTextRenderFilter(rect2) iTextSharp.text.pdf.parser.FilteredTextRenderListener textExtractionStrategy1 = new iTextSharp.text.pdf.parser.FilteredTextRenderListener(mystrat1, rtrf1); s = iTextSharp.text.pdf.parser.PdfTextExtractor.GetTextFromPage(reader, i + 1, textExtractionStrategy); if (!string.IsNullOrEmpty(s)) { s1 = iTextSharp.text.pdf.parser.PdfTextExtractor.GetTextFromPage(reader, i + 1, textExtractionStrategy1); if (parseaddress(s, s1, ref batch, i + 1, ref ep, ref envelope, ref startingpage, ref ai)) { //Des not catch a single first page, anytime this is true it's a new first page // ep.InnerText = i + 1 // Console.WriteLine(ai.Address1) aic.Add(ai); } } if (i == reader.NumberOfPages - 1 & (ep != null)) { ep.InnerText = Convert.ToString(i + 1); } if (i == reader.NumberOfPages - 1 & (ai != null)) { ai.endpage = Convert.ToInt32( i + 1); } // CurrentPage = CurrentPage + 1 } reader.Close(); //Me.RichTextBox1.Invoke(New updatert(AddressOf updaterichtext), New Object() {XMLDOC.OuterXml, badaddress}) this.RichTextBox1.Invoke(new updatert(updaterichtext), new object[] { XMLDOC, badaddress }); //Me.RichTextBox1.Invoke(New updatert(AddressOf updaterichtext), New Object() {badaddress}) Invoke(new updatedatagrid(updatedatagridonMail), new object[] { aic }); Invoke(new updatecomplete(updatecompleted), new object[]{}); //rtrf(0) = rf2 //textExtractionStrategy = New iTextSharp.text.pdf.parser.FilteredTextRenderListener(mystrat, rtrf) //MsgBox(iTextSharp.text.pdf.parser.PdfTextExtractor.GetTextFromPage(reader, 1, textExtractionStrategy)) // iTextSharp.text.pdf.parser.PdfTextExtractor.GetTextFromPage(reader, 1, textExtractionStrategy) }
public string ExtractTextFromRegionOfPdf(string sourceFileName) { FileStream x = new FileStream(sourceFileName, FileMode.Open); iTextSharp.text.pdf.PdfReader reader = new iTextSharp.text.pdf.PdfReader(x); //AWESOME!! System.util.RectangleJ rect1 = new System.util.RectangleJ(Rect.X, System.Math.Abs(this.PictureBox1.Height - Rect.Y) - Rect.Height, Rect.Width, Rect.Height); iTextSharp.text.pdf.parser.RegionTextRenderFilter rf = new iTextSharp.text.pdf.parser.RegionTextRenderFilter(rect1); iTextSharp.text.pdf.parser.LocationTextExtractionStrategy mystrat = new iTextSharp.text.pdf.parser.LocationTextExtractionStrategy(); iTextSharp.text.pdf.parser.RegionTextRenderFilter[] rtrf = new iTextSharp.text.pdf.parser.RegionTextRenderFilter[2]; rtrf[0] = rf; //Dim rect2 As New System.util.RectangleJ(0, 700, 800, 140) //Dim rf2 As New iTextSharp.text.pdf.parser.RegionTextRenderFilter(rect2) iTextSharp.text.pdf.parser.FilteredTextRenderListener textExtractionStrategy = new iTextSharp.text.pdf.parser.FilteredTextRenderListener(mystrat, rtrf); //rtrf(0) = rf2 //textExtractionStrategy = New iTextSharp.text.pdf.parser.FilteredTextRenderListener(mystrat, rtrf) //MsgBox(iTextSharp.text.pdf.parser.PdfTextExtractor.GetTextFromPage(reader, 1, textExtractionStrategy)) // iTextSharp.text.pdf.parser.PdfTextExtractor.GetTextFromPage(reader, 1, textExtractionStrategy) x.Close(); x.Dispose(); reader.Close(); if (_mode == 1) { if (this.loadedbool) { string s = iTextSharp.text.pdf.parser.PdfTextExtractor.GetTextFromPage(reader, CurrentPage + 1, textExtractionStrategy); DialogResult y = MessageBox.Show("This field is showing : " + s + "\r\n" + "Is this the correct variable on this page?","Confirm" , MessageBoxButtons.YesNo ); if (y == DialogResult.Yes ) { _dtt.Rows[_CurrentCount]["x"] = Rect.X; _dtt.Rows[_CurrentCount]["y"] = System.Math.Abs(this.PictureBox1.Height - Rect.Y) - Rect.Height; _dtt.Rows[_CurrentCount]["width"] = Rect.Width; _dtt.Rows[_CurrentCount]["height"] = Rect.Height; if (_CurrentCount == 0) { DialogResult xx = MessageBox.Show("There is an optional Parimeter where you can select something that only appears on the first page, do you want to add this. It can be part of a string like Page 1 of XX?", "Confirm", MessageBoxButtons.YesNo); if (xx == DialogResult.No) { _CurrentCount = 2; } } if (_dtt.Rows[_CurrentCount]["FieldName"] == "FirstPageConstant") { _validatetext = Interaction.InputBox("Enter Charectors to match, if you enter \"1 of \" it will be true for anything after the of"); _dtt.Rows[_CurrentCount + 1]["misc"] = _validatetext; _CurrentCount += 1; } _CurrentCount += 1; if (_CurrentCount == 3) { _CurrentCount = 0; this.Label2.Text = "OK, you have completed the template, if you wish to start over simply do it again and start by selecting the area with:" + _dtt.Rows[0]["fieldname"]; startover = 1; drawrectangles(); MessageBox.Show("Make sure you save this if you want to use it in the future."); } else { this.Label2.Text = "Now Select : " + _dtt.Rows[_CurrentCount]["fieldname"]; } } } } else { this.Label2.Text = "OK, you have completed the template, if you wish to start over simply do it again and start by selecting the area with:" + _dtt.Rows[0]["fieldname"]; } return ""; }