예제 #1
0
 /// <summary>
 /// Get the render format for the data extraction, you'll get the left and right side. And for the first page you'll get a shorter format of the rectangles so we can skip unnecessary info
 /// Premiere page sans en tête.
 /// Rectangle (lower left x, lower left y, upper right x, upper right y)
 /// </summary>
 /// <param name="page"> Which page are we extracting the date from</param>
 /// <param name="right">That's to know which side of the page we should extract the information left or right side </param>
 /// <returns></returns>
 private static RenderFilter[] get_render(int page, int right)
 {
     if (page == 1)
     {
         if (right == 0)
         {
             System.util.RectangleJ rect   = new System.util.RectangleJ(0, 0, 536 / 2, 500);
             RenderFilter[]         filter = { new RegionTextRenderFilter(rect) };
             return(filter);
         }
         else
         {
             System.util.RectangleJ rectL   = new System.util.RectangleJ(300, 0, 536 / 2, 500);
             RenderFilter[]         filterL = { new RegionTextRenderFilter(rectL) };
             return(filterL);
         }
     }
     else
     {
         if (right == 0)
         {
             System.util.RectangleJ rect   = new System.util.RectangleJ(0, 0, 536 / 2, 830);
             RenderFilter[]         filter = { new RegionTextRenderFilter(rect) };
             return(filter);
         }
         else
         {
             System.util.RectangleJ rectL   = new System.util.RectangleJ(300, 0, 536 / 2, 830);
             RenderFilter[]         filterL = { new RegionTextRenderFilter(rectL) };
             return(filterL);
         }
     }
 }
예제 #2
0
        public static ITextExtractionStrategy MakeRectangle(float pixelDistanceFromLeft, float pixelDistanceFromBottom, float pixelDistanceWidth, float pixelDistanceHeight)
        {
            var rectangle = new System.util.RectangleJ(pixelDistanceFromLeft, pixelDistanceFromBottom, pixelDistanceWidth, pixelDistanceHeight);

            var filters = new RenderFilter[1];

            filters[0] = new RegionTextRenderFilter(rectangle);

            ITextExtractionStrategy strategy = new FilteredTextRenderListener(new LocationTextExtractionStrategy(), filters);

            return(strategy);
        }
예제 #3
0
 public virtual void RenderText(TextRenderInfo renderInfo)
 {
     if (textRectangle == null)
     {
         textRectangle = renderInfo.GetDescentLine().GetBoundingRectange();
     }
     else
     {
         textRectangle.Add(renderInfo.GetDescentLine().GetBoundingRectange());
     }
     textRectangle.Add(renderInfo.GetAscentLine().GetBoundingRectange());
 }
        public void Run()
        {
            var settingsStr = File.ReadAllText("settings.toml");
            var settings = Toml.Toml.Parse(settingsStr);

            var reader = new PdfReader(settings.input);
            writer = new StreamWriter(settings.output);
            manualMode = settings.manualMode;
            totalNumSponsors = 0;

            // first page
            {
                // Parameters: distanceInPixelsFromLeft, distanceInPixelsFromBottom, width, height
                var rect = new System.util.RectangleJ(24, 34, 326, 348);
                var strategy = CreateStrategy(rect);
                var text = PdfTextExtractor.GetTextFromPage(reader, 1, strategy);
                Write(text);
            }

            // middle page
            {
                var rect = new System.util.RectangleJ(24, 0, 326, 569);

                for (var i = 2; i < reader.NumberOfPages; i++)
                {
                    var strategy = CreateStrategy(rect);
                    var text = PdfTextExtractor.GetTextFromPage(reader, i, strategy);
                    text = text.Replace("Organisation Name\n", "");
                    Write(text);
                }
            }

            // last page
            {
                var rect = new System.util.RectangleJ(24, 229, 326, 339);
                var strategy = CreateStrategy(rect);
                var text = PdfTextExtractor.GetTextFromPage(reader, reader.NumberOfPages, strategy);
                Write(text);
            }

            writer.Flush();

            if (totalNumSponsors != settings.totalNumSponsors)
            {
                Console.WriteLine(
            $@"Warning: mismatched total number of sponsors:
             expected `{settings.totalNumSponsors}`,
            found `{totalNumSponsors}`");
                Console.ReadLine();
            }
        }
예제 #5
0
        public List <Line> getTextFromRectangle(int x, int y, int w, int h)
        {
            System.util.RectangleJ  rect0  = new System.util.RectangleJ(x, y, w, h);
            RenderFilter[]          filter = { new RegionTextRenderFilter(rect0) };
            ITextExtractionStrategy strategy;
            StringBuilder           sb = new StringBuilder();

            strategy = new FilteredTextRenderListener(new LocationTextExtractionStrategy(), filter);
            sb.AppendLine(PdfTextExtractor.GetTextFromPage(reader, 1, strategy));

            List <Line> line = stockLine(sb.ToString());

            return(line);
        }
예제 #6
0
 public Path RenderPath(PathPaintingRenderInfo renderInfo)
 {
     if (renderInfo.Operation != PathPaintingRenderInfo.NO_OP)
     {
         if (textRectangle == null)
         {
             textRectangle = currentPathRectangle;
         }
         else
         {
             textRectangle.Add(currentPathRectangle);
         }
     }
     currentPathRectangle = null;
     return(null);
 }
예제 #7
0
        public virtual void RenderImage(ImageRenderInfo renderInfo)
        {
            Matrix      imageCtm = renderInfo.GetImageCTM();
            Vector      a        = (new Vector(0, 0, 1)).Cross(imageCtm);
            Vector      b        = (new Vector(1, 0, 1)).Cross(imageCtm);
            Vector      c        = (new Vector(0, 1, 1)).Cross(imageCtm);
            Vector      d        = (new Vector(1, 1, 1)).Cross(imageCtm);
            LineSegment bottom   = new LineSegment(a, b);
            LineSegment top      = new LineSegment(c, d);

            if (textRectangle == null)
            {
                textRectangle = bottom.GetBoundingRectange();
            }
            else
            {
                textRectangle.Add(bottom.GetBoundingRectange());
            }
            textRectangle.Add(top.GetBoundingRectange());
        }
예제 #8
0
// --------------------------------------------------------------------------- 
    public void Write(Stream stream) {
      using (ZipFile zip = new ZipFile()) {
        zip.AddFile(PREFACE, "");
        PdfReader reader = new PdfReader(PREFACE);
        System.util.RectangleJ rect = new System.util.RectangleJ(
          70, 80, 420, 500
        );
        RenderFilter[] filter = {new RegionTextRenderFilter(rect)};
        ITextExtractionStrategy strategy;
        StringBuilder sb = new StringBuilder();
        for (int i = 1; i <= reader.NumberOfPages; i++) {
          strategy = new FilteredTextRenderListener(
            new LocationTextExtractionStrategy(), filter
          );
          sb.AppendLine(
            PdfTextExtractor.GetTextFromPage(reader, i, strategy)
          );
        }        
        zip.AddEntry(RESULT, sb.ToString());
        zip.Save(stream);             
      }
    }
예제 #9
0
// ---------------------------------------------------------------------------
        public void Write(Stream stream)
        {
            using (ZipFile zip = new ZipFile()) {
                zip.AddFile(PREFACE, "");
                PdfReader reader            = new PdfReader(PREFACE);
                System.util.RectangleJ rect = new System.util.RectangleJ(
                    70, 80, 420, 500
                    );
                RenderFilter[]          filter = { new RegionTextRenderFilter(rect) };
                ITextExtractionStrategy strategy;
                StringBuilder           sb = new StringBuilder();
                for (int i = 1; i <= reader.NumberOfPages; i++)
                {
                    strategy = new FilteredTextRenderListener(
                        new LocationTextExtractionStrategy(), filter
                        );
                    sb.AppendLine(
                        PdfTextExtractor.GetTextFromPage(reader, i, strategy)
                        );
                }
                zip.AddEntry(RESULT, sb.ToString());
                zip.Save(stream);
            }
        }
예제 #10
0
        public void ModifyPath(PathConstructionRenderInfo renderInfo)
        {
            IList <Vector> points = new List <Vector>();

            if (renderInfo.Operation == PathConstructionRenderInfo.RECT)
            {
                float x = renderInfo.SegmentData[0];
                float y = renderInfo.SegmentData[1];
                float w = renderInfo.SegmentData[2];
                float h = renderInfo.SegmentData[3];
                points.Add(new Vector(x, y, 1));
                points.Add(new Vector(x + w, y, 1));
                points.Add(new Vector(x, y + h, 1));
                points.Add(new Vector(x + w, y + h, 1));
            }
            else if (renderInfo.SegmentData != null)
            {
                for (int i = 0; i < renderInfo.SegmentData.Count - 1; i += 2)
                {
                    points.Add(new Vector(renderInfo.SegmentData[i], renderInfo.SegmentData[i + 1], 1));
                }
            }
            foreach (Vector point in points)
            {
                Vector      point1         = point.Cross(renderInfo.Ctm);
                Rectangle2D pointRectangle = new Rectangle2D(point1[Vector.I1], point1[Vector.I2], 0, 0);
                if (currentPathRectangle == null)
                {
                    currentPathRectangle = pointRectangle;
                }
                else
                {
                    currentPathRectangle.Add(pointRectangle);
                }
            }
        }
예제 #11
0
 /// <summary>
 /// Create a TextInfo.
 /// </summary>
 /// <param name="initialTextChunk"></param>
 public TextInfo(TextChunk initialTextChunk)
 {
     TopLeft = initialTextChunk.AscentLine.GetStartPoint();
     BottomRight = initialTextChunk.DecentLine.GetEndPoint();
     rectangle = initialTextChunk.AscentLine.GetBoundingRectange();
     m_Text = initialTextChunk.Text;
 }
        public void verify()
        {
            addresscollection aic = new addresscollection();
            addressitem ai = null;
            string sourceFileName = _sourcefilename;
            FileStream x = new FileStream(sourceFileName, FileMode.Open);
            iTextSharp.text.pdf.PdfReader reader = new iTextSharp.text.pdf.PdfReader(x);
            //AWESOME!!
            x.Close();
            x.Dispose();
            string s = "";
            string s1 = "";
            System.Xml.XmlNode ep = null;
            System.Xml.XmlNode batch = null;
            System.Xml.XmlNode startingpage = null;
            System.Xml.XmlNode envelope = null;
            int pages = reader.NumberOfPages;
            int i;
            for (i = 0; i <= reader.NumberOfPages - 1; i++) {
                this.Label2.Invoke(new updatetext(updatelabel1text), new object[] { "Processing Page " + Convert.ToString(i + 1)  + " of " + pages });
                DataRow dr = _dtt.Rows[0];
                System.util.RectangleJ rect1 = new System.util.RectangleJ(Convert.ToInt32( dr["x"]), Convert.ToInt32( dr["y"]), Convert.ToInt32( dr["width"]), Convert.ToInt32( dr["height"]));
                iTextSharp.text.pdf.parser.RegionTextRenderFilter rf = new iTextSharp.text.pdf.parser.RegionTextRenderFilter(rect1);
                iTextSharp.text.pdf.parser.LocationTextExtractionStrategy mystrat = new iTextSharp.text.pdf.parser.LocationTextExtractionStrategy();
                iTextSharp.text.pdf.parser.RegionTextRenderFilter[] rtrf = new iTextSharp.text.pdf.parser.RegionTextRenderFilter[2];
                rtrf[0] = rf;
                //Dim rect2 As New System.util.RectangleJ(0, 700, 800, 140)
                //Dim rf2 As New iTextSharp.text.pdf.parser.RegionTextRenderFilter(rect2)
                iTextSharp.text.pdf.parser.FilteredTextRenderListener textExtractionStrategy = new iTextSharp.text.pdf.parser.FilteredTextRenderListener(mystrat, rtrf);

                DataRow dr1 = _dtt.Rows[1];
                System.util.RectangleJ rect2 = new System.util.RectangleJ(Convert.ToInt32( dr1["x"]), Convert.ToInt32( dr1["y"]), Convert.ToInt32( dr1["width"]), Convert.ToInt32( dr1["height"]));
                iTextSharp.text.pdf.parser.RegionTextRenderFilter rf1 = new iTextSharp.text.pdf.parser.RegionTextRenderFilter(rect2);
                iTextSharp.text.pdf.parser.LocationTextExtractionStrategy mystrat1 = new iTextSharp.text.pdf.parser.LocationTextExtractionStrategy();
                iTextSharp.text.pdf.parser.RegionTextRenderFilter[] rtrf1 = new iTextSharp.text.pdf.parser.RegionTextRenderFilter[2];
                rtrf1[0] = rf1;
                //Dim rect2 As New System.util.RectangleJ(0, 700, 800, 140)
                //Dim rf2 As New iTextSharp.text.pdf.parser.RegionTextRenderFilter(rect2)
                iTextSharp.text.pdf.parser.FilteredTextRenderListener textExtractionStrategy1 = new iTextSharp.text.pdf.parser.FilteredTextRenderListener(mystrat1, rtrf1);

                s = iTextSharp.text.pdf.parser.PdfTextExtractor.GetTextFromPage(reader, i + 1, textExtractionStrategy);

                if (!string.IsNullOrEmpty(s)) {
                    s1 = iTextSharp.text.pdf.parser.PdfTextExtractor.GetTextFromPage(reader, i + 1, textExtractionStrategy1);

                    if (parseaddress(s, s1, ref batch, i + 1, ref ep, ref envelope, ref startingpage, ref ai)) {
                        //Des not catch a single first page, anytime this is true it's a new first page
                        //    ep.InnerText = i + 1
                        // Console.WriteLine(ai.Address1)
                        aic.Add(ai);
                    }

                }

                if (i == reader.NumberOfPages - 1 & (ep != null)) {
                    ep.InnerText = Convert.ToString(i + 1);
                }

                if (i == reader.NumberOfPages - 1 & (ai != null)) {
                    ai.endpage = Convert.ToInt32( i + 1);
                }
                // CurrentPage = CurrentPage + 1
            }
            reader.Close();
            //Me.RichTextBox1.Invoke(New updatert(AddressOf updaterichtext), New Object() {XMLDOC.OuterXml, badaddress})
            this.RichTextBox1.Invoke(new updatert(updaterichtext), new object[] {
                XMLDOC,
                badaddress
            });
            //Me.RichTextBox1.Invoke(New updatert(AddressOf updaterichtext), New Object() {badaddress})
            Invoke(new updatedatagrid(updatedatagridonMail), new object[] { aic });
            Invoke(new updatecomplete(updatecompleted), new object[]{});

            //rtrf(0) = rf2
            //textExtractionStrategy = New iTextSharp.text.pdf.parser.FilteredTextRenderListener(mystrat, rtrf)
            //MsgBox(iTextSharp.text.pdf.parser.PdfTextExtractor.GetTextFromPage(reader, 1, textExtractionStrategy))
            // iTextSharp.text.pdf.parser.PdfTextExtractor.GetTextFromPage(reader, 1, textExtractionStrategy)
        }
        public string ExtractTextFromRegionOfPdf(string sourceFileName)
        {
            FileStream x = new FileStream(sourceFileName, FileMode.Open);
            iTextSharp.text.pdf.PdfReader reader = new iTextSharp.text.pdf.PdfReader(x);
            //AWESOME!!

            System.util.RectangleJ rect1 = new System.util.RectangleJ(Rect.X, System.Math.Abs(this.PictureBox1.Height - Rect.Y) - Rect.Height, Rect.Width, Rect.Height);
            iTextSharp.text.pdf.parser.RegionTextRenderFilter rf = new iTextSharp.text.pdf.parser.RegionTextRenderFilter(rect1);
            iTextSharp.text.pdf.parser.LocationTextExtractionStrategy mystrat = new iTextSharp.text.pdf.parser.LocationTextExtractionStrategy();
            iTextSharp.text.pdf.parser.RegionTextRenderFilter[] rtrf = new iTextSharp.text.pdf.parser.RegionTextRenderFilter[2];
            rtrf[0] = rf;
            //Dim rect2 As New System.util.RectangleJ(0, 700, 800, 140)
            //Dim rf2 As New iTextSharp.text.pdf.parser.RegionTextRenderFilter(rect2)
            iTextSharp.text.pdf.parser.FilteredTextRenderListener textExtractionStrategy = new iTextSharp.text.pdf.parser.FilteredTextRenderListener(mystrat, rtrf);

            //rtrf(0) = rf2
            //textExtractionStrategy = New iTextSharp.text.pdf.parser.FilteredTextRenderListener(mystrat, rtrf)
            //MsgBox(iTextSharp.text.pdf.parser.PdfTextExtractor.GetTextFromPage(reader, 1, textExtractionStrategy))
            // iTextSharp.text.pdf.parser.PdfTextExtractor.GetTextFromPage(reader, 1, textExtractionStrategy)
            x.Close();
            x.Dispose();
            reader.Close();

            if (_mode == 1) {

                if (this.loadedbool) {
                    string s = iTextSharp.text.pdf.parser.PdfTextExtractor.GetTextFromPage(reader, CurrentPage + 1, textExtractionStrategy);

                    DialogResult y =  MessageBox.Show("This field is showing : " + s + "\r\n" + "Is this the correct variable on this page?","Confirm" , MessageBoxButtons.YesNo );

                    if (y == DialogResult.Yes ) {

                        _dtt.Rows[_CurrentCount]["x"] = Rect.X;
                        _dtt.Rows[_CurrentCount]["y"] = System.Math.Abs(this.PictureBox1.Height - Rect.Y) - Rect.Height;
                        _dtt.Rows[_CurrentCount]["width"] = Rect.Width;
                        _dtt.Rows[_CurrentCount]["height"] = Rect.Height;

                        if (_CurrentCount == 0) {
                            DialogResult xx = MessageBox.Show("There is an optional Parimeter where you can select something that only appears on the first page, do you want to add this.  It can be part of a string like Page 1 of XX?", "Confirm", MessageBoxButtons.YesNo);
                            if (xx == DialogResult.No)
                            {
                                _CurrentCount = 2;
                            }
                        }

                        if (_dtt.Rows[_CurrentCount]["FieldName"] == "FirstPageConstant") {
                            _validatetext = Interaction.InputBox("Enter Charectors to match, if you enter \"1 of \" it will be true for anything after the of");
                            _dtt.Rows[_CurrentCount + 1]["misc"] = _validatetext;
                            _CurrentCount += 1;
                        }
                        _CurrentCount += 1;

                        if (_CurrentCount == 3) {
                            _CurrentCount = 0;
                            this.Label2.Text = "OK, you have completed the template, if you wish to start over simply do it again and start by selecting the area with:" + _dtt.Rows[0]["fieldname"];
                            startover = 1;
                            drawrectangles();

                            MessageBox.Show("Make sure you save this if you want to use it in the future.");

                        } else {
                            this.Label2.Text = "Now Select : " + _dtt.Rows[_CurrentCount]["fieldname"];
                        }
                    }
                }
            } else {
                this.Label2.Text = "OK, you have completed the template, if you wish to start over simply do it again and start by selecting the area with:" + _dtt.Rows[0]["fieldname"];
            }
            return "";
        }