/// <summary>
        /// Converts Pdf to MS Doc type.
        /// </summary>
        /// <param name="FileFullPath"></param>
        /// <param name="OutPutFileFullPath"></param>
        public static void PDFToWord(string FileFullPath, string OutPutFileFullPath)
        {
            iTextSharp.text.pdf.parser.ITextExtractionStrategy strategy = new iTextSharp.text.pdf.parser.LocationTextExtractionStrategy();
            PdfReader pdfReader = new PdfReader(FileFullPath);

            for (int page = 1; page <= pdfReader.NumberOfPages; page++)
            {
                string Pagestring;

                Pagestring = iTextSharp.text.pdf.parser.PdfTextExtractor.GetTextFromPage(pdfReader, page, strategy);

                FileStream   richTextBox1fs = new FileStream(OutPutFileFullPath, FileMode.Create, FileAccess.Write, FileShare.None);
                StreamWriter sw             = new StreamWriter(richTextBox1fs);
                sw.WriteLine(Pagestring);
                sw.Flush();
                sw.Close();
            }
            //AcroPDDoc pdfd = new AcroPDDoc();
            //pdfd.Open(FileFullPath);
            //Object jsObj = pdfd.GetJSObject();
            //Type jsType = pdfd.GetType();
            ////have to use acrobat javascript api because, acrobat
            //object[] saveAsParam = { "newFile.doc", "com.adobe.acrobat.doc", "", false, false };
            //jsType.InvokeMember("saveAs", BindingFlags.InvokeMethod | BindingFlags.Public | BindingFlags.Instance, null, jsObj, saveAsParam, CultureInfo.InvariantCulture);
        }
Example #2
0
        public static string GetTextFromAllPages(iTextSharp.text.pdf.PdfReader reader)
        {
            // ITextExtractionStrategy strategy = new LocationTextExtractionStrategy();
            var strategy = new iTextSharp.text.pdf.parser.LocationTextExtractionStrategy();
            var output   = new System.IO.StringWriter();

            for (int i = 1; i <= reader.NumberOfPages; i++)
            {
                var text = iTextSharp.text.pdf.parser.PdfTextExtractor.GetTextFromPage(reader, i, strategy);
                text = Convert(text);
                output.WriteLine(text);
            }
            return(output.ToString());
        }
        public void verify()
        {
            addresscollection aic = new addresscollection();
            addressitem ai = null;
            string sourceFileName = _sourcefilename;
            FileStream x = new FileStream(sourceFileName, FileMode.Open);
            iTextSharp.text.pdf.PdfReader reader = new iTextSharp.text.pdf.PdfReader(x);
            //AWESOME!!
            x.Close();
            x.Dispose();
            string s = "";
            string s1 = "";
            System.Xml.XmlNode ep = null;
            System.Xml.XmlNode batch = null;
            System.Xml.XmlNode startingpage = null;
            System.Xml.XmlNode envelope = null;
            int pages = reader.NumberOfPages;
            int i;
            for (i = 0; i <= reader.NumberOfPages - 1; i++) {
                this.Label2.Invoke(new updatetext(updatelabel1text), new object[] { "Processing Page " + Convert.ToString(i + 1)  + " of " + pages });
                DataRow dr = _dtt.Rows[0];
                System.util.RectangleJ rect1 = new System.util.RectangleJ(Convert.ToInt32( dr["x"]), Convert.ToInt32( dr["y"]), Convert.ToInt32( dr["width"]), Convert.ToInt32( dr["height"]));
                iTextSharp.text.pdf.parser.RegionTextRenderFilter rf = new iTextSharp.text.pdf.parser.RegionTextRenderFilter(rect1);
                iTextSharp.text.pdf.parser.LocationTextExtractionStrategy mystrat = new iTextSharp.text.pdf.parser.LocationTextExtractionStrategy();
                iTextSharp.text.pdf.parser.RegionTextRenderFilter[] rtrf = new iTextSharp.text.pdf.parser.RegionTextRenderFilter[2];
                rtrf[0] = rf;
                //Dim rect2 As New System.util.RectangleJ(0, 700, 800, 140)
                //Dim rf2 As New iTextSharp.text.pdf.parser.RegionTextRenderFilter(rect2)
                iTextSharp.text.pdf.parser.FilteredTextRenderListener textExtractionStrategy = new iTextSharp.text.pdf.parser.FilteredTextRenderListener(mystrat, rtrf);

                DataRow dr1 = _dtt.Rows[1];
                System.util.RectangleJ rect2 = new System.util.RectangleJ(Convert.ToInt32( dr1["x"]), Convert.ToInt32( dr1["y"]), Convert.ToInt32( dr1["width"]), Convert.ToInt32( dr1["height"]));
                iTextSharp.text.pdf.parser.RegionTextRenderFilter rf1 = new iTextSharp.text.pdf.parser.RegionTextRenderFilter(rect2);
                iTextSharp.text.pdf.parser.LocationTextExtractionStrategy mystrat1 = new iTextSharp.text.pdf.parser.LocationTextExtractionStrategy();
                iTextSharp.text.pdf.parser.RegionTextRenderFilter[] rtrf1 = new iTextSharp.text.pdf.parser.RegionTextRenderFilter[2];
                rtrf1[0] = rf1;
                //Dim rect2 As New System.util.RectangleJ(0, 700, 800, 140)
                //Dim rf2 As New iTextSharp.text.pdf.parser.RegionTextRenderFilter(rect2)
                iTextSharp.text.pdf.parser.FilteredTextRenderListener textExtractionStrategy1 = new iTextSharp.text.pdf.parser.FilteredTextRenderListener(mystrat1, rtrf1);

                s = iTextSharp.text.pdf.parser.PdfTextExtractor.GetTextFromPage(reader, i + 1, textExtractionStrategy);

                if (!string.IsNullOrEmpty(s)) {
                    s1 = iTextSharp.text.pdf.parser.PdfTextExtractor.GetTextFromPage(reader, i + 1, textExtractionStrategy1);

                    if (parseaddress(s, s1, ref batch, i + 1, ref ep, ref envelope, ref startingpage, ref ai)) {
                        //Des not catch a single first page, anytime this is true it's a new first page
                        //    ep.InnerText = i + 1
                        // Console.WriteLine(ai.Address1)
                        aic.Add(ai);
                    }

                }

                if (i == reader.NumberOfPages - 1 & (ep != null)) {
                    ep.InnerText = Convert.ToString(i + 1);
                }

                if (i == reader.NumberOfPages - 1 & (ai != null)) {
                    ai.endpage = Convert.ToInt32( i + 1);
                }
                // CurrentPage = CurrentPage + 1
            }
            reader.Close();
            //Me.RichTextBox1.Invoke(New updatert(AddressOf updaterichtext), New Object() {XMLDOC.OuterXml, badaddress})
            this.RichTextBox1.Invoke(new updatert(updaterichtext), new object[] {
                XMLDOC,
                badaddress
            });
            //Me.RichTextBox1.Invoke(New updatert(AddressOf updaterichtext), New Object() {badaddress})
            Invoke(new updatedatagrid(updatedatagridonMail), new object[] { aic });
            Invoke(new updatecomplete(updatecompleted), new object[]{});

            //rtrf(0) = rf2
            //textExtractionStrategy = New iTextSharp.text.pdf.parser.FilteredTextRenderListener(mystrat, rtrf)
            //MsgBox(iTextSharp.text.pdf.parser.PdfTextExtractor.GetTextFromPage(reader, 1, textExtractionStrategy))
            // iTextSharp.text.pdf.parser.PdfTextExtractor.GetTextFromPage(reader, 1, textExtractionStrategy)
        }
        public string ExtractTextFromRegionOfPdf(string sourceFileName)
        {
            FileStream x = new FileStream(sourceFileName, FileMode.Open);
            iTextSharp.text.pdf.PdfReader reader = new iTextSharp.text.pdf.PdfReader(x);
            //AWESOME!!

            System.util.RectangleJ rect1 = new System.util.RectangleJ(Rect.X, System.Math.Abs(this.PictureBox1.Height - Rect.Y) - Rect.Height, Rect.Width, Rect.Height);
            iTextSharp.text.pdf.parser.RegionTextRenderFilter rf = new iTextSharp.text.pdf.parser.RegionTextRenderFilter(rect1);
            iTextSharp.text.pdf.parser.LocationTextExtractionStrategy mystrat = new iTextSharp.text.pdf.parser.LocationTextExtractionStrategy();
            iTextSharp.text.pdf.parser.RegionTextRenderFilter[] rtrf = new iTextSharp.text.pdf.parser.RegionTextRenderFilter[2];
            rtrf[0] = rf;
            //Dim rect2 As New System.util.RectangleJ(0, 700, 800, 140)
            //Dim rf2 As New iTextSharp.text.pdf.parser.RegionTextRenderFilter(rect2)
            iTextSharp.text.pdf.parser.FilteredTextRenderListener textExtractionStrategy = new iTextSharp.text.pdf.parser.FilteredTextRenderListener(mystrat, rtrf);

            //rtrf(0) = rf2
            //textExtractionStrategy = New iTextSharp.text.pdf.parser.FilteredTextRenderListener(mystrat, rtrf)
            //MsgBox(iTextSharp.text.pdf.parser.PdfTextExtractor.GetTextFromPage(reader, 1, textExtractionStrategy))
            // iTextSharp.text.pdf.parser.PdfTextExtractor.GetTextFromPage(reader, 1, textExtractionStrategy)
            x.Close();
            x.Dispose();
            reader.Close();

            if (_mode == 1) {

                if (this.loadedbool) {
                    string s = iTextSharp.text.pdf.parser.PdfTextExtractor.GetTextFromPage(reader, CurrentPage + 1, textExtractionStrategy);

                    DialogResult y =  MessageBox.Show("This field is showing : " + s + "\r\n" + "Is this the correct variable on this page?","Confirm" , MessageBoxButtons.YesNo );

                    if (y == DialogResult.Yes ) {

                        _dtt.Rows[_CurrentCount]["x"] = Rect.X;
                        _dtt.Rows[_CurrentCount]["y"] = System.Math.Abs(this.PictureBox1.Height - Rect.Y) - Rect.Height;
                        _dtt.Rows[_CurrentCount]["width"] = Rect.Width;
                        _dtt.Rows[_CurrentCount]["height"] = Rect.Height;

                        if (_CurrentCount == 0) {
                            DialogResult xx = MessageBox.Show("There is an optional Parimeter where you can select something that only appears on the first page, do you want to add this.  It can be part of a string like Page 1 of XX?", "Confirm", MessageBoxButtons.YesNo);
                            if (xx == DialogResult.No)
                            {
                                _CurrentCount = 2;
                            }
                        }

                        if (_dtt.Rows[_CurrentCount]["FieldName"] == "FirstPageConstant") {
                            _validatetext = Interaction.InputBox("Enter Charectors to match, if you enter \"1 of \" it will be true for anything after the of");
                            _dtt.Rows[_CurrentCount + 1]["misc"] = _validatetext;
                            _CurrentCount += 1;
                        }
                        _CurrentCount += 1;

                        if (_CurrentCount == 3) {
                            _CurrentCount = 0;
                            this.Label2.Text = "OK, you have completed the template, if you wish to start over simply do it again and start by selecting the area with:" + _dtt.Rows[0]["fieldname"];
                            startover = 1;
                            drawrectangles();

                            MessageBox.Show("Make sure you save this if you want to use it in the future.");

                        } else {
                            this.Label2.Text = "Now Select : " + _dtt.Rows[_CurrentCount]["fieldname"];
                        }
                    }
                }
            } else {
                this.Label2.Text = "OK, you have completed the template, if you wish to start over simply do it again and start by selecting the area with:" + _dtt.Rows[0]["fieldname"];
            }
            return "";
        }