Exemple #1
        private void button1_Click(object sender, EventArgs e)
            String      input = @"..\..\..\..\..\..\Data\PDFTemplate-Az.pdf";
            PdfDocument doc   = new PdfDocument();

            // Read a pdf file

            // Get the first page
            PdfPageBase page = doc.Pages[0];

            // Extract text from page keeping white space
            String text = page.ExtractText(true);

            // Extract text from page without keeping white space
            //String text = page.ExtractText(false);

            String result = Path.GetFullPath("ExtractTextFromParticularPage_out.txt");
            // Create a writer to put the extracted text
            TextWriter tw = new StreamWriter(result);

            // Write a line of text to the file

            // Close the stream

            MessageBox.Show("\nText extracted successfully from particular pages of PDF Document.\nFile saved at " + result);
        static void Main(string[] args)
            //Load the PDF document
            PdfLoadedDocument loadedDocument = new PdfLoadedDocument("../../../../../../Data/Invoice.pdf");

            // Get the first page of the loaded PDF document
            PdfPageBase page = loadedDocument.Pages[0];

            TextLines lineCollection = new TextLines();

            // Extract text from the first page with bounds
            page.ExtractText(out lineCollection);

            RectangleF textBounds = new RectangleF(474, 161, 50, 9);

            string invoiceNumer = "";

            //Get the text provided in the bounds
            foreach (TextLine txtLine in lineCollection)
                foreach (TextWord word in txtLine.WordCollection)
                    if (textBounds.IntersectsWith(word.Bounds))
                        invoiceNumer = word.Text;

            //Close the PDF document

            File.WriteAllText("data.txt", invoiceNumer);
Exemple #3
        private void button1_Click(object sender, EventArgs e)
            string input = @"..\..\..\..\..\..\Data\ExtractTextFromSpecificArea.pdf";

            //Load the PDF file
            PdfDocument pdf = new PdfDocument();


            //Get the first page
            PdfPageBase page = pdf.Pages[0];

            //Extract text from a specific rectangular area within the page
            string text = page.ExtractText(new RectangleF(80, 180, 500, 200));

            //Save the text to a .txt file
            StringBuilder sb = new StringBuilder();

            string result = "ExtractText_result.txt";

            File.WriteAllText(result, sb.ToString());

Exemple #4
        private void button1_Click(object sender, EventArgs e)
            //Create a pdf document
            PdfDocument doc = new PdfDocument();

            //Load a pdf file

            PdfPageBase page = doc.Pages[0];
            PdfTextMarkupAnnotationWidget textMarkupAnnotation;
            StringBuilder stringBuilder = new StringBuilder();

            stringBuilder.AppendLine("Extracted hightlighted text:");
            //Get PdfTextMarkupAnnotationWidget objects
            for (int i = 0; i < page.AnnotationsWidget.Count; i++)
                if (page.AnnotationsWidget[i] is PdfTextMarkupAnnotationWidget)
                    textMarkupAnnotation = page.AnnotationsWidget[i] as PdfTextMarkupAnnotationWidget;
                    //Get the highlighted text

                    //Get the highlighted color
                    Color color = textMarkupAnnotation.TextMarkupColor;
            String result = "ExtractHighlightedText.txt";

            File.WriteAllText(result, stringBuilder.ToString());
Exemple #5
        static void Main(string[] args)
            //Load the PDF document
            PdfLoadedDocument loadedDocument = new PdfLoadedDocument("../../../../../../Data/Invoice.pdf");

            //Get the first page of the loaded PDF document
            PdfPageBase page = loadedDocument.Pages[0];

            //Extract text with layout
            string extractedText = page.ExtractText(true);

            //Save text to file
            File.WriteAllText("data.txt", extractedText);

            //Close the PDF document
Exemple #6
        private void getPDFMsg()
            PdfDocument pdf = new PdfDocument();

            pdf.LoadFromFile(pdfFileName[currentPageNum - 1]);

            PdfPageBase page = pdf.Pages[0];

            string text = page.ExtractText(new RectangleF((int)(x.X - 70), (int)(x.Y - 30), (int)(y.X - 80), (int)(y.Y - 135)));
            //string text = page.ExtractText(new RectangleF(50, 50, 100, 100));

            StringBuilder sb = new StringBuilder();

            File.WriteAllText("Extract.txt", sb.ToString().Replace("Evaluation Warning : The document was created with Spire.PDF for .NET.", ""));
        public static async Task <string> Read(StorageFile openFile)
            SaveFilePdf.openFile = openFile;
            string extractedText = "";

            if (openFile != null)
                PdfLoadedDocument loadedDocument = new PdfLoadedDocument();
                await loadedDocument.OpenAsync(openFile).ConfigureAwait(true);

                PdfPageBase page = loadedDocument.Pages[0];
                extractedText = page.ExtractText();
                var    mru      = Windows.Storage.AccessCache.StorageApplicationPermissions.MostRecentlyUsedList;
                string mruToken = mru.Add(openFile, "Pdf file");
        static void Main(string[] args)
            FileStream inputStream = new FileStream("../../../../../../../Data/Invoice.pdf", FileMode.Open);

            //Load the PDF document
            PdfLoadedDocument loadedDocument = new PdfLoadedDocument(inputStream);

            // Get the first page of the loaded PDF document
            PdfPageBase page = loadedDocument.Pages[0];

            // Extract text from the first page with bounds
            string extractedText = page.ExtractText();

            //Close the document

            //Save the text to file
            File.WriteAllText("data.txt", extractedText);
Exemple #9
        public string StripPDF(PDF filename)
            //Load an existing PDF.

            PdfLoadedDocument loadedDocument = new PdfLoadedDocument(filename.Path);

            //Load the first page.

            PdfPageBase page = loadedDocument.Pages[0];

            //Extract text from first page.

            string extractedText = page.ExtractText();

            //Close the document


Exemple #10
        public static void GetElements(string fileName)
                PdfDocument doc = new PdfDocument();
                PdfPageBase page = doc.Pages[0];

                SimpleTextExtractionStrategy strategy = new SimpleTextExtractionStrategy();
                string       text = page.ExtractText(strategy);
                FileStream   fs   = new FileStream(Path.GetDirectoryName(fileName) + "\\result_spire.txt", FileMode.Create);
                StreamWriter sw   = new StreamWriter(fs);
            catch (Exception e)
Exemple #11
        private void button2_Click(object sender, EventArgs e)
            if (fileDialog.FileName.Length > 0)
                PdfDocument doc = new PdfDocument();

                PdfPageBase page = doc.Pages[2];
                SimpleTextExtractionStrategy strategy = new SimpleTextExtractionStrategy();
                string       text = page.ExtractText(strategy);
                FileStream   fs   = new FileStream("Result_PDF.txt", FileMode.Create);
                StreamWriter sw   = new StreamWriter(fs);

                string textValue = System.IO.File.ReadAllText("Result_PDF.txt");
                textBox1.Text = textValue;
                MessageBox.Show("PDF 파일을 선택해 주세요.");
Exemple #12
        private void button1_Click(object sender, EventArgs e)
            var fileContent = string.Empty;
            var filePath    = string.Empty;

            using (OpenFileDialog openFileDialog = new OpenFileDialog())
                this.statusBox.Text            += Environment.NewLine + "Selecting file..." + Environment.NewLine;
                openFileDialog.InitialDirectory = "c:\\";
                openFileDialog.Filter           = "pdf files (*.pdf)|*.pdf|All files (*.*)|*.*";
                openFileDialog.FilterIndex      = 2;
                openFileDialog.RestoreDirectory = true;

                if (openFileDialog.ShowDialog() == DialogResult.OK)
                    this.messageBox.Text = " ";
                    //Get the path of specified file
                    filePath = openFileDialog.FileName;

                    //Read the contents of the file into a stream
                    var fileStream = openFileDialog.OpenFile();

                    using (StreamReader reader = new StreamReader(fileStream))
                        fileContent = reader.ReadToEnd();

            this.statusBox.Text += "Processing PDF file..." + Environment.NewLine;
            this.fileName.Text   = filePath;

            PdfDocument doc = new PdfDocument();

            PdfPageBase page = doc.Pages[0];
            SimpleTextExtractionStrategy strategy = new SimpleTextExtractionStrategy();
            string text = page.ExtractText(strategy);

            //PdfTextFind[] results = null;

            //results = page.FindText("MS61").Finds;

            //foreach (PdfTextFind texts in results)
            //    PointF p = texts.Position;
            //    float x = texts.Position.X;
            //    float y = texts.Position.Y;

            //   // this.displayText.Text += p + Environment.NewLine;
            //    string textArea = page.ExtractText(new RectangleF(x-30, y, 80, 180));
            //  //  this.displayText.Text += Environment.NewLine + textArea + Environment.NewLine;

            //MessageBox.Show(text, " ", MessageBoxButtons.OK);

            //Document pdfDocument = new Document(filePath);

            //TextAbsorber textAbsorber = new TextAbsorber();


            //string extractedText = textAbsorber.Text;


            //MessageBox.Show(extractedText, "Upstate Gold Sheet", MessageBoxButtons.OK);

            //PdfDocument PDF = PdfDocument.FromFile(filePath);
            //string AllText = PDF.ExtractAllText();

            string[] stringArray = text.Split(Environment.NewLine);

            //string[] stringArray = text.Split("$");
            this.statusBox.Text += "API Request/Response..." + Environment.NewLine;

            foreach (string line in stringArray)
                bool stringExists = line.Contains("$");
                if (stringExists)
                    string[] lineSplit = line.Split(' ');
                    //  foreach ( string word in lineSplit)
                    //                    {
                    //   this.displayText.Text += word + Environment.NewLine;
                    //                    }
                    //                   this.displayText.Text += line + Environment.NewLine;
                    //  this.displayText.Text += " = " + lineSplit[3] + Environment.NewLine;

                    string requestString = "method=test&requestString=" + line;
                    this.displayText.Text += "--> " + lineSplit[0] + " " + lineSplit[1];
                    string   responseString = APIRequest(requestString);
                    string[] responseArray  = responseString.Split(':');
                    responseArray[1] = responseArray[1].Replace("}", string.Empty);
                    // MessageBox.Show(responseArray[1], "api response");
                    if (responseArray[1] == "0")
                        this.displayText.Text += ": Success" + Environment.NewLine;
                        //   this.displayText.ForeColor
                        this.displayText.Text += ": Failure" + Environment.NewLine;
//                    this.displayText.Text += responseString + Environment.NewLine;

            this.messageBox.Text = "Click the 'Exit' button to end the application.";
            this.statusBox.Text += "Job End...";
            // MessageBox.Show(fileContent, "Unconverted Content " + filePath, MessageBoxButtons.OK);

            // MessageBox.Show(AllText, "Upstate Gold Sheet", MessageBoxButtons.OK);
            // MessageBox.Show(fileContent, "File Content at path: " + filePath, MessageBoxButtons.OK);

            // Show the dialog and get result.
            //DialogResult result = openFileDialog1.ShowDialog();
            //if (result == DialogResult.OK) // Test result.
            // Console.WriteLine(result); // <-- For debugging use.
        public ChartData GeneratePDFReport(PdfLoadedPageCollection pages)
                StringBuilder extractedText = new StringBuilder();
                if (pages != null)
                    //Parallel.ForEach<PdfPageBase>(pages.AsParallel(), page =>


                    foreach (PdfPageBase pageT in pages)
                        PdfPageBase page = pageT;
                var wordsCount     = System.Text.RegularExpressions.Regex.Matches(extractedText.ToString(), "\\S+").Count;
                var sentences      = extractedText.ToString().Split(new string[] { ". ", "\r\n\\" }, StringSplitOptions.None);
                var sentenceReport = (from sentence in sentences
                                      where sentence != string.Empty
                                      group sentence by sentence into tempBag
                                      //let count = tempBag.Count()
                                      //orderby count descending
                                      select new { Value = tempBag.Key, Length = tempBag.Key.Length }
                chartObj.NumberOfSentences = sentenceReport.Distinct().Count();
                if (chartObj.NumberOfSentences > 0)
                    chartObj.AvgSetenceLength = sentenceReport.Sum(x => x.Length) / chartObj.NumberOfSentences;

                string[] source     = extractedText.ToString().Split(new char[] { '.', '?', '!', ' ', ';', ':', ',', '_' }, StringSplitOptions.RemoveEmptyEntries);
                var      matchQuery = from word in source
                                      where word.ToLowerInvariant() == "\r\n".ToLowerInvariant()
                                      select word;

                chartObj.ParagraphCount = matchQuery.Count();

                var wordReport = (from word in source
                                  where word != string.Empty
                                  group word by word into tempBag
                                  let count = tempBag.Count()
                                              orderby count descending
                                              select new { Value = tempBag.Key, Count = count, Length = tempBag.Key.Length }
                chartObj.TotalUniqueWords = wordReport.Count();
                if (chartObj.TotalUniqueWords > 0)
                    chartObj.AverageWordLength = wordReport.Sum(x => x.Length) / chartObj.TotalUniqueWords;

                var topTenOccuringWords = (from obj in wordReport.OrderByDescending(x => x.Count).ToList().Take(10)
                                           where obj.Value != "\r\n"
                                           select new ChartData
                    Word = obj.Value,
                    Count = obj.Count
                chartObj.Top10WordsFromPDFLoaded = new ObservableCollection <ChartData>(topTenOccuringWords);
                chartObj.ListOfDetailsToPrint    = GetOtherPDFReportData(chartObj);
            catch (Exception ex)
                throw new Exception("Error while generating PDF Reports in PdfViewer.ViewModels.PDFViewModel.GeneratePDFReport", ex);
