Esempio n. 1
0
        private void button1_Click(object sender, EventArgs e)
        {
            String      input = @"..\..\..\..\..\..\Data\PDFTemplate-Az.pdf";
            PdfDocument doc   = new PdfDocument();

            // Read a pdf file
            doc.LoadFromFile(input);

            // Get the first page
            PdfPageBase page = doc.Pages[0];

            // Extract text from page keeping white space
            String text = page.ExtractText(true);

            // Extract text from page without keeping white space
            //String text = page.ExtractText(false);

            String result = Path.GetFullPath("ExtractTextFromParticularPage_out.txt");
            // Create a writer to put the extracted text
            TextWriter tw = new StreamWriter(result);

            // Write a line of text to the file
            tw.WriteLine(text);

            // Close the stream
            tw.Close();

            MessageBox.Show("\nText extracted successfully from particular pages of PDF Document.\nFile saved at " + result);
        }
        static void Main(string[] args)
        {
            //Load the PDF document
            PdfLoadedDocument loadedDocument = new PdfLoadedDocument("../../../../../../Data/Invoice.pdf");

            // Get the first page of the loaded PDF document
            PdfPageBase page = loadedDocument.Pages[0];

            TextLines lineCollection = new TextLines();

            // Extract text from the first page with bounds
            page.ExtractText(out lineCollection);

            RectangleF textBounds = new RectangleF(474, 161, 50, 9);

            string invoiceNumer = "";

            //Get the text provided in the bounds
            foreach (TextLine txtLine in lineCollection)
            {
                foreach (TextWord word in txtLine.WordCollection)
                {
                    if (textBounds.IntersectsWith(word.Bounds))
                    {
                        invoiceNumer = word.Text;
                        break;
                    }
                }
            }

            //Close the PDF document
            loadedDocument.Close(true);

            File.WriteAllText("data.txt", invoiceNumer);
        }
Esempio n. 3
0
        private void button1_Click(object sender, EventArgs e)
        {
            string input = @"..\..\..\..\..\..\Data\ExtractTextFromSpecificArea.pdf";

            //Load the PDF file
            PdfDocument pdf = new PdfDocument();

            pdf.LoadFromFile(input);

            //Get the first page
            PdfPageBase page = pdf.Pages[0];

            //Extract text from a specific rectangular area within the page
            string text = page.ExtractText(new RectangleF(80, 180, 500, 200));

            //Save the text to a .txt file
            StringBuilder sb = new StringBuilder();

            sb.AppendLine(text);
            string result = "ExtractText_result.txt";

            File.WriteAllText(result, sb.ToString());

            Viewer(result);
        }
Esempio n. 4
0
        private void button1_Click(object sender, EventArgs e)
        {
            //Create a pdf document
            PdfDocument doc = new PdfDocument();

            //Load a pdf file
            doc.LoadFromFile(@"..\..\..\..\..\..\Data\ExtractHighlightedText.pdf");

            PdfPageBase page = doc.Pages[0];
            PdfTextMarkupAnnotationWidget textMarkupAnnotation;
            StringBuilder stringBuilder = new StringBuilder();

            stringBuilder.AppendLine("Extracted hightlighted text:");
            //Get PdfTextMarkupAnnotationWidget objects
            for (int i = 0; i < page.AnnotationsWidget.Count; i++)
            {
                if (page.AnnotationsWidget[i] is PdfTextMarkupAnnotationWidget)
                {
                    textMarkupAnnotation = page.AnnotationsWidget[i] as PdfTextMarkupAnnotationWidget;
                    //Get the highlighted text
                    stringBuilder.AppendLine(page.ExtractText(textMarkupAnnotation.Bounds));

                    //Get the highlighted color
                    Color color = textMarkupAnnotation.TextMarkupColor;
                }
            }
            String result = "ExtractHighlightedText.txt";

            File.WriteAllText(result, stringBuilder.ToString());
            DocumentViewer(result);
        }
Esempio n. 5
0
        static void Main(string[] args)
        {
            //Load the PDF document
            PdfLoadedDocument loadedDocument = new PdfLoadedDocument("../../../../../../Data/Invoice.pdf");

            //Get the first page of the loaded PDF document
            PdfPageBase page = loadedDocument.Pages[0];

            //Extract text with layout
            string extractedText = page.ExtractText(true);

            //Save text to file
            File.WriteAllText("data.txt", extractedText);

            //Close the PDF document
            loadedDocument.Close(true);
        }
Esempio n. 6
0
        private void getPDFMsg()
        {
            PdfDocument pdf = new PdfDocument();

            pdf.LoadFromFile(pdfFileName[currentPageNum - 1]);

            PdfPageBase page = pdf.Pages[0];

            //从第一页的指定矩形区域内提取文本
            string text = page.ExtractText(new RectangleF((int)(x.X - 70), (int)(x.Y - 30), (int)(y.X - 80), (int)(y.Y - 135)));
            //string text = page.ExtractText(new RectangleF(50, 50, 100, 100));

            StringBuilder sb = new StringBuilder();

            sb.AppendLine(text);
            File.WriteAllText("Extract.txt", sb.ToString().Replace("Evaluation Warning : The document was created with Spire.PDF for .NET.", ""));
            Process.Start("Extract.txt");
        }
Esempio n. 7
0
        public static async Task <string> Read(StorageFile openFile)
        {
            SaveFilePdf.openFile = openFile;
            string extractedText = "";

            if (openFile != null)
            {
                PdfLoadedDocument loadedDocument = new PdfLoadedDocument();
                await loadedDocument.OpenAsync(openFile).ConfigureAwait(true);

                PdfPageBase page = loadedDocument.Pages[0];
                extractedText = page.ExtractText();
                loadedDocument.Close(true);
                loadedDocument.Dispose();
                var    mru      = Windows.Storage.AccessCache.StorageApplicationPermissions.MostRecentlyUsedList;
                string mruToken = mru.Add(openFile, "Pdf file");
            }
            return(extractedText);
        }
        static void Main(string[] args)
        {
            FileStream inputStream = new FileStream("../../../../../../../Data/Invoice.pdf", FileMode.Open);

            //Load the PDF document
            PdfLoadedDocument loadedDocument = new PdfLoadedDocument(inputStream);

            // Get the first page of the loaded PDF document
            PdfPageBase page = loadedDocument.Pages[0];

            // Extract text from the first page with bounds
            string extractedText = page.ExtractText();

            //Close the document
            loadedDocument.Close(true);

            //Save the text to file
            File.WriteAllText("data.txt", extractedText);
        }
Esempio n. 9
0
        public string StripPDF(PDF filename)
        {
            //Load an existing PDF.

            PdfLoadedDocument loadedDocument = new PdfLoadedDocument(filename.Path);

            //Load the first page.

            PdfPageBase page = loadedDocument.Pages[0];

            //Extract text from first page.

            string extractedText = page.ExtractText();

            //Close the document

            loadedDocument.Close(true);

            return(extractedText);
        }
Esempio n. 10
0
        public static void GetElements(string fileName)
        {
            try
            {
                PdfDocument doc = new PdfDocument();
                doc.LoadFromFile(fileName);
                PdfPageBase page = doc.Pages[0];

                SimpleTextExtractionStrategy strategy = new SimpleTextExtractionStrategy();
                string       text = page.ExtractText(strategy);
                FileStream   fs   = new FileStream(Path.GetDirectoryName(fileName) + "\\result_spire.txt", FileMode.Create);
                StreamWriter sw   = new StreamWriter(fs);
                sw.Write(text);
                sw.Flush();
                sw.Close();
            }
            catch (Exception e)
            {
                Console.WriteLine(e.Message);
            }
        }
Esempio n. 11
0
        private void button2_Click(object sender, EventArgs e)
        {
            if (fileDialog.FileName.Length > 0)
            {
                PdfDocument doc = new PdfDocument();
                doc.LoadFromFile(fileDialog.FileName);

                PdfPageBase page = doc.Pages[2];
                SimpleTextExtractionStrategy strategy = new SimpleTextExtractionStrategy();
                string       text = page.ExtractText(strategy);
                FileStream   fs   = new FileStream("Result_PDF.txt", FileMode.Create);
                StreamWriter sw   = new StreamWriter(fs);
                sw.Write(text);
                sw.Flush();
                sw.Close();

                string textValue = System.IO.File.ReadAllText("Result_PDF.txt");
                textBox1.Text = textValue;
            }
            else
            {
                MessageBox.Show("PDF 파일을 선택해 주세요.");
            }
        }
Esempio n. 12
0
        private void button1_Click(object sender, EventArgs e)
        {
            var fileContent = string.Empty;
            var filePath    = string.Empty;

            using (OpenFileDialog openFileDialog = new OpenFileDialog())
            {
                this.statusBox.Text            += Environment.NewLine + "Selecting file..." + Environment.NewLine;
                openFileDialog.InitialDirectory = "c:\\";
                openFileDialog.Filter           = "pdf files (*.pdf)|*.pdf|All files (*.*)|*.*";
                openFileDialog.FilterIndex      = 2;
                openFileDialog.RestoreDirectory = true;

                if (openFileDialog.ShowDialog() == DialogResult.OK)
                {
                    this.messageBox.Text = " ";
                    //Get the path of specified file
                    filePath = openFileDialog.FileName;

                    //Read the contents of the file into a stream
                    var fileStream = openFileDialog.OpenFile();

                    using (StreamReader reader = new StreamReader(fileStream))
                    {
                        fileContent = reader.ReadToEnd();
                    }
                }
            }

            this.statusBox.Text += "Processing PDF file..." + Environment.NewLine;
            this.fileName.Text   = filePath;

            PdfDocument doc = new PdfDocument();

            doc.LoadFromFile(filePath);
            PdfPageBase page = doc.Pages[0];
            SimpleTextExtractionStrategy strategy = new SimpleTextExtractionStrategy();
            string text = page.ExtractText(strategy);

            //PdfTextFind[] results = null;

            //results = page.FindText("MS61").Finds;

            //foreach (PdfTextFind texts in results)
            //{
            //    PointF p = texts.Position;
            //    float x = texts.Position.X;
            //    float y = texts.Position.Y;

            //   // this.displayText.Text += p + Environment.NewLine;
            //    string textArea = page.ExtractText(new RectangleF(x-30, y, 80, 180));
            //  //  this.displayText.Text += Environment.NewLine + textArea + Environment.NewLine;
            //}



            //MessageBox.Show(text, " ", MessageBoxButtons.OK);

            //Document pdfDocument = new Document(filePath);

            //TextAbsorber textAbsorber = new TextAbsorber();

            //pdfDocument.Pages.Accept(textAbsorber);

            //string extractedText = textAbsorber.Text;

            //Console.WriteLine(extractedText);

            //MessageBox.Show(extractedText, "Upstate Gold Sheet", MessageBoxButtons.OK);

            //PdfDocument PDF = PdfDocument.FromFile(filePath);
            //string AllText = PDF.ExtractAllText();


            string[] stringArray = text.Split(Environment.NewLine);

            //string[] stringArray = text.Split("$");
            this.statusBox.Text += "API Request/Response..." + Environment.NewLine;

            foreach (string line in stringArray)
            {
                bool stringExists = line.Contains("$");
                if (stringExists)
                {
                    string[] lineSplit = line.Split(' ');
                    //  foreach ( string word in lineSplit)
                    //                    {
                    //   this.displayText.Text += word + Environment.NewLine;
                    //                    }
                    //                   this.displayText.Text += line + Environment.NewLine;
                    //  this.displayText.Text += " = " + lineSplit[3] + Environment.NewLine;

                    string requestString = "method=test&requestString=" + line;
                    this.displayText.Text += "--> " + lineSplit[0] + " " + lineSplit[1];
                    string   responseString = APIRequest(requestString);
                    string[] responseArray  = responseString.Split(':');
                    responseArray[1] = responseArray[1].Replace("}", string.Empty);
                    // MessageBox.Show(responseArray[1], "api response");
                    if (responseArray[1] == "0")
                    {
                        this.displayText.Text += ": Success" + Environment.NewLine;
                    }
                    else
                    {
                        //   this.displayText.ForeColor
                        this.displayText.Text += ": Failure" + Environment.NewLine;
                    }
//                    this.displayText.Text += responseString + Environment.NewLine;
                }
            }


            this.messageBox.Text = "Click the 'Exit' button to end the application.";
            this.statusBox.Text += "Job End...";
            // MessageBox.Show(fileContent, "Unconverted Content " + filePath, MessageBoxButtons.OK);

            // MessageBox.Show(AllText, "Upstate Gold Sheet", MessageBoxButtons.OK);
            // MessageBox.Show(fileContent, "File Content at path: " + filePath, MessageBoxButtons.OK);

            // Show the dialog and get result.
            //DialogResult result = openFileDialog1.ShowDialog();
            //if (result == DialogResult.OK) // Test result.
            //{
            //}
            // Console.WriteLine(result); // <-- For debugging use.
        }
        public ChartData GeneratePDFReport(PdfLoadedPageCollection pages)
        {
            try
            {
                StringBuilder extractedText = new StringBuilder();
                if (pages != null)
                {
                    //Parallel.ForEach<PdfPageBase>(pages.AsParallel(), page =>
                    //{


                    //});

                    foreach (PdfPageBase pageT in pages)
                    {
                        PdfPageBase page = pageT;
                        extractedText.Append(page.ExtractText());
                    }
                }
                var wordsCount     = System.Text.RegularExpressions.Regex.Matches(extractedText.ToString(), "\\S+").Count;
                var sentences      = extractedText.ToString().Split(new string[] { ". ", "\r\n\\" }, StringSplitOptions.None);
                var sentenceReport = (from sentence in sentences
                                      where sentence != string.Empty
                                      group sentence by sentence into tempBag
                                      //let count = tempBag.Count()
                                      //orderby count descending
                                      select new { Value = tempBag.Key, Length = tempBag.Key.Length }
                                      ).ToList();
                chartObj.NumberOfSentences = sentenceReport.Distinct().Count();
                if (chartObj.NumberOfSentences > 0)
                {
                    chartObj.AvgSetenceLength = sentenceReport.Sum(x => x.Length) / chartObj.NumberOfSentences;
                }

                string[] source     = extractedText.ToString().Split(new char[] { '.', '?', '!', ' ', ';', ':', ',', '_' }, StringSplitOptions.RemoveEmptyEntries);
                var      matchQuery = from word in source
                                      where word.ToLowerInvariant() == "\r\n".ToLowerInvariant()
                                      select word;

                chartObj.ParagraphCount = matchQuery.Count();


                var wordReport = (from word in source
                                  where word != string.Empty
                                  group word by word into tempBag
                                  let count = tempBag.Count()
                                              orderby count descending
                                              select new { Value = tempBag.Key, Count = count, Length = tempBag.Key.Length }
                                  ).ToList();
                chartObj.TotalUniqueWords = wordReport.Count();
                if (chartObj.TotalUniqueWords > 0)
                {
                    chartObj.AverageWordLength = wordReport.Sum(x => x.Length) / chartObj.TotalUniqueWords;
                }

                var topTenOccuringWords = (from obj in wordReport.OrderByDescending(x => x.Count).ToList().Take(10)
                                           where obj.Value != "\r\n"
                                           select new ChartData
                {
                    Word = obj.Value,
                    Count = obj.Count
                }).ToList();
                chartObj.Top10WordsFromPDFLoaded = new ObservableCollection <ChartData>(topTenOccuringWords);
                chartObj.ListOfDetailsToPrint    = GetOtherPDFReportData(chartObj);
            }
            catch (Exception ex)
            {
                throw new Exception("Error while generating PDF Reports in PdfViewer.ViewModels.PDFViewModel.GeneratePDFReport", ex);
            }

            return(chartObj);
        }