Ejemplo n.º 1
0
        public static void Main()
        {
            // NOTE:
            // When used in trial mode, the library imposes some restrictions.
            // Please visit http://bitmiracle.com/pdf-library/trial-restrictions.aspx
            // for more information.

            using (PdfDocument pdf = new PdfDocument(@"..\Sample data\jfif3.pdf"))
            {
                // Extract plain text from document
                string documentTextFile = "Document text.txt";
                using (StreamWriter writer = new StreamWriter(documentTextFile))
                    writer.Write(pdf.GetText());

                // Extract text with formatting from document
                string documentTextFormattedFile = "Document text with formatting.txt";
                using (StreamWriter writer = new StreamWriter(documentTextFormattedFile))
                    writer.Write(pdf.GetTextWithFormatting());

                // Only extract visible plain text from first page
                string firstPageTextFile = "First page text.txt";
                using (StreamWriter writer = new StreamWriter(firstPageTextFile))
                {
                    var options = new PdfTextExtractionOptions
                    {
                        WithFormatting    = false,
                        SkipInvisibleText = true
                    };
                    writer.Write(pdf.Pages[0].GetText(options));
                }
            }

            Console.WriteLine($"The output is located in {Environment.CurrentDirectory}");
        }
Ejemplo n.º 2
0
        public void ProcessFile()
        {
            string filePath = FileTable.FileLocation;

            using (var pdf = new PdfDocument(filePath))
            {
                PdfTextExtractionOptions options = new PdfTextExtractionOptions
                {
                    SkipInvisibleText = true,
                    WithFormatting    = true
                };
                string   formattedText = pdf.GetText(options);
                string   modifiedText  = Regex.Replace(formattedText, @"(?<=\b)\p{Zs}(?=\b)", string.Empty);
                string[] strArray      = Regex.Split(modifiedText, @"\s+");
                foreach (var map in Mappings)
                {
                    OutputData.Add(new Output
                    {
                        FileId   = FileTable.FileId,
                        DataType = map.MapType,
                        Value    = GetNextIndexValue(strArray, map.MapName),
                    });
                }
            }
        }
Ejemplo n.º 3
0
        private string GetTextFromPdf(string filePath)
        {
            string text = "";

            using (var pdf = new PdfDocument(filePath))
            {
                var options = new PdfTextExtractionOptions
                {
                    SkipInvisibleText = true,
                    WithFormatting    = false
                };
                text = pdf.GetText(options);
            }
            return(text);
        }
Ejemplo n.º 4
0
        static string getNomeFantasia(string path)
        {
            string identificador = "";

            using (var pdf = new PdfDocument(path))
            {
                var page = pdf.Pages[1];

                //CNPJ
                var options = new PdfTextExtractionOptions
                {
                    Rectangle      = new PdfRectangle(230.30, 285.33, 11.06, 13.61),
                    WithFormatting = false
                };
                identificador = page.GetText(options);
                Console.WriteLine(identificador);
            }
            return(identificador);
        }
Ejemplo n.º 5
0
        static string getIdentificadoMatriz(string path)
        {
            string identificador = "";

            using (var pdf = new PdfDocument(path))
            {
                var page = pdf.Pages[1];

                //CNPJ
                var options = new PdfTextExtractionOptions
                {
                    Rectangle      = new PdfRectangle(227.50, 205.33, 5.56, 13.61),
                    WithFormatting = false
                };
                identificador = page.GetText(options);
                Console.WriteLine(identificador);
            }
            return(identificador);
        }
Ejemplo n.º 6
0
        static string getCNPJ(string path)
        {
            string cnpj = "";

            using (var pdf = new PdfDocument(path))
            {
                var page = pdf.Pages[1];

                //CNPJ
                var options = new PdfTextExtractionOptions
                {
                    Rectangle      = new PdfRectangle(227.50, 162.33, 11.06, 13.61),
                    WithFormatting = false
                };
                cnpj = page.GetText(options);
                Console.WriteLine(cnpj);
            }
            return(cnpj);
        }
Ejemplo n.º 7
0
        static string getcnpjSocio(string path)
        {
            string cnpjSocio = "";

            using (var pdf = new PdfDocument(path))
            {
                var page = pdf.Pages[4];

                //CNPJ
                var options = new PdfTextExtractionOptions
                {
                    Rectangle      = new PdfRectangle(212.20, 686.23, 11.15, 13.61),
                    WithFormatting = false
                };
                cnpjSocio = page.GetText(options);
                Console.WriteLine(cnpjSocio);
            }
            return(cnpjSocio);
        }
Ejemplo n.º 8
0
        static string getidentificadoSocio(string path)
        {
            string identificadoSocio = "";

            using (var pdf = new PdfDocument(path))
            {
                var page = pdf.Pages[4];

                //CNPJ
                var options = new PdfTextExtractionOptions
                {
                    Rectangle      = new PdfRectangle(212.20, 552.23, 5.56, 13.61),
                    WithFormatting = false
                };
                identificadoSocio = page.GetText(options);
                Console.WriteLine(identificadoSocio);
            }
            return(identificadoSocio);
        }
Ejemplo n.º 9
0
 public Task <string> GetAsText(Stream fileStream, CancellationToken cancellationToken = default)
 {
     try
     {
         using (var pdf = new PdfDocument(fileStream))
         {
             var options = new PdfTextExtractionOptions
             {
                 SkipInvisibleText = true,
                 WithFormatting    = false
             };
             return(Task.FromResult(pdf.GetText()));
         }
     }
     catch (Exception ex)
     {
         _logger.LogError(ex, ex.Message);
         return(Task.FromResult(string.Empty));
     }
 }