Пример #1
0
        private void ParsePDF_Click(object sender, EventArgs e)
        {
            string           output;
            List <EgrulItem> listEgrul = new List <EgrulItem>();

            string[] fileEntries = Directory.GetFiles(InnPath);
            string   jsonPath    = Path.GetDirectoryName(FilePath) + "\\inn\\output\\output" + DateTime.Now.ToShortDateString() + ".json";

            foreach (string fileEntrie in fileEntries)
            {
                EgrulItem egrulItem = ExtractTextFromPdf(fileEntrie);
                listEgrul.Add(egrulItem);
            }

            output = JsonConvert.SerializeObject(listEgrul);
            DirectoryInfo dirInfo = new DirectoryInfo(Path.GetDirectoryName(FilePath) + "\\inn\\output");

            if (!dirInfo.Exists)
            {
                dirInfo.Create();
            }
            File.WriteAllText(jsonPath, output);
            MessageBox.Show("Готово! Создан файл " + jsonPath);

            // ExtractTextFromPdf("C:\\Users\\1\\Desktop\\inn\\pdf\\ul-1021202053487-20191006114523.pdf");
            // ExtractTextFromPdf("C:\\Users\\1\\Desktop\\inn\\pdf\\ul-1061215080233-20191006114647.pdf");
            // ExtractTextFromPdf("C:\\Users\\1\\Desktop\\inn\\pdf\\ul-1092130010268-20191007160318.pdf");
        }
Пример #2
0
        private static EgrulItem ExtractTextFromPdf(string path)

        {
            using (PdfReader reader = new PdfReader(path))

            {
                StringBuilder text = new StringBuilder();


                for (int i = 1; i <= reader.NumberOfPages; i++)

                {
                    text.Append(PdfTextExtractor.GetTextFromPage(reader, i));
                }
                Console.Write(text.ToString());

                EgrulItem eGRULItem = new EgrulItem(text.ToString());


                return(eGRULItem);
            }
        }