Exemplo n.º 1
0
        public static String Consume(String path)
        {
            string path_extension = Path.GetExtension(path);

            if (path_extension != ".pdf")
            {
                throw new ArgumentException("Path does not lead to a PDF file type", path);
            }
            using (PDFiumEngine pdfEngine = new PDFiumEngine(path))
                using (StreamWriter sw = new StreamWriter(String.Format("output_{0}.txt", Path.GetFileNameWithoutExtension(path), false)))
                {
                    for (int i = 0; i < pdfEngine.PageCount; i++)
                    {
                        Image img = pdfEngine.GetImageFromPageNumber(i, 4000, 0);
                        using (TesseractEngine engine = Tesseract.getInstance())
                        {
                            Bitmap bit = new Bitmap(img);
                            using (Page p = engine.Process(bit, PageSegMode.AutoOsd))
                            {
                                string text = p.GetText();
                                sw.WriteLine(text);
                                sw.Flush();
                            }
                        }
                    }
                    sw.Close();
                }

            return("");
        }
Exemplo n.º 2
0
        public static String Consume_Image(String path)
        {
            using (StreamWriter sw = new StreamWriter(String.Format("output_{0}.txt", Path.GetFileNameWithoutExtension(path), false)))
            {
                using (TesseractEngine engine = Tesseract.getInstance())
                {
                    Bitmap bit = new Bitmap(path);
                    using (Page p = engine.Process(bit, PageSegMode.AutoOsd))
                    {
                        string text = p.GetText();
                        sw.WriteLine(text);
                        sw.Flush();
                    }
                }

                sw.Close();
            }
            return("");
        }