public static String Consume(String path) { string path_extension = Path.GetExtension(path); if (path_extension != ".pdf") { throw new ArgumentException("Path does not lead to a PDF file type", path); } using (PDFiumEngine pdfEngine = new PDFiumEngine(path)) using (StreamWriter sw = new StreamWriter(String.Format("output_{0}.txt", Path.GetFileNameWithoutExtension(path), false))) { for (int i = 0; i < pdfEngine.PageCount; i++) { Image img = pdfEngine.GetImageFromPageNumber(i, 4000, 0); using (TesseractEngine engine = Tesseract.getInstance()) { Bitmap bit = new Bitmap(img); using (Page p = engine.Process(bit, PageSegMode.AutoOsd)) { string text = p.GetText(); sw.WriteLine(text); sw.Flush(); } } } sw.Close(); } return(""); }
public static String Consume_Image(String path) { using (StreamWriter sw = new StreamWriter(String.Format("output_{0}.txt", Path.GetFileNameWithoutExtension(path), false))) { using (TesseractEngine engine = Tesseract.getInstance()) { Bitmap bit = new Bitmap(path); using (Page p = engine.Process(bit, PageSegMode.AutoOsd)) { string text = p.GetText(); sw.WriteLine(text); sw.Flush(); } } sw.Close(); } return(""); }