private void btnObjectAnalysis_Click(object sender, EventArgs e) { //use the PDF library to break out all of the PDF Objects Readers.PDFReader Rdr = new Readers.PDFReader(); Rdr.getObjectsFromPDF(PDFFileName, out PDFPageWidth, out PDFPageHeight); //break up text objects with multiple words into a single word per object DataBase.DB DocObjects = new DataBase.DB(); DocObjects.LocateKeywords(); PdfFixedDocument doc = new PdfFixedDocument(PDFFileName); PdfPageRenderer renderer = new PdfPageRenderer(doc.Pages[0]); FileStream PNGStream = File.OpenWrite(PDFFileName + ".png"); renderer.ConvertPageToImage(dpi, PNGStream, PdfPageImageFormat.Png); PNGStream.Flush(); PNGStream.Close(); LoadImageBox(); PopulateGridView("Text"); }
private void AutoprocessDocs(string Filename) { //load the imagebox PDFFileName = Filename; //Autoprocess //use the PDF library to break out all of the PDF Objects Readers.PDFReader Rdr = new Readers.PDFReader(); Rdr.getObjectsFromPDF(Filename, out PDFPageWidth, out PDFPageHeight); //break up text objects with multiple words into a single word per object DataBase.DB DocObjects = new DataBase.DB(); DocObjects.LocateKeywords(); ImageProcessor EvalImage = new ImageProcessor(); int ImageWidth = 0; int ImageHeight = 0; List <Rectangle> BlackBoxes = EvalImage.BlackBox(Filename + ".png", out ImageWidth, out ImageHeight, true); double ImageScale = PDFPageWidth / ImageWidth; DocObjects.ExtractTextFromRect(BlackBoxes, ImageScale); List <Rectangle> BlackBlobs = EvalImage.BlackBlob(Filename + ".png", out ImageWidth, out ImageHeight, true); ImageScale = PDFPageWidth / ImageWidth; DocObjects.ExtractTextFromRect(BlackBoxes, ImageScale); DocObjects.TranslateText(); ArchiveTrainingData(); PopulateGridView("Extract"); }