Пример #1
0
        private void btnObjectAnalysis_Click(object sender, EventArgs e)
        {
            //use the PDF library to break out all of the PDF Objects
            Readers.PDFReader Rdr = new Readers.PDFReader();
            Rdr.getObjectsFromPDF(PDFFileName, out PDFPageWidth, out PDFPageHeight);
            //break up text objects with multiple words into a single word per object
            DataBase.DB DocObjects = new DataBase.DB();

            DocObjects.LocateKeywords();

            PdfFixedDocument doc      = new PdfFixedDocument(PDFFileName);
            PdfPageRenderer  renderer = new PdfPageRenderer(doc.Pages[0]);

            FileStream PNGStream = File.OpenWrite(PDFFileName + ".png");

            renderer.ConvertPageToImage(dpi, PNGStream, PdfPageImageFormat.Png);
            PNGStream.Flush();
            PNGStream.Close();
            LoadImageBox();

            PopulateGridView("Text");
        }
Пример #2
0
        private void AutoprocessDocs(string Filename)
        {
            //load the imagebox
            PDFFileName = Filename;



            //Autoprocess
            //use the PDF library to break out all of the PDF Objects
            Readers.PDFReader Rdr = new Readers.PDFReader();
            Rdr.getObjectsFromPDF(Filename, out PDFPageWidth, out PDFPageHeight);
            //break up text objects with multiple words into a single word per object
            DataBase.DB DocObjects = new DataBase.DB();

            DocObjects.LocateKeywords();

            ImageProcessor EvalImage   = new ImageProcessor();
            int            ImageWidth  = 0;
            int            ImageHeight = 0;

            List <Rectangle> BlackBoxes = EvalImage.BlackBox(Filename + ".png", out ImageWidth, out ImageHeight, true);
            double           ImageScale = PDFPageWidth / ImageWidth;

            DocObjects.ExtractTextFromRect(BlackBoxes, ImageScale);
            List <Rectangle> BlackBlobs = EvalImage.BlackBlob(Filename + ".png", out ImageWidth, out ImageHeight, true);

            ImageScale = PDFPageWidth / ImageWidth;
            DocObjects.ExtractTextFromRect(BlackBoxes, ImageScale);

            DocObjects.TranslateText();

            ArchiveTrainingData();


            PopulateGridView("Extract");
        }