Esempio n. 1
0
        public void TestGetDocumentMetaInfo_on_doc1()
        {
            string pdf_filename = MiscTestHelpers.GetNormalizedPathToAnyTestDataTestFile(@"fixtures/1.Doc-Many.Metadata.Formats/0001-LDA-paper/2004.04.PNAS.ef997ae1b01762b57b75d8c22fb8cec87406.pdf");

            ASSERT.FileExists(pdf_filename);
            PDFDocumentMuPDFMetaInfo info = MuPDFRenderer.GetDocumentMetaInfo(pdf_filename, null, ProcessPriorityClass.Normal);

            ASSERT.AreEqual <int>(8, info.PageCount);
            ASSERT.AreEqual <bool>(false, info.DocumentIsCorrupted);
            ASSERT.IsLessOrEqual(10000, info.raw_multipurp_text.Length);
            TestJSONoutputIsCorrectForPDFdoc1(info.raw_decoded_json);

            object json_doc  = JsonConvert.DeserializeObject(info.raw_multipurp_text);
            string json_text = JsonConvert.SerializeObject(json_doc, Formatting.Indented).Replace("\r\n", "\n");

            // Perform comparison via ApprovalTests->BeyondCompare (that's what I use for *decades* now)
            //ApprovalTests.Approvals.VerifyJson(json_out);   --> becomes the code below:
            ApprovalTests.Approvals.Verify(
                new QiqqaApprover(json_text, pdf_filename),
                ApprovalTests.Approvals.GetReporter()
                );

            info.ClearRawContent();
            ASSERT.IsNull(info.raw_multipurp_text);
            ASSERT.IsNull(info.raw_decoded_json);
        }
        public static Dictionary <int, WordList> DoOCR(string pdf_filename, string page_numbers, string pdf_user_password)
        {
            List <MuPDFRenderer.TextChunk> text_chunks = MuPDFRenderer.GetEmbeddedText(pdf_filename, page_numbers, pdf_user_password, ProcessPriorityClass.BelowNormal);
            Dictionary <int, WordList>     word_lists  = ConvertToWordList(text_chunks);

            return(word_lists);
        }
Esempio n. 3
0
        public void Test_PDF_metadata_extraction_via_multipurp_chunk0070_qpdf(string filepath)
        {
            string pdf_filename = MiscTestHelpers.GetNormalizedPathToAnyTestDataTestFile($"fixtures/PDF/{ filepath.Replace("./", "") }");

            ASSERT.FileExists(pdf_filename);
            PDFDocumentMuPDFMetaInfo info = MuPDFRenderer.GetDocumentMetaInfo(pdf_filename, null, ProcessPriorityClass.Normal);

            string json_text = ProduceJSONtext4Comparison(info);

            // Perform comparison via ApprovalTests->BeyondCompare (that's what I use for *decades* now)
            //ApprovalTests.Approvals.VerifyJson(json_out);   --> becomes the code below:
            ApprovalTests.Approvals.Verify(
                new QiqqaApprover(json_text, pdf_filename),
                ApprovalTests.Approvals.GetReporter()
                );
        }
        private static byte[] GetPageByDPIAsImage_LOCK(string filename, string pdf_user_password, int page, int dpi, int height, int width)
        {
            WPFDoEvents.AssertThisCodeIs_NOT_RunningInTheUIThread();

            try
            {
                // sample command (PNG written to stdout for page #2, width and height are limiting/reducing, dpi-resolution is driving):
                //
                //      mudraw -q -o - -F png -r 600 -w 1920 -h 1280 G:\Qiqqa\evil\Guest\documents\1\1A9760F3917A107AC46E6E292B9C839364F09E73.pdf  2
                var img = MuPDFRenderer.RenderPDFPageAsByteArray(filename, page, dpi, height, width, pdf_user_password, ProcessPriorityClass.BelowNormal);

                return(img);
            }
            catch (Exception ex)
            {
                throw new GenericException(ex, $"PDF Render: Error while rasterising page {page} at {dpi}dpi / {height}x{width} pixels of '{filename}'");
            }
        }
Esempio n. 5
0
        public static int CountPDFPages(string filename, string password)
        {
            WPFDoEvents.AssertThisCodeIs_NOT_RunningInTheUIThread();

            try
            {
                Logging.Debug("+CountPDFPages_MuPDF: {0}", filename);
                var metadata   = MuPDFRenderer.GetDocumentMetaInfo(filename, password, ProcessPriorityClass.Normal);
                int page_count = metadata?.PageCount ?? (metadata.DocumentIsCorrupted ? -3 : -1);
                Logging.Debug("-CountPDFPages_MuPDF '{1}' -> ({0} pages)", page_count, filename);
                return(page_count);
            }
            catch (Exception ex)
            {
                Logging.Warn(ex, "Error while counting pages in CountPDFPages_MuPDF for file: {0}", filename);
                return(-1);
            }
        }
        void ObjButtonGO_Click(object sender, RoutedEventArgs e)
        {
            int pdf_number  = Convert.ToInt32(ObjTextDoc.Text);
            int page_number = Convert.ToInt32(ObjTextPage.Text);

            string pdf_filename = String.Format(@"C:\temp\{0}.pdf", pdf_number);

            Logging.Info("+Rendering page");
            MemoryStream ms           = MuPDFRenderer.RenderPDFPage(pdf_filename, page_number, 200, null, ProcessPriorityClass.Normal);
            BitmapSource bitmap_image = BitmapImageTools.LoadFromBytes(ms.ToArray());
            Bitmap       bitmap       = new Bitmap(ms);

            Logging.Info("-Rendering page");

            this.Image = bitmap_image;

            Logging.Info("+Finding regions");
            this.region_locator = new PDFRegionLocator(bitmap);
            Logging.Info("-Finding regions");

            Recalc();
        }