public void TestGetDocumentMetaInfo_on_doc1() { string pdf_filename = MiscTestHelpers.GetNormalizedPathToAnyTestDataTestFile(@"fixtures/1.Doc-Many.Metadata.Formats/0001-LDA-paper/2004.04.PNAS.ef997ae1b01762b57b75d8c22fb8cec87406.pdf"); ASSERT.FileExists(pdf_filename); PDFDocumentMuPDFMetaInfo info = MuPDFRenderer.GetDocumentMetaInfo(pdf_filename, null, ProcessPriorityClass.Normal); ASSERT.AreEqual <int>(8, info.PageCount); ASSERT.AreEqual <bool>(false, info.DocumentIsCorrupted); ASSERT.IsLessOrEqual(10000, info.raw_multipurp_text.Length); TestJSONoutputIsCorrectForPDFdoc1(info.raw_decoded_json); object json_doc = JsonConvert.DeserializeObject(info.raw_multipurp_text); string json_text = JsonConvert.SerializeObject(json_doc, Formatting.Indented).Replace("\r\n", "\n"); // Perform comparison via ApprovalTests->BeyondCompare (that's what I use for *decades* now) //ApprovalTests.Approvals.VerifyJson(json_out); --> becomes the code below: ApprovalTests.Approvals.Verify( new QiqqaApprover(json_text, pdf_filename), ApprovalTests.Approvals.GetReporter() ); info.ClearRawContent(); ASSERT.IsNull(info.raw_multipurp_text); ASSERT.IsNull(info.raw_decoded_json); }
public static Dictionary <int, WordList> DoOCR(string pdf_filename, string page_numbers, string pdf_user_password) { List <MuPDFRenderer.TextChunk> text_chunks = MuPDFRenderer.GetEmbeddedText(pdf_filename, page_numbers, pdf_user_password, ProcessPriorityClass.BelowNormal); Dictionary <int, WordList> word_lists = ConvertToWordList(text_chunks); return(word_lists); }
public void Test_PDF_metadata_extraction_via_multipurp_chunk0070_qpdf(string filepath) { string pdf_filename = MiscTestHelpers.GetNormalizedPathToAnyTestDataTestFile($"fixtures/PDF/{ filepath.Replace("./", "") }"); ASSERT.FileExists(pdf_filename); PDFDocumentMuPDFMetaInfo info = MuPDFRenderer.GetDocumentMetaInfo(pdf_filename, null, ProcessPriorityClass.Normal); string json_text = ProduceJSONtext4Comparison(info); // Perform comparison via ApprovalTests->BeyondCompare (that's what I use for *decades* now) //ApprovalTests.Approvals.VerifyJson(json_out); --> becomes the code below: ApprovalTests.Approvals.Verify( new QiqqaApprover(json_text, pdf_filename), ApprovalTests.Approvals.GetReporter() ); }
private static byte[] GetPageByDPIAsImage_LOCK(string filename, string pdf_user_password, int page, int dpi, int height, int width) { WPFDoEvents.AssertThisCodeIs_NOT_RunningInTheUIThread(); try { // sample command (PNG written to stdout for page #2, width and height are limiting/reducing, dpi-resolution is driving): // // mudraw -q -o - -F png -r 600 -w 1920 -h 1280 G:\Qiqqa\evil\Guest\documents\1\1A9760F3917A107AC46E6E292B9C839364F09E73.pdf 2 var img = MuPDFRenderer.RenderPDFPageAsByteArray(filename, page, dpi, height, width, pdf_user_password, ProcessPriorityClass.BelowNormal); return(img); } catch (Exception ex) { throw new GenericException(ex, $"PDF Render: Error while rasterising page {page} at {dpi}dpi / {height}x{width} pixels of '{filename}'"); } }
public static int CountPDFPages(string filename, string password) { WPFDoEvents.AssertThisCodeIs_NOT_RunningInTheUIThread(); try { Logging.Debug("+CountPDFPages_MuPDF: {0}", filename); var metadata = MuPDFRenderer.GetDocumentMetaInfo(filename, password, ProcessPriorityClass.Normal); int page_count = metadata?.PageCount ?? (metadata.DocumentIsCorrupted ? -3 : -1); Logging.Debug("-CountPDFPages_MuPDF '{1}' -> ({0} pages)", page_count, filename); return(page_count); } catch (Exception ex) { Logging.Warn(ex, "Error while counting pages in CountPDFPages_MuPDF for file: {0}", filename); return(-1); } }
void ObjButtonGO_Click(object sender, RoutedEventArgs e) { int pdf_number = Convert.ToInt32(ObjTextDoc.Text); int page_number = Convert.ToInt32(ObjTextPage.Text); string pdf_filename = String.Format(@"C:\temp\{0}.pdf", pdf_number); Logging.Info("+Rendering page"); MemoryStream ms = MuPDFRenderer.RenderPDFPage(pdf_filename, page_number, 200, null, ProcessPriorityClass.Normal); BitmapSource bitmap_image = BitmapImageTools.LoadFromBytes(ms.ToArray()); Bitmap bitmap = new Bitmap(ms); Logging.Info("-Rendering page"); this.Image = bitmap_image; Logging.Info("+Finding regions"); this.region_locator = new PDFRegionLocator(bitmap); Logging.Info("-Finding regions"); Recalc(); }