public void TestGetDocumentMetaInfo_on_doc1() { string pdf_filename = MiscTestHelpers.GetNormalizedPathToAnyTestDataTestFile(@"fixtures/1.Doc-Many.Metadata.Formats/0001-LDA-paper/2004.04.PNAS.ef997ae1b01762b57b75d8c22fb8cec87406.pdf"); ASSERT.FileExists(pdf_filename); PDFDocumentMuPDFMetaInfo info = MuPDFRenderer.GetDocumentMetaInfo(pdf_filename, null, ProcessPriorityClass.Normal); ASSERT.AreEqual <int>(8, info.PageCount); ASSERT.AreEqual <bool>(false, info.DocumentIsCorrupted); ASSERT.IsLessOrEqual(10000, info.raw_multipurp_text.Length); TestJSONoutputIsCorrectForPDFdoc1(info.raw_decoded_json); object json_doc = JsonConvert.DeserializeObject(info.raw_multipurp_text); string json_text = JsonConvert.SerializeObject(json_doc, Formatting.Indented).Replace("\r\n", "\n"); // Perform comparison via ApprovalTests->BeyondCompare (that's what I use for *decades* now) //ApprovalTests.Approvals.VerifyJson(json_out); --> becomes the code below: ApprovalTests.Approvals.Verify( new QiqqaApprover(json_text, pdf_filename), ApprovalTests.Approvals.GetReporter() ); info.ClearRawContent(); ASSERT.IsNull(info.raw_multipurp_text); ASSERT.IsNull(info.raw_decoded_json); }
public void Basic_Import_Test(string ris_filepath) { string path = GetNormalizedPathToRISTestFile(ris_filepath); ASSERT.FileExists(path); string ris_text = GetTestFileContent(path); Result rv = new Result(); rv.lines_set = SplitMultipleRISLines(ris_text); foreach (List <string> lines in rv.lines_set) { RISRecord record = MapRISLinesToDictionary(lines); rv.records.Add(record); rv.bibtex_items.Add(record.ToBibTeX()); } // Serialize the result to JSON for easier comparison via ApprovalTests->BeyondCompare (that's what I use for *decades* now) string json_out = JsonConvert.SerializeObject(rv, Newtonsoft.Json.Formatting.Indented).Replace("\r\n", "\n"); //ApprovalTests.Approvals.VerifyJson(json_out); --> becomes the code below: ApprovalTests.Approvals.Verify( new QiqqaApprover(json_out, ris_filepath), ApprovalTests.Approvals.GetReporter() ); }
public void Test_SPEED(string filepath) { string path = GetNormalizedPathToAnyTestDataTestFile(filepath); ASSERT.FileExists(path); string data_in = GetTestFileContent(path); string s1 = data_in; string s2 = BibTexCharacterMap.ASCIIToBibTex(s1); string s3 = BibTexCharacterMap.BibTexToASCII(s2); const int NUM = 1000000; const int CHUNK = 10000; Stopwatch start = Stopwatch.StartNew(); int i; for (i = 0; i < NUM; ++i) { if (i % CHUNK == CHUNK - 1) { // don't run for more than 2 seconds if (start.ElapsedMilliseconds >= 2000) { break; } } s2 = BibTexCharacterMap.ASCIIToBibTex(s1); } double time_a2b = i * 1.0E-3 * s1.Length / start.ElapsedMilliseconds; Logging.Info("ASCIIToBibTex can do {0:0.000}M operations per second per character", time_a2b); start = Stopwatch.StartNew(); for (i = 0; i < NUM; ++i) { if (i % CHUNK == CHUNK - 1) { // don't run for more than 2 seconds if (start.ElapsedMilliseconds >= 2000) { break; } } s3 = BibTexCharacterMap.BibTexToASCII(s2); } double time_b2a = i * 1.0E-3 * s1.Length / start.ElapsedMilliseconds; Logging.Info("BibTexToASCII can do {0:0.000}M operations per second per character", time_b2a); // dummy BibTexCharacterMap.ASCIIToBibTex(s3); }
public void TestMuPDF_multipurp_JSON_formatted_snippet1_parses_okay() { string json_filename = MiscTestHelpers.GetNormalizedPathToAnyTestDataTestFile(@"fixtures/mutool/multipurp/json-snippets/pdf-info1-formatted.json"); ASSERT.FileExists(json_filename); string json = File.ReadAllText(json_filename); ASSERT.IsNotNull(json); List <MultiPurpDocumentInfoObject> infos_list = JsonConvert.DeserializeObject <List <MultiPurpDocumentInfoObject> >(json); TestJSONoutputIsCorrectForPDFdoc1(infos_list); }
public void Deserialize_v80_ClassicalQiqqaRecord(string input_path) { ASSERT.IsTrue(true); string path = GetNormalizedPathToSerializationTestFile(input_path); ASSERT.FileExists(path); string input = GetTestFileContent(path); PDFDocument record = JsonConvert.DeserializeObject <PDFDocument>(input); ASSERT.IsTrue(true); }
public void Test_Conversion_To_BibTeX_Text(string filepath) { string path = GetNormalizedPathToAnyTestDataTestFile(filepath); ASSERT.FileExists(path); string data_in = GetTestFileContent(path); string s1 = data_in; string s2 = BibTexCharacterMap.ASCIIToBibTex(s1); ApprovalTests.Approvals.Verify( new QiqqaApprover(s2, filepath), ApprovalTests.Approvals.GetReporter() ); }
public void Test_PDF_metadata_extraction_via_multipurp_chunk0070_qpdf(string filepath) { string pdf_filename = MiscTestHelpers.GetNormalizedPathToAnyTestDataTestFile($"fixtures/PDF/{ filepath.Replace("./", "") }"); ASSERT.FileExists(pdf_filename); PDFDocumentMuPDFMetaInfo info = MuPDFRenderer.GetDocumentMetaInfo(pdf_filename, null, ProcessPriorityClass.Normal); string json_text = ProduceJSONtext4Comparison(info); // Perform comparison via ApprovalTests->BeyondCompare (that's what I use for *decades* now) //ApprovalTests.Approvals.VerifyJson(json_out); --> becomes the code below: ApprovalTests.Approvals.Verify( new QiqqaApprover(json_text, pdf_filename), ApprovalTests.Approvals.GetReporter() ); }
public void Basic_Test(string sample_filepath) { string path = GetNormalizedPathToAnyTestDataTestFile(sample_filepath); ASSERT.FileExists(path); string sample_text = GetTestFileContent(path); // extract URL from sample file: string url = null; string docHtml = null; Match match = Regex.Match(sample_text, @"<!--(.*?)-->(.*)", RegexOptions.Singleline); // counter-intuitive flag: https://stackoverflow.com/questions/159118/how-do-i-match-any-character-across-multiple-lines-in-a-regular-expression if (Match.Empty != match) { url = match.Groups[1].Value.Trim(); docHtml = match.Groups[2].Value; } List <GoogleScholarScrapePaper> gssps = new List <GoogleScholarScrapePaper>(); HtmlDocument doc = new HtmlDocument(); doc.LoadHtml(docHtml); //doc.Load(ms, System.Text.Encoding.UTF8, detectEncodingFromByteOrderMarks: false); ScrapeDoc(doc, url, ref gssps); ASSERT.IsGreaterOrEqual(gssps.Count, 1); // Serialize the result to JSON for easier comparison via ApprovalTests->BeyondCompare (that's what I use for *decades* now) string json_out = JsonConvert.SerializeObject(gssps, Newtonsoft.Json.Formatting.Indented).Replace("\r\n", "\n"); //ApprovalTests.Approvals.VerifyJson(json_out); --> becomes the code below: ApprovalTests.Approvals.Verify( new QiqqaApprover(json_out, sample_filepath), ApprovalTests.Approvals.GetReporter() ); }
public void GetFirstWord_Test(string data_filepath) { string path = GetNormalizedPathToAnyTestDataTestFile(data_filepath); ASSERT.FileExists(path); string data_text = GetTestFileContent(path); Result rv = new Result(); rv.input = data_text; rv.result = StringTools.GetFirstWord(data_text); // Serialize the result to JSON for easier comparison via ApprovalTests->BeyondCompare (that's what I use for *decades* now) string json_out = JsonConvert.SerializeObject(rv, Newtonsoft.Json.Formatting.Indented).Replace("\r\n", "\n"); //ApprovalTests.Approvals.VerifyJson(json_out); --> becomes the code below: ApprovalTests.Approvals.Verify( new QiqqaApprover(json_out, data_filepath), ApprovalTests.Approvals.GetReporter() ); }
public void Basic_Import_Test(string pubmed_filepath) { // See http://www.nlm.nih.gov/bsd/licensee/elements_descriptions.html for the low-down string path = GetNormalizedPathToPubMedXMLTestFile(pubmed_filepath); ASSERT.FileExists(path); string pubmed_xml = GetTestFileContent(path); Result rv = new Result(); rv.success = PubMedXMLToBibTex.TryConvert(pubmed_xml, out rv.bibtex, out rv.messages); // Serialize the result to JSON for easier comparison via ApprovalTests->BeyondCompare (that's what I use for *decades* now) string json_out = JsonConvert.SerializeObject(rv, Newtonsoft.Json.Formatting.Indented).Replace("\r\n", "\n"); //ApprovalTests.Approvals.VerifyJson(json_out); --> becomes the code below: ApprovalTests.Approvals.Verify( new QiqqaApprover(json_out, pubmed_filepath), ApprovalTests.Approvals.GetReporter() ); }