public void TestBug55733() { XWPFDocument doc = XWPFTestDataSamples.OpenSampleDocument("55733.docx"); XWPFWordExtractor extractor = new XWPFWordExtractor(doc); // Check it gives text without error string text = extractor.Text; extractor.Close(); }
public void TestFile() { XWPFDocument doc = XWPFTestDataSamples.OpenSampleDocument("ExternalEntityInText.docx"); XWPFWordExtractor extractor = new XWPFWordExtractor(doc); String text = extractor.Text; Assert.IsTrue(text.Length > 0); // Check contents, they should not contain the text from POI web site After colon! Assert.AreEqual("Here should not be the POI web site: \"\"", text.Trim()); extractor.Close(); }
public void TestCheckboxes() { XWPFDocument doc = XWPFTestDataSamples.OpenSampleDocument("checkboxes.docx"); Console.WriteLine(doc); XWPFWordExtractor extractor = new XWPFWordExtractor(doc); Assert.AreEqual("This is a small test for checkboxes \nunchecked: |_| \n" + "Or checked: |X|\n\n\n\n\n" + "Test a checkbox within a textbox: |_| -> |X|\n\n\n" + "In Table:\n|_|\t|X|\n\n\n" + "In Sequence:\n|X||_||X|\n", extractor.Text); extractor.Close(); }
public void TestSimpleControlContent() { XWPFDocument doc = XWPFTestDataSamples.OpenSampleDocument("Bug54849.docx"); String[] targs = new String[] { "header_rich_text", "rich_text", "rich_text_pre_table\nrich_text_cell1\t\t\t\n\t\t\t\n\t\t\t\n\nrich_text_post_table", "plain_text_no_newlines", "plain_text_with_newlines1\nplain_text_with_newlines2\n", "watermelon\n", "dirt\n", "4/16/2013\n", "rich_text_in_cell", "abc", "rich_text_in_paragraph_in_cell", "footer_rich_text", "footnote_sdt", "endnote_sdt" }; XWPFWordExtractor ex = new XWPFWordExtractor(doc); String s = ex.Text.ToLower(); int hits = 0; foreach (String targ in targs) { bool hitted = false; if (s.Contains(targ)) { hitted = true; hits++; } Assert.AreEqual(true, hitted, "controlled content loading-" + targ); } Assert.AreEqual(targs.Length, hits, "controlled content loading hit count"); ex.Close(); doc = XWPFTestDataSamples.OpenSampleDocument("Bug54771a.docx"); targs = new String[] { "bb", "test subtitle\n", "test user\n", }; ex = new XWPFWordExtractor(doc); s = ex.Text.ToLower(); //At one point in development there were three copies of the text. //This ensures that there is only one copy. MatchCollection mc; int hit; foreach (String targ in targs) { mc = Regex.Matches(s, targ); hit = 0; foreach (Match m in mc) { if (m.Success) { hit++; } } Assert.AreEqual(1, hit, "controlled content loading-" + targ); } //"test\n" appears twice: once as the "title" and once in the text. //This also happens when you save this document as text from MSWord. mc = Regex.Matches(s, "test\n"); hit = 0; foreach (Match m in mc) { if (m.Success) { hit++; } } Assert.AreEqual(2, hit, "test<N>"); ex.Close(); }