XWPFWordExtractor, NPOI.XWPF.Extractor C# (CSharp)のコード例

コード例 #1

0

ファイルを表示

ファイル: TestXWPFWordExtractor.cs プロジェクト: ssor/NPOI

        public void TestGetComplexText()
        {
            XWPFDocument doc = XWPFTestDataSamples.OpenSampleDocument("IllustrativeCases.docx");
            XWPFWordExtractor extractor = new XWPFWordExtractor(doc);

            String text = extractor.Text;
            Assert.IsTrue(text.Length > 0);

            char euro = '\u20ac';
            Debug.WriteLine("'" + text.Substring(text.Length - 40) + "'");

            //Check contents
            Assert.IsTrue(text.StartsWith(
                    "  \n(V) ILLUSTRATIVE CASES\n\n"
            ));
            Assert.IsTrue(text.Contains(
                    "As well as gaining " + euro + "90 from child benefit increases, he will also receive the early childhood supplement of " + euro + "250 per quarter for Vincent for the full four quarters of the year.\n\n\n\n"// \n\n\n"
            ));
            Assert.IsTrue(text.EndsWith(
                    "11.4%\t\t90\t\t\t\t\t250\t\t1,310\t\n\n \n\n\n"
            ));

            // Check number of paragraphs
            int ps = 0;
            char[] t = text.ToCharArray();
            for (int i = 0; i < t.Length; i++)
            {
                if (t[i] == '\n')
                {
                    ps++;
                }
            }
            Assert.AreEqual(134, ps);
        }

コード例 #2

0

ファイルを表示

ファイル: TestXWPFWordExtractor.cs プロジェクト: xoposhiy/npoi

        public void TestGetComplexText()
        {
            XWPFDocument doc = XWPFTestDataSamples.OpenSampleDocument("IllustrativeCases.docx");
            XWPFWordExtractor extractor = new XWPFWordExtractor(doc);

            String text = extractor.Text;
            Assert.IsTrue(text.Length > 0);

            char euro = '\u20ac';
            //		System.err.Println("'"+text.Substring(text.Length() - 40) + "'");

            //Check contents
            Assert.IsTrue(text.StartsWith(
                    "  \n(V) ILLUSTRATIVE CASES\n\n"
            ));
            Assert.IsTrue(text.Contains(
                    "As well as gaining " + euro + "90 from child benefit increases, he will also receive the early childhood supplement of " + euro + "250 per quarter for Vincent for the full four quarters of the year.\n\n\n\n \n\n\n"
            ));
            Assert.IsTrue(text.EndsWith(
                    "11.4%\t\t90\t\t\t\t\t250\t\t1,310\t\n\n"
            ));

            // Check number of paragraphs
            int ps = 0;
            char[] t = text.ToCharArray();
            for (int i = 0; i < t.Length; i++)
            {
                if (t[i] == '\n')
                {
                    ps++;
                }
            }
            Assert.AreEqual(134, ps);
        }

コード例 #3

0

ファイルを表示

ファイル: TestXWPFWordExtractor.cs プロジェクト: hiodava/Romero

        public void TestTableFootnotes()
        {
            XWPFDocument      doc       = XWPFTestDataSamples.OpenSampleDocument("table_footnotes.docx");
            XWPFWordExtractor extractor = new XWPFWordExtractor(doc);

            Assert.IsTrue(extractor.Text.Contains("snoska"));
        }

コード例 #4

0

ファイルを表示

ファイル: TestXWPFWordExtractor.cs プロジェクト: hiodava/Romero

        public void TestGetWithHyperlinks()
        {
            XWPFDocument      doc       = XWPFTestDataSamples.OpenSampleDocument("TestDocument.docx");
            XWPFWordExtractor extractor = new XWPFWordExtractor(doc);

            // Now check contents
            extractor.SetFetchHyperlinks(false);
            Assert.AreEqual(
                "This is a test document.\nThis bit is in bold and italic\n" +
                "Back to normal\n" +
                "This contains BOLD, ITALIC and BOTH, as well as RED and YELLOW text.\n" +
                "We have a hyperlink here, and another.\n",
                extractor.Text
                );

            // One hyperlink is a real one, one is just to the top of page
            extractor.SetFetchHyperlinks(true);
            Assert.AreEqual(
                "This is a test document.\nThis bit is in bold and italic\n" +
                "Back to normal\n" +
                "This contains BOLD, ITALIC and BOTH, as well as RED and YELLOW text.\n" +
                "We have a hyperlink <http://poi.apache.org/> here, and another.\n",
                extractor.Text
                );
        }

コード例 #5

0

ファイルを表示

ファイル: TestXWPFWordExtractor.cs プロジェクト: zbl960/npoi

        public void TestEndnotes()
        {
            XWPFDocument      doc       = XWPFTestDataSamples.OpenSampleDocument("endnotes.docx");
            XWPFWordExtractor extractor = new XWPFWordExtractor(doc);

            Assert.IsTrue(extractor.Text.Contains("XXX"));
        }

コード例 #6

0

ファイルを表示

ファイル: TestXWPFWordExtractor.cs プロジェクト: xoposhiy/npoi

        public void TestGetSimpleText()
        {
            XWPFDocument doc = XWPFTestDataSamples.OpenSampleDocument("sample.docx");
            XWPFWordExtractor extractor = new XWPFWordExtractor(doc);

            String text = extractor.Text;
            Assert.IsTrue(text.Length > 0);

            // Check contents
            Assert.IsTrue(text.StartsWith(
                    "Lorem ipsum dolor sit amet, consectetuer adipiscing elit. Nunc at risus vel erat tempus posuere. Aenean non ante. Suspendisse vehicula dolor sit amet odio."
            ));
            Assert.IsTrue(text.EndsWith(
                    "Phasellus ultricies mi nec leo. Sed tempus. In sit amet lorem at velit faucibus vestibulum.\n"
            ));

            // Check number of paragraphs
            int ps = 0;
            char[] t = text.ToCharArray();
            for (int i = 0; i < t.Length; i++)
            {
                if (t[i] == '\n')
                {
                    ps++;
                }
            }
            Assert.AreEqual(3, ps);
        }

コード例 #7

0

ファイルを表示

ファイル: TestXWPFWordExtractor.cs プロジェクト: ssor/NPOI

 public void TestDrawings()
 {
     XWPFDocument doc = XWPFTestDataSamples.OpenSampleDocument("drawing.docx");
     XWPFWordExtractor extractor = new XWPFWordExtractor(doc);
     String text = extractor.Text;
     Assert.IsTrue(text.Length > 0);
 }

コード例 #8

0

ファイルを表示

ファイル: TestXWPFWordExtractor.cs プロジェクト: hanwangkun/npoi

 public void TestDrawings()
 {
     XWPFDocument doc = XWPFTestDataSamples.OpenSampleDocument("drawing.docx");
     XWPFWordExtractor extractor = new XWPFWordExtractor(doc);
     String text = extractor.Text;
     Assert.IsTrue(text.Length > 0);
 }

コード例 #9

0

ファイルを表示

ファイル: TestXWPFWordExtractor.cs プロジェクト: hiodava/Romero

        public void TestGetSimpleText()
        {
            XWPFDocument      doc       = XWPFTestDataSamples.OpenSampleDocument("sample.docx");
            XWPFWordExtractor extractor = new XWPFWordExtractor(doc);

            String text = extractor.Text;

            Assert.IsTrue(text.Length > 0);

            // Check contents
            Assert.IsTrue(text.StartsWith(
                              "Lorem ipsum dolor sit amet, consectetuer adipiscing elit. Nunc at risus vel erat tempus posuere. Aenean non ante. Suspendisse vehicula dolor sit amet odio."
                              ));
            Assert.IsTrue(text.EndsWith(
                              "Phasellus ultricies mi nec leo. Sed tempus. In sit amet lorem at velit faucibus vestibulum.\n"
                              ));

            // Check number of paragraphs
            int ps = 0;

            char[] t = text.ToCharArray();
            for (int i = 0; i < t.Length; i++)
            {
                if (t[i] == '\n')
                {
                    ps++;
                }
            }
            Assert.AreEqual(3, ps);
        }

コード例 #10

0

ファイルを表示

ファイル: TestXWPFWordExtractor.cs プロジェクト: hiodava/Romero

        public void TestInsertedDeletedText()
        {
            XWPFDocument      doc       = XWPFTestDataSamples.OpenSampleDocument("delins.docx");
            XWPFWordExtractor extractor = new XWPFWordExtractor(doc);

            Assert.IsTrue(extractor.Text.Contains("pendant worn"));
            Assert.IsTrue(extractor.Text.Contains("extremely well"));
        }

コード例 #11

0

ファイルを表示

ファイル: TestXWPFWordExtractor.cs プロジェクト: ssor/NPOI

 public void TestEndnotes()
 {
     XWPFDocument doc = XWPFTestDataSamples.OpenSampleDocument("endnotes.docx");
     XWPFWordExtractor extractor = new XWPFWordExtractor(doc);
     string text = extractor.Text;
     Assert.IsTrue(text.Contains("XXX"));
     Assert.IsTrue(text.Contains("tilaka [endnoteRef:2]or 'tika'"));
 }

コード例 #12

0

ファイルを表示

ファイル: TestXWPFWordExtractor.cs プロジェクト: ssor/NPOI

 public void TestFootnotes()
 {
     XWPFDocument doc = XWPFTestDataSamples.OpenSampleDocument("footnotes.docx");
     XWPFWordExtractor extractor = new XWPFWordExtractor(doc);
     String text = extractor.Text;
     Assert.IsTrue(extractor.Text.Contains("snoska"));
     Assert.IsTrue(text.Contains("Eto ochen prostoy[footnoteRef:1] text so snoskoy"));
 }

コード例 #13

0

ファイルを表示

ファイル: TestXWPFWordExtractor.cs プロジェクト: ssor/NPOI

 public void TestFldSimpleContent()
 {
     XWPFDocument doc = XWPFTestDataSamples.OpenSampleDocument("FldSimple.docx");
     XWPFWordExtractor extractor = new XWPFWordExtractor(doc);
     String text = extractor.Text;
     Assert.IsTrue(text.Length > 0);
     Assert.IsTrue(text.Contains("FldSimple.docx"));
 }

コード例 #14

0

ファイルを表示

ファイル: TestXWPFWordExtractor.cs プロジェクト: hiodava/Romero

        public void TestParagraphHeader()
        {
            XWPFDocument      doc       = XWPFTestDataSamples.OpenSampleDocument("Headers.docx");
            XWPFWordExtractor extractor = new XWPFWordExtractor(doc);

            Assert.IsTrue(extractor.Text.Contains("Section 1"));
            Assert.IsTrue(extractor.Text.Contains("Section 2"));
            Assert.IsTrue(extractor.Text.Contains("Section 3"));
        }

コード例 #15

0

ファイルを表示

ファイル: TestXWPFWordExtractor.cs プロジェクト: hiodava/Romero

        public void TestDOCMFiles()
        {
            XWPFDocument      doc       = XWPFTestDataSamples.OpenSampleDocument("45690.docm");
            XWPFWordExtractor extractor = new XWPFWordExtractor(doc);

            Assert.IsTrue(extractor.Text.Contains("2004"));
            Assert.IsTrue(extractor.Text.Contains("2008"));
            Assert.IsTrue(extractor.Text.Contains("(120 "));
        }

コード例 #16

0

ファイルを表示

ファイル: TestXWPFWordExtractor.cs プロジェクト: ssor/NPOI

        public void TestBug55733()
        {
            XWPFDocument doc = XWPFTestDataSamples.OpenSampleDocument("55733.docx");
            XWPFWordExtractor extractor = new XWPFWordExtractor(doc);

            // Check it gives text without error
            string text = extractor.Text;
            extractor.Close();
        }

コード例 #17

0

ファイルを表示

ファイル: TestXWPFWordExtractor.cs プロジェクト: ssor/NPOI

 public void TestNoFieldCodes()
 {
     XWPFDocument doc = XWPFTestDataSamples.OpenSampleDocument("FieldCodes.docx");
     XWPFWordExtractor extractor = new XWPFWordExtractor(doc);
     String text = extractor.Text;
     Assert.IsTrue(text.Length > 0);
     Assert.IsFalse(text.Contains("AUTHOR"));
     Assert.IsFalse(text.Contains("CREATEDATE"));
 }

コード例 #18

0

ファイルを表示

ファイル: TestXWPFWordExtractor.cs プロジェクト: hanwangkun/npoi

        public void TestDOCMFiles()
        {
            XWPFDocument doc = XWPFTestDataSamples.OpenSampleDocument("45690.docm");
            XWPFWordExtractor extractor = new XWPFWordExtractor(doc);

            Assert.IsTrue(extractor.Text.Contains("2004"));
            Assert.IsTrue(extractor.Text.Contains("2008"));
            Assert.IsTrue(extractor.Text.Contains("(120 "));
        }

コード例 #19

0

ファイルを表示

ファイル: TestXWPFWordExtractor.cs プロジェクト: ssor/NPOI

        public void TestFormFootnotes()
        {
            XWPFDocument doc = XWPFTestDataSamples.OpenSampleDocument("form_footnotes.docx");
            XWPFWordExtractor extractor = new XWPFWordExtractor(doc);

            String text = extractor.Text;
            Assert.IsTrue(text.Contains("testdoc"), "Unable to find expected word in text\n" + text);
            Assert.IsTrue(text.Contains("test phrase"), "Unable to find expected word in text\n" + text);
        }

コード例 #20

0

ファイルを表示

ファイル: TestXWPFWordExtractor.cs プロジェクト: ssor/NPOI

        public void TestCheckboxes()
        {
            XWPFDocument doc = XWPFTestDataSamples.OpenSampleDocument("checkboxes.docx");
            Console.WriteLine(doc);
            XWPFWordExtractor extractor = new XWPFWordExtractor(doc);

            Assert.AreEqual("This is a small test for checkboxes \nunchecked: |_| \n" +
                         "Or checked: |X|\n\n\n\n\n" +
                         "Test a checkbox within a textbox: |_| -> |X|\n\n\n" +
                         "In Table:\n|_|\t|X|\n\n\n" +
                         "In Sequence:\n|X||_||X|\n", extractor.Text);
            extractor.Close();
        }

コード例 #21

0

ファイルを表示

ファイル: TestXWPFWordExtractor.cs プロジェクト: hanwangkun/npoi

        public void TestDocTabs()
        {
            XWPFDocument doc = XWPFTestDataSamples.OpenSampleDocument("WithTabs.docx");
            XWPFWordExtractor extractor = new XWPFWordExtractor(doc);

            // Check bits
            Assert.IsTrue(extractor.Text.Contains("a"));
            Assert.IsTrue(extractor.Text.Contains("\t"));
            Assert.IsTrue(extractor.Text.Contains("b"));

            // Now check the first paragraph in total
            Assert.IsTrue(extractor.Text.Contains("a\tb\n"));
        }

コード例 #22

0

ファイルを表示

ファイル: TestXWPFWordExtractor.cs プロジェクト: hiodava/Romero

        public void TestDocTabs()
        {
            XWPFDocument      doc       = XWPFTestDataSamples.OpenSampleDocument("WithTabs.docx");
            XWPFWordExtractor extractor = new XWPFWordExtractor(doc);

            // Check bits
            Assert.IsTrue(extractor.Text.Contains("a"));
            Assert.IsTrue(extractor.Text.Contains("\t"));
            Assert.IsTrue(extractor.Text.Contains("b"));

            // Now check the first paragraph in total
            Assert.IsTrue(extractor.Text.Contains("a\tb\n"));
        }

コード例 #23

0

ファイルを表示

        public void TestFile()
        {
            XWPFDocument      doc       = XWPFTestDataSamples.OpenSampleDocument("ExternalEntityInText.docx");
            XWPFWordExtractor extractor = new XWPFWordExtractor(doc);

            String text = extractor.Text;

            Assert.IsTrue(text.Length > 0);

            // Check contents, they should not contain the text from POI web site After colon!
            Assert.AreEqual("Here should not be the POI web site: \"\"", text.Trim());

            extractor.Close();
        }

コード例 #24

0

ファイルを表示

ファイル: TestExternalEntities.cs プロジェクト: newlysoft/npoi

        public void TestFile()
        {
            XWPFDocument doc = XWPFTestDataSamples.OpenSampleDocument("ExternalEntityInText.docx");
            XWPFWordExtractor extractor = new XWPFWordExtractor(doc);

            String text = extractor.Text;

            Assert.IsTrue(text.Length > 0);

            // Check contents, they should not contain the text from POI web site After colon!
            Assert.AreEqual("Here should not be the POI web site: \"\"", text.Trim());

            extractor.Close();
        }

コード例 #25

0

ファイルを表示

ファイル: TestXWPFWordExtractor.cs プロジェクト: hiodava/Romero

        public void TestHeadersFooters()
        {
            XWPFDocument      doc       = XWPFTestDataSamples.OpenSampleDocument("ThreeColHeadFoot.docx");
            XWPFWordExtractor extractor = new XWPFWordExtractor(doc);

            Assert.AreEqual(
                "First header column!\tMid header\tRight header!\n" +
                "This is a sample word document. It has two pages. It has a three column heading, and a three column footer\n" +
                "\n" +
                "HEADING TEXT\n" +
                "\n" +
                "More on page one\n" +
                "\n\n" +
                "End of page 1\n\n\n" +
                "This is page two. It also has a three column heading, and a three column footer.\n" +
                "Footer Left\tFooter Middle\tFooter Right\n",
                extractor.Text
                );

            // Now another file, expect multiple headers
            //  and multiple footers
            doc       = XWPFTestDataSamples.OpenSampleDocument("DiffFirstPageHeadFoot.docx");
            extractor = new XWPFWordExtractor(doc);
            extractor =
                new XWPFWordExtractor(doc);
            //extractor.Text;

            Assert.AreEqual(
                "I am the header on the first page, and I" + '\u2019' + "m nice and simple\n" +
                "First header column!\tMid header\tRight header!\n" +
                "This is a sample word document. It has two pages. It has a simple header and footer, which is different to all the other pages.\n" +
                "\n" +
                "HEADING TEXT\n" +
                "\n" +
                "More on page one\n" +
                "\n\n" +
                "End of page 1\n\n\n" +
                "This is page two. It also has a three column heading, and a three column footer.\n" +
                "The footer of the first page\n" +
                "Footer Left\tFooter Middle\tFooter Right\n",
                extractor.Text
                );
        }

コード例 #26

0

ファイルを表示

ファイル: TestXWPFWordExtractor.cs プロジェクト: flowbywind/npoi

        public void TestSimpleControlContent()
        {
            XWPFDocument doc = XWPFTestDataSamples.OpenSampleDocument("Bug54849.docx");

            String[] targs = new String[] {
                "header_rich_text",
                "rich_text",
                "rich_text_pre_table\nrich_text_cell1\t\t\t\n\nrich_text_post_table",
                "plain_text_no_newlines",
                "plain_text_with_newlines1\nplain_text_with_newlines2\n",
                "watermelon\n",
                "dirt\n",
                "4/16/2013\n",
                "rich_text_in_paragraph_in_cell",
                "footer_rich_text",
                "footnote_sdt",
                "endnote_sdt"
            };
            XWPFWordExtractor ex = new XWPFWordExtractor(doc);
            String            s  = ex.Text.ToLower();
            int hits             = 0;

            foreach (String targ in targs)
            {
                bool hit = false;
                if (s.IndexOf(targ) > -1)
                {
                    hit = true;
                    hits++;
                }
                Assert.AreEqual(true, hit, "controlled content loading-" + targ);
            }
            Assert.AreEqual(targs.Length, hits, "controlled content loading hit count");

            ex.Close();
        }

コード例 #27

0

ファイルを表示

ファイル: TestXWPFWordExtractor.cs プロジェクト: Reinakumiko/npoi

        public void TestSimpleControlContent()
        {
            XWPFDocument doc = XWPFTestDataSamples.OpenSampleDocument("Bug54849.docx");
            String[] targs = new String[]{
                "header_rich_text",
                "rich_text",
                "rich_text_pre_table\nrich_text_cell1\t\t\t\n\t\t\t\n\t\t\t\n\nrich_text_post_table",
                "plain_text_no_newlines",
                "plain_text_with_newlines1\nplain_text_with_newlines2\n",
                "watermelon\n",
                "dirt\n",
                "4/16/2013\n",
                "rich_text_in_cell",
                "abc",
                "rich_text_in_paragraph_in_cell",
                "footer_rich_text",
                "footnote_sdt",
                "endnote_sdt"
        };
            XWPFWordExtractor ex = new XWPFWordExtractor(doc);
            String s = ex.Text.ToLower();
            int hits = 0;

            foreach (String targ in targs)
            {
                bool hitted = false;
                if (s.IndexOf(targ) > -1)
                {
                    hitted = true;
                    hits++;
                }
                Assert.AreEqual(true, hitted, "controlled content loading-" + targ);
            }
            Assert.AreEqual(targs.Length, hits, "controlled content loading hit count");

            ex.Close();

            doc = XWPFTestDataSamples.OpenSampleDocument("Bug54771a.docx");
            targs = new String[]{
                "bb",
                "test subtitle\n",
                "test user\n",
        };
            ex = new XWPFWordExtractor(doc);
            s = ex.Text.ToLower();

            //At one point in development there were three copies of the text.
            //This ensures that there is only one copy.
            MatchCollection mc;
            int hit;
            foreach (String targ in targs)
            {
                mc = Regex.Matches(s, targ);
                hit = 0;
                foreach (Match m in mc)
                {
                    if (m.Success)
                        hit++;
                }
                Assert.AreEqual(1, hit, "controlled content loading-" + targ);
            }
            //"test\n" appears twice: once as the "title" and once in the text.
            //This also happens when you save this document as text from MSWord.
            mc = Regex.Matches(s, "test\n");
            hit = 0;
            foreach (Match m in mc)
            {
                if (m.Success)
                    hit++;
            }
            Assert.AreEqual(2, hit, "test<N>");
            ex.Close();

        }

コード例 #28

0

ファイルを表示

ファイル: TestXWPFWordExtractor.cs プロジェクト: xoposhiy/npoi

        public void TestHeadersFooters()
        {
            XWPFDocument doc = XWPFTestDataSamples.OpenSampleDocument("ThreeColHeadFoot.docx");
            XWPFWordExtractor extractor = new XWPFWordExtractor(doc);

            Assert.AreEqual(
                    "First header column!\tMid header\tRight header!\n" +
                            "This is a sample word document. It has two pages. It has a three column heading, and a three column footer\n" +
                            "\n" +
                            "HEADING TEXT\n" +
                            "\n" +
                            "More on page one\n" +
                            "\n\n" +
                            "End of page 1\n\n\n" +
                            "This is page two. It also has a three column heading, and a three column footer.\n" +
                            "Footer Left\tFooter Middle\tFooter Right\n",
                    extractor.Text
            );

            // Now another file, expect multiple headers
            //  and multiple footers
            doc = XWPFTestDataSamples.OpenSampleDocument("DiffFirstPageHeadFoot.docx");
            extractor = new XWPFWordExtractor(doc);
            extractor =
                    new XWPFWordExtractor(doc);
            //extractor.Text;

            Assert.AreEqual(
                    "I am the header on the first page, and I" + '\u2019' + "m nice and simple\n" +
                            "First header column!\tMid header\tRight header!\n" +
                            "This is a sample word document. It has two pages. It has a simple header and footer, which is different to all the other pages.\n" +
                            "\n" +
                            "HEADING TEXT\n" +
                            "\n" +
                            "More on page one\n" +
                            "\n\n" +
                            "End of page 1\n\n\n" +
                            "This is page two. It also has a three column heading, and a three column footer.\n" +
                            "The footer of the first page\n" +
                            "Footer Left\tFooter Middle\tFooter Right\n",
                    extractor.Text
            );
        }

コード例 #29

0

ファイルを表示

ファイル: TestXWPFWordExtractor.cs プロジェクト: xoposhiy/npoi

 public void TestNoFieldCodes()
 {
     XWPFDocument doc = XWPFTestDataSamples.OpenSampleDocument("FieldCodes.docx");
     XWPFWordExtractor extractor = new XWPFWordExtractor(doc);
     String text = extractor.Text;
     Assert.IsTrue(text.Length > 0);
     Assert.IsFalse(text.Contains("AUTHOR"));
     Assert.IsFalse(text.Contains("CREATEDATE"));
 }

コード例 #30

0

ファイルを表示

ファイル: TestXWPFWordExtractor.cs プロジェクト: xoposhiy/npoi

 public void TestFldSimpleContent()
 {
     XWPFDocument doc = XWPFTestDataSamples.OpenSampleDocument("FldSimple.docx");
     XWPFWordExtractor extractor = new XWPFWordExtractor(doc);
     String text = extractor.Text;
     Assert.IsTrue(text.Length > 0);
     Assert.IsTrue(text.Contains("FldSimple.docx"));
 }

コード例 #31

0

ファイルを表示

ファイル: TestXWPFWordExtractor.cs プロジェクト: xoposhiy/npoi

        public void TestInsertedDeletedText()
        {
            XWPFDocument doc = XWPFTestDataSamples.OpenSampleDocument("delins.docx");
            XWPFWordExtractor extractor = new XWPFWordExtractor(doc);

            Assert.IsTrue(extractor.Text.Contains("pendant worn"));
            Assert.IsTrue(extractor.Text.Contains("extremely well"));
        }

コード例 #32

0

ファイルを表示

ファイル: TestXWPFWordExtractor.cs プロジェクト: xoposhiy/npoi

        public void TestParagraphHeader()
        {
            XWPFDocument doc = XWPFTestDataSamples.OpenSampleDocument("Headers.docx");
            XWPFWordExtractor extractor = new XWPFWordExtractor(doc);

            Assert.IsTrue(extractor.Text.Contains("Section 1"));
            Assert.IsTrue(extractor.Text.Contains("Section 2"));
            Assert.IsTrue(extractor.Text.Contains("Section 3"));
        }

コード例 #33

0

ファイルを表示

ファイル: TestXWPFWordExtractor.cs プロジェクト: xoposhiy/npoi

        public void TestFormFootnotes()
        {
            XWPFDocument doc = XWPFTestDataSamples.OpenSampleDocument("form_footnotes.docx");
            XWPFWordExtractor extractor = new XWPFWordExtractor(doc);

            String text = extractor.Text;
            Assert.IsTrue(text.Contains("testdoc"), "Unable to find expected word in text\n" + text);
            Assert.IsTrue(text.Contains("test phrase"), "Unable to find expected word in text\n" + text);
        }

コード例 #34

0

ファイルを表示

ファイル: TestXWPFWordExtractor.cs プロジェクト: xoposhiy/npoi

        public void TestEndnotes()
        {
            XWPFDocument doc = XWPFTestDataSamples.OpenSampleDocument("endnotes.docx");
            XWPFWordExtractor extractor = new XWPFWordExtractor(doc);

            Assert.IsTrue(extractor.Text.Contains("XXX"));
        }

コード例 #35

0

ファイルを表示

        public void TestSimpleControlContent()
        {
            XWPFDocument doc = XWPFTestDataSamples.OpenSampleDocument("Bug54849.docx");

            String[] targs = new String[] {
                "header_rich_text",
                "rich_text",
                "rich_text_pre_table\nrich_text_cell1\t\t\t\n\t\t\t\n\t\t\t\n\nrich_text_post_table",
                "plain_text_no_newlines",
                "plain_text_with_newlines1\nplain_text_with_newlines2\n",
                "watermelon\n",
                "dirt\n",
                "4/16/2013\n",
                "rich_text_in_cell",
                "abc",
                "rich_text_in_paragraph_in_cell",
                "footer_rich_text",
                "footnote_sdt",
                "endnote_sdt"
            };
            XWPFWordExtractor ex = new XWPFWordExtractor(doc);
            String            s  = ex.Text.ToLower();
            int hits             = 0;

            foreach (String targ in targs)
            {
                bool hitted = false;
                if (s.Contains(targ))
                {
                    hitted = true;
                    hits++;
                }
                Assert.AreEqual(true, hitted, "controlled content loading-" + targ);
            }
            Assert.AreEqual(targs.Length, hits, "controlled content loading hit count");

            ex.Close();

            doc   = XWPFTestDataSamples.OpenSampleDocument("Bug54771a.docx");
            targs = new String[] {
                "bb",
                "test subtitle\n",
                "test user\n",
            };
            ex = new XWPFWordExtractor(doc);
            s  = ex.Text.ToLower();

            //At one point in development there were three copies of the text.
            //This ensures that there is only one copy.
            MatchCollection mc;
            int             hit;

            foreach (String targ in targs)
            {
                mc  = Regex.Matches(s, targ);
                hit = 0;
                foreach (Match m in mc)
                {
                    if (m.Success)
                    {
                        hit++;
                    }
                }
                Assert.AreEqual(1, hit, "controlled content loading-" + targ);
            }
            //"test\n" appears twice: once as the "title" and once in the text.
            //This also happens when you save this document as text from MSWord.
            mc  = Regex.Matches(s, "test\n");
            hit = 0;
            foreach (Match m in mc)
            {
                if (m.Success)
                {
                    hit++;
                }
            }
            Assert.AreEqual(2, hit, "test<N>");
            ex.Close();
        }

コード例 #36

0

ファイルを表示

ファイル: TestXWPFWordExtractor.cs プロジェクト: xoposhiy/npoi

        public void TestTableFootnotes()
        {
            XWPFDocument doc = XWPFTestDataSamples.OpenSampleDocument("table_footnotes.docx");
            XWPFWordExtractor extractor = new XWPFWordExtractor(doc);

            Assert.IsTrue(extractor.Text.Contains("snoska"));
        }

コード例 #37

0

ファイルを表示

ファイル: TestXWPFWordExtractor.cs プロジェクト: xoposhiy/npoi

        public void TestGetWithHyperlinks()
        {
            XWPFDocument doc = XWPFTestDataSamples.OpenSampleDocument("TestDocument.docx");
            XWPFWordExtractor extractor = new XWPFWordExtractor(doc);

            // Now check contents
            extractor.SetFetchHyperlinks(false);
            Assert.AreEqual(
                    "This is a test document.\nThis bit is in bold and italic\n" +
                    "Back to normal\n" +
                    "This contains BOLD, ITALIC and BOTH, as well as RED and YELLOW text.\n" +
                    "We have a hyperlink here, and another.\n",
                    extractor.Text
            );

            // One hyperlink is a real one, one is just to the top of page
            extractor.SetFetchHyperlinks (true);
            Assert.AreEqual(
                    "This is a test document.\nThis bit is in bold and italic\n" +
                    "Back to normal\n" +
                    "This contains BOLD, ITALIC and BOTH, as well as RED and YELLOW text.\n" +
                    "We have a hyperlink <http://poi.apache.org/> here, and another.\n",
                    extractor.Text
            );
        }

コード例 #38

0

ファイルを表示

ファイル: TestXWPFWordExtractor.cs プロジェクト: hijson/npoi

        public void TestBug55733()
        {
            XWPFDocument doc = XWPFTestDataSamples.OpenSampleDocument("55733.docx");
            XWPFWordExtractor extractor = new XWPFWordExtractor(doc);

            // Check it gives text without error
            string text = extractor.Text;
            extractor.Close();
        }

コード例 #39

0

ファイルを表示

ファイル: TestXWPFWordExtractor.cs プロジェクト: mdjasim/npoi

        public void TestSimpleControlContent()
        {
            XWPFDocument doc = XWPFTestDataSamples.OpenSampleDocument("Bug54849.docx");
            String[] targs = new String[]{
                "header_rich_text",
                "rich_text",
                "rich_text_pre_table\nrich_text_cell1\t\t\t\n\nrich_text_post_table",
                "plain_text_no_newlines",
                "plain_text_with_newlines1\nplain_text_with_newlines2\n",
                "watermelon\n",
                "dirt\n",
                "4/16/2013\n",
                "rich_text_in_paragraph_in_cell",
                "footer_rich_text",
                "footnote_sdt",
                "endnote_sdt"
        };
            XWPFWordExtractor ex = new XWPFWordExtractor(doc);
            String s = ex.Text.ToLower();
            int hits = 0;

            foreach (String targ in targs)
            {
                bool hit = false;
                if (s.IndexOf(targ) > -1)
                {
                    hit = true;
                    hits++;
                }
                Assert.AreEqual(true, hit, "controlled content loading-" + targ);
            }
            Assert.AreEqual(targs.Length, hits, "controlled content loading hit count");
        }

コード例 #40

0

ファイルを表示

ファイル: TestXWPFWordExtractor.cs プロジェクト: mdjasim/npoi

 public void TestFootnotes()
 {
     XWPFDocument doc = XWPFTestDataSamples.OpenSampleDocument("footnotes.docx");
     XWPFWordExtractor extractor = new XWPFWordExtractor(doc);
     String text = extractor.Text;
     Assert.IsTrue(extractor.Text.Contains("snoska"));
     Assert.IsTrue(text.Contains("Eto ochen prostoy[footnoteRef:1] text so snoskoy"));
 }

コード例 #41

0

ファイルを表示

ファイル: TestXWPFWordExtractor.cs プロジェクト: mdjasim/npoi

 public void TestEndnotes()
 {
     XWPFDocument doc = XWPFTestDataSamples.OpenSampleDocument("endnotes.docx");
     XWPFWordExtractor extractor = new XWPFWordExtractor(doc);
     string text = extractor.Text;
     Assert.IsTrue(text.Contains("XXX"));
     Assert.IsTrue(text.Contains("tilaka [endnoteRef:2]or 'tika'"));
 }

C# (CSharp) NPOI.XWPF.Extractor XWPFWordExtractorの例