Пример #1
0
        public void TestParseSimpleDocumentFromWord()
        {
            ParserContext   context = new ParserContext(TestDataSample.GetWordPath("SampleDoc.docx"));
            IDocumentParser parser  = ParserFactory.CreateDocument(context);
            ToxyDocument    doc     = parser.Parse();

            Assert.AreEqual(7, doc.Paragraphs.Count);
            Assert.AreEqual("I am a test document", doc.Paragraphs[0].Text);
            Assert.AreEqual("This is page 1", doc.Paragraphs[1].Text);
            Assert.AreEqual("I am Calibri (Body) in font size 11", doc.Paragraphs[2].Text);
            Assert.AreEqual("\n", doc.Paragraphs[3].Text);
            Assert.AreEqual("This is page two", doc.Paragraphs[4].Text);
            Assert.AreEqual("It’s Arial Black in 16 point", doc.Paragraphs[5].Text);
            Assert.AreEqual("It’s also in blue", doc.Paragraphs[6].Text);
        }
Пример #2
0
        public void TestParseTextFromWord()
        {
            ParserContext context = new ParserContext(TestDataSample.GetWordPath("SampleDoc.docx"));
            ITextParser   parser  = ParserFactory.CreateText(context);
            string        doc     = parser.Parse();

            Assert.IsNotNull(doc);

            string[] lines = doc.Split(new string[] { Environment.NewLine }, StringSplitOptions.RemoveEmptyEntries);
            Assert.AreEqual(6, lines.Length);
            Assert.AreEqual("I am a test document", lines[0]);
            Assert.AreEqual("This is page 1", lines[1]);
            Assert.AreEqual("I am Calibri (Body) in font size 11", lines[2]);
            Assert.AreEqual("This is page two", lines[3]);
            Assert.AreEqual("It’s Arial Black in 16 point", lines[4]);
            Assert.AreEqual("It’s also in blue", lines[5]);
        }
Пример #3
0
        public void TestParseDocumentWithTable()
        {
            ParserContext   context = new ParserContext(TestDataSample.GetWordPath("simple-table.docx"));
            IDocumentParser parser  = ParserFactory.CreateDocument(context);
            ToxyDocument    doc     = parser.Parse();

            Assert.AreEqual(8, doc.Paragraphs.Count);
            Assert.AreEqual("This is a Word document that was created using Word 97 – SR2.  It contains a paragraph, a table consisting of 2 rows and 3 columns and a final paragraph.",
                            doc.Paragraphs[0].Text);
            Assert.AreEqual("This text is below the table.", doc.Paragraphs[1].Text);
            Assert.AreEqual("Cell 1,1", doc.Paragraphs[2].Text);
            Assert.AreEqual("Cell 1,2", doc.Paragraphs[3].Text);
            Assert.AreEqual("Cell 1,3", doc.Paragraphs[4].Text);
            Assert.AreEqual("Cell 2,1", doc.Paragraphs[5].Text);
            Assert.AreEqual("Cell 2,2", doc.Paragraphs[6].Text);
            Assert.AreEqual("Cell 2,3", doc.Paragraphs[7].Text);
        }
Пример #4
0
        public void TestParseTextFromWord()
        {
            ParserContext context = new ParserContext(TestDataSample.GetWordPath("SampleDoc.doc"));
            ITextParser   parser  = ParserFactory.CreateText(context);
            string        doc     = parser.Parse();

            Assert.IsNotNull(doc);

            string[] lines = doc.Split('\r');
            Assert.AreEqual(8, lines.Length);
            Assert.AreEqual("I am a test document", lines[0]);
            Assert.AreEqual("This is page 1", lines[1]);
            Assert.AreEqual("I am Calibri (Body) in font size 11", lines[2]);
            Assert.AreEqual("\f", lines[3]);
            Assert.AreEqual("This is page two", lines[4]);
            Assert.AreEqual("It’s Arial Black in 16 point", lines[5]);
            Assert.AreEqual("It’s also in blue", lines[6]);
            Assert.AreEqual("", lines[7]);
        }