Exemple #1
0
        /**
         * Not all documents have all the images concatenated in the data stream
         * although MS claims so. The best approach is to scan all character Runs.
         *
         * @return a list of Picture objects found in current document
         */
        public List <Picture> GetAllPictures()
        {
            List <Picture> pictures = new List <Picture>();

            Range range = _document.GetOverallRange();

            for (int i = 0; i < range.NumCharacterRuns; i++)
            {
                CharacterRun run = range.GetCharacterRun(i);

                if (run == null)
                {
                    continue;
                }

                Picture picture = ExtractPicture(run, false);
                if (picture != null)
                {
                    pictures.Add(picture);
                }
            }

            SearchForPictures(_dgg.EscherRecords, pictures);

            return(pictures);
        }
Exemple #2
0
        public void TestContents()
        {
            HWPFDocument doc = docAscii;

            for (int run = 0; run < 3; run++)
            {
                Range r;

                // Now check the real ranges
                r = doc.GetRange();
                Assert.AreEqual(
                    a_page_1 +
                    page_break + "\r" +
                    a_page_2,
                    r.Text
                    );

                r = doc.GetHeaderStoryRange();
                Assert.AreEqual(
                    headerDef +
                    a_header +
                    footerDef +
                    a_footer +
                    endHeaderFooter,
                    r.Text
                    );

                r = doc.GetOverallRange();
                Assert.AreEqual(
                    a_page_1 +
                    page_break + "\r" +
                    a_page_2 +
                    headerDef +
                    a_header +
                    footerDef +
                    a_footer +
                    endHeaderFooter +
                    "\r",
                    r.Text
                    );

                // Write out and read back in again, Ready for
                //  the next run of the Test
                // TODO run more than once
                if (run < 1)
                {
                    doc = HWPFTestDataSamples.WriteOutAndReadBack(doc);
                }
            }
        }
Exemple #3
0
        public void TestRangeDeleteOne()
        {
            HWPFDocument daDoc = HWPFTestDataSamples.OpenSampleFile(illustrativeDocFile);

            Range range = daDoc.GetOverallRange();

            Assert.AreEqual(1, range.NumSections);

            Section section = range.GetSection(0);

            Assert.AreEqual(5, section.NumParagraphs);

            Paragraph para = section.GetParagraph(2);

            String text = para.Text;

            Assert.AreEqual(originalText, text);

            int offset = text.IndexOf(searchText);

            Assert.AreEqual(192, offset);

            int   absOffset = para.StartOffset + offset;
            Range subRange  = new Range(absOffset, (absOffset + searchText.Length), para.GetDocument());

            Assert.AreEqual(searchText, subRange.Text);

            subRange.Delete();

            // we need to let the model re-calculate the Range before we Evaluate it
            range = daDoc.GetRange();

            Assert.AreEqual(1, range.NumSections);
            section = range.GetSection(0);

            Assert.AreEqual(5, section.NumParagraphs);
            para = section.GetParagraph(2);

            text = para.Text;
            Assert.AreEqual(expectedText2, text);

            // this can lead to a StringBuilderOutOfBoundsException, so we will add it
            // even though we don't have an assertion for it
            Range daRange = daDoc.GetRange();

            text = daRange.Text;
        }
Exemple #4
0
        public void TestContentsUnicode()
        {
            Range r;

            // Now check the real ranges
            r = docUnicode.GetRange();
            Assert.AreEqual(
                u_page_1 +
                page_break + "\r" +
                u_page_2,
                r.Text
                );

            r = docUnicode.GetHeaderStoryRange();
            Assert.AreEqual(
                headerDef +
                u_header +
                footerDef +
                u_footer +
                endHeaderFooter,
                r.Text
                );

            r = docUnicode.GetOverallRange();
            Assert.AreEqual(
                u_page_1 +
                page_break + "\r" +
                u_page_2 +
                headerDef +
                u_header +
                footerDef +
                u_footer +
                endHeaderFooter +
                "\r",
                r.Text
                );
        }
        public void TestUnicodeParagraphDefInitions()
        {
            Range r = u.GetRange();

            String[] p1_parts = u_page_1.Split('\r');
            String[] p2_parts = u_page_2.Split('\r');

            Assert.AreEqual(
                u_page_1 + page_break + "\r" + u_page_2,
                r.Text
                );
            Assert.AreEqual(
                408, r.Text.Length
                );


            Assert.AreEqual(1, r.NumSections);
            Assert.AreEqual(1, u.SectionTable.GetSections().Count);
            Section s = r.GetSection(0);

            Assert.AreEqual(
                u_page_1 +
                page_break + "\r" +
                u_page_2,
                s.Text
                );
            Assert.AreEqual(0, s.StartOffset);
            Assert.AreEqual(408, s.EndOffset);


            List <PAPX> pDefs = r._paragraphs;

            Assert.AreEqual(35, pDefs.Count);

            // Check that the last paragraph ends where it should do
            Assert.AreEqual(531, u.GetOverallRange().Text.Length);
            Assert.AreEqual(530, u.GetCPSplitCalculator().GetHeaderTextboxEnd());
            PropertyNode pLast = (PropertyNode)pDefs[34];
            //		Assert.AreEqual(530, pLast.End);

            // Only care about the first few really though
            PropertyNode p0 = (PropertyNode)pDefs[0];
            PropertyNode p1 = (PropertyNode)pDefs[1];
            PropertyNode p2 = (PropertyNode)pDefs[2];
            PropertyNode p3 = (PropertyNode)pDefs[3];
            PropertyNode p4 = (PropertyNode)pDefs[4];

            // 5 paragraphs should get us to the end of our text
            Assert.IsTrue(p0.Start < 408);
            Assert.IsTrue(p0.End < 408);
            Assert.IsTrue(p1.Start < 408);
            Assert.IsTrue(p1.End < 408);
            Assert.IsTrue(p2.Start < 408);
            Assert.IsTrue(p2.End < 408);
            Assert.IsTrue(p3.Start < 408);
            Assert.IsTrue(p3.End < 408);
            Assert.IsTrue(p4.Start < 408);
            Assert.IsTrue(p4.End < 408);

            // Paragraphs should match with lines
            Assert.AreEqual(
                0,
                p0.Start
                );
            Assert.AreEqual(
                p1_parts[0].Length + 1,
                p0.End
                );

            Assert.AreEqual(
                p1_parts[0].Length + 1,
                p1.Start
                );
            Assert.AreEqual(
                p1_parts[0].Length + 1 +
                p1_parts[1].Length + 1,
                p1.End
                );

            Assert.AreEqual(
                p1_parts[0].Length + 1 +
                p1_parts[1].Length + 1,
                p2.Start
                );
            Assert.AreEqual(
                p1_parts[0].Length + 1 +
                p1_parts[1].Length + 1 +
                p1_parts[2].Length + 1,
                p2.End
                );
        }
Exemple #6
0
        public void TestDocStructure()
        {
            HWPFDocument daDoc = HWPFTestDataSamples.OpenSampleFile(illustrativeDocFile);
            Range        range;
            Section      section;
            Paragraph    para;
            PAPX         paraDef;

            // First, check overall
            range = daDoc.GetOverallRange();
            Assert.AreEqual(1, range.NumSections);
            Assert.AreEqual(5, range.NumParagraphs);


            // Now, onto just the doc bit
            range = daDoc.GetRange();

            Assert.AreEqual(1, range.NumSections);
            Assert.AreEqual(1, daDoc.SectionTable.GetSections().Count);
            section = range.GetSection(0);

            Assert.AreEqual(5, section.NumParagraphs);

            para = section.GetParagraph(0);
            Assert.AreEqual(1, para.NumCharacterRuns);
            Assert.AreEqual(introText, para.Text);

            para = section.GetParagraph(1);
            Assert.AreEqual(5, para.NumCharacterRuns);
            Assert.AreEqual(fillerText, para.Text);


            paraDef = (PAPX)daDoc.ParagraphTable.GetParagraphs()[2];
            Assert.AreEqual(132, paraDef.Start);
            Assert.AreEqual(400, paraDef.End);

            para = section.GetParagraph(2);
            Assert.AreEqual(5, para.NumCharacterRuns);
            Assert.AreEqual(originalText, para.Text);


            paraDef = (PAPX)daDoc.ParagraphTable.GetParagraphs()[3];
            Assert.AreEqual(400, paraDef.Start);
            Assert.AreEqual(438, paraDef.End);

            para = section.GetParagraph(3);
            Assert.AreEqual(1, para.NumCharacterRuns);
            Assert.AreEqual(lastText, para.Text);


            // Check things match on text length
            Assert.AreEqual(439, range.Text.Length);
            Assert.AreEqual(439, section.Text.Length);
            Assert.AreEqual(439,
                            section.GetParagraph(0).Text.Length +
                            section.GetParagraph(1).Text.Length +
                            section.GetParagraph(2).Text.Length +
                            section.GetParagraph(3).Text.Length +
                            section.GetParagraph(4).Text.Length
                            );
        }