Example #1
0
        public void TestOrderedLists()
        {
            HWPFDocument doc = HWPFTestDataSamples.OpenSampleFile("Lists.doc");
            Range        r   = doc.GetRange();

            Assert.AreEqual(40, r.NumParagraphs);

            Assert.AreEqual("Next up is an ordered list:\r", r.GetParagraph(5).Text);
            Assert.AreEqual("Ordered list 1\r", r.GetParagraph(6).Text);
            Assert.AreEqual("OL 2\r", r.GetParagraph(7).Text);
            Assert.AreEqual("OL 3\r", r.GetParagraph(8).Text);
            Assert.AreEqual("Now for an un-ordered list with a different bullet style:\r", r.GetParagraph(9).Text);

            Assert.AreEqual(9, r.GetParagraph(5).GetLvl());
            Assert.AreEqual(9, r.GetParagraph(6).GetLvl());
            Assert.AreEqual(9, r.GetParagraph(7).GetLvl());
            Assert.AreEqual(9, r.GetParagraph(8).GetLvl());
            Assert.AreEqual(9, r.GetParagraph(9).GetLvl());

            Assert.AreEqual(0, r.GetParagraph(5).GetIlvl());
            Assert.AreEqual(0, r.GetParagraph(6).GetIlvl());
            Assert.AreEqual(0, r.GetParagraph(7).GetIlvl());
            Assert.AreEqual(0, r.GetParagraph(8).GetIlvl());
            Assert.AreEqual(0, r.GetParagraph(9).GetIlvl());
        }
Example #2
0
        public void Test49820()
        {
            HWPFDocument doc = HWPFTestDataSamples.OpenSampleFile("Bug49820.doc");

            Range      documentRange = doc.GetRange();
            StyleSheet styleSheet    = doc.GetStyleSheet();

            // JUnit asserts
            assertLevels(documentRange, styleSheet, 0, 0, 0);
            assertLevels(documentRange, styleSheet, 1, 1, 1);
            assertLevels(documentRange, styleSheet, 2, 2, 2);
            assertLevels(documentRange, styleSheet, 3, 3, 3);
            assertLevels(documentRange, styleSheet, 4, 4, 4);
            assertLevels(documentRange, styleSheet, 5, 5, 5);
            assertLevels(documentRange, styleSheet, 6, 6, 6);
            assertLevels(documentRange, styleSheet, 7, 7, 7);
            assertLevels(documentRange, styleSheet, 8, 8, 8);
            assertLevels(documentRange, styleSheet, 9, 9, 9);
            assertLevels(documentRange, styleSheet, 10, 9, 0);
            assertLevels(documentRange, styleSheet, 11, 9, 4);

            // output to console
            for (int i = 0; i < documentRange.NumParagraphs; i++)
            {
                Paragraph par      = documentRange.GetParagraph(i);
                int       styleLvl = styleSheet.GetParagraphStyle(par.GetStyleIndex()).GetLvl();
                int       parLvl   = par.GetLvl();
                Console.WriteLine("Style level: " + styleLvl + ", paragraph level: " + parLvl + ", text: " + par.Text);
            }
        }
Example #3
0
        private static String GetHtmlText(String sampleFileName,
                                          bool emulatePictureStorage)
        {
            HWPFDocument hwpfDocument = new HWPFDocument(POIDataSamples
                                                         .GetDocumentInstance().OpenResourceAsStream(sampleFileName));
            XmlDocument         newDocument         = new XmlDocument();
            WordToHtmlConverter wordToHtmlConverter = new WordToHtmlConverter(
                newDocument);

            if (emulatePictureStorage)
            {
                //wordToHtmlConverter.SetPicturesManager( new PicturesManager()
                //{
                //    public String SavePicture( byte[] content,
                //            PictureType pictureType, String suggestedName )
                //    {
                //        return suggestedName;
                //    }
                //} );
            }

            wordToHtmlConverter.ProcessDocument(hwpfDocument);

            ;
            return(wordToHtmlConverter.Document.InnerXml);
        }
Example #4
0
        public void TestAsciiParts()
        {
            HWPFDocument   doc = HWPFTestDataSamples.OpenSampleFile("ThreeColHeadFoot.doc");
            TextPieceTable tbl = doc.TextTable;

            // All ascii, so stored in one big lump
            Assert.AreEqual(1, tbl.TextPieces.Count);
            TextPiece tp = (TextPiece)tbl.TextPieces[0];

            Assert.AreEqual(0, tp.Start);
            Assert.AreEqual(339, tp.End);
            Assert.AreEqual(339, tp.CharacterLength);
            Assert.AreEqual(339, tp.BytesLength);
            Assert.IsTrue(tp.GetStringBuilder().ToString().StartsWith("This is a sample word document"));


            // Save and re-load
            HWPFDocument docB = SaveAndReload(doc);

            tbl = docB.TextTable;

            Assert.AreEqual(1, tbl.TextPieces.Count);
            tp = (TextPiece)tbl.TextPieces[0];

            Assert.AreEqual(0, tp.Start);
            Assert.AreEqual(339, tp.End);
            Assert.AreEqual(339, tp.CharacterLength);
            Assert.AreEqual(339, tp.BytesLength);
            Assert.IsTrue(tp.GetStringBuilder().ToString().StartsWith("This is a sample word document"));
        }
Example #5
0
        public void TestDocStructure()
        {
            HWPFDocument daDoc = HWPFTestDataSamples.OpenSampleFile(illustrativeDocFile);

            Range range = daDoc.GetRange();

            Assert.AreEqual(1, range.NumSections);
            Section section = range.GetSection(0);

            Assert.AreEqual(3, section.NumParagraphs);
            Paragraph para = section.GetParagraph(2);

            Assert.AreEqual(originalText, para.Text);

            Assert.AreEqual(3, para.NumCharacterRuns);
            String text =
                para.GetCharacterRun(0).Text +
                para.GetCharacterRun(1).Text +
                para.GetCharacterRun(2).Text
            ;

            Assert.AreEqual(originalText, text);

            Assert.AreEqual(insertionPoint, para.StartOffset);
        }
Example #6
0
        public void Init(string path)
        {
            if (path.EndsWith(".doc"))
            {
                HWPFDocument hwpf;
                using (FileStream file = new FileStream(path, FileMode.Open, FileAccess.Read))
                {
                    hwpf = new HWPFDocument(file);
                }

                this.textBody = hwpf.Text.ToString();
            }
            else if (path.EndsWith(".docx"))
            {
                XWPFDocument xwpf;
                using (FileStream file = new FileStream(path, FileMode.Open, FileAccess.Read))
                {
                    xwpf = new XWPFDocument(file);
                }

                XWPFWordExtractor ex = new XWPFWordExtractor(xwpf);
                this.textBody = ex.Text;
            }

            textBody = textBody.Replace("(", "(").Replace(")", ")");
        }
Example #7
0
        public void TestIndentedText()
        {
            HWPFDocument doc = HWPFTestDataSamples.OpenSampleFile("Lists.doc");
            Range        r   = doc.GetRange();

            Assert.AreEqual(40, r.NumParagraphs);
            Assert.AreEqual("Finally we want some indents, to tell the difference\r", r.GetParagraph(34).Text);
            Assert.AreEqual("Indented once\r", r.GetParagraph(35).Text);
            Assert.AreEqual("Indented twice\r", r.GetParagraph(36).Text);
            Assert.AreEqual("Indented three times\r", r.GetParagraph(37).Text);
            Assert.AreEqual("The end!\r", r.GetParagraph(38).Text);

            Assert.AreEqual(9, r.GetParagraph(34).GetLvl());
            Assert.AreEqual(9, r.GetParagraph(35).GetLvl());
            Assert.AreEqual(9, r.GetParagraph(36).GetLvl());
            Assert.AreEqual(9, r.GetParagraph(37).GetLvl());
            Assert.AreEqual(9, r.GetParagraph(38).GetLvl());
            Assert.AreEqual(9, r.GetParagraph(39).GetLvl());

            Assert.AreEqual(0, r.GetParagraph(34).GetIlvl());
            Assert.AreEqual(0, r.GetParagraph(35).GetIlvl());
            Assert.AreEqual(0, r.GetParagraph(36).GetIlvl());
            Assert.AreEqual(0, r.GetParagraph(37).GetIlvl());
            Assert.AreEqual(0, r.GetParagraph(38).GetIlvl());
            Assert.AreEqual(0, r.GetParagraph(39).GetIlvl());

            // TODO Test the indent
        }
Example #8
0
        protected override void ProcessEndnoteAutonumbered(HWPFDocument wordDocument,
                                                           int noteIndex, XmlElement block, Range endnoteTextRange)
        {
            String textIndex;// = (internalLinkCounter.incrementAndGet()).ToString();

            lock (objLinkCounter)
            {
                internalLinkCounter++;

                textIndex = internalLinkCounter.ToString();
            }
            String forwardLinkName  = "endnote_" + textIndex;
            String backwardLinkName = "endnote_back_" + textIndex;

            XmlElement forwardLink = foDocumentFacade
                                     .CreateBasicLinkInternal(forwardLinkName);

            forwardLink.AppendChild(CreateNoteInline(textIndex));
            SetId(forwardLink, backwardLinkName);
            block.AppendChild(forwardLink);

            XmlElement endnote      = foDocumentFacade.CreateBlock();
            XmlElement backwardLink = foDocumentFacade
                                      .CreateBasicLinkInternal(backwardLinkName);

            backwardLink.AppendChild(CreateNoteInline(textIndex + " "));
            SetId(backwardLink, forwardLinkName);
            endnote.AppendChild(backwardLink);

            ProcessCharacters(wordDocument, int.MinValue, endnoteTextRange, endnote);

            WordToFoUtils.CompactInlines(endnote);
            this.endnotes.Add(endnote);
        }
Example #9
0
        public void TestDifferentImages()
        {
            HWPFDocument   doc  = HWPFTestDataSamples.OpenSampleFile("testPictures.doc");
            List <Picture> pics = doc.GetPicturesTable().GetAllPictures();

            Assert.IsNotNull(pics);
            Assert.AreEqual(7, pics.Count);
            for (int i = 0; i < pics.Count; i++)
            {
                Picture pic = (Picture)pics[i];
                Assert.IsNotNull(pic.SuggestFileExtension());
                Assert.IsNotNull(pic.SuggestFullFileName());
            }

            Assert.AreEqual("jpg", pics[0].SuggestFileExtension());
            Assert.AreEqual("image/jpeg", pics[0].MimeType);
            Assert.AreEqual("jpg", pics[1].SuggestFileExtension());
            Assert.AreEqual("image/jpeg", pics[1].MimeType);
            Assert.AreEqual("png", pics[3].SuggestFileExtension());
            Assert.AreEqual("image/png", pics[3].MimeType);
            Assert.AreEqual("png", pics[4].SuggestFileExtension());
            Assert.AreEqual("image/png", pics[4].MimeType);
            Assert.AreEqual("wmf", pics[5].SuggestFileExtension());
            Assert.AreEqual("image/x-wmf", pics[5].MimeType);
            Assert.AreEqual("jpg", pics[6].SuggestFileExtension());
            Assert.AreEqual("image/jpeg", pics[6].MimeType);
        }
Example #10
0
        public void TestEmfComplexImage()
        {
            // Commenting out this Test case temporarily. The file emf_2003_image does not contain any
            // pictures. Instead it has an office drawing object. Need to rewrite this Test after
            // revisiting the implementation of office drawing objects.

            HWPFDocument   doc  = HWPFTestDataSamples.OpenSampleFile("Bug41898.doc");
            List <Picture> pics = doc.GetPicturesTable().GetAllPictures();

            Assert.IsNotNull(pics);
            Assert.AreEqual(1, pics.Count);

            Picture pic = pics[0];

            Assert.IsNotNull(pic.SuggestFileExtension());
            Assert.IsNotNull(pic.SuggestFullFileName());

            // This one's tricky
            // TODO: Fix once we've sorted bug #41898
            Assert.IsNotNull(pic.GetContent());
            Assert.IsNotNull(pic.GetRawContent());

            // These are probably some sort of offSet, need to figure them out
            Assert.AreEqual(4, pic.Size);
            Assert.AreEqual((uint)0x80000000, LittleEndian.GetUInt(pic.GetContent()));
            Assert.AreEqual((uint)0x80000000, LittleEndian.GetUInt(pic.GetRawContent()));
        }
Example #11
0
        public void TestReadWrite()
        {
            // This document is widely available on the internet as "blair.doc".
            // I tried stripping the content and saving the document but my version
            // of Word (from Office XP) strips this table out.
            HWPFDocument doc = HWPFTestDataSamples.OpenSampleFile("saved-by-table.doc");

            // Check what we just Read.
            for(int i=0;i<expected.Count;i++)
            {
                Assert.AreEqual(expected[i],doc.GetSavedByTable().GetEntries()[i], "List of saved-by entries was not as expected");
            }

            // Now write the entire document out, and read it back in...
            MemoryStream byteStream = new MemoryStream();
            doc.Write(byteStream);
            Stream copyStream = new MemoryStream(byteStream.ToArray());
            HWPFDocument copy = new HWPFDocument(copyStream);

            // And check again.
            for (int i = 0; i < expected.Count; i++)
            {
                Assert.AreEqual(
                             expected[i], copy.GetSavedByTable().GetEntries()[i], "List of saved-by entries was incorrect after writing");
            }
        }
Example #12
0
        /**
         * Test that we can replace text in our Range with Unicode text.
         */
        public void TestRangeReplacementAll()
        {
            HWPFDocument daDoc = HWPFTestDataSamples.OpenSampleFile(illustrativeDocFile);

            Range range = daDoc.GetRange();

            Assert.AreEqual(1, range.NumSections);

            Section section = range.GetSection(0);

            Assert.AreEqual(5, section.NumParagraphs);

            Paragraph para = section.GetParagraph(2);

            String text = para.Text;

            Assert.AreEqual(originalText, text);

            range.ReplaceText(searchText, ReplacementText);

            Assert.AreEqual(1, range.NumSections);
            section = range.GetSection(0);
            Assert.AreEqual(5, section.NumParagraphs);

            para = section.GetParagraph(2);
            text = para.Text;
            Assert.AreEqual(expectedText2, text);

            para = section.GetParagraph(3);
            text = para.Text;
            Assert.AreEqual(expectedText3, text);
        }
Example #13
0
        protected void ProcessDrawnObject(HWPFDocument doc,
                                          CharacterRun characterRun, XmlElement block)
        {
            if (GetPicturesManager() == null)
            {
                return;
            }
            // TODO: support headers
            OfficeDrawing officeDrawing = doc.GetOfficeDrawingsMain().GetOfficeDrawingAt(characterRun.StartOffset);

            if (officeDrawing == null)
            {
                logger.Log(POILogger.WARN, "Characters #" + characterRun
                           + " references missing drawn object");
                return;
            }

            byte[] pictureData = officeDrawing.GetPictureData();
            if (pictureData == null)
            {
                // usual shape?
                return;
            }

            PictureType type = PictureType.FindMatchingType(pictureData);
            String      path = GetPicturesManager().SavePicture(pictureData, type,
                                                                "s" + characterRun.StartOffset + "." + type);

            ProcessDrawnObject(doc, characterRun, officeDrawing, path, block);
        }
Example #14
0
        private static HWPFDocument Reload(HWPFDocument hwpfDocument)
        {
            MemoryStream baos = new MemoryStream();

            hwpfDocument.Write(baos);
            return(new HWPFDocument(new MemoryStream(baos.ToArray())));
        }
Example #15
0
        public void TestUnicodeParts()
        {
            HWPFDocument   doc = HWPFTestDataSamples.OpenSampleFile("HeaderFooterUnicode.doc");
            TextPieceTable tbl = doc.TextTable;

            // In three bits, split every 512 bytes
            Assert.AreEqual(3, tbl.TextPieces.Count);
            TextPiece tpA = (TextPiece)tbl.TextPieces[0];
            TextPiece tpB = (TextPiece)tbl.TextPieces[1];
            TextPiece tpC = (TextPiece)tbl.TextPieces[2];

            Assert.IsTrue(tpA.IsUnicode);
            Assert.IsTrue(tpB.IsUnicode);
            Assert.IsTrue(tpC.IsUnicode);

            Assert.AreEqual(256, tpA.CharacterLength);
            Assert.AreEqual(256, tpB.CharacterLength);
            Assert.AreEqual(19, tpC.CharacterLength);

            Assert.AreEqual(512, tpA.BytesLength);
            Assert.AreEqual(512, tpB.BytesLength);
            Assert.AreEqual(38, tpC.BytesLength);

            Assert.AreEqual(0, tpA.Start);
            Assert.AreEqual(256, tpA.End);
            Assert.AreEqual(256, tpB.Start);
            Assert.AreEqual(512, tpB.End);
            Assert.AreEqual(512, tpC.Start);
            Assert.AreEqual(531, tpC.End);


            // Save and re-load
            HWPFDocument docB = SaveAndReload(doc);

            tbl = docB.TextTable;

            Assert.AreEqual(3, tbl.TextPieces.Count);
            tpA = (TextPiece)tbl.TextPieces[0];
            tpB = (TextPiece)tbl.TextPieces[1];
            tpC = (TextPiece)tbl.TextPieces[2];

            Assert.IsTrue(tpA.IsUnicode);
            Assert.IsTrue(tpB.IsUnicode);
            Assert.IsTrue(tpC.IsUnicode);

            Assert.AreEqual(256, tpA.CharacterLength);
            Assert.AreEqual(256, tpB.CharacterLength);
            Assert.AreEqual(19, tpC.CharacterLength);

            Assert.AreEqual(512, tpA.BytesLength);
            Assert.AreEqual(512, tpB.BytesLength);
            Assert.AreEqual(38, tpC.BytesLength);

            Assert.AreEqual(0, tpA.Start);
            Assert.AreEqual(256, tpA.End);
            Assert.AreEqual(256, tpB.Start);
            Assert.AreEqual(512, tpB.End);
            Assert.AreEqual(512, tpC.Start);
            Assert.AreEqual(531, tpC.End);
        }
Example #16
0
        protected void ProcessNote(HWPFDocument wordDocument, XmlElement block,
                                   Range noteTextRange)
        {
            int noteIndex;

            lock (objCounters)
            {
                noteIndex = noteCounters++;
            }
            block.AppendChild(textDocumentFacade
                              .CreateText(UNICODECHAR_ZERO_WIDTH_SPACE + "[" + noteIndex
                                          + "]" + UNICODECHAR_ZERO_WIDTH_SPACE));

            if (notes == null)
            {
                notes = textDocumentFacade.CreateBlock();
            }

            XmlElement note = textDocumentFacade.CreateBlock();

            notes.AppendChild(note);

            note.AppendChild(textDocumentFacade.CreateText("^" + noteIndex
                                                           + "\t "));
            ProcessCharacters(wordDocument, int.MinValue, noteTextRange, note);
            note.AppendChild(textDocumentFacade.CreateText("\n"));
        }
Example #17
0
        public void TestCroppedPictures()
        {
            HWPFDocument   doc  = HWPFTestDataSamples.OpenSampleFile("testCroppedPictures.doc");
            List <Picture> pics = doc.GetPicturesTable().GetAllPictures();

            Assert.IsNotNull(pics);
            Assert.AreEqual(2, pics.Count);

            Picture pic1 = pics[0];

            Assert.AreEqual(27, pic1.AspectRatioX);
            Assert.AreEqual(270, pic1.HorizontalScalingFactor);
            Assert.AreEqual(27, pic1.AspectRatioY);
            Assert.AreEqual(271, pic1.VerticalScalingFactor);
            Assert.AreEqual(12000, pic1.DxaGoal);       // 21.17 cm / 2.54 cm/inch * 72dpi * 20 = 12000
            Assert.AreEqual(9000, pic1.DyaGoal);        // 15.88 cm / 2.54 cm/inch * 72dpi * 20 = 9000
            Assert.AreEqual(0, pic1.DxaCropLeft);
            Assert.AreEqual(0, pic1.DxaCropRight);
            Assert.AreEqual(0, pic1.DyaCropTop);
            Assert.AreEqual(0, pic1.DyaCropBottom);

            Picture pic2 = pics[1];

            Assert.AreEqual(76, pic2.AspectRatioX);
            Assert.AreEqual(764, pic2.HorizontalScalingFactor);
            Assert.AreEqual(68, pic2.AspectRatioY);
            Assert.AreEqual(685, pic2.VerticalScalingFactor);
            Assert.AreEqual(12000, pic2.DxaGoal);       // 21.17 cm / 2.54 cm/inch * 72dpi * 20 = 12000
            Assert.AreEqual(9000, pic2.DyaGoal);        // 15.88 cm / 2.54 cm/inch * 72dpi * 20 = 9000
            Assert.AreEqual(0, pic2.DxaCropLeft);       // TODO YK: The Picture is cropped but HWPF reads the crop parameters all zeros
            Assert.AreEqual(0, pic2.DxaCropRight);
            Assert.AreEqual(0, pic2.DyaCropTop);
            Assert.AreEqual(0, pic2.DyaCropBottom);
        }
Example #18
0
        public void TestReadWrite()
        {
            // This document is widely available on the internet as "blair.doc".
            // I tried stripping the content and saving the document but my version
            // of Word (from Office XP) strips this table out.
            HWPFDocument doc = HWPFTestDataSamples.OpenSampleFile("saved-by-table.doc");

            // Check what we just Read.
            for (int i = 0; i < expected.Count; i++)
            {
                Assert.AreEqual(expected[i], doc.GetSavedByTable().GetEntries()[i], "List of saved-by entries was not as expected");
            }

            // Now write the entire document out, and read it back in...
            MemoryStream byteStream = new MemoryStream();

            doc.Write(byteStream);
            Stream       copyStream = new MemoryStream(byteStream.ToArray());
            HWPFDocument copy       = new HWPFDocument(copyStream);

            // And check again.
            for (int i = 0; i < expected.Count; i++)
            {
                Assert.AreEqual(
                    expected[i], copy.GetSavedByTable().GetEntries()[i], "List of saved-by entries was incorrect after writing");
            }
        }
Example #19
0
        public void TestMultiLevelLists()
        {
            HWPFDocument doc = HWPFTestDataSamples.OpenSampleFile("Lists.doc");
            Range        r   = doc.GetRange();

            Assert.AreEqual(40, r.NumParagraphs);

            Assert.AreEqual("Multi-level un-ordered list:\r", r.GetParagraph(12).Text);
            Assert.AreEqual("ML 1:1\r", r.GetParagraph(13).Text);
            Assert.AreEqual("ML 1:2\r", r.GetParagraph(14).Text);
            Assert.AreEqual("ML 2:1\r", r.GetParagraph(15).Text);
            Assert.AreEqual("ML 2:2\r", r.GetParagraph(16).Text);
            Assert.AreEqual("ML 2:3\r", r.GetParagraph(17).Text);
            Assert.AreEqual("ML 3:1\r", r.GetParagraph(18).Text);
            Assert.AreEqual("ML 4:1\r", r.GetParagraph(19).Text);
            Assert.AreEqual("ML 5:1\r", r.GetParagraph(20).Text);
            Assert.AreEqual("ML 5:2\r", r.GetParagraph(21).Text);
            Assert.AreEqual("ML 2:4\r", r.GetParagraph(22).Text);
            Assert.AreEqual("ML 1:3\r", r.GetParagraph(23).Text);
            Assert.AreEqual("Multi-level ordered list:\r", r.GetParagraph(24).Text);
            Assert.AreEqual("OL 1\r", r.GetParagraph(25).Text);
            Assert.AreEqual("OL 2\r", r.GetParagraph(26).Text);
            Assert.AreEqual("OL 2.1\r", r.GetParagraph(27).Text);
            Assert.AreEqual("OL 2.2\r", r.GetParagraph(28).Text);
            Assert.AreEqual("OL 2.2.1\r", r.GetParagraph(29).Text);
            Assert.AreEqual("OL 2.2.2\r", r.GetParagraph(30).Text);
            Assert.AreEqual("OL 2.2.2.1\r", r.GetParagraph(31).Text);
            Assert.AreEqual("OL 2.2.3\r", r.GetParagraph(32).Text);
            Assert.AreEqual("OL 3\r", r.GetParagraph(33).Text);
            Assert.AreEqual("Finally we want some indents, to tell the difference\r", r.GetParagraph(34).Text);

            for (int i = 12; i <= 34; i++)
            {
                Assert.AreEqual(9, r.GetParagraph(12).GetLvl());
            }
            Assert.AreEqual(0, r.GetParagraph(12).GetIlvl());
            Assert.AreEqual(0, r.GetParagraph(13).GetIlvl());
            Assert.AreEqual(0, r.GetParagraph(14).GetIlvl());
            Assert.AreEqual(1, r.GetParagraph(15).GetIlvl());
            Assert.AreEqual(1, r.GetParagraph(16).GetIlvl());
            Assert.AreEqual(1, r.GetParagraph(17).GetIlvl());
            Assert.AreEqual(2, r.GetParagraph(18).GetIlvl());
            Assert.AreEqual(3, r.GetParagraph(19).GetIlvl());
            Assert.AreEqual(4, r.GetParagraph(20).GetIlvl());
            Assert.AreEqual(4, r.GetParagraph(21).GetIlvl());
            Assert.AreEqual(1, r.GetParagraph(22).GetIlvl());
            Assert.AreEqual(0, r.GetParagraph(23).GetIlvl());
            Assert.AreEqual(0, r.GetParagraph(24).GetIlvl());
            Assert.AreEqual(0, r.GetParagraph(25).GetIlvl());
            Assert.AreEqual(0, r.GetParagraph(26).GetIlvl());
            Assert.AreEqual(1, r.GetParagraph(27).GetIlvl());
            Assert.AreEqual(1, r.GetParagraph(28).GetIlvl());
            Assert.AreEqual(2, r.GetParagraph(29).GetIlvl());
            Assert.AreEqual(2, r.GetParagraph(30).GetIlvl());
            Assert.AreEqual(3, r.GetParagraph(31).GetIlvl());
            Assert.AreEqual(2, r.GetParagraph(32).GetIlvl());
            Assert.AreEqual(0, r.GetParagraph(33).GetIlvl());
            Assert.AreEqual(0, r.GetParagraph(34).GetIlvl());
        }
Example #20
0
        protected override void ProcessDrawnObject(HWPFDocument doc,
                                                   CharacterRun characterRun, OfficeDrawing officeDrawing,
                                                   String path, XmlElement block)
        {
            XmlElement externalGraphic = foDocumentFacade.CreateExternalGraphic(path);

            block.AppendChild(externalGraphic);
        }
Example #21
0
        public void TestPicturesInHeader()
        {
            HWPFDocument doc = HWPFTestDataSamples.OpenSampleFile("header_image.doc");

            List <Picture> pics = doc.GetPicturesTable().GetAllPictures();

            Assert.AreEqual(2, pics.Count);
        }
Example #22
0
        /** @link dependency
         * @stereotype instantiate*/
        /*# Picture lnkPicture; */

        /**
         *
         * @param _document
         * @param _dataStream
         */
        public PicturesTable(HWPFDocument _document, byte[] _dataStream, byte[] _mainStream, FSPATable fspa, EscherRecordHolder dgg)
        {
            this._document = _document;
            this._dataStream = _dataStream;
            this._mainStream = _mainStream;
            this._fspa = fspa;
            this._dgg = dgg;
        }
        public void TestEmptyDocument()
        {
            HWPFDocument doc = HWPFTestDataSamples.OpenSampleFile("empty.doc");

            RevisionMarkAuthorTable rmt = doc.GetRevisionMarkAuthorTable();

            Assert.IsNull(rmt);
        }
        public void TestBug47731()
        {
            HWPFDocument doc       = HWPFTestDataSamples.OpenSampleFile("Bug47731.doc");
            String       foundText = WordToTextConverter.GetText(doc);

            Assert.IsTrue(foundText
                          .Contains("Soak the rice in water for three to four hours"));
        }
Example #25
0
        public void TestPicturesWithTable()
        {
            HWPFDocument doc = HWPFTestDataSamples.OpenSampleFile("Bug44603.doc");

            List <Picture> pics = doc.GetPicturesTable().GetAllPictures();

            Assert.AreEqual(2, pics.Count);
        }
Example #26
0
        /** @link dependency
         * @stereotype instantiate*/
        /*# Picture lnkPicture; */

        /**
         *
         * @param _document
         * @param _dataStream
         */
        public PicturesTable(HWPFDocument _document, byte[] _dataStream, byte[] _mainStream, FSPATable fspa, EscherRecordHolder dgg)
        {
            this._document   = _document;
            this._dataStream = _dataStream;
            this._mainStream = _mainStream;
            this._fspa       = fspa;
            this._dgg        = dgg;
        }
Example #27
0
        public void TestParagraphPAPXNoParent45877()
        {
            HWPFDocument doc = HWPFTestDataSamples.OpenSampleFile("Bug45877.doc");

            Assert.AreEqual(17, doc.GetRange().NumParagraphs);

            Assert.AreEqual("First paragraph\r", doc.GetRange().GetParagraph(0).Text);
            Assert.AreEqual("After Crashing Part\r", doc.GetRange().GetParagraph(13).Text);
        }
Example #28
0
        /**
         * Writes a spreadsheet to a <tt>MemoryStream</tt> and Reads it back
         * from a <tt>MemoryStream</tt>.<p/>
         * Useful for verifying that the serialisation round trip
         */
        public static HWPFDocument WriteOutAndReadBack(HWPFDocument original)
        {
            MemoryStream baos = new MemoryStream(4096);

            original.Write(baos);
            MemoryStream bais = new MemoryStream(baos.ToArray());

            return(new HWPFDocument(bais));
        }
Example #29
0
        private static String getFoText(String sampleFileName)
        {
            HWPFDocument hwpfDocument = new HWPFDocument(POIDataSamples.GetDocumentInstance().OpenResourceAsStream(sampleFileName));

            WordToFoConverter wordToFoConverter = new WordToFoConverter(new XmlDocument());

            wordToFoConverter.ProcessDocument(hwpfDocument);

            return(wordToFoConverter.Document.InnerXml);
        }
Example #30
0
        protected HWPFDocument SaveAndReload(HWPFDocument doc)
        {
            MemoryStream baos = new MemoryStream();

            doc.Write(baos);

            return(new HWPFDocument(
                       new MemoryStream(baos.ToArray())
                       ));
        }
        public ToxyDocument Parse()
        {
            if (!File.Exists(Context.Path))
            {
                throw new FileNotFoundException("File " + Context.Path + " is not found");
            }

            bool extractHeader = false;

            if (Context.Properties.ContainsKey("ExtractHeader"))
            {
                extractHeader = Utility.IsTrue(Context.Properties["ExtractHeader"]);
            }
            bool extractFooter = false;

            if (Context.Properties.ContainsKey("ExtractFooter"))
            {
                extractFooter = Utility.IsTrue(Context.Properties["ExtractFooter"]);
            }

            ToxyDocument rdoc = new ToxyDocument();


            using (FileStream stream = File.OpenRead(Context.Path))
            {
                HWPFDocument worddoc = new HWPFDocument(stream);
                if (extractHeader && worddoc.GetHeaderStoryRange() != null)
                {
                    StringBuilder sb = new StringBuilder();
                    rdoc.Header = worddoc.GetHeaderStoryRange().Text;
                }
                if (extractFooter && worddoc.GetFootnoteRange() != null)
                {
                    StringBuilder sb = new StringBuilder();
                    rdoc.Footer = worddoc.GetFootnoteRange().Text;
                }
                for (int i = 0; i < worddoc.GetRange().NumParagraphs; i++)
                {
                    Paragraph     para = worddoc.GetRange().GetParagraph(i);
                    string        text = para.Text;
                    ToxyParagraph p    = new ToxyParagraph();
                    p.Text = text;
                    //var runs = para.Runs;
                    p.StyleID = para.GetStyleIndex().ToString();

                    //for (int i = 0; i < runs.Count; i++)
                    //{
                    //    var run = runs[i];

                    //}
                    rdoc.Paragraphs.Add(p);
                }
            }
            return(rdoc);
        }
Example #32
0
        public void TestInnerTable()
        {
            Stream resourceAsStream = POIDataSamples.GetDocumentInstance()
                    .OpenResourceAsStream("innertable.doc");
            HWPFDocument hwpfDocument = new HWPFDocument(resourceAsStream);
            resourceAsStream.Close();

            TestInnerTable(hwpfDocument);
            hwpfDocument = Reload(hwpfDocument);
            TestInnerTable(hwpfDocument);
        }
Example #33
0
        public void TestWriteProperties()
        {
            HWPFDocument doc = HWPFTestDataSamples.OpenSampleFile("SampleDoc.doc");

            Assert.AreEqual("Nick Burch", doc.SummaryInformation.Author);

            // Write and read
            HWPFDocument doc2 = WriteOutAndRead(doc);

            Assert.AreEqual("Nick Burch", doc2.SummaryInformation.Author);
        }
Example #34
0
        public void TestSprmPJc()
        {
            Stream resourceAsStream = POIDataSamples.GetDocumentInstance()
                    .OpenResourceAsStream("Bug49820.doc");
            HWPFDocument hwpfDocument = new HWPFDocument(resourceAsStream);
            resourceAsStream.Close();

            Assert.AreEqual(1, hwpfDocument.GetStyleSheet().GetParagraphStyle(8)
                    .GetJustification());

            hwpfDocument = Reload(hwpfDocument);

            Assert.AreEqual(1, hwpfDocument.GetStyleSheet().GetParagraphStyle(8)
                    .GetJustification());
        }
Example #35
0
        private void TestInnerTable(HWPFDocument hwpfDocument)
        {
            Range range = hwpfDocument.GetRange();
            for (int p = 0; p < range.NumParagraphs; p++)
            {
                Paragraph paragraph = range.GetParagraph(p);
                char first = paragraph.Text.ToLower()[0];
                if ('1' <= first && first < '4')
                {
                    Assert.IsTrue(paragraph.IsInTable());
                    Assert.AreEqual(2, paragraph.GetTableLevel());
                }

                if ('a' <= first && first < 'z')
                {
                    Assert.IsTrue(paragraph.IsInTable());
                    Assert.AreEqual(1, paragraph.GetTableLevel());
                }
            }
        }
Example #36
0
        public HeaderStories(HWPFDocument doc)
        {
            this.headerStories = doc.GetHeaderStoryRange();
            FileInformationBlock fib = doc.GetFileInformationBlock();

            // If there's no PlcfHdd, nothing to do
            if (fib.GetCcpHdd() == 0)
            {
                return;
            }
            if (fib.GetPlcfHddSize() == 0)
            {
                return;
            }

            // Handle the PlcfHdd
            plcfHdd = new PlexOfCps(
                    doc.GetTableStream(), fib.GetPlcfHddOffset(),
                    fib.GetPlcfHddSize(), 0
            );
        }
Example #37
0
        protected HWPFDocument SaveAndReload(HWPFDocument doc)
        {
            MemoryStream baos = new MemoryStream();
            doc.Write(baos);

            return new HWPFDocument(
                    new MemoryStream(baos.ToArray())
            );
        }
Example #38
0
 protected override void ProcessDrawnObject(HWPFDocument doc, CharacterRun characterRun, OfficeDrawing officeDrawing, string path, XmlElement block)
 {
     XmlElement img = htmlDocumentFacade.CreateImage(path);
     block.AppendChild(img);
 }
Example #39
0
 protected override void ProcessEndnoteAutonumbered(HWPFDocument wordDocument, int noteIndex, XmlElement block, Range endnoteTextRange)
 {
     ProcessNoteAutonumbered(wordDocument, "end", noteIndex, block, endnoteTextRange);
 }
Example #40
0
        private void ProcessNoteAutonumbered(HWPFDocument wordDocument, string type, int noteIndex, XmlElement block, Range noteTextRange)
        {
            String textIndex = (noteIndex + 1).ToString();
            String textIndexClass = htmlDocumentFacade.GetOrCreateCssClass("a", "a", "vertical-align:super;font-size:smaller;");
            String forwardNoteLink = type + "note_" + textIndex;
            String backwardNoteLink = type + "note_back_" + textIndex;

            XmlElement anchor = htmlDocumentFacade.CreateHyperlink("#" + forwardNoteLink);
            anchor.SetAttribute("name", backwardNoteLink);
            anchor.SetAttribute("class", textIndexClass + " " + type + "noteanchor");
            anchor.InnerText = textIndex;
            block.AppendChild(anchor);

            if (notes == null)
            {
                notes = htmlDocumentFacade.CreateBlock();
                notes.SetAttribute("class", "notes");
            }

            XmlElement note = htmlDocumentFacade.CreateBlock();
            note.SetAttribute("class", type + "note");
            notes.AppendChild(note);

            XmlElement bookmark = htmlDocumentFacade.CreateBookmark(forwardNoteLink);
            bookmark.SetAttribute("href", "#" + backwardNoteLink);
            bookmark.InnerText = (textIndex);
            bookmark.SetAttribute("class", textIndexClass + " " + type  + "noteindex");
            note.AppendChild(bookmark);
            note.AppendChild(htmlDocumentFacade.CreateText(" "));

            XmlElement span = htmlDocumentFacade.Document.CreateElement("span");
            span.SetAttribute("class", type + "notetext");
            note.AppendChild(span);

            this.blocksProperies.Push(new BlockProperies("", -1));
            try
            {
                ProcessCharacters(wordDocument, int.MinValue, noteTextRange, span);
            }
            finally
            {
                this.blocksProperies.Pop();
            }
        }
Example #41
0
        /**
         * Create a new Word Extractor
         * @param doc The HWPFDocument to extract from
         */
        public WordExtractor(HWPFDocument doc)
            : base(doc)
        {

            this.doc = doc;
        }
Example #42
0
 protected override void ProcessFootnoteAutonumbered(HWPFDocument wordDocument, int noteIndex, XmlElement block, Range footnoteTextRange)
 {
     ProcessNoteAutonumbered(wordDocument, "foot", noteIndex, block, footnoteTextRange);
 }
Example #43
0
 private static HWPFDocument Reload(HWPFDocument hwpfDocument)
 {
     MemoryStream baos = new MemoryStream();
     hwpfDocument.Write(baos);
     return new HWPFDocument(new MemoryStream(baos.ToArray()));
 }
Example #44
0
 public DocumentPosition(HWPFDocument doc, int pos):base(pos, pos, doc)
 {
     
 }