Exemple #1
0
 /**
  * Writes a spreadsheet to a <tt>MemoryStream</tt> and Reads it back
  * from a <tt>MemoryStream</tt>.<p/>
  * Useful for verifying that the serialisation round trip
  */
 public static HWPFDocument WriteOutAndReadBack(HWPFDocument original)
 {
     MemoryStream baos = new MemoryStream(4096);
     original.Write(baos);
     MemoryStream bais = new MemoryStream(baos.ToArray());
     return new HWPFDocument(bais);
 }
        private static String GetHtmlText(String sampleFileName,
                bool emulatePictureStorage)
        {
            HWPFDocument hwpfDocument = new HWPFDocument(POIDataSamples
                    .GetDocumentInstance().OpenResourceAsStream(sampleFileName));
            XmlDocument newDocument = new XmlDocument();
            WordToHtmlConverter wordToHtmlConverter = new WordToHtmlConverter(
                    newDocument);

            if (emulatePictureStorage)
            {
                //wordToHtmlConverter.SetPicturesManager( new PicturesManager()
                //{
                //    public String SavePicture( byte[] content,
                //            PictureType pictureType, String suggestedName )
                //    {
                //        return suggestedName;
                //    }
                //} );
            }

            wordToHtmlConverter.ProcessDocument(hwpfDocument);

            ;
            return wordToHtmlConverter.Document.InnerXml;
        }
        private static String getFoText(String sampleFileName)
        {
            HWPFDocument hwpfDocument = new HWPFDocument(POIDataSamples.GetDocumentInstance().OpenResourceAsStream(sampleFileName));

            WordToFoConverter wordToFoConverter = new WordToFoConverter(new XmlDocument());
            wordToFoConverter.ProcessDocument(hwpfDocument);

            return wordToFoConverter.Document.InnerXml;
        }
Exemple #4
0
        public HWPFDocument WriteOutAndRead(HWPFDocument doc)
        {
            MemoryStream baos = new MemoryStream();
            HWPFDocument newDoc;
            doc.Write(baos);
            MemoryStream bais = new MemoryStream(baos.ToArray());
            newDoc = new HWPFDocument(bais);

            return newDoc;
        }
Exemple #5
0
 public string Parse()
 {
     if (!File.Exists(Context.Path))
         throw new FileNotFoundException("File " + Context.Path + " is not found");
     StringBuilder sb = new StringBuilder();
     using (FileStream stream = File.OpenRead(Context.Path))
     {
         HWPFDocument worddoc = new HWPFDocument(stream);
         return worddoc.GetRange().Text;
     }
 }
Exemple #6
0
 public void SetUp()
 {
     none = HWPFTestDataSamples.OpenSampleFile("NoHeadFoot.doc");
     header = HWPFTestDataSamples.OpenSampleFile("ThreeColHead.doc");
     footer = HWPFTestDataSamples.OpenSampleFile("ThreeColFoot.doc");
     headerFooter = HWPFTestDataSamples.OpenSampleFile("SimpleHeadThreeColFoot.doc");
     oddEven = HWPFTestDataSamples.OpenSampleFile("PageSpecificHeadFoot.doc");
     diffFirst = HWPFTestDataSamples.OpenSampleFile("DiffFirstPageHeadFoot.doc");
     unicode = HWPFTestDataSamples.OpenSampleFile("HeaderFooterUnicode.doc");
     withFields = HWPFTestDataSamples.OpenSampleFile("HeaderWithMacros.doc");
 }
        public ToxyDocument Parse()
        {
            if (!File.Exists(Context.Path))
                throw new FileNotFoundException("File " + Context.Path + " is not found");

            bool extractHeader = false;
            if (Context.Properties.ContainsKey("ExtractHeader"))
            {
                extractHeader = Utility.IsTrue(Context.Properties["ExtractHeader"]);
            }
            bool extractFooter = false;
            if (Context.Properties.ContainsKey("ExtractFooter"))
            {
                extractFooter = Utility.IsTrue(Context.Properties["ExtractFooter"]);
            }

            ToxyDocument rdoc = new ToxyDocument();


            using (FileStream stream = File.OpenRead(Context.Path))
            {
                HWPFDocument worddoc = new HWPFDocument(stream);
                if (extractHeader && worddoc.GetHeaderStoryRange() != null)
                {
                    StringBuilder sb = new StringBuilder();
                    rdoc.Header = worddoc.GetHeaderStoryRange().Text;
                }
                if (extractFooter && worddoc.GetFootnoteRange() != null)
                {
                    StringBuilder sb = new StringBuilder();
                    rdoc.Footer = worddoc.GetFootnoteRange().Text;
                }
                for (int i=0;i<worddoc.GetRange().NumParagraphs;i++)
                {
                    Paragraph para = worddoc.GetRange().GetParagraph(i);
                    string text = para.Text;
                    ToxyParagraph p = new ToxyParagraph();
                    p.Text = text;
                    //var runs = para.Runs;
                    p.StyleID = para.GetStyleIndex().ToString();

                    //for (int i = 0; i < runs.Count; i++)
                    //{
                    //    var run = runs[i];

                    //}
                    rdoc.Paragraphs.Add(p);
                }               
            }
            return rdoc;
        }
Exemple #8
0
        public void TestInnerTableCellsDetection()
        {
            HWPFDocument hwpfDocument = new HWPFDocument(POIDataSamples
                    .GetDocumentInstance().OpenResourceAsStream("innertable.doc"));
            hwpfDocument.GetRange();

            Range documentRange = hwpfDocument.GetRange();
            Paragraph startOfInnerTable = documentRange.GetParagraph(6);

            Table innerTable = documentRange.GetTable(startOfInnerTable);
            Assert.AreEqual(2, innerTable.NumRows);

            TableRow tableRow = innerTable.GetRow(0);
            Assert.AreEqual(2, tableRow.NumCells());
        }
Exemple #9
0
        public void TestShapes1()
        {
            HWPFDocument doc = HWPFTestDataSamples.OpenSampleFile("WithArtShapes.doc");

            IList shapes = doc.GetShapesTable().GetAllShapes();
            IList vshapes = doc.GetShapesTable().GetVisibleShapes();

            Assert.AreEqual(2, shapes.Count);
            Assert.AreEqual(2, vshapes.Count);

            Shape s1 = (Shape)shapes[0];
            Shape s2 = (Shape)shapes[1];

            Assert.AreEqual(3616, s1.Width);
            Assert.AreEqual(1738, s1.Height);
            Assert.AreEqual(true, s1.IsWithinDocument);

            Assert.AreEqual(4817, s2.Width);
            Assert.AreEqual(2164, s2.Height);
            Assert.AreEqual(true, s2.IsWithinDocument);


            // Re-serialisze, check still there
            MemoryStream baos = new MemoryStream();
            doc.Write(baos);
            MemoryStream bais = new MemoryStream(baos.ToArray());
            doc = new HWPFDocument(bais);

            shapes = doc.GetShapesTable().GetAllShapes();
            vshapes = doc.GetShapesTable().GetVisibleShapes();

            Assert.AreEqual(2, shapes.Count);
            Assert.AreEqual(2, vshapes.Count);

            s1 = (Shape)shapes[0];
            s2 = (Shape)shapes[1];

            Assert.AreEqual(3616, s1.Width);
            Assert.AreEqual(1738, s1.Height);
            Assert.AreEqual(true, s1.IsWithinDocument);

            Assert.AreEqual(4817, s2.Width);
            Assert.AreEqual(2164, s2.Height);
            Assert.AreEqual(true, s2.IsWithinDocument);
        }
Exemple #10
0
        static void Main(string[] args)
        {
            // POI apparently can't create a document from scratch,
            // so we need an existing empty dummy document
            POIFSFileSystem fs = new POIFSFileSystem(File.OpenRead("empty.doc"));
            HWPFDocument doc = new HWPFDocument(fs);

            // centered paragraph with large font size
            Range range = doc.GetRange();
            CharacterRun run1 = range.InsertAfter("one");
            //par1.SetSpacingAfter(200);
            //par1.SetJustification((byte)1);
            // justification: 0=left, 1=center, 2=right, 3=left and right

            //CharacterRun run1 = par1.InsertAfter("one");
            run1.SetFontSize(2 * 18);
            // font size: twice the point size

            // paragraph with bold typeface
            Paragraph par2 = run1.InsertAfter(new ParagraphProperties(), 0);
            par2.SetSpacingAfter(200);
            CharacterRun run2 = par2.InsertAfter("two two two two two two two two two two two two two");
            run2.SetBold(true);

            // paragraph with italic typeface and a line indent in the first line
            Paragraph par3 = run2.InsertAfter(new ParagraphProperties(), 0);
            par3.SetFirstLineIndent(200);
            par3.SetSpacingAfter(200);
            CharacterRun run3 = par3.InsertAfter("three three three three three three three three three "
                + "three three three three three three three three three three three three three three "
                + "three three three three three three three three three three three three three three");
            run3.SetItalic(true);

            // add a custom document property (needs POI 3.5; POI 3.2 doesn't save custom properties)
            DocumentSummaryInformation dsi = doc.DocumentSummaryInformation;
            CustomProperties cp = dsi.CustomProperties;
            if (cp == null)
                cp = new CustomProperties();
            cp.Put("myProperty", "foo bar baz");

            doc.Write(File.OpenWrite("new-hwpf-file.doc"));
        }
        public void TestExtractFromEmbeded()
        {
            POIFSFileSystem fs = new POIFSFileSystem(POIDataSamples.GetSpreadSheetInstance().OpenResourceAsStream(filename3));
            HWPFDocument doc;
            WordExtractor extractor3;

            DirectoryNode dirA = (DirectoryNode)fs.Root.GetEntry("MBD0000A3B7");
            DirectoryNode dirB = (DirectoryNode)fs.Root.GetEntry("MBD0000A3B2");

            // Should have WordDocument and 1Table
            Assert.IsNotNull(dirA.GetEntry("1Table"));
            Assert.IsNotNull(dirA.GetEntry("WordDocument"));

            Assert.IsNotNull(dirB.GetEntry("1Table"));
            Assert.IsNotNull(dirB.GetEntry("WordDocument"));

            // Check each in turn
            doc = new HWPFDocument(dirA, fs);
            extractor3 = new WordExtractor(doc);

            Assert.IsNotNull(extractor3.Text);
            Assert.IsTrue(extractor3.Text.Length > 20);
            Assert.AreEqual("I am a sample document\r\nNot much on me\r\nI am document 1\r\n", extractor3
                    .Text);
            Assert.AreEqual("Sample Doc 1", extractor3.SummaryInformation.Title);
            Assert.AreEqual("Sample Test", extractor3.SummaryInformation.Subject);

            doc = new HWPFDocument(dirB, fs);
            extractor3 = new WordExtractor(doc);

            Assert.IsNotNull(extractor3.Text);
            Assert.IsTrue(extractor3.Text.Length > 20);
            Assert.AreEqual("I am another sample document\r\nNot much on me\r\nI am document 2\r\n",
                    extractor3.Text);
            Assert.AreEqual("Sample Doc 2", extractor3.SummaryInformation.Title);
            Assert.AreEqual("Another Sample Test", extractor3.SummaryInformation.Subject);
        }
        protected void ProcessNote(HWPFDocument wordDocument, XmlElement block,
                Range noteTextRange)
        {
            int noteIndex;
            lock (objCounters)
            {
                noteIndex = noteCounters++;
            }
            block.AppendChild(textDocumentFacade
                    .CreateText(UNICODECHAR_ZERO_WIDTH_SPACE + "[" + noteIndex
                            + "]" + UNICODECHAR_ZERO_WIDTH_SPACE));

            if (notes == null)
                notes = textDocumentFacade.CreateBlock();

            XmlElement note = textDocumentFacade.CreateBlock();
            notes.AppendChild(note);

            note.AppendChild(textDocumentFacade.CreateText("^" + noteIndex
                    + "\t "));
            ProcessCharacters(wordDocument, int.MinValue, noteTextRange, note);
            note.AppendChild(textDocumentFacade.CreateText("\n"));
        }
Exemple #13
0
        protected override void ProcessFootnoteAutonumbered(HWPFDocument wordDocument,
                int noteIndex, XmlElement block, Range footnoteTextRange)
        {
            String textIndex;// = (internalLinkCounter.incrementAndGet()).ToString();
            lock (objLinkCounter)
            {
                internalLinkCounter++;

                textIndex = internalLinkCounter.ToString();
            }
            String forwardLinkName = "footnote_" + textIndex;
            String backwardLinkName = "footnote_back_" + textIndex;

            XmlElement footNote = foDocumentFacade.CreateFootnote();
            block.AppendChild(footNote);

            XmlElement inline = foDocumentFacade.CreateInline();
            XmlElement forwardLink = foDocumentFacade
                    .CreateBasicLinkInternal(forwardLinkName);
            forwardLink.AppendChild(CreateNoteInline(textIndex));
            SetId(forwardLink, backwardLinkName);
            inline.AppendChild(forwardLink);
            footNote.AppendChild(inline);

            XmlElement footnoteBody = foDocumentFacade.CreateFootnoteBody();
            XmlElement footnoteBlock = foDocumentFacade.CreateBlock();
            XmlElement backwardLink = foDocumentFacade
                    .CreateBasicLinkInternal(backwardLinkName);
            backwardLink.AppendChild(CreateNoteInline(textIndex + " "));
            SetId(backwardLink, forwardLinkName);
            footnoteBlock.AppendChild(backwardLink);
            footnoteBody.AppendChild(footnoteBlock);
            footNote.AppendChild(footnoteBody);

            ProcessCharacters(wordDocument, int.MinValue, footnoteTextRange, footnoteBlock);

            WordToFoUtils.CompactInlines(footnoteBlock);
        }
Exemple #14
0
 protected override void ProcessDrawnObject(HWPFDocument doc,
         CharacterRun characterRun, OfficeDrawing officeDrawing,
         String path, XmlElement block)
 {
     XmlElement externalGraphic = foDocumentFacade.CreateExternalGraphic(path);
     block.AppendChild(externalGraphic);
 }
Exemple #15
0
 public void SetUp()
 {
     doc = HWPFTestDataSamples.OpenSampleFile("test2.doc");
 }
Exemple #16
0
 public void SetUp()
 {
     docUnicode = HWPFTestDataSamples.OpenSampleFile("HeaderFooterUnicode.doc");
     docAscii = HWPFTestDataSamples.OpenSampleFile("ThreeColHeadFoot.doc");
 }
 protected override void ProcessFootnoteAutonumbered(HWPFDocument wordDocument,
         int noteIndex, XmlElement block, Range footnoteTextRange)
 {
     ProcessNote(wordDocument, block, footnoteTextRange);
 }
        protected override bool ProcessOle2(HWPFDocument wordDocument, XmlElement block, Entry entry)
        {
            if (!(entry is DirectoryNode))
                return false;
            DirectoryNode directoryNode = (DirectoryNode)entry;

            /*
             * even if there is no ExtractorFactory in classpath, still support
             * included Word's objects
             */

            //TODO: Not completed
            if ( directoryNode.HasEntry( "WordDocument" ) )
            {
                String text = WordToTextConverter.GetText( (DirectoryNode) entry );
                block.AppendChild( textDocumentFacade
                        .CreateText( UNICODECHAR_ZERO_WIDTH_SPACE + text
                                + UNICODECHAR_ZERO_WIDTH_SPACE ) );
                return true;
            }

            Object extractor;
            
            /*try
            {
                Class<?> cls = Class
                        .ForName( "org.apache.poi.extractor.ExtractorFactory" );
                Method createExtractor = cls.GetMethod( "createExtractor",
                        DirectoryNode.class );
                extractor = createExtractor.Invoke( null, directoryNode );
            }
            catch ( Error exc )
            {
                // no extractor in classpath
                logger.Log( POILogger.WARN, "There is an OLE object entry '",
                        entry.GetName(),
                        "', but there is no text extractor for this object type ",
                        "or text extractor factory is not available: ", "" + exc );
                return false;
            }

            try
            {
                Method getText = extractor.GetClass().GetMethod( "getText" );
                String text = (String) getText.Invoke( extractor );

                block.AppendChild( textDocumentFacade
                        .CreateText( UNICODECHAR_ZERO_WIDTH_SPACE + text
                                + UNICODECHAR_ZERO_WIDTH_SPACE ) );
                return true;
            }
            catch ( Exception exc )
            {
                logger.Log( POILogger.ERROR,
                        "Unable to extract text from OLE entry '", entry.GetName(),
                        "': ", exc, exc );
                return false;
            }
             * */
            return false;
        }
Exemple #19
0
 public void SetUp()
 {
     u = HWPFTestDataSamples.OpenSampleFile("HeaderFooterUnicode.doc");
     a = HWPFTestDataSamples.OpenSampleFile("SampleDoc.doc");
 }
 protected override void ProcessDrawnObject(HWPFDocument doc,
         CharacterRun characterRun, OfficeDrawing officeDrawing,
         String path, XmlElement block)
 {
     // ignore
 }