예제 #1
0
        public void TestImageCount()
        {
            HWPFDocument docA = HWPFTestDataSamples.OpenSampleFile(docAFile);
            HWPFDocument docB = HWPFTestDataSamples.OpenSampleFile(docBFile);

            Assert.IsNotNull(docA.GetPicturesTable());
            Assert.IsNotNull(docB.GetPicturesTable());

            PicturesTable picA = docA.GetPicturesTable();
            PicturesTable picB = docB.GetPicturesTable();

            List <Picture> picturesA = picA.GetAllPictures();
            List <Picture> picturesB = picB.GetAllPictures();

            Assert.AreEqual(7, picturesA.Count);
            Assert.AreEqual(2, picturesB.Count);
        }
예제 #2
0
        public void TestEquation()
        {
            HWPFDocument  doc      = HWPFTestDataSamples.OpenSampleFile("equation.doc");
            PicturesTable pictures = doc.GetPicturesTable();

            List <Picture> allPictures = pictures.GetAllPictures();

            Assert.AreEqual(1, allPictures.Count);

            Picture picture = allPictures[0];

            Assert.IsNotNull(picture);
            Assert.AreEqual(PictureType.EMF, picture.SuggestPictureType());
            Assert.AreEqual(PictureType.EMF.Extension,
                            picture.SuggestFileExtension());
            Assert.AreEqual(PictureType.EMF.Mime, picture.MimeType);
            Assert.AreEqual("0.emf", picture.SuggestFullFileName());
        }
예제 #3
0
        public void TestCompressedImageData()
        {
            HWPFDocument   docC      = HWPFTestDataSamples.OpenSampleFile(docCFile);
            PicturesTable  picC      = docC.GetPicturesTable();
            List <Picture> picturesC = picC.GetAllPictures();

            Assert.AreEqual(1, picturesC.Count);

            Picture pic = picturesC[0];

            Assert.IsNotNull(pic);

            // Check the same
            byte[] picBytes = ReadFile(imgCFile);

            Assert.AreEqual(picBytes.Length, pic.GetContent().Length);
            assertBytesSame(picBytes, pic.GetContent());
        }
예제 #4
0
        public void TestFloatingPictures()
        {
            HWPFDocument  doc      = HWPFTestDataSamples.OpenSampleFile("FloatingPictures.doc");
            PicturesTable pictures = doc.GetPicturesTable();

            // There are 19 images in the picture, but some are
            //  duplicate floating ones
            Assert.AreEqual(17, pictures.GetAllPictures().Count);

            int plain8s  = 0;
            int escher8s = 0;
            int image1s  = 0;

            Range r = doc.GetRange();

            for (int np = 0; np < r.NumParagraphs; np++)
            {
                Paragraph p = r.GetParagraph(np);
                for (int nc = 0; nc < p.NumCharacterRuns; nc++)
                {
                    CharacterRun cr = p.GetCharacterRun(nc);
                    if (pictures.HasPicture(cr))
                    {
                        image1s++;
                    }
                    else if (pictures.HasEscherPicture(cr))
                    {
                        escher8s++;
                    }
                    else if (cr.Text.StartsWith("\u0008"))
                    {
                        plain8s++;
                    }
                }
            }
            // Total is 20, as the 4 escher 8s all reference
            //  the same regular image
            Assert.AreEqual(16, image1s);
            Assert.AreEqual(4, escher8s);
            Assert.AreEqual(0, plain8s);
        }
예제 #5
0
        public void TestImageData()
        {
            HWPFDocument   docB      = HWPFTestDataSamples.OpenSampleFile(docBFile);
            PicturesTable  picB      = docB.GetPicturesTable();
            List <Picture> picturesB = picB.GetAllPictures();

            Assert.AreEqual(2, picturesB.Count);

            Picture pic1 = picturesB[0];
            Picture pic2 = picturesB[1];

            Assert.IsNotNull(pic1);
            Assert.IsNotNull(pic2);

            // Check the same
            byte[] pic1B = ReadFile(imgAFile);
            byte[] pic2B = ReadFile(imgBFile);

            Assert.AreEqual(pic1B.Length, pic1.GetContent().Length);
            Assert.AreEqual(pic2B.Length, pic2.GetContent().Length);

            assertBytesSame(pic1B, pic1.GetContent());
            assertBytesSame(pic2B, pic2.GetContent());
        }
예제 #6
0
        /// <summary>
        /// Initializes a new instance of the <see cref="HWPFDocument"/> class.
        /// </summary>
        /// <param name="directory">The directory.</param>
        public HWPFDocument(DirectoryNode directory)
            : base(directory)
        {
            _endnotes  = new NotesImpl(_endnotesTables);
            _footnotes = new NotesImpl(_footnotesTables);

            // Load the main stream and FIB
            // Also handles HPSF bits

            // Do the CP Split
            _cpSplit = new CPSplitCalculator(_fib);

            // Is this document too old for us?
            if (_fib.GetNFib() < 106)
            {
                throw new OldWordFileFormatException("The document is too old - Word 95 or older. Try HWPFOldDocument instead?");
            }

            // use the fib to determine the name of the table stream.
            String name = "0Table";

            if (_fib.IsFWhichTblStm())
            {
                name = "1Table";
            }

            // Grab the table stream.
            DocumentEntry tableProps;

            try
            {
                tableProps =
                    (DocumentEntry)directory.GetEntry(name);
            }
            catch (FileNotFoundException)
            {
                throw new InvalidOperationException("Table Stream '" + name + "' wasn't found - Either the document is corrupt, or is Word95 (or earlier)");
            }

            // read in the table stream.
            _tableStream = new byte[tableProps.Size];
            directory.CreatePOIFSDocumentReader(name).Read(_tableStream);

            _fib.FillVariableFields(_mainStream, _tableStream);

            // read in the data stream.
            try
            {
                DocumentEntry dataProps =
                    (DocumentEntry)directory.GetEntry("Data");
                _dataStream = new byte[dataProps.Size];
                directory.CreatePOIFSDocumentReader("Data").Read(_dataStream);
            }
            catch (FileNotFoundException)
            {
                _dataStream = new byte[0];
            }

            // Get the cp of the start of text in the main stream
            // The latest spec doc says this is always zero!
            int fcMin = 0;

            //fcMin = _fib.GetFcMin()

            // Start to load up our standard structures.
            _dop = new DocumentProperties(_tableStream, _fib.GetFcDop());
            _cft = new ComplexFileTable(_mainStream, _tableStream, _fib.GetFcClx(), fcMin);
            TextPieceTable _tpt = _cft.GetTextPieceTable();


            // Now load the rest of the properties, which need to be adjusted
            //  for where text really begin
            _cbt = new CHPBinTable(_mainStream, _tableStream, _fib.GetFcPlcfbteChpx(), _fib.GetLcbPlcfbteChpx(), _tpt);
            _pbt = new PAPBinTable(_mainStream, _tableStream, _dataStream, _fib.GetFcPlcfbtePapx(), _fib.GetLcbPlcfbtePapx(), _tpt);

            _text = _tpt.Text;

            /*
             * in this mode we preserving PAPX/CHPX structure from file, so text may
             * miss from output, and text order may be corrupted
             */
            bool preserveBinTables = false;

            try
            {
                preserveBinTables = Boolean.Parse(
                    ConfigurationManager.AppSettings[PROPERTY_PRESERVE_BIN_TABLES]);
            }
            catch (Exception)
            {
                // ignore;
            }

            if (!preserveBinTables)
            {
                _cbt.Rebuild(_cft);
                _pbt.Rebuild(_text, _cft);
            }

            /*
             * Property to disable text rebuilding. In this mode changing the text
             * will lead to unpredictable behavior
             */
            bool preserveTextTable = false;

            try
            {
                preserveTextTable = Boolean.Parse(
                    ConfigurationManager.AppSettings[PROPERTY_PRESERVE_TEXT_TABLE]);
            }
            catch (Exception)
            {
                // ignore;
            }
            if (!preserveTextTable)
            {
                _cft = new ComplexFileTable();
                _tpt = _cft.GetTextPieceTable();
                TextPiece textPiece = new SinglentonTextPiece(_text);
                _tpt.Add(textPiece);
                _text = textPiece.GetStringBuilder();
            }

            // Read FSPA and Escher information
            // _fspa = new FSPATable(_tableStream, _fib.getFcPlcspaMom(),
            // _fib.getLcbPlcspaMom(), getTextTable().getTextPieces());
            _fspaHeaders = new FSPATable(_tableStream, _fib,
                                         FSPADocumentPart.HEADER);
            _fspaMain = new FSPATable(_tableStream, _fib, FSPADocumentPart.MAIN);

            if (_fib.GetFcDggInfo() != 0)
            {
                _dgg = new EscherRecordHolder(_tableStream, _fib.GetFcDggInfo(), _fib.GetLcbDggInfo());
            }
            else
            {
                _dgg = new EscherRecordHolder();
            }

            // read in the pictures stream
            _pictures = new PicturesTable(this, _dataStream, _mainStream, _fspa, _dgg);
            // And the art shapes stream
            _officeArts = new ShapesTable(_tableStream, _fib);

            // And escher pictures
            _officeDrawingsHeaders = new OfficeDrawingsImpl(_fspaHeaders, _dgg, _mainStream);
            _officeDrawingsMain    = new OfficeDrawingsImpl(_fspaMain, _dgg, _mainStream);

            _st = new SectionTable(_mainStream, _tableStream, _fib.GetFcPlcfsed(), _fib.GetLcbPlcfsed(), fcMin, _tpt, _cpSplit);
            _ss = new StyleSheet(_tableStream, _fib.GetFcStshf());
            _ft = new FontTable(_tableStream, _fib.GetFcSttbfffn(), _fib.GetLcbSttbfffn());

            int listOffset = _fib.GetFcPlcfLst();
            int lfoOffset  = _fib.GetFcPlfLfo();

            if (listOffset != 0 && _fib.GetLcbPlcfLst() != 0)
            {
                _lt = new ListTables(_tableStream, _fib.GetFcPlcfLst(), _fib.GetFcPlfLfo());
            }

            int sbtOffset = _fib.GetFcSttbSavedBy();
            int sbtLength = _fib.GetLcbSttbSavedBy();

            if (sbtOffset != 0 && sbtLength != 0)
            {
                _sbt = new SavedByTable(_tableStream, sbtOffset, sbtLength);
            }

            int rmarkOffset = _fib.GetFcSttbfRMark();
            int rmarkLength = _fib.GetLcbSttbfRMark();

            if (rmarkOffset != 0 && rmarkLength != 0)
            {
                _rmat = new RevisionMarkAuthorTable(_tableStream, rmarkOffset, rmarkLength);
            }


            _bookmarksTables = new BookmarksTables(_tableStream, _fib);
            _bookmarks       = new BookmarksImpl(_bookmarksTables);

            _endnotesTables  = new NotesTables(NoteType.ENDNOTE, _tableStream, _fib);
            _endnotes        = new NotesImpl(_endnotesTables);
            _footnotesTables = new NotesTables(NoteType.FOOTNOTE, _tableStream, _fib);
            _footnotes       = new NotesImpl(_footnotesTables);

            _fieldsTables = new FieldsTables(_tableStream, _fib);
            _fields       = new FieldsImpl(_fieldsTables);
        }
예제 #7
0
        public void TestEmbededDocumentIcon()
        {
            // This file has two embeded excel files, an embeded powerpoint
            //   file and an embeded word file, in that order
            HWPFDocument doc = HWPFTestDataSamples.OpenSampleFile("word_with_embeded.doc");

            // Check we don't break loading the pictures
            doc.GetPicturesTable().GetAllPictures();
            PicturesTable pictureTable = doc.GetPicturesTable();

            // Check the Text, and its embeded images
            Paragraph p;
            Range     r = doc.GetRange();

            Assert.AreEqual(1, r.NumSections);
            Assert.AreEqual(5, r.NumParagraphs);

            p = r.GetParagraph(0);
            Assert.AreEqual(2, p.NumCharacterRuns);
            Assert.AreEqual("I have lots of embedded files in me\r", p.Text);
            Assert.AreEqual(false, pictureTable.HasPicture(p.GetCharacterRun(0)));
            Assert.AreEqual(false, pictureTable.HasPicture(p.GetCharacterRun(1)));

            p = r.GetParagraph(1);
            Assert.AreEqual(5, p.NumCharacterRuns);
            Assert.AreEqual("\u0013 EMBED Excel.Sheet.8  \u0014\u0001\u0015\r", p.Text);
            Assert.AreEqual(false, pictureTable.HasPicture(p.GetCharacterRun(0)));
            Assert.AreEqual(false, pictureTable.HasPicture(p.GetCharacterRun(1)));
            Assert.AreEqual(false, pictureTable.HasPicture(p.GetCharacterRun(2)));
            Assert.AreEqual(true, pictureTable.HasPicture(p.GetCharacterRun(3)));
            Assert.AreEqual(false, pictureTable.HasPicture(p.GetCharacterRun(4)));

            p = r.GetParagraph(2);
            Assert.AreEqual(6, p.NumCharacterRuns);
            Assert.AreEqual("\u0013 EMBED Excel.Sheet.8  \u0014\u0001\u0015\r", p.Text);
            Assert.AreEqual(false, pictureTable.HasPicture(p.GetCharacterRun(0)));
            Assert.AreEqual(false, pictureTable.HasPicture(p.GetCharacterRun(1)));
            Assert.AreEqual(false, pictureTable.HasPicture(p.GetCharacterRun(2)));
            Assert.AreEqual(true, pictureTable.HasPicture(p.GetCharacterRun(3)));
            Assert.AreEqual(false, pictureTable.HasPicture(p.GetCharacterRun(4)));
            Assert.AreEqual(false, pictureTable.HasPicture(p.GetCharacterRun(5)));

            p = r.GetParagraph(3);
            Assert.AreEqual(6, p.NumCharacterRuns);
            Assert.AreEqual("\u0013 EMBED PowerPoint.Show.8  \u0014\u0001\u0015\r", p.Text);
            Assert.AreEqual(false, pictureTable.HasPicture(p.GetCharacterRun(0)));
            Assert.AreEqual(false, pictureTable.HasPicture(p.GetCharacterRun(1)));
            Assert.AreEqual(false, pictureTable.HasPicture(p.GetCharacterRun(2)));
            Assert.AreEqual(true, pictureTable.HasPicture(p.GetCharacterRun(3)));
            Assert.AreEqual(false, pictureTable.HasPicture(p.GetCharacterRun(4)));
            Assert.AreEqual(false, pictureTable.HasPicture(p.GetCharacterRun(5)));

            p = r.GetParagraph(4);
            Assert.AreEqual(6, p.NumCharacterRuns);
            Assert.AreEqual("\u0013 EMBED Word.Document.8 \\s \u0014\u0001\u0015\r", p.Text);
            Assert.AreEqual(false, pictureTable.HasPicture(p.GetCharacterRun(0)));
            Assert.AreEqual(false, pictureTable.HasPicture(p.GetCharacterRun(1)));
            Assert.AreEqual(false, pictureTable.HasPicture(p.GetCharacterRun(2)));
            Assert.AreEqual(true, pictureTable.HasPicture(p.GetCharacterRun(3)));
            Assert.AreEqual(false, pictureTable.HasPicture(p.GetCharacterRun(4)));
            Assert.AreEqual(false, pictureTable.HasPicture(p.GetCharacterRun(5)));

            // Look at the pictures table
            List <Picture> pictures = pictureTable.GetAllPictures();

            Assert.AreEqual(4, pictures.Count);

            Picture picture = pictures[0];

            Assert.AreEqual("", picture.SuggestFileExtension());
            Assert.AreEqual("0", picture.SuggestFullFileName());
            Assert.AreEqual("image/unknown", picture.MimeType);

            picture = pictures[1];
            Assert.AreEqual("", picture.SuggestFileExtension());
            Assert.AreEqual("469", picture.SuggestFullFileName());
            Assert.AreEqual("image/unknown", picture.MimeType);

            picture = pictures[2];
            Assert.AreEqual("", picture.SuggestFileExtension());
            Assert.AreEqual("8c7", picture.SuggestFullFileName());
            Assert.AreEqual("image/unknown", picture.MimeType);

            picture = pictures[3];
            Assert.AreEqual("", picture.SuggestFileExtension());
            Assert.AreEqual("10a8", picture.SuggestFullFileName());
            Assert.AreEqual("image/unknown", picture.MimeType);
        }