public void TestImageCount() { HWPFDocument docA = HWPFTestDataSamples.OpenSampleFile(docAFile); HWPFDocument docB = HWPFTestDataSamples.OpenSampleFile(docBFile); Assert.IsNotNull(docA.GetPicturesTable()); Assert.IsNotNull(docB.GetPicturesTable()); PicturesTable picA = docA.GetPicturesTable(); PicturesTable picB = docB.GetPicturesTable(); List <Picture> picturesA = picA.GetAllPictures(); List <Picture> picturesB = picB.GetAllPictures(); Assert.AreEqual(7, picturesA.Count); Assert.AreEqual(2, picturesB.Count); }
public void TestEquation() { HWPFDocument doc = HWPFTestDataSamples.OpenSampleFile("equation.doc"); PicturesTable pictures = doc.GetPicturesTable(); List <Picture> allPictures = pictures.GetAllPictures(); Assert.AreEqual(1, allPictures.Count); Picture picture = allPictures[0]; Assert.IsNotNull(picture); Assert.AreEqual(PictureType.EMF, picture.SuggestPictureType()); Assert.AreEqual(PictureType.EMF.Extension, picture.SuggestFileExtension()); Assert.AreEqual(PictureType.EMF.Mime, picture.MimeType); Assert.AreEqual("0.emf", picture.SuggestFullFileName()); }
public void TestCompressedImageData() { HWPFDocument docC = HWPFTestDataSamples.OpenSampleFile(docCFile); PicturesTable picC = docC.GetPicturesTable(); List <Picture> picturesC = picC.GetAllPictures(); Assert.AreEqual(1, picturesC.Count); Picture pic = picturesC[0]; Assert.IsNotNull(pic); // Check the same byte[] picBytes = ReadFile(imgCFile); Assert.AreEqual(picBytes.Length, pic.GetContent().Length); assertBytesSame(picBytes, pic.GetContent()); }
public void TestFloatingPictures() { HWPFDocument doc = HWPFTestDataSamples.OpenSampleFile("FloatingPictures.doc"); PicturesTable pictures = doc.GetPicturesTable(); // There are 19 images in the picture, but some are // duplicate floating ones Assert.AreEqual(17, pictures.GetAllPictures().Count); int plain8s = 0; int escher8s = 0; int image1s = 0; Range r = doc.GetRange(); for (int np = 0; np < r.NumParagraphs; np++) { Paragraph p = r.GetParagraph(np); for (int nc = 0; nc < p.NumCharacterRuns; nc++) { CharacterRun cr = p.GetCharacterRun(nc); if (pictures.HasPicture(cr)) { image1s++; } else if (pictures.HasEscherPicture(cr)) { escher8s++; } else if (cr.Text.StartsWith("\u0008")) { plain8s++; } } } // Total is 20, as the 4 escher 8s all reference // the same regular image Assert.AreEqual(16, image1s); Assert.AreEqual(4, escher8s); Assert.AreEqual(0, plain8s); }
public void TestImageData() { HWPFDocument docB = HWPFTestDataSamples.OpenSampleFile(docBFile); PicturesTable picB = docB.GetPicturesTable(); List <Picture> picturesB = picB.GetAllPictures(); Assert.AreEqual(2, picturesB.Count); Picture pic1 = picturesB[0]; Picture pic2 = picturesB[1]; Assert.IsNotNull(pic1); Assert.IsNotNull(pic2); // Check the same byte[] pic1B = ReadFile(imgAFile); byte[] pic2B = ReadFile(imgBFile); Assert.AreEqual(pic1B.Length, pic1.GetContent().Length); Assert.AreEqual(pic2B.Length, pic2.GetContent().Length); assertBytesSame(pic1B, pic1.GetContent()); assertBytesSame(pic2B, pic2.GetContent()); }
/// <summary> /// Initializes a new instance of the <see cref="HWPFDocument"/> class. /// </summary> /// <param name="directory">The directory.</param> public HWPFDocument(DirectoryNode directory) : base(directory) { _endnotes = new NotesImpl(_endnotesTables); _footnotes = new NotesImpl(_footnotesTables); // Load the main stream and FIB // Also handles HPSF bits // Do the CP Split _cpSplit = new CPSplitCalculator(_fib); // Is this document too old for us? if (_fib.GetNFib() < 106) { throw new OldWordFileFormatException("The document is too old - Word 95 or older. Try HWPFOldDocument instead?"); } // use the fib to determine the name of the table stream. String name = "0Table"; if (_fib.IsFWhichTblStm()) { name = "1Table"; } // Grab the table stream. DocumentEntry tableProps; try { tableProps = (DocumentEntry)directory.GetEntry(name); } catch (FileNotFoundException) { throw new InvalidOperationException("Table Stream '" + name + "' wasn't found - Either the document is corrupt, or is Word95 (or earlier)"); } // read in the table stream. _tableStream = new byte[tableProps.Size]; directory.CreatePOIFSDocumentReader(name).Read(_tableStream); _fib.FillVariableFields(_mainStream, _tableStream); // read in the data stream. try { DocumentEntry dataProps = (DocumentEntry)directory.GetEntry("Data"); _dataStream = new byte[dataProps.Size]; directory.CreatePOIFSDocumentReader("Data").Read(_dataStream); } catch (FileNotFoundException) { _dataStream = new byte[0]; } // Get the cp of the start of text in the main stream // The latest spec doc says this is always zero! int fcMin = 0; //fcMin = _fib.GetFcMin() // Start to load up our standard structures. _dop = new DocumentProperties(_tableStream, _fib.GetFcDop()); _cft = new ComplexFileTable(_mainStream, _tableStream, _fib.GetFcClx(), fcMin); TextPieceTable _tpt = _cft.GetTextPieceTable(); // Now load the rest of the properties, which need to be adjusted // for where text really begin _cbt = new CHPBinTable(_mainStream, _tableStream, _fib.GetFcPlcfbteChpx(), _fib.GetLcbPlcfbteChpx(), _tpt); _pbt = new PAPBinTable(_mainStream, _tableStream, _dataStream, _fib.GetFcPlcfbtePapx(), _fib.GetLcbPlcfbtePapx(), _tpt); _text = _tpt.Text; /* * in this mode we preserving PAPX/CHPX structure from file, so text may * miss from output, and text order may be corrupted */ bool preserveBinTables = false; try { preserveBinTables = Boolean.Parse( ConfigurationManager.AppSettings[PROPERTY_PRESERVE_BIN_TABLES]); } catch (Exception) { // ignore; } if (!preserveBinTables) { _cbt.Rebuild(_cft); _pbt.Rebuild(_text, _cft); } /* * Property to disable text rebuilding. In this mode changing the text * will lead to unpredictable behavior */ bool preserveTextTable = false; try { preserveTextTable = Boolean.Parse( ConfigurationManager.AppSettings[PROPERTY_PRESERVE_TEXT_TABLE]); } catch (Exception) { // ignore; } if (!preserveTextTable) { _cft = new ComplexFileTable(); _tpt = _cft.GetTextPieceTable(); TextPiece textPiece = new SinglentonTextPiece(_text); _tpt.Add(textPiece); _text = textPiece.GetStringBuilder(); } // Read FSPA and Escher information // _fspa = new FSPATable(_tableStream, _fib.getFcPlcspaMom(), // _fib.getLcbPlcspaMom(), getTextTable().getTextPieces()); _fspaHeaders = new FSPATable(_tableStream, _fib, FSPADocumentPart.HEADER); _fspaMain = new FSPATable(_tableStream, _fib, FSPADocumentPart.MAIN); if (_fib.GetFcDggInfo() != 0) { _dgg = new EscherRecordHolder(_tableStream, _fib.GetFcDggInfo(), _fib.GetLcbDggInfo()); } else { _dgg = new EscherRecordHolder(); } // read in the pictures stream _pictures = new PicturesTable(this, _dataStream, _mainStream, _fspa, _dgg); // And the art shapes stream _officeArts = new ShapesTable(_tableStream, _fib); // And escher pictures _officeDrawingsHeaders = new OfficeDrawingsImpl(_fspaHeaders, _dgg, _mainStream); _officeDrawingsMain = new OfficeDrawingsImpl(_fspaMain, _dgg, _mainStream); _st = new SectionTable(_mainStream, _tableStream, _fib.GetFcPlcfsed(), _fib.GetLcbPlcfsed(), fcMin, _tpt, _cpSplit); _ss = new StyleSheet(_tableStream, _fib.GetFcStshf()); _ft = new FontTable(_tableStream, _fib.GetFcSttbfffn(), _fib.GetLcbSttbfffn()); int listOffset = _fib.GetFcPlcfLst(); int lfoOffset = _fib.GetFcPlfLfo(); if (listOffset != 0 && _fib.GetLcbPlcfLst() != 0) { _lt = new ListTables(_tableStream, _fib.GetFcPlcfLst(), _fib.GetFcPlfLfo()); } int sbtOffset = _fib.GetFcSttbSavedBy(); int sbtLength = _fib.GetLcbSttbSavedBy(); if (sbtOffset != 0 && sbtLength != 0) { _sbt = new SavedByTable(_tableStream, sbtOffset, sbtLength); } int rmarkOffset = _fib.GetFcSttbfRMark(); int rmarkLength = _fib.GetLcbSttbfRMark(); if (rmarkOffset != 0 && rmarkLength != 0) { _rmat = new RevisionMarkAuthorTable(_tableStream, rmarkOffset, rmarkLength); } _bookmarksTables = new BookmarksTables(_tableStream, _fib); _bookmarks = new BookmarksImpl(_bookmarksTables); _endnotesTables = new NotesTables(NoteType.ENDNOTE, _tableStream, _fib); _endnotes = new NotesImpl(_endnotesTables); _footnotesTables = new NotesTables(NoteType.FOOTNOTE, _tableStream, _fib); _footnotes = new NotesImpl(_footnotesTables); _fieldsTables = new FieldsTables(_tableStream, _fib); _fields = new FieldsImpl(_fieldsTables); }
public void TestEmbededDocumentIcon() { // This file has two embeded excel files, an embeded powerpoint // file and an embeded word file, in that order HWPFDocument doc = HWPFTestDataSamples.OpenSampleFile("word_with_embeded.doc"); // Check we don't break loading the pictures doc.GetPicturesTable().GetAllPictures(); PicturesTable pictureTable = doc.GetPicturesTable(); // Check the Text, and its embeded images Paragraph p; Range r = doc.GetRange(); Assert.AreEqual(1, r.NumSections); Assert.AreEqual(5, r.NumParagraphs); p = r.GetParagraph(0); Assert.AreEqual(2, p.NumCharacterRuns); Assert.AreEqual("I have lots of embedded files in me\r", p.Text); Assert.AreEqual(false, pictureTable.HasPicture(p.GetCharacterRun(0))); Assert.AreEqual(false, pictureTable.HasPicture(p.GetCharacterRun(1))); p = r.GetParagraph(1); Assert.AreEqual(5, p.NumCharacterRuns); Assert.AreEqual("\u0013 EMBED Excel.Sheet.8 \u0014\u0001\u0015\r", p.Text); Assert.AreEqual(false, pictureTable.HasPicture(p.GetCharacterRun(0))); Assert.AreEqual(false, pictureTable.HasPicture(p.GetCharacterRun(1))); Assert.AreEqual(false, pictureTable.HasPicture(p.GetCharacterRun(2))); Assert.AreEqual(true, pictureTable.HasPicture(p.GetCharacterRun(3))); Assert.AreEqual(false, pictureTable.HasPicture(p.GetCharacterRun(4))); p = r.GetParagraph(2); Assert.AreEqual(6, p.NumCharacterRuns); Assert.AreEqual("\u0013 EMBED Excel.Sheet.8 \u0014\u0001\u0015\r", p.Text); Assert.AreEqual(false, pictureTable.HasPicture(p.GetCharacterRun(0))); Assert.AreEqual(false, pictureTable.HasPicture(p.GetCharacterRun(1))); Assert.AreEqual(false, pictureTable.HasPicture(p.GetCharacterRun(2))); Assert.AreEqual(true, pictureTable.HasPicture(p.GetCharacterRun(3))); Assert.AreEqual(false, pictureTable.HasPicture(p.GetCharacterRun(4))); Assert.AreEqual(false, pictureTable.HasPicture(p.GetCharacterRun(5))); p = r.GetParagraph(3); Assert.AreEqual(6, p.NumCharacterRuns); Assert.AreEqual("\u0013 EMBED PowerPoint.Show.8 \u0014\u0001\u0015\r", p.Text); Assert.AreEqual(false, pictureTable.HasPicture(p.GetCharacterRun(0))); Assert.AreEqual(false, pictureTable.HasPicture(p.GetCharacterRun(1))); Assert.AreEqual(false, pictureTable.HasPicture(p.GetCharacterRun(2))); Assert.AreEqual(true, pictureTable.HasPicture(p.GetCharacterRun(3))); Assert.AreEqual(false, pictureTable.HasPicture(p.GetCharacterRun(4))); Assert.AreEqual(false, pictureTable.HasPicture(p.GetCharacterRun(5))); p = r.GetParagraph(4); Assert.AreEqual(6, p.NumCharacterRuns); Assert.AreEqual("\u0013 EMBED Word.Document.8 \\s \u0014\u0001\u0015\r", p.Text); Assert.AreEqual(false, pictureTable.HasPicture(p.GetCharacterRun(0))); Assert.AreEqual(false, pictureTable.HasPicture(p.GetCharacterRun(1))); Assert.AreEqual(false, pictureTable.HasPicture(p.GetCharacterRun(2))); Assert.AreEqual(true, pictureTable.HasPicture(p.GetCharacterRun(3))); Assert.AreEqual(false, pictureTable.HasPicture(p.GetCharacterRun(4))); Assert.AreEqual(false, pictureTable.HasPicture(p.GetCharacterRun(5))); // Look at the pictures table List <Picture> pictures = pictureTable.GetAllPictures(); Assert.AreEqual(4, pictures.Count); Picture picture = pictures[0]; Assert.AreEqual("", picture.SuggestFileExtension()); Assert.AreEqual("0", picture.SuggestFullFileName()); Assert.AreEqual("image/unknown", picture.MimeType); picture = pictures[1]; Assert.AreEqual("", picture.SuggestFileExtension()); Assert.AreEqual("469", picture.SuggestFullFileName()); Assert.AreEqual("image/unknown", picture.MimeType); picture = pictures[2]; Assert.AreEqual("", picture.SuggestFileExtension()); Assert.AreEqual("8c7", picture.SuggestFullFileName()); Assert.AreEqual("image/unknown", picture.MimeType); picture = pictures[3]; Assert.AreEqual("", picture.SuggestFileExtension()); Assert.AreEqual("10a8", picture.SuggestFullFileName()); Assert.AreEqual("image/unknown", picture.MimeType); }