コード例 #1
0
ファイル: OldPAPBinTable.cs プロジェクト: ctddjyds/npoi
        public OldPAPBinTable(byte[] documentStream, int OffSet,
                           int size, int fcMin, TextPieceTable tpt)
        {
            PlexOfCps binTable = new PlexOfCps(documentStream, OffSet, size, 2);

            int length = binTable.Length;
            for (int x = 0; x < length; x++)
            {
                GenericPropertyNode node = binTable.GetProperty(x);

                int pageNum = LittleEndian.GetShort(node.Bytes);
                int pageOffset = POIFSConstants.SMALLER_BIG_BLOCK_SIZE * pageNum;

                PAPFormattedDiskPage pfkp = new PAPFormattedDiskPage(documentStream,
                  documentStream, pageOffset, tpt);

                int fkpSize = pfkp.Size();

                for (int y = 0; y < fkpSize; y++)
                {
                    PAPX papx = pfkp.GetPAPX(y);
                    _paragraphs.Add(papx);
                }
            }
             _paragraphs.Sort((IComparer<PAPX>)PropertyNode.PAPXComparator.instance);
        }
コード例 #2
0
        public ComplexFileTable(byte[] documentStream, byte[] tableStream, int offset, int fcMin)
        {
            //skips through the prms before we reach the piece table. These contain data
            //for actual fast saved files
            List<SprmBuffer> sprmBuffers = new List<SprmBuffer>();
            //skips through the prms before we reach the piece table. These contain data
            //for actual fast saved files
            while (tableStream[offset] == GRPPRL_TYPE)
            {
                offset++;
                int size = LittleEndian.GetShort(tableStream, offset);
                offset += LittleEndianConsts.SHORT_SIZE;
                byte[] bs = LittleEndian.GetByteArray(tableStream, offset, size);
                offset += size;

                SprmBuffer sprmBuffer = new SprmBuffer(bs, false, 0);
                sprmBuffers.Add(sprmBuffer);
            }
            this._grpprls = sprmBuffers.ToArray();

            if (tableStream[offset] != TEXT_PIECE_TABLE_TYPE)
            {
                throw new IOException("The text piece table is corrupted");
            }
            int pieceTableSize = LittleEndian.GetInt(tableStream, ++offset);
            offset += LittleEndianConsts.INT_SIZE;
            _tpt = new TextPieceTable(documentStream, tableStream, offset, pieceTableSize, fcMin);
        }
コード例 #3
0
ファイル: TestPAPBinTable.cs プロジェクト: ctddjyds/npoi
        public void TestReadWrite()
        {
            TextPieceTable fakeTPT = new TextPieceTable();

            FileInformationBlock fib = _hWPFDocFixture._fib;
            byte[] mainStream = _hWPFDocFixture._mainStream;
            byte[] tableStream = _hWPFDocFixture._tableStream;

            _pAPBinTable = new PAPBinTable(mainStream, tableStream, null, fib.GetFcPlcfbtePapx(), fib.GetLcbPlcfbtePapx(), fakeTPT);

            HWPFFileSystem fileSys = new HWPFFileSystem();

            _pAPBinTable.WriteTo(fileSys, fakeTPT);
            MemoryStream tableOut = fileSys.GetStream("1Table");
            MemoryStream mainOut = fileSys.GetStream("WordDocument");

            byte[] newTableStream = tableOut.ToArray();
            byte[] newMainStream = mainOut.ToArray();

            PAPBinTable newBinTable = new PAPBinTable(newMainStream, newTableStream, null, 0, newTableStream.Length, fakeTPT);

            List<PAPX> oldTextRuns = _pAPBinTable.GetParagraphs();
            List<PAPX> newTextRuns = newBinTable.GetParagraphs();

            Assert.AreEqual(oldTextRuns.Count, newTextRuns.Count);

            int size = oldTextRuns.Count;
            for (int x = 0; x < size; x++)
            {
                PropertyNode oldNode = (PropertyNode)oldTextRuns[x];
                PropertyNode newNode = (PropertyNode)newTextRuns[x];

                Assert.IsTrue(oldNode.Equals(newNode));
            }
        }
コード例 #4
0
        public void TestReadWrite()
        {
            FileInformationBlock fib = _hWPFDocFixture._fib;

            byte[] mainStream  = _hWPFDocFixture._mainStream;
            byte[] tableStream = _hWPFDocFixture._tableStream;
            int    fcMin       = fib.GetFcMin();

            CPSplitCalculator cps = new CPSplitCalculator(fib);

            ComplexFileTable cft = new ComplexFileTable(mainStream, tableStream, fib.GetFcClx(), fcMin);
            TextPieceTable   tpt = cft.GetTextPieceTable();

            SectionTable sectionTable = new SectionTable(mainStream, tableStream,
                                                         fib.GetFcPlcfsed(),
                                                         fib.GetLcbPlcfsed(),
                                                         fcMin, tpt, cps);
            HWPFFileSystem fileSys = new HWPFFileSystem();

            sectionTable.WriteTo(fileSys, 0);
            MemoryStream tableOut = fileSys.GetStream("1Table");
            MemoryStream mainOut  = fileSys.GetStream("WordDocument");

            byte[] newTableStream = tableOut.ToArray();
            byte[] newMainStream  = mainOut.ToArray();

            SectionTable newSectionTable = new SectionTable(
                newMainStream, newTableStream, 0,
                newTableStream.Length, 0, tpt, cps);

            List <SEPX> oldSections = sectionTable.GetSections();
            List <SEPX> newSections = newSectionTable.GetSections();

            Assert.AreEqual(oldSections.Count, newSections.Count);

            //test for proper char offset conversions
            PlexOfCps oldSedPlex = new PlexOfCps(tableStream, fib.GetFcPlcfsed(),
                                                 fib.GetLcbPlcfsed(), 12);
            PlexOfCps newSedPlex = new PlexOfCps(newTableStream, 0,
                                                 newTableStream.Length, 12);

            Assert.AreEqual(oldSedPlex.Length, newSedPlex.Length);

            for (int x = 0; x < oldSedPlex.Length; x++)
            {
                Assert.AreEqual(oldSedPlex.GetProperty(x).Start, newSedPlex.GetProperty(x).Start);
                Assert.AreEqual(oldSedPlex.GetProperty(x).End, newSedPlex.GetProperty(x).End);
            }

            int size = oldSections.Count;

            for (int x = 0; x < size; x++)
            {
                PropertyNode oldNode = (PropertyNode)oldSections[x];
                PropertyNode newNode = (PropertyNode)newSections[x];
                Assert.AreEqual(oldNode, newNode);
            }
        }
コード例 #5
0
ファイル: OldCHPBinTable.cs プロジェクト: 89sos98/npoi
        /**
         * Constructor used to read an old-style binTable
         *  in from a Word document.
         *
         * @param documentStream
         * @param offset
         * @param size
         * @param fcMin
         */
        public OldCHPBinTable(byte[] documentStream, int OffSet,
                           int size, int fcMin, TextPieceTable tpt)
        {
            PlexOfCps binTable = new PlexOfCps(documentStream, OffSet, size, 2);

            int length = binTable.Length;
            for (int x = 0; x < length; x++)
            {
                GenericPropertyNode node = binTable.GetProperty(x);

                int pageNum = LittleEndian.GetShort(node.Bytes);
                int pageOffset = POIFSConstants.SMALLER_BIG_BLOCK_SIZE * pageNum;

                CHPFormattedDiskPage cfkp = new CHPFormattedDiskPage(documentStream,
                  pageOffset, fcMin, tpt);

                int fkpSize = cfkp.Size();

                for (int y = 0; y < fkpSize; y++)
                {
                    _textRuns.Add(cfkp.GetCHPX(y));
                }
            }
        }
コード例 #6
0
ファイル: HWPFOldDocument.cs プロジェクト: ctddjyds/npoi
        public HWPFOldDocument(DirectoryNode directory)
            : base(directory)
        {


            // Where are things?
            int sedTableOffset = LittleEndian.GetInt(_mainStream, 0x88);
            int sedTableSize = LittleEndian.GetInt(_mainStream, 0x8c);
            int chpTableOffset = LittleEndian.GetInt(_mainStream, 0xb8);
            int chpTableSize = LittleEndian.GetInt(_mainStream, 0xbc);
            int papTableOffset = LittleEndian.GetInt(_mainStream, 0xc0);
            int papTableSize = LittleEndian.GetInt(_mainStream, 0xc4);
            //int shfTableOffset = LittleEndian.GetInt(_mainStream, 0x60);
            //int shfTableSize   = LittleEndian.GetInt(_mainStream, 0x64);
            int complexTableOffset = LittleEndian.GetInt(_mainStream, 0x160);

            // We need to get hold of the text that Makes up the
            //  document, which might be regular or fast-saved
            StringBuilder text = new StringBuilder();
            if (_fib.IsFComplex())
            {
                ComplexFileTable cft = new ComplexFileTable(
                        _mainStream, _mainStream,
                        complexTableOffset, _fib.GetFcMin()
                );
                tpt = cft.GetTextPieceTable();

                foreach (TextPiece tp in tpt.TextPieces)
                {
                    text.Append(tp.GetStringBuilder());
                }
            }
            else
            {
                // TODO Discover if these older documents can ever hold Unicode Strings?
                //  (We think not, because they seem to lack a Piece table)
                // TODO Build the Piece Descriptor properly
                //  (We have to fake it, as they don't seem to have a proper Piece table)
                PieceDescriptor pd = new PieceDescriptor(new byte[] { 0, 0, 0, 0, 0, 127, 0, 0 }, 0);
                pd.FilePosition = _fib.GetFcMin();

                // Generate a single Text Piece Table, with a single Text Piece
                //  which covers all the (8 bit only) text in the file
                tpt = new TextPieceTable();
                byte[] textData = new byte[_fib.GetFcMac() - _fib.GetFcMin()];
                Array.Copy(_mainStream, _fib.GetFcMin(), textData, 0, textData.Length);
                TextPiece tp = new TextPiece(
                        0, textData.Length, textData, pd
                );
                tpt.Add(tp);

                text.Append(tp.GetStringBuilder());
            }

            _text = tpt.Text;

            // Now we can fetch the character and paragraph properties
            _cbt = new OldCHPBinTable(
                    _mainStream, chpTableOffset, chpTableSize,
                    _fib.GetFcMin(), tpt
            );
            _pbt = new OldPAPBinTable(
                    _mainStream, chpTableOffset, papTableSize,
                    _fib.GetFcMin(), tpt
            );
            _st = new OldSectionTable(
                    _mainStream, chpTableOffset, sedTableSize,
                    _fib.GetFcMin(), tpt
            );
        }
コード例 #7
0
ファイル: HWPFOldDocument.cs プロジェクト: datadiode/npoi
        public HWPFOldDocument(DirectoryNode directory)
            : base(directory)
        {
            // Where are things?
            int sedTableOffset = LittleEndian.GetInt(_mainStream, 0x88);
            int sedTableSize   = LittleEndian.GetInt(_mainStream, 0x8c);
            int chpTableOffset = LittleEndian.GetInt(_mainStream, 0xb8);
            int chpTableSize   = LittleEndian.GetInt(_mainStream, 0xbc);
            int papTableOffset = LittleEndian.GetInt(_mainStream, 0xc0);
            int papTableSize   = LittleEndian.GetInt(_mainStream, 0xc4);
            //int shfTableOffset = LittleEndian.GetInt(_mainStream, 0x60);
            //int shfTableSize   = LittleEndian.GetInt(_mainStream, 0x64);
            int complexTableOffset = LittleEndian.GetInt(_mainStream, 0x160);

            // We need to get hold of the text that Makes up the
            //  document, which might be regular or fast-saved
            StringBuilder text = new StringBuilder();

            if (_fib.IsFComplex())
            {
                ComplexFileTable cft = new ComplexFileTable(
                    _mainStream, _mainStream,
                    complexTableOffset, _fib.GetFcMin()
                    );
                tpt = cft.GetTextPieceTable();

                foreach (TextPiece tp in tpt.TextPieces)
                {
                    text.Append(tp.GetStringBuilder());
                }
            }
            else
            {
                // TODO Discover if these older documents can ever hold Unicode Strings?
                //  (We think not, because they seem to lack a Piece table)
                // TODO Build the Piece Descriptor properly
                //  (We have to fake it, as they don't seem to have a proper Piece table)
                PieceDescriptor pd = new PieceDescriptor(new byte[] { 0, 0, 0, 0, 0, 127, 0, 0 }, 0);
                pd.FilePosition = _fib.GetFcMin();

                // Generate a single Text Piece Table, with a single Text Piece
                //  which covers all the (8 bit only) text in the file
                tpt = new TextPieceTable();
                byte[] textData = new byte[_fib.GetFcMac() - _fib.GetFcMin()];
                Array.Copy(_mainStream, _fib.GetFcMin(), textData, 0, textData.Length);
                TextPiece tp = new TextPiece(
                    0, textData.Length, textData, pd
                    );
                tpt.Add(tp);

                text.Append(tp.GetStringBuilder());
            }

            _text = tpt.Text;

            // Now we can fetch the character and paragraph properties
            _cbt = new OldCHPBinTable(
                _mainStream, chpTableOffset, chpTableSize,
                _fib.GetFcMin(), tpt
                );
            _pbt = new OldPAPBinTable(
                _mainStream, chpTableOffset, papTableSize,
                _fib.GetFcMin(), tpt
                );
            _st = new OldSectionTable(
                _mainStream, chpTableOffset, sedTableSize,
                _fib.GetFcMin(), tpt
                );
        }
コード例 #8
0
ファイル: SectionTable.cs プロジェクト: xoposhiy/npoi
        public SectionTable(byte[] documentStream, byte[] tableStream, int OffSet,
                            int size, int fcMin,
                            TextPieceTable tpt, CPSplitCalculator cps)
        {
            PlexOfCps sedPlex = new PlexOfCps(tableStream, OffSet, size, SED_SIZE);
            this.tpt = tpt;
            this._text = tpt.TextPieces;

            int length = sedPlex.Length;

            for (int x = 0; x < length; x++)
            {
                GenericPropertyNode node = sedPlex.GetProperty(x);
                SectionDescriptor sed = new SectionDescriptor(node.Bytes, 0);

                int fileOffset = sed.GetFc();
                int startAt = CPtoFC(node.Start);
                int endAt = CPtoFC(node.End);

                // check for the optimization
                if (fileOffset == unchecked((int)0xffffffff))
                {
                    _sections.Add(new SEPX(sed, startAt, endAt, new byte[0]));
                }
                else
                {
                    // The first short at the offset is the size of the grpprl.
                    int sepxSize = LittleEndian.GetShort(documentStream, fileOffset);
                    byte[] buf = new byte[sepxSize];
                    fileOffset += LittleEndianConsts.SHORT_SIZE;
                    Array.Copy(documentStream, fileOffset, buf, 0, buf.Length);
                    _sections.Add(new SEPX(sed, startAt, endAt, buf));
                }
            }

            // Some files seem to lie about their unicode status, which
            //  is very very pesky. Try to work around these, but this
            //  is Getting on for black magic...
            int mainEndsAt = cps.GetMainDocumentEnd();
            bool matchAt = false;
            bool matchHalf = false;
            for (int i = 0; i < _sections.Count; i++)
            {
                SEPX s = _sections[i];
                if (s.End == mainEndsAt)
                {
                    matchAt = true;
                }
                else if (s.EndBytes == mainEndsAt || s.EndBytes == mainEndsAt - 1)
                {
                    matchHalf = true;
                }
            }
            if (!matchAt && matchHalf)
            {
                //System.err.println("Your document seemed to be mostly unicode, but the section defInition was in bytes! Trying anyway, but things may well go wrong!");
                for (int i = 0; i < _sections.Count; i++)
                {
                    SEPX s = _sections[i];
                    GenericPropertyNode node = sedPlex.GetProperty(i);

                    int startAt = node.Start;
                    int endAt = node.End;
                    s.Start = (startAt);
                    s.End = (endAt);
                }
            }
        }
コード例 #9
0
 public ComplexFileTable()
 {
     _tpt = new TextPieceTable();
 }
コード例 #10
0
ファイル: HWPFDocument.cs プロジェクト: yesonsik/npoi
        /// <summary>
        /// Initializes a new instance of the <see cref="HWPFDocument"/> class.
        /// </summary>
        /// <param name="directory">The directory.</param>
        public HWPFDocument(DirectoryNode directory)
            : base(directory)
        {
            _endnotes  = new NotesImpl(_endnotesTables);
            _footnotes = new NotesImpl(_footnotesTables);

            // Load the main stream and FIB
            // Also handles HPSF bits

            // Do the CP Split
            _cpSplit = new CPSplitCalculator(_fib);

            // Is this document too old for us?
            if (_fib.GetNFib() < 106)
            {
                throw new OldWordFileFormatException("The document is too old - Word 95 or older. Try HWPFOldDocument instead?");
            }

            // use the fib to determine the name of the table stream.
            String name = "0Table";

            if (_fib.IsFWhichTblStm())
            {
                name = "1Table";
            }

            // Grab the table stream.
            DocumentEntry tableProps;

            try
            {
                tableProps =
                    (DocumentEntry)directory.GetEntry(name);
            }
            catch (FileNotFoundException)
            {
                throw new InvalidOperationException("Table Stream '" + name + "' wasn't found - Either the document is corrupt, or is Word95 (or earlier)");
            }

            // read in the table stream.
            _tableStream = new byte[tableProps.Size];
            directory.CreatePOIFSDocumentReader(name).Read(_tableStream);

            _fib.FillVariableFields(_mainStream, _tableStream);

            // read in the data stream.
            try
            {
                DocumentEntry dataProps =
                    (DocumentEntry)directory.GetEntry("Data");
                _dataStream = new byte[dataProps.Size];
                directory.CreatePOIFSDocumentReader("Data").Read(_dataStream);
            }
            catch (FileNotFoundException)
            {
                _dataStream = new byte[0];
            }

            // Get the cp of the start of text in the main stream
            // The latest spec doc says this is always zero!
            int fcMin = 0;

            //fcMin = _fib.GetFcMin()

            // Start to load up our standard structures.
            _dop = new DocumentProperties(_tableStream, _fib.GetFcDop());
            _cft = new ComplexFileTable(_mainStream, _tableStream, _fib.GetFcClx(), fcMin);
            TextPieceTable _tpt = _cft.GetTextPieceTable();


            // Now load the rest of the properties, which need to be adjusted
            //  for where text really begin
            _cbt = new CHPBinTable(_mainStream, _tableStream, _fib.GetFcPlcfbteChpx(), _fib.GetLcbPlcfbteChpx(), _tpt);
            _pbt = new PAPBinTable(_mainStream, _tableStream, _dataStream, _fib.GetFcPlcfbtePapx(), _fib.GetLcbPlcfbtePapx(), _tpt);

            _text = _tpt.Text;

            /*
             * in this mode we preserving PAPX/CHPX structure from file, so text may
             * miss from output, and text order may be corrupted
             */
            bool preserveBinTables = false;

            try
            {
                preserveBinTables = Boolean.Parse(
                    ConfigurationManager.AppSettings[PROPERTY_PRESERVE_BIN_TABLES]);
            }
            catch (Exception)
            {
                // ignore;
            }

            if (!preserveBinTables)
            {
                _cbt.Rebuild(_cft);
                _pbt.Rebuild(_text, _cft);
            }

            /*
             * Property to disable text rebuilding. In this mode changing the text
             * will lead to unpredictable behavior
             */
            bool preserveTextTable = false;

            try
            {
                preserveTextTable = Boolean.Parse(
                    ConfigurationManager.AppSettings[PROPERTY_PRESERVE_TEXT_TABLE]);
            }
            catch (Exception)
            {
                // ignore;
            }
            if (!preserveTextTable)
            {
                _cft = new ComplexFileTable();
                _tpt = _cft.GetTextPieceTable();
                TextPiece textPiece = new SinglentonTextPiece(_text);
                _tpt.Add(textPiece);
                _text = textPiece.GetStringBuilder();
            }

            // Read FSPA and Escher information
            // _fspa = new FSPATable(_tableStream, _fib.getFcPlcspaMom(),
            // _fib.getLcbPlcspaMom(), getTextTable().getTextPieces());
            _fspaHeaders = new FSPATable(_tableStream, _fib,
                                         FSPADocumentPart.HEADER);
            _fspaMain = new FSPATable(_tableStream, _fib, FSPADocumentPart.MAIN);

            if (_fib.GetFcDggInfo() != 0)
            {
                _dgg = new EscherRecordHolder(_tableStream, _fib.GetFcDggInfo(), _fib.GetLcbDggInfo());
            }
            else
            {
                _dgg = new EscherRecordHolder();
            }

            // read in the pictures stream
            _pictures = new PicturesTable(this, _dataStream, _mainStream, _fspa, _dgg);
            // And the art shapes stream
            _officeArts = new ShapesTable(_tableStream, _fib);

            // And escher pictures
            _officeDrawingsHeaders = new OfficeDrawingsImpl(_fspaHeaders, _dgg, _mainStream);
            _officeDrawingsMain    = new OfficeDrawingsImpl(_fspaMain, _dgg, _mainStream);

            _st = new SectionTable(_mainStream, _tableStream, _fib.GetFcPlcfsed(), _fib.GetLcbPlcfsed(), fcMin, _tpt, _cpSplit);
            _ss = new StyleSheet(_tableStream, _fib.GetFcStshf());
            _ft = new FontTable(_tableStream, _fib.GetFcSttbfffn(), _fib.GetLcbSttbfffn());

            int listOffset = _fib.GetFcPlcfLst();
            int lfoOffset  = _fib.GetFcPlfLfo();

            if (listOffset != 0 && _fib.GetLcbPlcfLst() != 0)
            {
                _lt = new ListTables(_tableStream, _fib.GetFcPlcfLst(), _fib.GetFcPlfLfo());
            }

            int sbtOffset = _fib.GetFcSttbSavedBy();
            int sbtLength = _fib.GetLcbSttbSavedBy();

            if (sbtOffset != 0 && sbtLength != 0)
            {
                _sbt = new SavedByTable(_tableStream, sbtOffset, sbtLength);
            }

            int rmarkOffset = _fib.GetFcSttbfRMark();
            int rmarkLength = _fib.GetLcbSttbfRMark();

            if (rmarkOffset != 0 && rmarkLength != 0)
            {
                _rmat = new RevisionMarkAuthorTable(_tableStream, rmarkOffset, rmarkLength);
            }


            _bookmarksTables = new BookmarksTables(_tableStream, _fib);
            _bookmarks       = new BookmarksImpl(_bookmarksTables);

            _endnotesTables  = new NotesTables(NoteType.ENDNOTE, _tableStream, _fib);
            _endnotes        = new NotesImpl(_endnotesTables);
            _footnotesTables = new NotesTables(NoteType.FOOTNOTE, _tableStream, _fib);
            _footnotes       = new NotesImpl(_footnotesTables);

            _fieldsTables = new FieldsTables(_tableStream, _fib);
            _fields       = new FieldsImpl(_fieldsTables);
        }
コード例 #11
0
ファイル: OldSectionTable.cs プロジェクト: xoposhiy/npoi
        public OldSectionTable(byte[] documentStream, int offset,
                            int size, int fcMin,
                            TextPieceTable tpt):this(documentStream, offset, size)
        {

        }
コード例 #12
0
ファイル: CHPFormattedDiskPage.cs プロジェクト: 89sos98/npoi
 /**
  * This constructs a CHPFormattedDiskPage from a raw fkp (512 byte array
  * read from a Word file).
  */
 public CHPFormattedDiskPage(byte[] documentStream, int offset, int fcMin, TextPieceTable tpt)
     : this( documentStream, offset, tpt )
 {
 }