예제 #1
0
        public void TestUnicodeParts()
        {
            HWPFDocument   doc = HWPFTestDataSamples.OpenSampleFile("HeaderFooterUnicode.doc");
            TextPieceTable tbl = doc.TextTable;

            // In three bits, split every 512 bytes
            Assert.AreEqual(3, tbl.TextPieces.Count);
            TextPiece tpA = (TextPiece)tbl.TextPieces[0];
            TextPiece tpB = (TextPiece)tbl.TextPieces[1];
            TextPiece tpC = (TextPiece)tbl.TextPieces[2];

            Assert.IsTrue(tpA.IsUnicode);
            Assert.IsTrue(tpB.IsUnicode);
            Assert.IsTrue(tpC.IsUnicode);

            Assert.AreEqual(256, tpA.CharacterLength);
            Assert.AreEqual(256, tpB.CharacterLength);
            Assert.AreEqual(19, tpC.CharacterLength);

            Assert.AreEqual(512, tpA.BytesLength);
            Assert.AreEqual(512, tpB.BytesLength);
            Assert.AreEqual(38, tpC.BytesLength);

            Assert.AreEqual(0, tpA.Start);
            Assert.AreEqual(256, tpA.End);
            Assert.AreEqual(256, tpB.Start);
            Assert.AreEqual(512, tpB.End);
            Assert.AreEqual(512, tpC.Start);
            Assert.AreEqual(531, tpC.End);


            // Save and re-load
            HWPFDocument docB = SaveAndReload(doc);

            tbl = docB.TextTable;

            Assert.AreEqual(3, tbl.TextPieces.Count);
            tpA = (TextPiece)tbl.TextPieces[0];
            tpB = (TextPiece)tbl.TextPieces[1];
            tpC = (TextPiece)tbl.TextPieces[2];

            Assert.IsTrue(tpA.IsUnicode);
            Assert.IsTrue(tpB.IsUnicode);
            Assert.IsTrue(tpC.IsUnicode);

            Assert.AreEqual(256, tpA.CharacterLength);
            Assert.AreEqual(256, tpB.CharacterLength);
            Assert.AreEqual(19, tpC.CharacterLength);

            Assert.AreEqual(512, tpA.BytesLength);
            Assert.AreEqual(512, tpB.BytesLength);
            Assert.AreEqual(38, tpC.BytesLength);

            Assert.AreEqual(0, tpA.Start);
            Assert.AreEqual(256, tpA.End);
            Assert.AreEqual(256, tpB.Start);
            Assert.AreEqual(512, tpB.End);
            Assert.AreEqual(512, tpC.Start);
            Assert.AreEqual(531, tpC.End);
        }
예제 #2
0
        public void TestReadWrite()
        {
            FileInformationBlock fib = _hWPFDocFixture._fib;

            byte[] mainStream  = _hWPFDocFixture._mainStream;
            byte[] tableStream = _hWPFDocFixture._tableStream;
            int    fcMin       = fib.GetFcMin();

            ComplexFileTable cft = new ComplexFileTable(mainStream, tableStream, fib.GetFcClx(), fcMin);


            HWPFFileSystem fileSys = new HWPFFileSystem();

            cft.WriteTo(fileSys);
            MemoryStream tableOut = fileSys.GetStream("1Table");
            MemoryStream mainOut  = fileSys.GetStream("WordDocument");

            byte[] newTableStream = tableOut.ToArray();
            byte[] newMainStream  = mainOut.ToArray();

            ComplexFileTable newCft = new ComplexFileTable(newMainStream, newTableStream, 0, 0);

            TextPieceTable oldTextPieceTable = cft.GetTextPieceTable();
            TextPieceTable newTextPieceTable = newCft.GetTextPieceTable();

            Assert.AreEqual(oldTextPieceTable.Text.ToString(), newTextPieceTable.Text.ToString());
        }
예제 #3
0
        public void TestAsciiParts()
        {
            HWPFDocument   doc = HWPFTestDataSamples.OpenSampleFile("ThreeColHeadFoot.doc");
            TextPieceTable tbl = doc.TextTable;

            // All ascii, so stored in one big lump
            Assert.AreEqual(1, tbl.TextPieces.Count);
            TextPiece tp = (TextPiece)tbl.TextPieces[0];

            Assert.AreEqual(0, tp.Start);
            Assert.AreEqual(339, tp.End);
            Assert.AreEqual(339, tp.CharacterLength);
            Assert.AreEqual(339, tp.BytesLength);
            Assert.IsTrue(tp.GetStringBuilder().ToString().StartsWith("This is a sample word document"));


            // Save and re-load
            HWPFDocument docB = SaveAndReload(doc);

            tbl = docB.TextTable;

            Assert.AreEqual(1, tbl.TextPieces.Count);
            tp = (TextPiece)tbl.TextPieces[0];

            Assert.AreEqual(0, tp.Start);
            Assert.AreEqual(339, tp.End);
            Assert.AreEqual(339, tp.CharacterLength);
            Assert.AreEqual(339, tp.BytesLength);
            Assert.IsTrue(tp.GetStringBuilder().ToString().StartsWith("This is a sample word document"));
        }
예제 #4
0
        public ComplexFileTable(byte[] documentStream, byte[] tableStream, int offset, int fcMin)
        {
            //skips through the prms before we reach the piece table. These contain data
            //for actual fast saved files
            List <SprmBuffer> sprmBuffers = new List <SprmBuffer>();

            //skips through the prms before we reach the piece table. These contain data
            //for actual fast saved files
            while (tableStream[offset] == GRPPRL_TYPE)
            {
                offset++;
                int size = LittleEndian.GetShort(tableStream, offset);
                offset += LittleEndianConsts.SHORT_SIZE;
                byte[] bs = LittleEndian.GetByteArray(tableStream, offset, size);
                offset += size;

                SprmBuffer sprmBuffer = new SprmBuffer(bs, false, 0);
                sprmBuffers.Add(sprmBuffer);
            }
            this._grpprls = sprmBuffers.ToArray();

            if (tableStream[offset] != TEXT_PIECE_TABLE_TYPE)
            {
                throw new IOException("The text piece table is corrupted");
            }
            int pieceTableSize = LittleEndian.GetInt(tableStream, ++offset);

            offset += LittleEndianConsts.INT_SIZE;
            _tpt    = new TextPieceTable(documentStream, tableStream, offset, pieceTableSize, fcMin);
        }
예제 #5
0
        /**
         * Constructor used to read an old-style binTable
         *  in from a Word document.
         *
         * @param documentStream
         * @param offset
         * @param size
         * @param fcMin
         */
        public OldCHPBinTable(byte[] documentStream, int OffSet,
                              int size, int fcMin, TextPieceTable tpt)
        {
            PlexOfCps binTable = new PlexOfCps(documentStream, OffSet, size, 2);

            int length = binTable.Length;

            for (int x = 0; x < length; x++)
            {
                GenericPropertyNode node = binTable.GetProperty(x);

                int pageNum    = LittleEndian.GetShort(node.Bytes);
                int pageOffset = POIFSConstants.SMALLER_BIG_BLOCK_SIZE * pageNum;

                CHPFormattedDiskPage cfkp = new CHPFormattedDiskPage(documentStream,
                                                                     pageOffset, fcMin, tpt);

                int fkpSize = cfkp.Size();

                for (int y = 0; y < fkpSize; y++)
                {
                    _textRuns.Add(cfkp.GetCHPX(y));
                }
            }
        }
예제 #6
0
        public OldPAPBinTable(byte[] documentStream, int OffSet,
                              int size, int fcMin, TextPieceTable tpt)
        {
            PlexOfCps binTable = new PlexOfCps(documentStream, OffSet, size, 2);

            int length = binTable.Length;

            for (int x = 0; x < length; x++)
            {
                GenericPropertyNode node = binTable.GetProperty(x);

                int pageNum    = LittleEndian.GetShort(node.Bytes);
                int pageOffset = POIFSConstants.SMALLER_BIG_BLOCK_SIZE * pageNum;

                PAPFormattedDiskPage pfkp = new PAPFormattedDiskPage(documentStream,
                                                                     documentStream, pageOffset, tpt);

                int fkpSize = pfkp.Size();

                for (int y = 0; y < fkpSize; y++)
                {
                    PAPX papx = pfkp.GetPAPX(y);
                    _paragraphs.Add(papx);
                }
            }
            _paragraphs.Sort((IComparer <PAPX>)PropertyNode.PAPXComparator.instance);
        }
예제 #7
0
        public override bool Equals(Object o)
        {
            TextPieceTable tpt = (TextPieceTable)o;

            int size = tpt._textPieces.Count;
            if (size == _textPieces.Count)
            {
                for (int x = 0; x < size; x++)
                {
                    if (!tpt._textPieces[x].Equals(_textPieces[x]))
                    {
                        return false;
                    }
                }
                return true;
            }
            return false;
        }
예제 #8
0
        public void TestReadWrite()
        {
            TextPieceTable fakeTPT = new TextPieceTable();

            FileInformationBlock fib = _hWPFDocFixture._fib;

            byte[] mainStream  = _hWPFDocFixture._mainStream;
            byte[] tableStream = _hWPFDocFixture._tableStream;

            _pAPBinTable = new PAPBinTable(mainStream, tableStream, null, fib.GetFcPlcfbtePapx(), fib.GetLcbPlcfbtePapx(), fakeTPT);

            HWPFFileSystem fileSys = new HWPFFileSystem();

            _pAPBinTable.WriteTo(fileSys, fakeTPT);
            MemoryStream tableOut = fileSys.GetStream("1Table");
            MemoryStream mainOut  = fileSys.GetStream("WordDocument");

            byte[] newTableStream = tableOut.ToArray();
            byte[] newMainStream  = mainOut.ToArray();

            PAPBinTable newBinTable = new PAPBinTable(newMainStream, newTableStream, null, 0, newTableStream.Length, fakeTPT);

            List <PAPX> oldTextRuns = _pAPBinTable.GetParagraphs();
            List <PAPX> newTextRuns = newBinTable.GetParagraphs();

            Assert.AreEqual(oldTextRuns.Count, newTextRuns.Count);

            int size = oldTextRuns.Count;

            for (int x = 0; x < size; x++)
            {
                PropertyNode oldNode = (PropertyNode)oldTextRuns[x];
                PropertyNode newNode = (PropertyNode)newTextRuns[x];

                Assert.IsTrue(oldNode.Equals(newNode));
            }
        }
예제 #9
0
 /**
  * This constructs a CHPFormattedDiskPage from a raw fkp (512 byte array
  * read from a Word file).
  */
 public CHPFormattedDiskPage(byte[] documentStream, int offset, int fcMin, TextPieceTable tpt)
     : this(documentStream, offset, tpt)
 {
 }
예제 #10
0
 public CHPBinTable(byte[] documentStream, byte[] tableStream, int offset,
                    int size, int fcMin, TextPieceTable tpt) : this(documentStream, tableStream, offset, size, tpt)
 {
 }
예제 #11
0
파일: SectionTable.cs 프로젝트: zzy092/npoi
        public SectionTable(byte[] documentStream, byte[] tableStream, int OffSet,
                            int size, int fcMin,
                            TextPieceTable tpt, int mainLength)
        {
            PlexOfCps sedPlex = new PlexOfCps(tableStream, OffSet, size, SED_SIZE);

            this.tpt   = tpt;
            this._text = tpt.TextPieces;

            int length = sedPlex.Length;

            for (int x = 0; x < length; x++)
            {
                GenericPropertyNode node = sedPlex.GetProperty(x);
                SectionDescriptor   sed  = new SectionDescriptor(node.Bytes, 0);

                int fileOffset = sed.GetFc();
                //int startAt = CPtoFC(node.Start);
                //int endAt = CPtoFC(node.End);
                int startAt = node.Start;
                int endAt   = node.End;

                // check for the optimization
                if (fileOffset == unchecked ((int)0xffffffff))
                {
                    _sections.Add(new SEPX(sed, startAt, endAt, new byte[0]));
                }
                else
                {
                    // The first short at the offset is the size of the grpprl.
                    int    sepxSize = LittleEndian.GetShort(documentStream, fileOffset);
                    byte[] buf      = new byte[sepxSize];
                    fileOffset += LittleEndianConsts.SHORT_SIZE;
                    Array.Copy(documentStream, fileOffset, buf, 0, buf.Length);
                    _sections.Add(new SEPX(sed, startAt, endAt, buf));
                }
            }

            // Some files seem to lie about their unicode status, which
            //  is very very pesky. Try to work around these, but this
            //  is Getting on for black magic...
            int  mainEndsAt = mainLength;
            bool matchAt    = false;
            bool matchHalf  = false;

            for (int i = 0; i < _sections.Count; i++)
            {
                SEPX s = _sections[i];
                if (s.End == mainEndsAt)
                {
                    matchAt = true;
                }
                else if (s.End == mainEndsAt || s.End == mainEndsAt - 1)
                {
                    matchHalf = true;
                }
            }
            if (!matchAt && matchHalf)
            {
                //System.err.println("Your document seemed to be mostly unicode, but the section defInition was in bytes! Trying anyway, but things may well go wrong!");
                for (int i = 0; i < _sections.Count; i++)
                {
                    SEPX s = _sections[i];
                    GenericPropertyNode node = sedPlex.GetProperty(i);

                    int startAt = node.Start;
                    int endAt   = node.End;
                    s.Start = (startAt);
                    s.End   = (endAt);
                }
            }
        }
예제 #12
0
 public OldSectionTable(byte[] documentStream, int offset,
                        int size, int fcMin,
                        TextPieceTable tpt) : this(documentStream, offset, size)
 {
 }
예제 #13
0
 public ComplexFileTable()
 {
     _tpt = new TextPieceTable();
 }