public void TestUnicodeParts() { HWPFDocument doc = HWPFTestDataSamples.OpenSampleFile("HeaderFooterUnicode.doc"); TextPieceTable tbl = doc.TextTable; // In three bits, split every 512 bytes Assert.AreEqual(3, tbl.TextPieces.Count); TextPiece tpA = (TextPiece)tbl.TextPieces[0]; TextPiece tpB = (TextPiece)tbl.TextPieces[1]; TextPiece tpC = (TextPiece)tbl.TextPieces[2]; Assert.IsTrue(tpA.IsUnicode); Assert.IsTrue(tpB.IsUnicode); Assert.IsTrue(tpC.IsUnicode); Assert.AreEqual(256, tpA.CharacterLength); Assert.AreEqual(256, tpB.CharacterLength); Assert.AreEqual(19, tpC.CharacterLength); Assert.AreEqual(512, tpA.BytesLength); Assert.AreEqual(512, tpB.BytesLength); Assert.AreEqual(38, tpC.BytesLength); Assert.AreEqual(0, tpA.Start); Assert.AreEqual(256, tpA.End); Assert.AreEqual(256, tpB.Start); Assert.AreEqual(512, tpB.End); Assert.AreEqual(512, tpC.Start); Assert.AreEqual(531, tpC.End); // Save and re-load HWPFDocument docB = SaveAndReload(doc); tbl = docB.TextTable; Assert.AreEqual(3, tbl.TextPieces.Count); tpA = (TextPiece)tbl.TextPieces[0]; tpB = (TextPiece)tbl.TextPieces[1]; tpC = (TextPiece)tbl.TextPieces[2]; Assert.IsTrue(tpA.IsUnicode); Assert.IsTrue(tpB.IsUnicode); Assert.IsTrue(tpC.IsUnicode); Assert.AreEqual(256, tpA.CharacterLength); Assert.AreEqual(256, tpB.CharacterLength); Assert.AreEqual(19, tpC.CharacterLength); Assert.AreEqual(512, tpA.BytesLength); Assert.AreEqual(512, tpB.BytesLength); Assert.AreEqual(38, tpC.BytesLength); Assert.AreEqual(0, tpA.Start); Assert.AreEqual(256, tpA.End); Assert.AreEqual(256, tpB.Start); Assert.AreEqual(512, tpB.End); Assert.AreEqual(512, tpC.Start); Assert.AreEqual(531, tpC.End); }
public void TestReadWrite() { FileInformationBlock fib = _hWPFDocFixture._fib; byte[] mainStream = _hWPFDocFixture._mainStream; byte[] tableStream = _hWPFDocFixture._tableStream; int fcMin = fib.GetFcMin(); ComplexFileTable cft = new ComplexFileTable(mainStream, tableStream, fib.GetFcClx(), fcMin); HWPFFileSystem fileSys = new HWPFFileSystem(); cft.WriteTo(fileSys); MemoryStream tableOut = fileSys.GetStream("1Table"); MemoryStream mainOut = fileSys.GetStream("WordDocument"); byte[] newTableStream = tableOut.ToArray(); byte[] newMainStream = mainOut.ToArray(); ComplexFileTable newCft = new ComplexFileTable(newMainStream, newTableStream, 0, 0); TextPieceTable oldTextPieceTable = cft.GetTextPieceTable(); TextPieceTable newTextPieceTable = newCft.GetTextPieceTable(); Assert.AreEqual(oldTextPieceTable.Text.ToString(), newTextPieceTable.Text.ToString()); }
public void TestAsciiParts() { HWPFDocument doc = HWPFTestDataSamples.OpenSampleFile("ThreeColHeadFoot.doc"); TextPieceTable tbl = doc.TextTable; // All ascii, so stored in one big lump Assert.AreEqual(1, tbl.TextPieces.Count); TextPiece tp = (TextPiece)tbl.TextPieces[0]; Assert.AreEqual(0, tp.Start); Assert.AreEqual(339, tp.End); Assert.AreEqual(339, tp.CharacterLength); Assert.AreEqual(339, tp.BytesLength); Assert.IsTrue(tp.GetStringBuilder().ToString().StartsWith("This is a sample word document")); // Save and re-load HWPFDocument docB = SaveAndReload(doc); tbl = docB.TextTable; Assert.AreEqual(1, tbl.TextPieces.Count); tp = (TextPiece)tbl.TextPieces[0]; Assert.AreEqual(0, tp.Start); Assert.AreEqual(339, tp.End); Assert.AreEqual(339, tp.CharacterLength); Assert.AreEqual(339, tp.BytesLength); Assert.IsTrue(tp.GetStringBuilder().ToString().StartsWith("This is a sample word document")); }
public ComplexFileTable(byte[] documentStream, byte[] tableStream, int offset, int fcMin) { //skips through the prms before we reach the piece table. These contain data //for actual fast saved files List <SprmBuffer> sprmBuffers = new List <SprmBuffer>(); //skips through the prms before we reach the piece table. These contain data //for actual fast saved files while (tableStream[offset] == GRPPRL_TYPE) { offset++; int size = LittleEndian.GetShort(tableStream, offset); offset += LittleEndianConsts.SHORT_SIZE; byte[] bs = LittleEndian.GetByteArray(tableStream, offset, size); offset += size; SprmBuffer sprmBuffer = new SprmBuffer(bs, false, 0); sprmBuffers.Add(sprmBuffer); } this._grpprls = sprmBuffers.ToArray(); if (tableStream[offset] != TEXT_PIECE_TABLE_TYPE) { throw new IOException("The text piece table is corrupted"); } int pieceTableSize = LittleEndian.GetInt(tableStream, ++offset); offset += LittleEndianConsts.INT_SIZE; _tpt = new TextPieceTable(documentStream, tableStream, offset, pieceTableSize, fcMin); }
/** * Constructor used to read an old-style binTable * in from a Word document. * * @param documentStream * @param offset * @param size * @param fcMin */ public OldCHPBinTable(byte[] documentStream, int OffSet, int size, int fcMin, TextPieceTable tpt) { PlexOfCps binTable = new PlexOfCps(documentStream, OffSet, size, 2); int length = binTable.Length; for (int x = 0; x < length; x++) { GenericPropertyNode node = binTable.GetProperty(x); int pageNum = LittleEndian.GetShort(node.Bytes); int pageOffset = POIFSConstants.SMALLER_BIG_BLOCK_SIZE * pageNum; CHPFormattedDiskPage cfkp = new CHPFormattedDiskPage(documentStream, pageOffset, fcMin, tpt); int fkpSize = cfkp.Size(); for (int y = 0; y < fkpSize; y++) { _textRuns.Add(cfkp.GetCHPX(y)); } } }
public OldPAPBinTable(byte[] documentStream, int OffSet, int size, int fcMin, TextPieceTable tpt) { PlexOfCps binTable = new PlexOfCps(documentStream, OffSet, size, 2); int length = binTable.Length; for (int x = 0; x < length; x++) { GenericPropertyNode node = binTable.GetProperty(x); int pageNum = LittleEndian.GetShort(node.Bytes); int pageOffset = POIFSConstants.SMALLER_BIG_BLOCK_SIZE * pageNum; PAPFormattedDiskPage pfkp = new PAPFormattedDiskPage(documentStream, documentStream, pageOffset, tpt); int fkpSize = pfkp.Size(); for (int y = 0; y < fkpSize; y++) { PAPX papx = pfkp.GetPAPX(y); _paragraphs.Add(papx); } } _paragraphs.Sort((IComparer <PAPX>)PropertyNode.PAPXComparator.instance); }
public override bool Equals(Object o) { TextPieceTable tpt = (TextPieceTable)o; int size = tpt._textPieces.Count; if (size == _textPieces.Count) { for (int x = 0; x < size; x++) { if (!tpt._textPieces[x].Equals(_textPieces[x])) { return false; } } return true; } return false; }
public void TestReadWrite() { TextPieceTable fakeTPT = new TextPieceTable(); FileInformationBlock fib = _hWPFDocFixture._fib; byte[] mainStream = _hWPFDocFixture._mainStream; byte[] tableStream = _hWPFDocFixture._tableStream; _pAPBinTable = new PAPBinTable(mainStream, tableStream, null, fib.GetFcPlcfbtePapx(), fib.GetLcbPlcfbtePapx(), fakeTPT); HWPFFileSystem fileSys = new HWPFFileSystem(); _pAPBinTable.WriteTo(fileSys, fakeTPT); MemoryStream tableOut = fileSys.GetStream("1Table"); MemoryStream mainOut = fileSys.GetStream("WordDocument"); byte[] newTableStream = tableOut.ToArray(); byte[] newMainStream = mainOut.ToArray(); PAPBinTable newBinTable = new PAPBinTable(newMainStream, newTableStream, null, 0, newTableStream.Length, fakeTPT); List <PAPX> oldTextRuns = _pAPBinTable.GetParagraphs(); List <PAPX> newTextRuns = newBinTable.GetParagraphs(); Assert.AreEqual(oldTextRuns.Count, newTextRuns.Count); int size = oldTextRuns.Count; for (int x = 0; x < size; x++) { PropertyNode oldNode = (PropertyNode)oldTextRuns[x]; PropertyNode newNode = (PropertyNode)newTextRuns[x]; Assert.IsTrue(oldNode.Equals(newNode)); } }
/** * This constructs a CHPFormattedDiskPage from a raw fkp (512 byte array * read from a Word file). */ public CHPFormattedDiskPage(byte[] documentStream, int offset, int fcMin, TextPieceTable tpt) : this(documentStream, offset, tpt) { }
public CHPBinTable(byte[] documentStream, byte[] tableStream, int offset, int size, int fcMin, TextPieceTable tpt) : this(documentStream, tableStream, offset, size, tpt) { }
public SectionTable(byte[] documentStream, byte[] tableStream, int OffSet, int size, int fcMin, TextPieceTable tpt, int mainLength) { PlexOfCps sedPlex = new PlexOfCps(tableStream, OffSet, size, SED_SIZE); this.tpt = tpt; this._text = tpt.TextPieces; int length = sedPlex.Length; for (int x = 0; x < length; x++) { GenericPropertyNode node = sedPlex.GetProperty(x); SectionDescriptor sed = new SectionDescriptor(node.Bytes, 0); int fileOffset = sed.GetFc(); //int startAt = CPtoFC(node.Start); //int endAt = CPtoFC(node.End); int startAt = node.Start; int endAt = node.End; // check for the optimization if (fileOffset == unchecked ((int)0xffffffff)) { _sections.Add(new SEPX(sed, startAt, endAt, new byte[0])); } else { // The first short at the offset is the size of the grpprl. int sepxSize = LittleEndian.GetShort(documentStream, fileOffset); byte[] buf = new byte[sepxSize]; fileOffset += LittleEndianConsts.SHORT_SIZE; Array.Copy(documentStream, fileOffset, buf, 0, buf.Length); _sections.Add(new SEPX(sed, startAt, endAt, buf)); } } // Some files seem to lie about their unicode status, which // is very very pesky. Try to work around these, but this // is Getting on for black magic... int mainEndsAt = mainLength; bool matchAt = false; bool matchHalf = false; for (int i = 0; i < _sections.Count; i++) { SEPX s = _sections[i]; if (s.End == mainEndsAt) { matchAt = true; } else if (s.End == mainEndsAt || s.End == mainEndsAt - 1) { matchHalf = true; } } if (!matchAt && matchHalf) { //System.err.println("Your document seemed to be mostly unicode, but the section defInition was in bytes! Trying anyway, but things may well go wrong!"); for (int i = 0; i < _sections.Count; i++) { SEPX s = _sections[i]; GenericPropertyNode node = sedPlex.GetProperty(i); int startAt = node.Start; int endAt = node.End; s.Start = (startAt); s.End = (endAt); } } }
public OldSectionTable(byte[] documentStream, int offset, int size, int fcMin, TextPieceTable tpt) : this(documentStream, offset, size) { }
public ComplexFileTable() { _tpt = new TextPieceTable(); }