public void Add(TextPiece piece) { _textPieces.Add(piece); _textPiecesFCOrder.Add(piece); _textPieces.Sort(); _textPiecesFCOrder.Sort(new FCComparator()); }
public void TestUnicodeParts() { HWPFDocument doc = HWPFTestDataSamples.OpenSampleFile("HeaderFooterUnicode.doc"); TextPieceTable tbl = doc.TextTable; // In three bits, split every 512 bytes Assert.AreEqual(3, tbl.TextPieces.Count); TextPiece tpA = (TextPiece)tbl.TextPieces[0]; TextPiece tpB = (TextPiece)tbl.TextPieces[1]; TextPiece tpC = (TextPiece)tbl.TextPieces[2]; Assert.IsTrue(tpA.IsUnicode); Assert.IsTrue(tpB.IsUnicode); Assert.IsTrue(tpC.IsUnicode); Assert.AreEqual(256, tpA.CharacterLength); Assert.AreEqual(256, tpB.CharacterLength); Assert.AreEqual(19, tpC.CharacterLength); Assert.AreEqual(512, tpA.BytesLength); Assert.AreEqual(512, tpB.BytesLength); Assert.AreEqual(38, tpC.BytesLength); Assert.AreEqual(0, tpA.Start); Assert.AreEqual(256, tpA.End); Assert.AreEqual(256, tpB.Start); Assert.AreEqual(512, tpB.End); Assert.AreEqual(512, tpC.Start); Assert.AreEqual(531, tpC.End); // Save and re-load HWPFDocument docB = SaveAndReload(doc); tbl = docB.TextTable; Assert.AreEqual(3, tbl.TextPieces.Count); tpA = (TextPiece)tbl.TextPieces[0]; tpB = (TextPiece)tbl.TextPieces[1]; tpC = (TextPiece)tbl.TextPieces[2]; Assert.IsTrue(tpA.IsUnicode); Assert.IsTrue(tpB.IsUnicode); Assert.IsTrue(tpC.IsUnicode); Assert.AreEqual(256, tpA.CharacterLength); Assert.AreEqual(256, tpB.CharacterLength); Assert.AreEqual(19, tpC.CharacterLength); Assert.AreEqual(512, tpA.BytesLength); Assert.AreEqual(512, tpB.BytesLength); Assert.AreEqual(38, tpC.BytesLength); Assert.AreEqual(0, tpA.Start); Assert.AreEqual(256, tpA.End); Assert.AreEqual(256, tpB.Start); Assert.AreEqual(512, tpB.End); Assert.AreEqual(512, tpC.Start); Assert.AreEqual(531, tpC.End); }
public void TestAsciiParts() { HWPFDocument doc = HWPFTestDataSamples.OpenSampleFile("ThreeColHeadFoot.doc"); TextPieceTable tbl = doc.TextTable; // All ascii, so stored in one big lump Assert.AreEqual(1, tbl.TextPieces.Count); TextPiece tp = (TextPiece)tbl.TextPieces[0]; Assert.AreEqual(0, tp.Start); Assert.AreEqual(339, tp.End); Assert.AreEqual(339, tp.CharacterLength); Assert.AreEqual(339, tp.BytesLength); Assert.IsTrue(tp.GetStringBuilder().ToString().StartsWith("This is a sample word document")); // Save and re-load HWPFDocument docB = SaveAndReload(doc); tbl = docB.TextTable; Assert.AreEqual(1, tbl.TextPieces.Count); tp = (TextPiece)tbl.TextPieces[0]; Assert.AreEqual(0, tp.Start); Assert.AreEqual(339, tp.End); Assert.AreEqual(339, tp.CharacterLength); Assert.AreEqual(339, tp.BytesLength); Assert.IsTrue(tp.GetStringBuilder().ToString().StartsWith("This is a sample word document")); }
public override bool Equals(Object o) { if (LimitsAreEqual(o)) { TextPiece tp = (TextPiece)o; return(GetStringBuilder().ToString().Equals(tp.GetStringBuilder().ToString()) && tp._usesUnicode == _usesUnicode && _pd.Equals(tp._pd)); } return(false); }
/** * Adjust all the text piece after inserting some text into one of them * * @param listIndex * The TextPiece that had characters inserted into * @param length * The number of characters inserted */ public int AdjustForInsert(int listIndex, int length) { int size = _textPieces.Count; TextPiece tp = _textPieces[listIndex]; // Update with the new end tp.End = (tp.End + length); // Now change all subsequent ones for (int x = listIndex + 1; x < size; x++) { tp = (TextPiece)_textPieces[x]; tp.Start = (tp.Start + length); tp.End = (tp.End + length); } // All done return(length); }
public byte[] WriteTo(HWPFStream docStream) { PlexOfCps textPlex = new PlexOfCps(PieceDescriptor.SizeInBytes); // int fcMin = docStream.Getoffset(); int size = _textPieces.Count; for (int x = 0; x < size; x++) { TextPiece next = _textPieces[x]; PieceDescriptor pd = next.PieceDescriptor; int offset = docStream.Offset; int mod = (offset % POIFSConstants.SMALLER_BIG_BLOCK_SIZE); if (mod != 0) { mod = POIFSConstants.SMALLER_BIG_BLOCK_SIZE - mod; byte[] buf = new byte[mod]; docStream.Write(buf); } // set the text piece position to the current docStream offset. pd.FilePosition = (docStream.Offset); // write the text to the docstream and save the piece descriptor to // the // plex which will be written later to the tableStream. docStream.Write(next.RawBytes); // The TextPiece is already in characters, which // Makes our life much easier int nodeStart = next.Start; int nodeEnd = next.End; textPlex.AddProperty(new GenericPropertyNode(nodeStart, nodeEnd, pd.ToByteArray())); } return textPlex.ToByteArray(); }
// goss version of CPtoFC - this takes into account non-contiguous textpieces // that we have come across in real world documents. Tests against the example // code in HWPFDocument show no variation to Ryan's version of the code in // normal use, but this version works with our non-contiguous test case. // So far unable to get this test case to be written out as well due to // other issues. - piers private int CPtoFC(int CP) { TextPiece TP = null; for (int i = _text.Count - 1; i > -1; i--) { TP = _text[i]; if (CP >= TP.GetCP()) { break; } } int FC = TP.PieceDescriptor.FilePosition; int offset = CP - TP.GetCP(); if (TP.IsUnicode) { offset = offset * 2; } FC = FC + offset; return(FC); }
public HWPFOldDocument(DirectoryNode directory) : base(directory) { // Where are things? int sedTableOffset = LittleEndian.GetInt(_mainStream, 0x88); int sedTableSize = LittleEndian.GetInt(_mainStream, 0x8c); int chpTableOffset = LittleEndian.GetInt(_mainStream, 0xb8); int chpTableSize = LittleEndian.GetInt(_mainStream, 0xbc); int papTableOffset = LittleEndian.GetInt(_mainStream, 0xc0); int papTableSize = LittleEndian.GetInt(_mainStream, 0xc4); //int shfTableOffset = LittleEndian.GetInt(_mainStream, 0x60); //int shfTableSize = LittleEndian.GetInt(_mainStream, 0x64); int complexTableOffset = LittleEndian.GetInt(_mainStream, 0x160); // We need to get hold of the text that Makes up the // document, which might be regular or fast-saved StringBuilder text = new StringBuilder(); if (_fib.IsFComplex()) { ComplexFileTable cft = new ComplexFileTable( _mainStream, _mainStream, complexTableOffset, _fib.GetFcMin() ); tpt = cft.GetTextPieceTable(); foreach (TextPiece tp in tpt.TextPieces) { text.Append(tp.GetStringBuilder()); } } else { // TODO Discover if these older documents can ever hold Unicode Strings? // (We think not, because they seem to lack a Piece table) // TODO Build the Piece Descriptor properly // (We have to fake it, as they don't seem to have a proper Piece table) PieceDescriptor pd = new PieceDescriptor(new byte[] { 0, 0, 0, 0, 0, 127, 0, 0 }, 0); pd.FilePosition = _fib.GetFcMin(); // Generate a single Text Piece Table, with a single Text Piece // which covers all the (8 bit only) text in the file tpt = new TextPieceTable(); byte[] textData = new byte[_fib.GetFcMac() - _fib.GetFcMin()]; Array.Copy(_mainStream, _fib.GetFcMin(), textData, 0, textData.Length); TextPiece tp = new TextPiece( 0, textData.Length, textData, pd ); tpt.Add(tp); text.Append(tp.GetStringBuilder()); } _text = tpt.Text; // Now we can fetch the character and paragraph properties _cbt = new OldCHPBinTable( _mainStream, chpTableOffset, chpTableSize, _fib.GetFcMin(), tpt ); _pbt = new OldPAPBinTable( _mainStream, chpTableOffset, papTableSize, _fib.GetFcMin(), tpt ); _st = new OldSectionTable( _mainStream, chpTableOffset, sedTableSize, _fib.GetFcMin(), tpt ); }