public void TestReadWrite() { FileInformationBlock fib = _hWPFDocFixture._fib; byte[] mainStream = _hWPFDocFixture._mainStream; byte[] tableStream = _hWPFDocFixture._tableStream; int fcMin = fib.GetFcMin(); CPSplitCalculator cps = new CPSplitCalculator(fib); ComplexFileTable cft = new ComplexFileTable(mainStream, tableStream, fib.GetFcClx(), fcMin); TextPieceTable tpt = cft.GetTextPieceTable(); SectionTable sectionTable = new SectionTable(mainStream, tableStream, fib.GetFcPlcfsed(), fib.GetLcbPlcfsed(), fcMin, tpt, cps); HWPFFileSystem fileSys = new HWPFFileSystem(); sectionTable.WriteTo(fileSys, 0); MemoryStream tableOut = fileSys.GetStream("1Table"); MemoryStream mainOut = fileSys.GetStream("WordDocument"); byte[] newTableStream = tableOut.ToArray(); byte[] newMainStream = mainOut.ToArray(); SectionTable newSectionTable = new SectionTable( newMainStream, newTableStream, 0, newTableStream.Length, 0, tpt, cps); List<SEPX> oldSections = sectionTable.GetSections(); List<SEPX> newSections = newSectionTable.GetSections(); Assert.AreEqual(oldSections.Count, newSections.Count); //test for proper char offset conversions PlexOfCps oldSedPlex = new PlexOfCps(tableStream, fib.GetFcPlcfsed(), fib.GetLcbPlcfsed(), 12); PlexOfCps newSedPlex = new PlexOfCps(newTableStream, 0, newTableStream.Length, 12); Assert.AreEqual(oldSedPlex.Length, newSedPlex.Length); for (int x = 0; x < oldSedPlex.Length; x++) { Assert.AreEqual(oldSedPlex.GetProperty(x).Start, newSedPlex.GetProperty(x).Start); Assert.AreEqual(oldSedPlex.GetProperty(x).End, newSedPlex.GetProperty(x).End); } int size = oldSections.Count; for (int x = 0; x < size; x++) { PropertyNode oldNode = (PropertyNode)oldSections[x]; PropertyNode newNode = (PropertyNode)newSections[x]; Assert.AreEqual(oldNode, newNode); } }
public void TestWriteRead() { _plexOfCps = new PlexOfCps(4); int last = 0; for (int x = 0; x < 110; x++) { byte[] intHolder = new byte[4]; int span = (int)(110.0f * (new Random((int)DateTime.Now.Ticks).Next(0,100)/100.0)); LittleEndian.PutInt(intHolder, span); _plexOfCps.AddProperty(new GenericPropertyNode(last, last + span, intHolder)); last += span; } byte[] output = _plexOfCps.ToByteArray(); _plexOfCps = new PlexOfCps(output, 0, output.Length, 4); int len = _plexOfCps.Length; Assert.AreEqual(len, 110); last = 0; for (int x = 0; x < len; x++) { GenericPropertyNode node = _plexOfCps.GetProperty(x); Assert.AreEqual(node.Start, last); last = node.End; int span = LittleEndian.GetInt(node.Bytes); Assert.AreEqual(node.End - node.Start, span); } }
public PAPBinTable(byte[] documentStream, byte[] tableStream, byte[] dataStream, int offset, int size, CharIndexTranslator charIndexTranslator) { PlexOfCps binTable = new PlexOfCps(tableStream, offset, size, 4); int length = binTable.Length; for (int x = 0; x < length; x++) { GenericPropertyNode node = binTable.GetProperty(x); int pageNum = LittleEndian.GetInt(node.Bytes); int pageOffset = POIFSConstants.SMALLER_BIG_BLOCK_SIZE * pageNum; PAPFormattedDiskPage pfkp = new PAPFormattedDiskPage(documentStream, dataStream, pageOffset, charIndexTranslator); int fkpSize = pfkp.Size(); for (int y = 0; y < fkpSize; y++) { PAPX papx = pfkp.GetPAPX(y); _paragraphs.Add(papx); } } _dataStream = dataStream; }
public OldPAPBinTable(byte[] documentStream, int OffSet, int size, int fcMin, TextPieceTable tpt) { PlexOfCps binTable = new PlexOfCps(documentStream, OffSet, size, 2); int length = binTable.Length; for (int x = 0; x < length; x++) { GenericPropertyNode node = binTable.GetProperty(x); int pageNum = LittleEndian.GetShort(node.Bytes); int pageOffset = POIFSConstants.SMALLER_BIG_BLOCK_SIZE * pageNum; PAPFormattedDiskPage pfkp = new PAPFormattedDiskPage(documentStream, documentStream, pageOffset, tpt); int fkpSize = pfkp.Size(); for (int y = 0; y < fkpSize; y++) { PAPX papx = pfkp.GetPAPX(y); _paragraphs.Add(papx); } } _paragraphs.Sort((IComparer<PAPX>)PropertyNode.PAPXComparator.instance); }
public void TestWriteRead() { _plexOfCps = new PlexOfCps(4); int last = 0; for (int x = 0; x < 110; x++) { byte[] intHolder = new byte[4]; int span = (int)(110.0f * (new Random((int)DateTime.Now.Ticks).Next(0, 100) / 100.0)); LittleEndian.PutInt(intHolder, span); _plexOfCps.AddProperty(new GenericPropertyNode(last, last + span, intHolder)); last += span; } byte[] output = _plexOfCps.ToByteArray(); _plexOfCps = new PlexOfCps(output, 0, output.Length, 4); int len = _plexOfCps.Length; Assert.AreEqual(len, 110); last = 0; for (int x = 0; x < len; x++) { GenericPropertyNode node = _plexOfCps.GetProperty(x); Assert.AreEqual(node.Start, last); last = node.End; int span = LittleEndian.GetInt(node.Bytes); Assert.AreEqual(node.End - node.Start, span); } }
/** * Constructor used to read a binTable in from a Word document. */ public CHPBinTable(byte[] documentStream, byte[] tableStream, int offset, int size, CharIndexTranslator translator) { PlexOfCps binTable = new PlexOfCps(tableStream, offset, size, 4); int length = binTable.Length; for (int x = 0; x < length; x++) { GenericPropertyNode node = binTable.GetProperty(x); int pageNum = LittleEndian.GetInt(node.Bytes); int pageOffset = POIFSConstants.SMALLER_BIG_BLOCK_SIZE * pageNum; CHPFormattedDiskPage cfkp = new CHPFormattedDiskPage(documentStream, pageOffset, translator); int fkpSize = cfkp.Size(); for (int y = 0; y < fkpSize; y++) { CHPX chpx = cfkp.GetCHPX(y); if (chpx != null) { _textRuns.Add(chpx); } } } }
/** * Constructor used to read an old-style binTable * in from a Word document. * * @param documentStream * @param offset * @param size * @param fcMin */ public OldCHPBinTable(byte[] documentStream, int OffSet, int size, int fcMin, TextPieceTable tpt) { PlexOfCps binTable = new PlexOfCps(documentStream, OffSet, size, 2); int length = binTable.Length; for (int x = 0; x < length; x++) { GenericPropertyNode node = binTable.GetProperty(x); int pageNum = LittleEndian.GetShort(node.Bytes); int pageOffset = POIFSConstants.SMALLER_BIG_BLOCK_SIZE * pageNum; CHPFormattedDiskPage cfkp = new CHPFormattedDiskPage(documentStream, pageOffset, fcMin, tpt); int fkpSize = cfkp.Size(); for (int y = 0; y < fkpSize; y++) { _textRuns.Add(cfkp.GetCHPX(y)); } } }
public OldPAPBinTable(byte[] documentStream, int OffSet, int size, int fcMin, TextPieceTable tpt) { PlexOfCps binTable = new PlexOfCps(documentStream, OffSet, size, 2); int length = binTable.Length; for (int x = 0; x < length; x++) { GenericPropertyNode node = binTable.GetProperty(x); int pageNum = LittleEndian.GetShort(node.Bytes); int pageOffset = POIFSConstants.SMALLER_BIG_BLOCK_SIZE * pageNum; PAPFormattedDiskPage pfkp = new PAPFormattedDiskPage(documentStream, documentStream, pageOffset, tpt); int fkpSize = pfkp.Size(); for (int y = 0; y < fkpSize; y++) { PAPX papx = pfkp.GetPAPX(y); _paragraphs.Add(papx); } } _paragraphs.Sort((IComparer <PAPX>)PropertyNode.PAPXComparator.instance); }
public FSPATable(byte[] tableStream, int fcPlcspa, int lcbPlcspa, List<TextPiece> tpt) { // Will be 0 if no drawing objects in document if (fcPlcspa == 0) return; PlexOfCps plex = new PlexOfCps(tableStream, fcPlcspa, lcbPlcspa, FSPA.FSPA_SIZE); for (int i = 0; i < plex.Length; i++) { GenericPropertyNode property = plex.GetProperty(i); _byStart.Add(property.Start, property); } }
public FSPATable(byte[] tableStream, FileInformationBlock fib, FSPADocumentPart part) { int offset = fib.GetFSPAPlcfOffset(part); int length = fib.GetFSPAPlcfLength(part); PlexOfCps plex = new PlexOfCps(tableStream, offset, length, FSPA.FSPA_SIZE); for (int i = 0; i < plex.Length; i++) { GenericPropertyNode property = plex.GetProperty(i); _byStart.Add(property.Start, property); } }
private static List<PlexOfField> ToArrayList(PlexOfCps plexOfCps) { if (plexOfCps == null) return new List<PlexOfField>(); List<PlexOfField> fields = new List<PlexOfField>( plexOfCps.Length); for (int i = 0; i < plexOfCps.Length; i++) { GenericPropertyNode propNode = plexOfCps.GetProperty(i); PlexOfField plex = new PlexOfField(propNode); fields.Add(plex); } return fields; }
public FSPATable(byte[] tableStream, int fcPlcspa, int lcbPlcspa, List <TextPiece> tpt) { // Will be 0 if no drawing objects in document if (fcPlcspa == 0) { return; } PlexOfCps plex = new PlexOfCps(tableStream, fcPlcspa, lcbPlcspa, FSPA.FSPA_SIZE); for (int i = 0; i < plex.Length; i++) { GenericPropertyNode property = plex.GetProperty(i); _byStart.Add(property.Start, property); } }
private ArrayList _shapesVisibili; //holds visible shapes public ShapesTable(byte[] tblStream, FileInformationBlock fib) { PlexOfCps binTable = new PlexOfCps(tblStream, fib.GetFcPlcspaMom(), fib.GetLcbPlcspaMom(), 26); _shapes = new ArrayList(); _shapesVisibili = new ArrayList(); for (int i = 0; i < binTable.Length; i++) { GenericPropertyNode nodo = binTable.GetProperty(i); Shape sh = new Shape(nodo); _shapes.Add(sh); if (sh.IsWithinDocument) _shapesVisibili.Add(sh); } }
private static List <PlexOfField> ToArrayList(PlexOfCps plexOfCps) { if (plexOfCps == null) { return(new List <PlexOfField>()); } List <PlexOfField> fields = new List <PlexOfField>( plexOfCps.Length); for (int i = 0; i < plexOfCps.Length; i++) { GenericPropertyNode propNode = plexOfCps.GetProperty(i); PlexOfField plex = new PlexOfField(propNode); fields.Add(plex); } return(fields); }
private ArrayList _shapesVisibili; //holds visible shapes public ShapesTable(byte[] tblStream, FileInformationBlock fib) { PlexOfCps binTable = new PlexOfCps(tblStream, fib.GetFcPlcspaMom(), fib.GetLcbPlcspaMom(), 26); _shapes = new ArrayList(); _shapesVisibili = new ArrayList(); for (int i = 0; i < binTable.Length; i++) { GenericPropertyNode nodo = binTable.GetProperty(i); Shape sh = new Shape(nodo); _shapes.Add(sh); if (sh.IsWithinDocument) { _shapesVisibili.Add(sh); } } }
public OldSectionTable(byte[] documentStream, int offset, int size) { PlexOfCps sedPlex = new PlexOfCps(documentStream, offset, size, 12); int length = sedPlex.Length; for (int x = 0; x < length; x++) { GenericPropertyNode node = sedPlex.GetProperty(x); SectionDescriptor sed = new SectionDescriptor(node.Bytes, 0); int fileOffset = sed.GetFc(); int startAt = node.Start; int endAt = node.End; SEPX sepx; // check for the optimization if (fileOffset == unchecked((int)0xffffffff)) { sepx = new SEPX(sed, startAt, endAt, new byte[0]); } else { // The first short at the offset is the size of the grpprl. int sepxSize = LittleEndian.GetShort(documentStream, fileOffset); // Because we don't properly know about all the details of the old // section properties, and we're trying to decode them as if they // were the new ones, we sometimes "need" more data than we have. // As a workaround, have a few extra 0 bytes on the end! byte[] buf = new byte[sepxSize+2]; fileOffset += LittleEndianConsts.SHORT_SIZE; Array.Copy(documentStream, fileOffset, buf, 0, buf.Length>=documentStream.Length - fileOffset?documentStream.Length - fileOffset: buf.Length); sepx = new SEPX(sed, startAt, endAt,buf); } _sections.Add(sepx); } _sections.Sort(PropertyNode.SEPXComparator.instance); }
public OldSectionTable(byte[] documentStream, int offset, int size) { PlexOfCps sedPlex = new PlexOfCps(documentStream, offset, size, 12); int length = sedPlex.Length; for (int x = 0; x < length; x++) { GenericPropertyNode node = sedPlex.GetProperty(x); SectionDescriptor sed = new SectionDescriptor(node.Bytes, 0); int fileOffset = sed.GetFc(); int startAt = node.Start; int endAt = node.End; SEPX sepx; // check for the optimization if (fileOffset == unchecked ((int)0xffffffff)) { sepx = new SEPX(sed, startAt, endAt, new byte[0]); } else { // The first short at the offset is the size of the grpprl. int sepxSize = LittleEndian.GetShort(documentStream, fileOffset); // Because we don't properly know about all the details of the old // section properties, and we're trying to decode them as if they // were the new ones, we sometimes "need" more data than we have. // As a workaround, have a few extra 0 bytes on the end! byte[] buf = new byte[sepxSize + 2]; fileOffset += LittleEndianConsts.SHORT_SIZE; Array.Copy(documentStream, fileOffset, buf, 0, buf.Length >= documentStream.Length - fileOffset?documentStream.Length - fileOffset: buf.Length); sepx = new SEPX(sed, startAt, endAt, buf); } _sections.Add(sepx); } _sections.Sort(PropertyNode.SEPXComparator.instance); }
public GenericPropertyNode GetTextPosition(int index) { return(textPositions.GetProperty(index)); }
public GenericPropertyNode GetDescriptor(int index) { return(descriptors.GetProperty(index)); }
public SectionTable(byte[] documentStream, byte[] tableStream, int OffSet, int size, int fcMin, TextPieceTable tpt, CPSplitCalculator cps) { PlexOfCps sedPlex = new PlexOfCps(tableStream, OffSet, size, SED_SIZE); this.tpt = tpt; this._text = tpt.TextPieces; int length = sedPlex.Length; for (int x = 0; x < length; x++) { GenericPropertyNode node = sedPlex.GetProperty(x); SectionDescriptor sed = new SectionDescriptor(node.Bytes, 0); int fileOffset = sed.GetFc(); int startAt = CPtoFC(node.Start); int endAt = CPtoFC(node.End); // check for the optimization if (fileOffset == unchecked((int)0xffffffff)) { _sections.Add(new SEPX(sed, startAt, endAt, new byte[0])); } else { // The first short at the offset is the size of the grpprl. int sepxSize = LittleEndian.GetShort(documentStream, fileOffset); byte[] buf = new byte[sepxSize]; fileOffset += LittleEndianConsts.SHORT_SIZE; Array.Copy(documentStream, fileOffset, buf, 0, buf.Length); _sections.Add(new SEPX(sed, startAt, endAt, buf)); } } // Some files seem to lie about their unicode status, which // is very very pesky. Try to work around these, but this // is Getting on for black magic... int mainEndsAt = cps.GetMainDocumentEnd(); bool matchAt = false; bool matchHalf = false; for (int i = 0; i < _sections.Count; i++) { SEPX s = _sections[i]; if (s.End == mainEndsAt) { matchAt = true; } else if (s.EndBytes == mainEndsAt || s.EndBytes == mainEndsAt - 1) { matchHalf = true; } } if (!matchAt && matchHalf) { //System.err.println("Your document seemed to be mostly unicode, but the section defInition was in bytes! Trying anyway, but things may well go wrong!"); for (int i = 0; i < _sections.Count; i++) { SEPX s = _sections[i]; GenericPropertyNode node = sedPlex.GetProperty(i); int startAt = node.Start; int endAt = node.End; s.Start = (startAt); s.End = (endAt); } } }
public TextPieceTable(byte[] documentStream, byte[] tableStream, int offset, int size, int fcMin) { // get our plex of PieceDescriptors PlexOfCps pieceTable = new PlexOfCps(tableStream, offset, size, PieceDescriptor .SizeInBytes); int length = pieceTable.Length; PieceDescriptor[] pieces = new PieceDescriptor[length]; // iterate through piece descriptors raw bytes and create // PieceDescriptor objects for (int x = 0; x < length; x++) { GenericPropertyNode node = pieceTable.GetProperty(x); pieces[x] = new PieceDescriptor(node.Bytes, 0); } // Figure out the cp of the earliest text piece // Note that text pieces don't have to be stored in order! _cpMin = pieces[0].FilePosition - fcMin; for (int x = 0; x < pieces.Length; x++) { int start = pieces[x].FilePosition - fcMin; if (start < _cpMin) { _cpMin = start; } } // using the PieceDescriptors, build our list of TextPieces. for (int x = 0; x < pieces.Length; x++) { int start = pieces[x].FilePosition; PropertyNode node = pieceTable.GetProperty(x); // Grab the start and end, which are in characters int nodeStartChars = node.Start; int nodeEndChars = node.End; // What's the relationship between bytes and characters? bool unicode = pieces[x].IsUnicode; int multiple = 1; if (unicode) { multiple = 2; } // Figure out the Length, in bytes and chars int textSizeChars = (nodeEndChars - nodeStartChars); int textSizeBytes = textSizeChars * multiple; // Grab the data that Makes up the piece byte[] buf = new byte[textSizeBytes]; Array.Copy(documentStream, start, buf, 0, textSizeBytes); // And now build the piece _textPieces.Add(new TextPiece(nodeStartChars, nodeEndChars, buf, pieces[x], node .Start)); } // In the interest of our sanity, now sort the text pieces // into order, if they're not already _textPieces.Sort(); _textPiecesFCOrder = new List <TextPiece>(_textPieces); _textPiecesFCOrder.Sort(new FCComparator()); }
public SectionTable(byte[] documentStream, byte[] tableStream, int OffSet, int size, int fcMin, TextPieceTable tpt, int mainLength) { PlexOfCps sedPlex = new PlexOfCps(tableStream, OffSet, size, SED_SIZE); this.tpt = tpt; this._text = tpt.TextPieces; int length = sedPlex.Length; for (int x = 0; x < length; x++) { GenericPropertyNode node = sedPlex.GetProperty(x); SectionDescriptor sed = new SectionDescriptor(node.Bytes, 0); int fileOffset = sed.GetFc(); //int startAt = CPtoFC(node.Start); //int endAt = CPtoFC(node.End); int startAt = node.Start; int endAt = node.End; // check for the optimization if (fileOffset == unchecked ((int)0xffffffff)) { _sections.Add(new SEPX(sed, startAt, endAt, new byte[0])); } else { // The first short at the offset is the size of the grpprl. int sepxSize = LittleEndian.GetShort(documentStream, fileOffset); byte[] buf = new byte[sepxSize]; fileOffset += LittleEndianConsts.SHORT_SIZE; Array.Copy(documentStream, fileOffset, buf, 0, buf.Length); _sections.Add(new SEPX(sed, startAt, endAt, buf)); } } // Some files seem to lie about their unicode status, which // is very very pesky. Try to work around these, but this // is Getting on for black magic... int mainEndsAt = mainLength; bool matchAt = false; bool matchHalf = false; for (int i = 0; i < _sections.Count; i++) { SEPX s = _sections[i]; if (s.End == mainEndsAt) { matchAt = true; } else if (s.End == mainEndsAt || s.End == mainEndsAt - 1) { matchHalf = true; } } if (!matchAt && matchHalf) { //System.err.println("Your document seemed to be mostly unicode, but the section defInition was in bytes! Trying anyway, but things may well go wrong!"); for (int i = 0; i < _sections.Count; i++) { SEPX s = _sections[i]; GenericPropertyNode node = sedPlex.GetProperty(i); int startAt = node.Start; int endAt = node.End; s.Start = (startAt); s.End = (endAt); } } }