public TextPiece(int start, int end, byte[] text, PieceDescriptor pd) : base(start, end, buildInitSB(text, pd)) { _usesUnicode = pd.IsUnicode; _pd = pd; // Validate int textLength = ((StringBuilder)_buf).Length; if (end - start != textLength) { throw new InvalidOperationException("Told we're for characters " + start + " -> " + end + ", but actually covers " + textLength + " characters!"); } if (end < start) { throw new InvalidOperationException("Told we're of negative size! start=" + start + " end=" + end); } }
/** * Create the StringBuilder from the text and unicode flag */ private static StringBuilder buildInitSB(byte[] text, PieceDescriptor pd) { String str; try { if (pd.IsUnicode) { str = Encoding.GetEncoding("UTF-16LE").GetString(text); } else { //str = Encoding.GetEncoding("CP1252").GetString(text); str = Encoding.GetEncoding("Windows-1252").GetString(text); } } catch (EncoderFallbackException) { throw new Exception("Your Java is broken! It doesn't know about basic, required character encodings!"); } return new StringBuilder(str); }
/** * Create the StringBuilder from the text and unicode flag */ private static StringBuilder buildInitSB(byte[] text, PieceDescriptor pd) { String str; try { if (pd.IsUnicode) { str = Encoding.GetEncoding("UTF-16LE").GetString(text); } else { //str = Encoding.GetEncoding("CP1252").GetString(text); str = Encoding.GetEncoding("Windows-1252").GetString(text); } } catch (EncoderFallbackException) { throw new Exception("Your Java is broken! It doesn't know about basic, required character encodings!"); } return(new StringBuilder(str)); }
public byte[] WriteTo(HWPFStream docStream) { PlexOfCps textPlex = new PlexOfCps(PieceDescriptor.SizeInBytes); // int fcMin = docStream.Getoffset(); int size = _textPieces.Count; for (int x = 0; x < size; x++) { TextPiece next = _textPieces[x]; PieceDescriptor pd = next.PieceDescriptor; int offset = docStream.Offset; int mod = (offset % POIFSConstants.SMALLER_BIG_BLOCK_SIZE); if (mod != 0) { mod = POIFSConstants.SMALLER_BIG_BLOCK_SIZE - mod; byte[] buf = new byte[mod]; docStream.Write(buf); } // set the text piece position to the current docStream offset. pd.FilePosition = (docStream.Offset); // write the text to the docstream and save the piece descriptor to // the // plex which will be written later to the tableStream. docStream.Write(next.RawBytes); // The TextPiece is already in characters, which // Makes our life much easier int nodeStart = next.Start; int nodeEnd = next.End; textPlex.AddProperty(new GenericPropertyNode(nodeStart, nodeEnd, pd.ToByteArray())); } return textPlex.ToByteArray(); }
public HWPFOldDocument(DirectoryNode directory) : base(directory) { // Where are things? int sedTableOffset = LittleEndian.GetInt(_mainStream, 0x88); int sedTableSize = LittleEndian.GetInt(_mainStream, 0x8c); int chpTableOffset = LittleEndian.GetInt(_mainStream, 0xb8); int chpTableSize = LittleEndian.GetInt(_mainStream, 0xbc); int papTableOffset = LittleEndian.GetInt(_mainStream, 0xc0); int papTableSize = LittleEndian.GetInt(_mainStream, 0xc4); //int shfTableOffset = LittleEndian.GetInt(_mainStream, 0x60); //int shfTableSize = LittleEndian.GetInt(_mainStream, 0x64); int complexTableOffset = LittleEndian.GetInt(_mainStream, 0x160); // We need to get hold of the text that Makes up the // document, which might be regular or fast-saved StringBuilder text = new StringBuilder(); if (_fib.IsFComplex()) { ComplexFileTable cft = new ComplexFileTable( _mainStream, _mainStream, complexTableOffset, _fib.GetFcMin() ); tpt = cft.GetTextPieceTable(); foreach (TextPiece tp in tpt.TextPieces) { text.Append(tp.GetStringBuilder()); } } else { // TODO Discover if these older documents can ever hold Unicode Strings? // (We think not, because they seem to lack a Piece table) // TODO Build the Piece Descriptor properly // (We have to fake it, as they don't seem to have a proper Piece table) PieceDescriptor pd = new PieceDescriptor(new byte[] { 0, 0, 0, 0, 0, 127, 0, 0 }, 0); pd.FilePosition = _fib.GetFcMin(); // Generate a single Text Piece Table, with a single Text Piece // which covers all the (8 bit only) text in the file tpt = new TextPieceTable(); byte[] textData = new byte[_fib.GetFcMac() - _fib.GetFcMin()]; Array.Copy(_mainStream, _fib.GetFcMin(), textData, 0, textData.Length); TextPiece tp = new TextPiece( 0, textData.Length, textData, pd ); tpt.Add(tp); text.Append(tp.GetStringBuilder()); } _text = tpt.Text; // Now we can fetch the character and paragraph properties _cbt = new OldCHPBinTable( _mainStream, chpTableOffset, chpTableSize, _fib.GetFcMin(), tpt ); _pbt = new OldPAPBinTable( _mainStream, chpTableOffset, papTableSize, _fib.GetFcMin(), tpt ); _st = new OldSectionTable( _mainStream, chpTableOffset, sedTableSize, _fib.GetFcMin(), tpt ); }
public TextPieceTable(byte[] documentStream, byte[] tableStream, int offset, int size, int fcMin) { // get our plex of PieceDescriptors PlexOfCps pieceTable = new PlexOfCps(tableStream, offset, size, PieceDescriptor .SizeInBytes); int length = pieceTable.Length; PieceDescriptor[] pieces = new PieceDescriptor[length]; // iterate through piece descriptors raw bytes and create // PieceDescriptor objects for (int x = 0; x < length; x++) { GenericPropertyNode node = pieceTable.GetProperty(x); pieces[x] = new PieceDescriptor(node.Bytes, 0); } // Figure out the cp of the earliest text piece // Note that text pieces don't have to be stored in order! _cpMin = pieces[0].FilePosition - fcMin; for (int x = 0; x < pieces.Length; x++) { int start = pieces[x].FilePosition - fcMin; if (start < _cpMin) { _cpMin = start; } } // using the PieceDescriptors, build our list of TextPieces. for (int x = 0; x < pieces.Length; x++) { int start = pieces[x].FilePosition; PropertyNode node = pieceTable.GetProperty(x); // Grab the start and end, which are in characters int nodeStartChars = node.Start; int nodeEndChars = node.End; // What's the relationship between bytes and characters? bool unicode = pieces[x].IsUnicode; int multiple = 1; if (unicode) { multiple = 2; } // Figure out the Length, in bytes and chars int textSizeChars = (nodeEndChars - nodeStartChars); int textSizeBytes = textSizeChars * multiple; // Grab the data that Makes up the piece byte[] buf = new byte[textSizeBytes]; Array.Copy(documentStream, start, buf, 0, textSizeBytes); // And now build the piece _textPieces.Add(new TextPiece(nodeStartChars, nodeEndChars, buf, pieces[x], node .Start)); } // In the interest of our sanity, now sort the text pieces // into order, if they're not already _textPieces.Sort(); _textPiecesFCOrder = new List <TextPiece>(_textPieces); _textPiecesFCOrder.Sort(new FCComparator()); }
/** * @param start Beginning offset in main document stream, in characters. * @param end Ending offset in main document stream, in characters. * @param text The raw bytes of our text */ public TextPiece(int start, int end, byte[] text, PieceDescriptor pd, int cpStart) : this(start, end, text, pd) { }
public override bool Equals(Object o) { PieceDescriptor pd = (PieceDescriptor)o; return(descriptor == pd.descriptor && prm == pd.prm && unicode == pd.unicode); }