Beispiel #1
0
        public TextPiece(int start, int end, byte[] text, PieceDescriptor pd)
            : base(start, end, buildInitSB(text, pd))
         {
            _usesUnicode = pd.IsUnicode;
            _pd = pd;

            // Validate
            int textLength = ((StringBuilder)_buf).Length;
            if (end - start != textLength)
            {
                throw new InvalidOperationException("Told we're for characters " + start + " -> " + end + ", but actually covers " + textLength + " characters!");
            }
            if (end < start)
            {
                throw new InvalidOperationException("Told we're of negative size! start=" + start + " end=" + end);
            }
        }
Beispiel #2
0
        public TextPiece(int start, int end, byte[] text, PieceDescriptor pd)
            : base(start, end, buildInitSB(text, pd))
        {
            _usesUnicode = pd.IsUnicode;
            _pd          = pd;

            // Validate
            int textLength = ((StringBuilder)_buf).Length;

            if (end - start != textLength)
            {
                throw new InvalidOperationException("Told we're for characters " + start + " -> " + end + ", but actually covers " + textLength + " characters!");
            }
            if (end < start)
            {
                throw new InvalidOperationException("Told we're of negative size! start=" + start + " end=" + end);
            }
        }
Beispiel #3
0
 /**
  * Create the StringBuilder from the text and unicode flag
  */
 private static StringBuilder buildInitSB(byte[] text, PieceDescriptor pd)
 {
     String str;
     try
     {
         if (pd.IsUnicode)
         {
             str = Encoding.GetEncoding("UTF-16LE").GetString(text);
         }
         else
         {
             //str = Encoding.GetEncoding("CP1252").GetString(text);
             str = Encoding.GetEncoding("Windows-1252").GetString(text);
         }
     }
     catch (EncoderFallbackException)
     {
         throw new Exception("Your Java is broken! It doesn't know about basic, required character encodings!");
     }
     return new StringBuilder(str);
 }
Beispiel #4
0
        /**
         * Create the StringBuilder from the text and unicode flag
         */
        private static StringBuilder buildInitSB(byte[] text, PieceDescriptor pd)
        {
            String str;

            try
            {
                if (pd.IsUnicode)
                {
                    str = Encoding.GetEncoding("UTF-16LE").GetString(text);
                }
                else
                {
                    //str = Encoding.GetEncoding("CP1252").GetString(text);
                    str = Encoding.GetEncoding("Windows-1252").GetString(text);
                }
            }
            catch (EncoderFallbackException)
            {
                throw new Exception("Your Java is broken! It doesn't know about basic, required character encodings!");
            }
            return(new StringBuilder(str));
        }
Beispiel #5
0
        public byte[] WriteTo(HWPFStream docStream)
        {

            PlexOfCps textPlex = new PlexOfCps(PieceDescriptor.SizeInBytes);
            // int fcMin = docStream.Getoffset();

            int size = _textPieces.Count;
            for (int x = 0; x < size; x++)
            {
                TextPiece next = _textPieces[x];
                PieceDescriptor pd = next.PieceDescriptor;

                int offset = docStream.Offset;
                int mod = (offset % POIFSConstants.SMALLER_BIG_BLOCK_SIZE);
                if (mod != 0)
                {
                    mod = POIFSConstants.SMALLER_BIG_BLOCK_SIZE - mod;
                    byte[] buf = new byte[mod];
                    docStream.Write(buf);
                }

                // set the text piece position to the current docStream offset.
                pd.FilePosition = (docStream.Offset);

                // write the text to the docstream and save the piece descriptor to
                // the
                // plex which will be written later to the tableStream.
                docStream.Write(next.RawBytes);

                // The TextPiece is already in characters, which
                // Makes our life much easier
                int nodeStart = next.Start;
                int nodeEnd = next.End;
                textPlex.AddProperty(new GenericPropertyNode(nodeStart, nodeEnd, pd.ToByteArray()));
            }

            return textPlex.ToByteArray();

        }
Beispiel #6
0
        public HWPFOldDocument(DirectoryNode directory)
            : base(directory)
        {


            // Where are things?
            int sedTableOffset = LittleEndian.GetInt(_mainStream, 0x88);
            int sedTableSize = LittleEndian.GetInt(_mainStream, 0x8c);
            int chpTableOffset = LittleEndian.GetInt(_mainStream, 0xb8);
            int chpTableSize = LittleEndian.GetInt(_mainStream, 0xbc);
            int papTableOffset = LittleEndian.GetInt(_mainStream, 0xc0);
            int papTableSize = LittleEndian.GetInt(_mainStream, 0xc4);
            //int shfTableOffset = LittleEndian.GetInt(_mainStream, 0x60);
            //int shfTableSize   = LittleEndian.GetInt(_mainStream, 0x64);
            int complexTableOffset = LittleEndian.GetInt(_mainStream, 0x160);

            // We need to get hold of the text that Makes up the
            //  document, which might be regular or fast-saved
            StringBuilder text = new StringBuilder();
            if (_fib.IsFComplex())
            {
                ComplexFileTable cft = new ComplexFileTable(
                        _mainStream, _mainStream,
                        complexTableOffset, _fib.GetFcMin()
                );
                tpt = cft.GetTextPieceTable();

                foreach (TextPiece tp in tpt.TextPieces)
                {
                    text.Append(tp.GetStringBuilder());
                }
            }
            else
            {
                // TODO Discover if these older documents can ever hold Unicode Strings?
                //  (We think not, because they seem to lack a Piece table)
                // TODO Build the Piece Descriptor properly
                //  (We have to fake it, as they don't seem to have a proper Piece table)
                PieceDescriptor pd = new PieceDescriptor(new byte[] { 0, 0, 0, 0, 0, 127, 0, 0 }, 0);
                pd.FilePosition = _fib.GetFcMin();

                // Generate a single Text Piece Table, with a single Text Piece
                //  which covers all the (8 bit only) text in the file
                tpt = new TextPieceTable();
                byte[] textData = new byte[_fib.GetFcMac() - _fib.GetFcMin()];
                Array.Copy(_mainStream, _fib.GetFcMin(), textData, 0, textData.Length);
                TextPiece tp = new TextPiece(
                        0, textData.Length, textData, pd
                );
                tpt.Add(tp);

                text.Append(tp.GetStringBuilder());
            }

            _text = tpt.Text;

            // Now we can fetch the character and paragraph properties
            _cbt = new OldCHPBinTable(
                    _mainStream, chpTableOffset, chpTableSize,
                    _fib.GetFcMin(), tpt
            );
            _pbt = new OldPAPBinTable(
                    _mainStream, chpTableOffset, papTableSize,
                    _fib.GetFcMin(), tpt
            );
            _st = new OldSectionTable(
                    _mainStream, chpTableOffset, sedTableSize,
                    _fib.GetFcMin(), tpt
            );
        }
Beispiel #7
0
        public TextPieceTable(byte[] documentStream, byte[] tableStream, int offset, int size, int fcMin)
        {
            // get our plex of PieceDescriptors
            PlexOfCps pieceTable = new PlexOfCps(tableStream, offset, size, PieceDescriptor
                                                 .SizeInBytes);

            int length = pieceTable.Length;

            PieceDescriptor[] pieces = new PieceDescriptor[length];

            // iterate through piece descriptors raw bytes and create
            // PieceDescriptor objects
            for (int x = 0; x < length; x++)
            {
                GenericPropertyNode node = pieceTable.GetProperty(x);
                pieces[x] = new PieceDescriptor(node.Bytes, 0);
            }

            // Figure out the cp of the earliest text piece
            // Note that text pieces don't have to be stored in order!
            _cpMin = pieces[0].FilePosition - fcMin;
            for (int x = 0; x < pieces.Length; x++)
            {
                int start = pieces[x].FilePosition - fcMin;
                if (start < _cpMin)
                {
                    _cpMin = start;
                }
            }

            // using the PieceDescriptors, build our list of TextPieces.
            for (int x = 0; x < pieces.Length; x++)
            {
                int          start = pieces[x].FilePosition;
                PropertyNode node  = pieceTable.GetProperty(x);

                // Grab the start and end, which are in characters
                int nodeStartChars = node.Start;
                int nodeEndChars   = node.End;

                // What's the relationship between bytes and characters?
                bool unicode  = pieces[x].IsUnicode;
                int  multiple = 1;
                if (unicode)
                {
                    multiple = 2;
                }

                // Figure out the Length, in bytes and chars
                int textSizeChars = (nodeEndChars - nodeStartChars);
                int textSizeBytes = textSizeChars * multiple;

                // Grab the data that Makes up the piece
                byte[] buf = new byte[textSizeBytes];
                Array.Copy(documentStream, start, buf, 0, textSizeBytes);

                // And now build the piece
                _textPieces.Add(new TextPiece(nodeStartChars, nodeEndChars, buf, pieces[x], node
                                              .Start));
            }

            // In the interest of our sanity, now sort the text pieces
            // into order, if they're not already
            _textPieces.Sort();
            _textPiecesFCOrder = new List <TextPiece>(_textPieces);
            _textPiecesFCOrder.Sort(new FCComparator());
        }
Beispiel #8
0
 /**
  * @param start Beginning offset in main document stream, in characters.
  * @param end Ending offset in main document stream, in characters.
  * @param text The raw bytes of our text
  */
 public TextPiece(int start, int end, byte[] text, PieceDescriptor pd, int cpStart)
     : this(start, end, text, pd)
 {
     
 }
Beispiel #9
0
        public override bool Equals(Object o)
        {
            PieceDescriptor pd = (PieceDescriptor)o;

            return(descriptor == pd.descriptor && prm == pd.prm && unicode == pd.unicode);
        }
Beispiel #10
0
 /**
  * @param start Beginning offset in main document stream, in characters.
  * @param end Ending offset in main document stream, in characters.
  * @param text The raw bytes of our text
  */
 public TextPiece(int start, int end, byte[] text, PieceDescriptor pd, int cpStart)
     : this(start, end, text, pd)
 {
 }