Пример #1
0
        /// <summary>
        /// Attempt to parse the given data stream, returning an indicator of parse progress
        /// </summary>
        /// <param name="StartingToken">The token immediately preceeding the starting index in Data stream</param>
        /// <param name="Data">Raw byte stream to parse</param>
        /// <param name="StartingIndex">0-based starting index into Data where StartingToken appears</param>
        /// <param name="EndingIndex">Index into data stream where parsing ended (either successfully or unsuccessfully)</param>
        /// <returns>Object parsed from data stream, or NULL if unable to parse. If NULL and EndingIndex is equal to Data.Length, parsing may be successful with more data</returns>
        public static IPDFObject TryParse(string StartingToken, byte[] Data, int StartingIndex, out int EndingIndex)
        {
            if (!String.IsNullOrEmpty(StartingToken) && (StartingToken[0] == '<') && (!StartingToken.Equals("<<")))
            {
                List <byte> StringBytes = new List <byte>(StartingToken.Length);
                bool        HaveString  = false;
                EndingIndex = StartingIndex + 1;
                while (!HaveString && (EndingIndex < Data.Length))
                {
                    byte NextByte = Data[EndingIndex++];
                    if (NextByte == (byte)'>')
                    {
                        HaveString = true;
                    }
                    else if (!PDF.IsWhitespace(NextByte))
                    {
                        StringBytes.Add(NextByte);
                    }
                }

                return(new PDFHexString(Encoding.UTF8.GetString(StringBytes.ToArray())));
            }
            else
            {
                EndingIndex = StartingIndex;
                return(null);
            }
        }
Пример #2
0
        /// <summary>
        /// Attempt to parse the given data stream, returning an indicator of parse progress
        /// </summary>
        /// <param name="StartingToken">The token immediately preceeding the starting index in Data stream</param>
        /// <param name="Data">Raw byte stream to parse</param>
        /// <param name="StartingIndex">0-based starting index into Data where StartingToken appears</param>
        /// <param name="EndingIndex">Index into data stream where parsing ended (either successfully or unsuccessfully)</param>
        /// <returns>Object parsed from data stream, or NULL if unable to parse. If NULL and EndingIndex is equal to Data.Length, parsing may be successful with more data</returns>
        public static IPDFObject TryParse(string StartingToken, byte[] Data, int StartingIndex, out int EndingIndex)
        {
            EndingIndex = StartingIndex;

            if (!String.IsNullOrEmpty(StartingToken) && (StartingToken[0] == '['))
            {
                PDFArray ObjectArray = new PDFArray();

                EndingIndex = StartingIndex + 1;
                while (EndingIndex < Data.Length)
                {
                    IPDFObject nextObject = PDFObjectParser.Parse(Data, out EndingIndex, EndingIndex);
                    if (nextObject != null)
                    {
                        ObjectArray.Add(nextObject);
                    }

                    byte nextChar = PDF.Whitespace[0];
                    while (PDF.IsWhitespace(nextChar))
                    {
                        nextChar = Data[EndingIndex++];
                    }

                    if (nextChar == ']')
                    {
                        return(ObjectArray);
                    }
                    else
                    {
                        EndingIndex--;
                    }
                }
            }

            return(null);
        }
Пример #3
0
        /// <summary>
        /// Read a number of tokens (seperated by whitespace) from a PDF data stream as UTF8 characters
        /// </summary>
        /// <param name="Data">Data stream to read</param>
        /// <param name="StartingIndex">Starting offset</param>
        /// <param name="TokenStartIndex">Offset in the data array where the token appears</param>
        /// <param name="EndingIndex">Offset in the data array where reading stopped</param>
        /// <param name="WordCount">Number of tokens sepreated by whitespace to read. If multiple words are read, all whitespace will be replaced by a single space character (0x20)</param>
        /// <returns>Read tokens as a UTF8 string, or NULL if unable to read the specified number of words before running out of data</returns>
        internal static string GetTokenString(byte[] Data, int StartingIndex, out int TokenStartIndex, out int EndingIndex, int WordCount = 1)
        {
            List <Byte> TokenString  = new List <Byte>(20);
            bool        InComment    = false;
            bool        InWhitespace = false;

            TokenStartIndex = 0;
            EndingIndex     = StartingIndex;
            while ((WordCount > 0) && (EndingIndex < Data.Length))
            {
                byte DataByte = Data[EndingIndex];

                InComment = InComment || (DataByte == PDF.CommentDelimiter);

                if (InComment)
                {
                    InComment = !PDF.IsEOL(DataByte);
                }
                else
                {
                    if (InWhitespace && !PDF.IsWhitespace(DataByte))
                    {
                        InWhitespace = false;
                    }

                    if (!InWhitespace)
                    {
                        if (PDF.IsWhitespace(DataByte))
                        {
                            InWhitespace = true;
                            if (TokenString.Count > 0)
                            {
                                // Don't add leading/trailing whitespace
                                WordCount--;
                                if (WordCount > 0)
                                {
                                    TokenString.Add(0x20);
                                }
                            }
                        }
                        else
                        {
                            if (TokenString.Count == 0)
                            {
                                TokenStartIndex = EndingIndex;
                            }
                            TokenString.Add(DataByte);
                        }
                    }
                }

                EndingIndex++;
            }

            if (WordCount > 0)
            {
                return(null);
            }

            return(Encoding.UTF8.GetString(TokenString.ToArray()));
        }