Пример #1
0
 /// <summary>
 /// Trim a string of any PDF comment, returning the trimmed string and the extracted comment text
 /// </summary>
 /// <param name="OriginalLine">Data line as read from PDF file</param>
 /// <param name="Comment">Complete text of the comment in the line, null if line contains no comment</param>
 /// <param name="CommentStart">Index into OriginalLine of comment character, -1 if line contains no comment</param>
 /// <param name="CommentEnd">Index into OriginalLine of last character in comment (after EOL). Will be -1 if line contains no comment</param>
 public static void ExtractPDFComment(byte[] OriginalLine, out byte[] Comment, out int CommentStart, out int CommentEnd)
 {
     CommentStart = PDF.CommentStart(OriginalLine);
     Comment      = PDF.ExtractPDFLine(OriginalLine, CommentStart, out CommentEnd);
 }
Пример #2
0
        /// <summary>
        /// Read a number of tokens (seperated by whitespace) from a PDF data stream as UTF8 characters
        /// </summary>
        /// <param name="Data">Data stream to read</param>
        /// <param name="StartingIndex">Starting offset</param>
        /// <param name="TokenStartIndex">Offset in the data array where the token appears</param>
        /// <param name="EndingIndex">Offset in the data array where reading stopped</param>
        /// <param name="WordCount">Number of tokens sepreated by whitespace to read. If multiple words are read, all whitespace will be replaced by a single space character (0x20)</param>
        /// <returns>Read tokens as a UTF8 string, or NULL if unable to read the specified number of words before running out of data</returns>
        internal static string GetTokenString(byte[] Data, int StartingIndex, out int TokenStartIndex, out int EndingIndex, int WordCount = 1)
        {
            List <Byte> TokenString  = new List <Byte>(20);
            bool        InComment    = false;
            bool        InWhitespace = false;

            TokenStartIndex = 0;
            EndingIndex     = StartingIndex;
            while ((WordCount > 0) && (EndingIndex < Data.Length))
            {
                byte DataByte = Data[EndingIndex];

                InComment = InComment || (DataByte == PDF.CommentDelimiter);

                if (InComment)
                {
                    InComment = !PDF.IsEOL(DataByte);
                }
                else
                {
                    if (InWhitespace && !PDF.IsWhitespace(DataByte))
                    {
                        InWhitespace = false;
                    }

                    if (!InWhitespace)
                    {
                        if (PDF.IsWhitespace(DataByte))
                        {
                            InWhitespace = true;
                            if (TokenString.Count > 0)
                            {
                                // Don't add leading/trailing whitespace
                                WordCount--;
                                if (WordCount > 0)
                                {
                                    TokenString.Add(0x20);
                                }
                            }
                        }
                        else
                        {
                            if (TokenString.Count == 0)
                            {
                                TokenStartIndex = EndingIndex;
                            }
                            TokenString.Add(DataByte);
                        }
                    }
                }

                EndingIndex++;
            }

            if (WordCount > 0)
            {
                return(null);
            }

            return(Encoding.UTF8.GetString(TokenString.ToArray()));
        }