/// <summary> /// Trim a string of any PDF comment, returning the trimmed string and the extracted comment text /// </summary> /// <param name="OriginalLine">Data line as read from PDF file</param> /// <param name="Comment">Complete text of the comment in the line, null if line contains no comment</param> /// <param name="CommentStart">Index into OriginalLine of comment character, -1 if line contains no comment</param> /// <param name="CommentEnd">Index into OriginalLine of last character in comment (after EOL). Will be -1 if line contains no comment</param> public static void ExtractPDFComment(byte[] OriginalLine, out byte[] Comment, out int CommentStart, out int CommentEnd) { CommentStart = PDF.CommentStart(OriginalLine); Comment = PDF.ExtractPDFLine(OriginalLine, CommentStart, out CommentEnd); }
/// <summary> /// Read a number of tokens (seperated by whitespace) from a PDF data stream as UTF8 characters /// </summary> /// <param name="Data">Data stream to read</param> /// <param name="StartingIndex">Starting offset</param> /// <param name="TokenStartIndex">Offset in the data array where the token appears</param> /// <param name="EndingIndex">Offset in the data array where reading stopped</param> /// <param name="WordCount">Number of tokens sepreated by whitespace to read. If multiple words are read, all whitespace will be replaced by a single space character (0x20)</param> /// <returns>Read tokens as a UTF8 string, or NULL if unable to read the specified number of words before running out of data</returns> internal static string GetTokenString(byte[] Data, int StartingIndex, out int TokenStartIndex, out int EndingIndex, int WordCount = 1) { List <Byte> TokenString = new List <Byte>(20); bool InComment = false; bool InWhitespace = false; TokenStartIndex = 0; EndingIndex = StartingIndex; while ((WordCount > 0) && (EndingIndex < Data.Length)) { byte DataByte = Data[EndingIndex]; InComment = InComment || (DataByte == PDF.CommentDelimiter); if (InComment) { InComment = !PDF.IsEOL(DataByte); } else { if (InWhitespace && !PDF.IsWhitespace(DataByte)) { InWhitespace = false; } if (!InWhitespace) { if (PDF.IsWhitespace(DataByte)) { InWhitespace = true; if (TokenString.Count > 0) { // Don't add leading/trailing whitespace WordCount--; if (WordCount > 0) { TokenString.Add(0x20); } } } else { if (TokenString.Count == 0) { TokenStartIndex = EndingIndex; } TokenString.Add(DataByte); } } } EndingIndex++; } if (WordCount > 0) { return(null); } return(Encoding.UTF8.GetString(TokenString.ToArray())); }