internal PdfArray ReadArray(IPdfStreamReader streamReader, IPdf pdf) { var arrayInit = streamReader.Read(); if (arrayInit != '[') { throw new InvalidDataException("Unexpected array initializer, expected ["); } var items = new List <PdfObject>(); streamReader.SeekToNextToken(); while (true) { var item = this.ReadObject(streamReader, pdf); if (item != null) { items.Add(item); } else { //ignore whitespace if (streamReader.PeekIsNextByteWhitespace()) { streamReader.Seek(1, SeekOrigin.Current); } else if (streamReader.Peek() == ']') { //we're at the end of the array, do nothing, it gets handled below. } else { throw new InvalidDataException("Unable to parse array, invalid object."); } } if (streamReader.Peek() == ']') { streamReader.Seek(1, SeekOrigin.Current); streamReader.SeekToNextToken(); break; } } return(items.ToArray()); }
internal PdfBinaryString ReadBinaryString(IPdfStreamReader streamReader, PdfOptions options) { var start = streamReader.Read(); if (start != '<') { throw new InvalidDataException("Invalid starting character, should be <"); } var data = new StringBuilder(); var buffer = new byte[512]; int endIndex; while (true) { //read in a buffer of data to make it fast. var read = streamReader.Peek(buffer, 0, buffer.Length); var bufferText = Encoding.ASCII.GetString(buffer, 0, read); if ((endIndex = bufferText.IndexOf(">")) >= 0) { if (endIndex != 0) { data.Append(bufferText.Substring(0, endIndex)); } streamReader.Seek(endIndex + 1, SeekOrigin.Current); break; } else { data.Append(bufferText); streamReader.Seek(read + 1, SeekOrigin.Current); } } streamReader.SeekToNextToken(); var result = data.ToString().GetBytesFromHexString(); return(result); }
internal PdfStream ReadStream(IPdfStreamReader streamReader, IPdf pdf, PdfDictionary streamDictionary) { PdfNumber length = null; var lengthEntry = streamDictionary["Length"]; if (lengthEntry == null) { throw new InvalidDataException("Unable to get required stream length. Entry not found."); } if (lengthEntry is PdfNumber) { length = lengthEntry as PdfNumber; } else if (lengthEntry is PdfReference reference) { //oh yay, we need to go dig through the pdf content for the correct value var referenceEntry = pdf.CrossReferenceTable.Find(reference.ObjectNumber, reference.Generation); lengthEntry = this.PeekReadObject(streamReader, pdf, referenceEntry); length = lengthEntry as PdfNumber; } if (length == null) { throw new InvalidDataException("Unable to get required stream length. Value not found."); } var buffer = new byte[length]; var firstDataBuffer = new byte[6]; streamReader.Read(firstDataBuffer, 0, 6); //the word stream streamReader.Peek(firstDataBuffer, 0, 2); // the CR+LF or LF int seekLength; if (firstDataBuffer[0] == 0xD && firstDataBuffer[1] == 0xA) { seekLength = 2; } else if (firstDataBuffer[0] == 0xA) { seekLength = 1; } else { throw new InvalidDataException("Unexpected end of line characters, expected CR+LF or just LF"); } streamReader.Seek(seekLength, SeekOrigin.Current); var bytesRead = streamReader.Read(buffer, 0, buffer.Length); //read the data if (bytesRead != buffer.Length) { throw new InvalidDataException("Unexpected end of file while reading the data stream"); } streamReader.SeekToNextToken(true); //end of line at the end of the stream is ok. var endOfStreamToken = streamReader.ReadNextToken(); if (endOfStreamToken != "endstream") { throw new InvalidDataException("Unexpected end of stream token, expected endstream"); } var result = new PdfStream(streamDictionary, buffer); return(result); }
public PdfObject ReadObject(IPdfStreamReader streamReader, IPdf pdf) { var tokenStart = streamReader.Peek(); if (tokenStart < 0) { throw new InvalidDataException("Unexpected end of stream"); } if (tokenStart == '<') { //binary string or a dictionary //dictionary starts with << so we'll check that. //otherwise we'll go on to a binary string. var buffer = new byte[2]; var length = streamReader.Peek(buffer, 0, 2); if (length == 2 && buffer[0] == '<' && buffer[1] == '<') { var result = this.ReadDictionary(streamReader, pdf); return(result); } else { //binary string var result = this.ReadBinaryString(streamReader, pdf.Options); return(result); } } else if (tokenStart == 't' || tokenStart == 'f') { //might be a bool, read the token, anything else is invalid var result = this.ReadBoolean(streamReader); return(result); } else if (tokenStart == '(') { //we're in a string var result = this.ReadString(streamReader); return(result); } else if (tokenStart == '/') { var result = this.ReadName(streamReader); return(result); } else if ((tokenStart >= '0' && tokenStart <= '9') || tokenStart == '+' || tokenStart == '-' || tokenStart == '.') { //we're in a number var number = this.ReadNumber(streamReader); //check for reference or object if (this.TryPeekReadNumber(streamReader, out var generation)) { var currentPosition = streamReader.Position; this.ReadNumber(streamReader); var objectType = streamReader.ReadNextToken(); // check for a reference if (objectType == "R") { var result = new PdfReference((int)generation, (int)number); return(result); } if (objectType == "obj") { //TODO: handle object types var result = this.ReadIndirectObject(streamReader, pdf, (int)generation, number); return(result); } streamReader.Seek(currentPosition, SeekOrigin.Begin); } return(number); } else if (tokenStart == '[') { var result = this.ReadArray(streamReader, pdf); return(result); } else if (tokenStart == '%') { var result = this.ReadComment(streamReader); return(result); } else if (tokenStart == 'n') { var token = streamReader.PeekNextToken(true); if (token == "null") { streamReader.ReadNextToken(); return(new PdfNull()); } } //TODO: invalid data, puke all over return(null); }