internal PdfArray ReadArray(IPdfStreamReader streamReader, IPdf pdf) { var arrayInit = streamReader.Read(); if (arrayInit != '[') { throw new InvalidDataException("Unexpected array initializer, expected ["); } var items = new List <PdfObject>(); streamReader.SeekToNextToken(); while (true) { var item = this.ReadObject(streamReader, pdf); if (item != null) { items.Add(item); } else { //ignore whitespace if (streamReader.PeekIsNextByteWhitespace()) { streamReader.Seek(1, SeekOrigin.Current); } else if (streamReader.Peek() == ']') { //we're at the end of the array, do nothing, it gets handled below. } else { throw new InvalidDataException("Unable to parse array, invalid object."); } } if (streamReader.Peek() == ']') { streamReader.Seek(1, SeekOrigin.Current); streamReader.SeekToNextToken(); break; } } return(items.ToArray()); }
public PdfIndirectObject PeekReadObject(IPdfStreamReader streamReader, IPdf pdf, CrossReferencePartSubSectionEntry entry) { var originalOffset = streamReader.Position; var result = this.ReadObject(streamReader, pdf, entry); streamReader.Seek(originalOffset, SeekOrigin.Begin); return(result); }
internal PdfBinaryString ReadBinaryString(IPdfStreamReader streamReader, PdfOptions options) { var start = streamReader.Read(); if (start != '<') { throw new InvalidDataException("Invalid starting character, should be <"); } var data = new StringBuilder(); var buffer = new byte[512]; int endIndex; while (true) { //read in a buffer of data to make it fast. var read = streamReader.Peek(buffer, 0, buffer.Length); var bufferText = Encoding.ASCII.GetString(buffer, 0, read); if ((endIndex = bufferText.IndexOf(">")) >= 0) { if (endIndex != 0) { data.Append(bufferText.Substring(0, endIndex)); } streamReader.Seek(endIndex + 1, SeekOrigin.Current); break; } else { data.Append(bufferText); streamReader.Seek(read + 1, SeekOrigin.Current); } } streamReader.SeekToNextToken(); var result = data.ToString().GetBytesFromHexString(); return(result); }
public PdfIndirectObject ReadObject(IPdfStreamReader streamReader, IPdf pdf, CrossReferencePartSubSectionEntry entry) { streamReader.Seek(entry.Offset, SeekOrigin.Begin); var result = this.ReadObject(streamReader, pdf); if (result is PdfIndirectObject indirectObject) { return(indirectObject); } throw new InvalidDataException("Expected object is not an indirect object."); }
internal PdfDictionary ReadDictionary(IPdfStreamReader streamReader, IPdf pdf) { var result = new PdfDictionary(); var token = streamReader.ReadNextToken(false); if (token != "<<") { throw new InvalidDataException("Unexpected dictionary start string"); } string currentKey = null; while (true) { token = streamReader.PeekNextToken(); if (token == ">>") { streamReader.Seek(2, SeekOrigin.Current); streamReader.SeekToNextToken(); return(result); } var value = this.ReadObject(streamReader, pdf); if (currentKey != null) { result.Entries[currentKey] = value; currentKey = null; } else if (value is PdfName pdfName) { currentKey = pdfName.Value; } } }
internal PdfString ReadString(IPdfStreamReader streamReader) { var startPosition = streamReader.Position; var parenthesisCount = 0; var buffer = new byte[512]; var escaped = false; var currentCharacterCode = (string)null; var result = string.Empty; var totalStringBytes = 0; var foundEndOfString = false; var goingToNewLine = false; while (true) { var bytesRead = streamReader.Read(buffer, 0, buffer.Length); if (bytesRead <= 0) { throw new InvalidDataException("Unexpected end of stream"); } for (var index = 0; index < bytesRead; index++) { totalStringBytes++; var c = (char)buffer[index]; if (escaped) { escaped = false; if (goingToNewLine && (c != '\r' && c != '\n')) { escaped = false; goingToNewLine = false; result += c; continue; } //handled escape characters if (c == '\r' || c == '\n') { escaped = true; goingToNewLine = true; } else if (c == 'n') { result += '\n'; } else if (c == 'r') { result += '\r'; } else if (c == 't') { result += '\t'; } else if (c == 'b') { result += '\b'; } else if (c == 'f') { result += '\f'; } else if (c == '(' || c == ')' || c == '\\') { result += c.ToString(); } else if (c >= '0' && c <= '9') { if (currentCharacterCode == null) { currentCharacterCode = c.ToString(); escaped = true; } else if (currentCharacterCode.Length < 3) { currentCharacterCode += c.ToString(); escaped = true; } if (currentCharacterCode.Length == 3) { result += (char)Convert.ToByte(currentCharacterCode, 8); currentCharacterCode = null; escaped = false; } } else if (currentCharacterCode != null) { result += $"{(char)Convert.ToByte(currentCharacterCode, 8)}{c}"; currentCharacterCode = null; } else { throw new InvalidDataException("Invalid escaped character: " + c); } } else if (c == '(') { if (parenthesisCount > 0) { result += c; } parenthesisCount++; } else if (c == ')') { parenthesisCount--; if (parenthesisCount > 0) { result += c; } else { foundEndOfString = true; break; } } else if (c == '\\') { escaped = true; } else { result += c; } } if (foundEndOfString) { break; } } streamReader.Seek(startPosition + totalStringBytes, SeekOrigin.Begin); streamReader.SeekToNextToken(); return(result); }
internal PdfStream ReadStream(IPdfStreamReader streamReader, IPdf pdf, PdfDictionary streamDictionary) { PdfNumber length = null; var lengthEntry = streamDictionary["Length"]; if (lengthEntry == null) { throw new InvalidDataException("Unable to get required stream length. Entry not found."); } if (lengthEntry is PdfNumber) { length = lengthEntry as PdfNumber; } else if (lengthEntry is PdfReference reference) { //oh yay, we need to go dig through the pdf content for the correct value var referenceEntry = pdf.CrossReferenceTable.Find(reference.ObjectNumber, reference.Generation); lengthEntry = this.PeekReadObject(streamReader, pdf, referenceEntry); length = lengthEntry as PdfNumber; } if (length == null) { throw new InvalidDataException("Unable to get required stream length. Value not found."); } var buffer = new byte[length]; var firstDataBuffer = new byte[6]; streamReader.Read(firstDataBuffer, 0, 6); //the word stream streamReader.Peek(firstDataBuffer, 0, 2); // the CR+LF or LF int seekLength; if (firstDataBuffer[0] == 0xD && firstDataBuffer[1] == 0xA) { seekLength = 2; } else if (firstDataBuffer[0] == 0xA) { seekLength = 1; } else { throw new InvalidDataException("Unexpected end of line characters, expected CR+LF or just LF"); } streamReader.Seek(seekLength, SeekOrigin.Current); var bytesRead = streamReader.Read(buffer, 0, buffer.Length); //read the data if (bytesRead != buffer.Length) { throw new InvalidDataException("Unexpected end of file while reading the data stream"); } streamReader.SeekToNextToken(true); //end of line at the end of the stream is ok. var endOfStreamToken = streamReader.ReadNextToken(); if (endOfStreamToken != "endstream") { throw new InvalidDataException("Unexpected end of stream token, expected endstream"); } var result = new PdfStream(streamDictionary, buffer); return(result); }
public PdfObject ReadObject(IPdfStreamReader streamReader, IPdf pdf) { var tokenStart = streamReader.Peek(); if (tokenStart < 0) { throw new InvalidDataException("Unexpected end of stream"); } if (tokenStart == '<') { //binary string or a dictionary //dictionary starts with << so we'll check that. //otherwise we'll go on to a binary string. var buffer = new byte[2]; var length = streamReader.Peek(buffer, 0, 2); if (length == 2 && buffer[0] == '<' && buffer[1] == '<') { var result = this.ReadDictionary(streamReader, pdf); return(result); } else { //binary string var result = this.ReadBinaryString(streamReader, pdf.Options); return(result); } } else if (tokenStart == 't' || tokenStart == 'f') { //might be a bool, read the token, anything else is invalid var result = this.ReadBoolean(streamReader); return(result); } else if (tokenStart == '(') { //we're in a string var result = this.ReadString(streamReader); return(result); } else if (tokenStart == '/') { var result = this.ReadName(streamReader); return(result); } else if ((tokenStart >= '0' && tokenStart <= '9') || tokenStart == '+' || tokenStart == '-' || tokenStart == '.') { //we're in a number var number = this.ReadNumber(streamReader); //check for reference or object if (this.TryPeekReadNumber(streamReader, out var generation)) { var currentPosition = streamReader.Position; this.ReadNumber(streamReader); var objectType = streamReader.ReadNextToken(); // check for a reference if (objectType == "R") { var result = new PdfReference((int)generation, (int)number); return(result); } if (objectType == "obj") { //TODO: handle object types var result = this.ReadIndirectObject(streamReader, pdf, (int)generation, number); return(result); } streamReader.Seek(currentPosition, SeekOrigin.Begin); } return(number); } else if (tokenStart == '[') { var result = this.ReadArray(streamReader, pdf); return(result); } else if (tokenStart == '%') { var result = this.ReadComment(streamReader); return(result); } else if (tokenStart == 'n') { var token = streamReader.PeekNextToken(true); if (token == "null") { streamReader.ReadNextToken(); return(new PdfNull()); } } //TODO: invalid data, puke all over return(null); }