public bool MoveNext() { // Read until we find object-number generation obj, e.g. "69 420 obj". int tokensRead = 0; while (coreTokenScanner.MoveNext() && !Equals(coreTokenScanner.CurrentToken, OperatorToken.StartObject)) { if (coreTokenScanner.CurrentToken is CommentToken) { continue; } tokensRead++; previousTokens[0] = previousTokens[1]; previousTokenPositions[0] = previousTokenPositions[1]; previousTokens[1] = coreTokenScanner.CurrentToken; previousTokenPositions[1] = coreTokenScanner.CurrentTokenStart; } // We only read partial tokens. if (tokensRead < 2) { return(false); } var startPosition = previousTokenPositions[0]; var objectNumber = previousTokens[0] as NumericToken; var generation = previousTokens[1] as NumericToken; if (objectNumber == null || generation == null) { throw new PdfDocumentFormatException("The obj operator (start object) was not preceded by a 2 numbers." + $"Instead got: {previousTokens[0]} {previousTokens[1]} obj"); } // Read all tokens between obj and endobj. while (coreTokenScanner.MoveNext() && !Equals(coreTokenScanner.CurrentToken, OperatorToken.EndObject)) { if (coreTokenScanner.CurrentToken is CommentToken) { continue; } if (ReferenceEquals(coreTokenScanner.CurrentToken, OperatorToken.StartObject)) { // This should never happen. Debug.Assert(false, "Encountered a start object 'obj' operator before the end of the previous object."); return(false); } if (ReferenceEquals(coreTokenScanner.CurrentToken, OperatorToken.StartStream)) { // Read stream: special case. if (TryReadStream(coreTokenScanner.CurrentTokenStart, out var stream)) { readTokens.Clear(); readTokens.Add(stream); } } else { readTokens.Add(coreTokenScanner.CurrentToken); } previousTokens[0] = previousTokens[1]; previousTokenPositions[0] = previousTokenPositions[1]; previousTokens[1] = coreTokenScanner.CurrentToken; previousTokenPositions[1] = coreTokenScanner.CurrentPosition; } if (!ReferenceEquals(coreTokenScanner.CurrentToken, OperatorToken.EndObject)) { readTokens.Clear(); return(false); } var reference = new IndirectReference(objectNumber.Long, generation.Int); IToken token; if (readTokens.Count == 3 && readTokens[0] is NumericToken objNum && readTokens[1] is NumericToken genNum && ReferenceEquals(readTokens[2], OperatorToken.R)) { // I have no idea if this can ever happen. token = new IndirectReferenceToken(new IndirectReference(objNum.Long, genNum.Int)); }
public bool MoveNext() { if (isDisposed) { throw new ObjectDisposedException(nameof(PdfTokenScanner)); } // Read until we find object-number generation obj, e.g. "69 420 obj". int tokensRead = 0; while (coreTokenScanner.MoveNext() && !Equals(coreTokenScanner.CurrentToken, OperatorToken.StartObject)) { if (coreTokenScanner.CurrentToken is CommentToken) { continue; } tokensRead++; previousTokens[0] = previousTokens[1]; previousTokenPositions[0] = previousTokenPositions[1]; previousTokens[1] = coreTokenScanner.CurrentToken; previousTokenPositions[1] = coreTokenScanner.CurrentTokenStart; } // We only read partial tokens. if (tokensRead < 2) { return(false); } var startPosition = previousTokenPositions[0]; var objectNumber = previousTokens[0] as NumericToken; var generation = previousTokens[1] as NumericToken; if (objectNumber == null || generation == null) { // Handle case where the scanner correctly reads most of an object token but includes too much of the first token // specifically %%EOF1 0 obj where scanning starts from 'F'. if (generation != null && previousTokens[0] is OperatorToken op) { var match = EndsWithNumberRegex.Match(op.Data); if (match.Success && int.TryParse(match.Value, NumberStyles.Any, CultureInfo.InvariantCulture, out var number)) { startPosition = previousTokenPositions[0] + match.Index; objectNumber = new NumericToken(number); } else { return(false); } } else { return(false); } } // Read all tokens between obj and endobj. while (coreTokenScanner.MoveNext() && !Equals(coreTokenScanner.CurrentToken, OperatorToken.EndObject)) { if (coreTokenScanner.CurrentToken is CommentToken) { continue; } if (ReferenceEquals(coreTokenScanner.CurrentToken, OperatorToken.StartObject)) { // This should never happen. Debug.Assert(false, "Encountered a start object 'obj' operator before the end of the previous object."); return(false); } if (ReferenceEquals(coreTokenScanner.CurrentToken, OperatorToken.StartStream)) { var streamIdentifier = new IndirectReference(objectNumber.Long, generation.Int); // Prevent an infinite loop where a stream's length references the stream or the stream's offset. var getLengthFromFile = !(callingObject.HasValue && callingObject.Value.Equals(streamIdentifier)); var outerCallingObject = callingObject; try { callingObject = streamIdentifier; // Read stream: special case. if (TryReadStream(coreTokenScanner.CurrentTokenStart, getLengthFromFile, out var stream)) { readTokens.Clear(); readTokens.Add(stream); } } finally { callingObject = outerCallingObject; } } else { readTokens.Add(coreTokenScanner.CurrentToken); } previousTokens[0] = previousTokens[1]; previousTokenPositions[0] = previousTokenPositions[1]; previousTokens[1] = coreTokenScanner.CurrentToken; previousTokenPositions[1] = coreTokenScanner.CurrentPosition; } if (!ReferenceEquals(coreTokenScanner.CurrentToken, OperatorToken.EndObject)) { readTokens.Clear(); return(false); } var reference = new IndirectReference(objectNumber.Long, generation.Int); IToken token; if (readTokens.Count == 3 && readTokens[0] is NumericToken objNum && readTokens[1] is NumericToken genNum && ReferenceEquals(readTokens[2], OperatorToken.R)) { // I have no idea if this can ever happen. token = new IndirectReferenceToken(new IndirectReference(objNum.Long, genNum.Int)); }