public void SetsPropertiesCorrectly() { var definition = new TableSubsectionDefinition(5, 12); Assert.Equal(5, definition.FirstNumber); Assert.Equal(12, definition.Count); }
public void SecondPartInvalidFormatFalse() { var input = StringBytesTestConverter.Convert("85 9t", false); var result = TableSubsectionDefinition.TryRead(log, input.Bytes, out var _); Assert.False(result); }
public void TryReadIncorrectFormatMultiplePartsFalse() { var input = StringBytesTestConverter.Convert("76362 100 1000", false); var result = TableSubsectionDefinition.TryRead(log, input.Bytes, out var _); Assert.False(result); }
public void SecondPartInvalidFormatFalse() { var bytes = OtherEncodings.StringAsLatin1Bytes("85 9t"); var input = new RandomAccessBuffer(bytes); var result = TableSubsectionDefinition.TryRead(log, input, out var _); Assert.False(result); }
public void TryReadIncorrectFormatMultiplePartsFalse() { var bytes = OtherEncodings.StringAsLatin1Bytes(@"76362 100 1000"); var input = new RandomAccessBuffer(bytes); var result = TableSubsectionDefinition.TryRead(log, input, out var _); Assert.False(result); }
public void ValidWithLongTrue() { var input = StringBytesTestConverter.Convert("214748364700 6", false); var result = TableSubsectionDefinition.TryRead(log, input.Bytes, out var definition); Assert.True(result); Assert.Equal(214748364700L, definition.FirstNumber); Assert.Equal(6, definition.Count); }
public void ValidTrue() { var input = StringBytesTestConverter.Convert("12 32", false); var result = TableSubsectionDefinition.TryRead(log, input.Bytes, out var definition); Assert.True(result); Assert.Equal(12, definition.FirstNumber); Assert.Equal(32, definition.Count); }
public void ValidTrue() { var bytes = OtherEncodings.StringAsLatin1Bytes("12 32"); var input = new RandomAccessBuffer(bytes); var result = TableSubsectionDefinition.TryRead(log, input, out var definition); Assert.True(result); Assert.Equal(12, definition.FirstNumber); Assert.Equal(32, definition.Count); }
public void ValidWithLongTrue() { var bytes = OtherEncodings.StringAsLatin1Bytes("214748364700 6"); var input = new RandomAccessBuffer(bytes); var result = TableSubsectionDefinition.TryRead(log, input, out var definition); Assert.True(result); Assert.Equal(214748364700L, definition.FirstNumber); Assert.Equal(6, definition.Count); }
public CrossReferenceTablePart Parse(ISeekableTokenScanner scanner, long offset, bool isLenientParsing) { var builder = new CrossReferenceTablePartBuilder { Offset = offset, XRefType = CrossReferenceType.Table }; if (scanner.CurrentPosition != offset) { scanner.Seek(offset); } scanner.MoveNext(); if (scanner.CurrentToken is OperatorToken operatorToken) { if (operatorToken.Data == "xref") { scanner.MoveNext(); } else { throw new PdfDocumentFormatException($"Unexpected operator in xref position: {operatorToken}."); } } if (scanner.CurrentToken is NumericToken firstObjectNumber) { if (!scanner.TryReadToken(out NumericToken objectCount)) { throw new PdfDocumentFormatException($"Unexpected token following xref and {firstObjectNumber}. We found: {scanner.CurrentToken}."); } var definition = new TableSubsectionDefinition(firstObjectNumber.Long, objectCount.Int); var tokenizer = new EndOfLineTokenizer(); scanner.RegisterCustomTokenizer((byte)'\r', tokenizer); scanner.RegisterCustomTokenizer((byte)'\n', tokenizer); var readingLine = false; var tokens = new List <IToken>(); var count = 0; while (scanner.MoveNext()) { if (scanner.CurrentToken is EndOfLineToken) { if (!readingLine) { continue; } readingLine = false; count = ProcessTokens(tokens, scanner, builder, isLenientParsing, count, ref definition); tokens.Clear(); continue; } if (scanner.CurrentToken is CommentToken) { continue; } var isLineOperator = scanner.CurrentToken is OperatorToken op && (op.Data == FreeEntry || op.Data == InUseEntry); if (!(scanner.CurrentToken is NumericToken) && !isLineOperator) { break; } readingLine = true; tokens.Add(scanner.CurrentToken); } if (tokens.Count > 0) { ProcessTokens(tokens, scanner, builder, isLenientParsing, count, ref definition); } scanner.DeregisterCustomTokenizer(tokenizer); } builder.Dictionary = ParseTrailer(scanner, isLenientParsing); return(builder.Build()); }
private static int ProcessTokens(List <IToken> tokens, ISeekableTokenScanner scanner, CrossReferenceTablePartBuilder builder, bool isLenientParsing, int objectCount, ref TableSubsectionDefinition definition) { string GetErrorMessage() { var representation = "Invalid line format in xref table: [" + string.Join(", ", tokens.Select(x => x.ToString())) + "]"; return(representation); } if (objectCount == definition.Count) { if (tokens.Count == 2) { if (tokens[0] is NumericToken newFirstObjectToken && tokens[1] is NumericToken newObjectCountToken) { definition = new TableSubsectionDefinition(newFirstObjectToken.Long, newObjectCountToken.Int); return(0); } } throw new PdfDocumentFormatException($"Found a line with 2 unexpected entries in the cross reference table: {tokens[0]}, {tokens[1]}."); } if (tokens.Count <= 2) { if (!isLenientParsing) { throw new PdfDocumentFormatException(GetErrorMessage()); } return(objectCount); } var lastToken = tokens[tokens.Count - 1]; if (lastToken is OperatorToken operatorToken) { if (operatorToken.Data == FreeEntry) { return(objectCount + 1); } if (operatorToken.Data != InUseEntry) { if (!isLenientParsing) { throw new PdfDocumentFormatException(GetErrorMessage()); } return(objectCount); } if (tokens[0] is NumericToken offset && tokens[1] is NumericToken generationNumber) { if (offset.Long >= builder.Offset && offset.Long <= scanner.CurrentPosition) { throw new PdfDocumentFormatException($"Object offset {offset} is within its own cross-reference table for object {definition.FirstNumber + objectCount}"); } builder.Add(definition.FirstNumber + objectCount, generationNumber.Int, offset.Long); return(objectCount + 1); } } else { if (!isLenientParsing) { throw new PdfDocumentFormatException(GetErrorMessage()); } } return(objectCount); }
public bool TryParse(IRandomAccessRead source, long offset, bool isLenientParsing, CosObjectPool pool, out CrossReferenceTablePartBuilder builder) { builder = null; var tableStartOffset = source.GetPosition(); if (source.Peek() != 'x') { return(false); } var xref = ReadHelper.ReadString(source); if (!xref.Trim().Equals("xref")) { return(false); } // check for trailer after xref var str = ReadHelper.ReadString(source); byte[] b = OtherEncodings.StringAsLatin1Bytes(str); source.Rewind(b.Length); if (str.StartsWith("trailer")) { log.Warn("skipping empty xref table"); return(false); } builder = new CrossReferenceTablePartBuilder { Offset = offset, XRefType = CrossReferenceType.Table }; // Tables can have multiple sections. Each starts with a starting object id and a count. while (true) { if (!TableSubsectionDefinition.TryRead(log, source, out var subsectionDefinition)) { log.Warn($"Unexpected subsection definition in the cross-reference table at offset {offset}"); if (isLenientParsing) { break; } return(false); } var currentObjectId = subsectionDefinition.FirstNumber; ReadHelper.SkipSpaces(source); for (var i = 0; i < subsectionDefinition.Count; i++) { if (source.IsEof() || ReadHelper.IsEndOfName((char)source.Peek())) { break; } if (source.Peek() == 't') { break; } //Ignore table contents var currentLine = ReadHelper.ReadLine(source); var splitString = currentLine.Split(new[] { ' ' }, StringSplitOptions.RemoveEmptyEntries); if (splitString.Length < 3) { log.Warn("invalid xref line: " + currentLine); break; } // This supports the corrupt table as reported in PDFBOX-474 (XXXX XXX XX n) if (splitString[splitString.Length - 1].Equals(InUseEntry)) { try { var objectOffset = long.Parse(splitString[0]); if (objectOffset >= tableStartOffset && objectOffset <= source.GetPosition()) { // PDFBOX-3923: offset points inside this table - that can't be good throw new InvalidOperationException( $"Object offset {objectOffset} is within its own cross-reference table for object {currentObjectId}"); } var generation = int.Parse(splitString[1]); builder.Add(currentObjectId, generation, objectOffset); } catch (FormatException e) { throw new InvalidOperationException("Bad", e); } } else if (!splitString[2].Equals(FreeEntry)) { throw new InvalidOperationException( $"Corrupt cross-reference table entry for object {currentObjectId}. The indicator was not 'n' or 'f' but {splitString[2]}."); } currentObjectId++; ReadHelper.SkipSpaces(source); } ReadHelper.SkipSpaces(source); if (!ReadHelper.IsDigit(source)) { break; } } if (!TryParseTrailer(source, isLenientParsing, pool, out var trailer)) { throw new InvalidOperationException($"Something went wrong trying to read the XREF table at {offset}."); } builder.Dictionary = trailer; builder.Previous = trailer.GetLongOrDefault(CosName.PREV); return(true); }
public void ToStringRepresentsPdfForm() { var definition = new TableSubsectionDefinition(420, 69); Assert.Equal("420 69", definition.ToString()); }