public bool TryParse(IRandomAccessRead source, long offset, bool isLenientParsing, CosObjectPool pool, out CrossReferenceTablePartBuilder builder) { builder = null; var tableStartOffset = source.GetPosition(); if (source.Peek() != 'x') { return(false); } var xref = ReadHelper.ReadString(source); if (!xref.Trim().Equals("xref")) { return(false); } // check for trailer after xref var str = ReadHelper.ReadString(source); byte[] b = OtherEncodings.StringAsLatin1Bytes(str); source.Rewind(b.Length); if (str.StartsWith("trailer")) { log.Warn("skipping empty xref table"); return(false); } builder = new CrossReferenceTablePartBuilder { Offset = offset, XRefType = CrossReferenceType.Table }; // Tables can have multiple sections. Each starts with a starting object id and a count. while (true) { if (!TableSubsectionDefinition.TryRead(log, source, out var subsectionDefinition)) { log.Warn($"Unexpected subsection definition in the cross-reference table at offset {offset}"); if (isLenientParsing) { break; } return(false); } var currentObjectId = subsectionDefinition.FirstNumber; ReadHelper.SkipSpaces(source); for (var i = 0; i < subsectionDefinition.Count; i++) { if (source.IsEof() || ReadHelper.IsEndOfName((char)source.Peek())) { break; } if (source.Peek() == 't') { break; } //Ignore table contents var currentLine = ReadHelper.ReadLine(source); var splitString = currentLine.Split(new[] { ' ' }, StringSplitOptions.RemoveEmptyEntries); if (splitString.Length < 3) { log.Warn("invalid xref line: " + currentLine); break; } // This supports the corrupt table as reported in PDFBOX-474 (XXXX XXX XX n) if (splitString[splitString.Length - 1].Equals(InUseEntry)) { try { var objectOffset = long.Parse(splitString[0]); if (objectOffset >= tableStartOffset && objectOffset <= source.GetPosition()) { // PDFBOX-3923: offset points inside this table - that can't be good throw new InvalidOperationException( $"Object offset {objectOffset} is within its own cross-reference table for object {currentObjectId}"); } var generation = int.Parse(splitString[1]); builder.Add(currentObjectId, generation, objectOffset); } catch (FormatException e) { throw new InvalidOperationException("Bad", e); } } else if (!splitString[2].Equals(FreeEntry)) { throw new InvalidOperationException( $"Corrupt cross-reference table entry for object {currentObjectId}. The indicator was not 'n' or 'f' but {splitString[2]}."); } currentObjectId++; ReadHelper.SkipSpaces(source); } ReadHelper.SkipSpaces(source); if (!ReadHelper.IsDigit(source)) { break; } } if (!TryParseTrailer(source, isLenientParsing, pool, out var trailer)) { throw new InvalidOperationException($"Something went wrong trying to read the XREF table at {offset}."); } builder.Dictionary = trailer; builder.Previous = trailer.GetLongOrDefault(CosName.PREV); return(true); }
public bool TryTokenize(byte currentByte, IInputBytes inputBytes, out IToken token) { token = null; if (currentByte != '/') { return(false); } var bytes = new List <byte>(); bool escapeActive = false; int postEscapeRead = 0; var escapedChars = new char[2]; while (inputBytes.MoveNext()) { var b = inputBytes.CurrentByte; if (b == '#') { escapeActive = true; } else if (escapeActive) { if (ReadHelper.IsHex((char)b)) { escapedChars[postEscapeRead] = (char)b; postEscapeRead++; if (postEscapeRead == 2) { var hex = new string(escapedChars); var characterToWrite = (byte)Convert.ToInt32(hex, 16); bytes.Add(characterToWrite); escapeActive = false; postEscapeRead = 0; } } else { bytes.Add((byte)'#'); if (postEscapeRead == 1) { bytes.Add((byte)escapedChars[0]); } if (ReadHelper.IsEndOfName(b)) { break; } if (b == '#') { // Make it clear what's going on, we read something like #m#AE // ReSharper disable once RedundantAssignment escapeActive = true; postEscapeRead = 0; continue; } bytes.Add(b); escapeActive = false; postEscapeRead = 0; } } else if (ReadHelper.IsEndOfName(b)) { break; } else { bytes.Add(b); } } byte[] byteArray = bytes.ToArray(); var str = ReadHelper.IsValidUtf8(byteArray) ? Encoding.UTF8.GetString(byteArray) : Encoding.GetEncoding("windows-1252").GetString(byteArray); token = NameToken.Create(str); return(true); }