public TokenObject GetToken() { TokenObject t = null; if (_stack.Count > 0) { t = _stack.Pop(); } else { t = GetAnyToken(); } if (IgnoreComments) { while (t is TokenComment) { if (_stack.Count > 0) { t = _stack.Pop(); } else { t = GetAnyToken(); } } } return(t); }
private void ThrowOnError(TokenObject t) { if (t is TokenError) { throw new ApplicationException(t.ToString()); } }
private void ThrowOnEmptyOrError(TokenObject t) { if (t is TokenError) { throw new ApplicationException(t.ToString()); } else if (t is TokenEmpty) { throw new ApplicationException("Unexpected end of PDF document."); } }
private T ThrowIfNot <T>(TokenObject t) where T : TokenObject { if (t is TokenError) { throw new ApplicationException(t.ToString()); } else if (t is TokenEmpty) { throw new ApplicationException("Unexpected end of PDF document."); } else if (!(t is T)) { throw new ApplicationException($"Found {t.GetType().Name} instead of {typeof(T).Name}."); } return((T)t); }
public List <TokenXRefEntry> ParseXRef() { Tokenizer.IgnoreComments = true; TokenObject t = Tokenizer.GetToken(); TokenKeyword keyword = t as TokenKeyword; if ((keyword == null) || (keyword.Value != ParseKeyword.XRef)) { // Scan entire source creating XRef entries for each indirect object return(IndirectObjectsToXRef()); } else { List <TokenXRefEntry> entries = new List <TokenXRefEntry>(); ParseXRefSections(entries); return(entries); } }
public List <TokenXRefEntry> IndirectObjectsToXRef() { List <TokenXRefEntry> entries = new List <TokenXRefEntry>(); // Start scanning from beginning of the source Tokenizer.IgnoreComments = false; Tokenizer.Position = 0; long lastTrailer = -1; do { long t1Position = Tokenizer.Position; TokenObject t1 = Tokenizer.GetToken(); if (t1 is TokenInteger) { TokenInteger t2 = Tokenizer.GetToken() as TokenInteger; if (t2 != null) { TokenKeyword t3 = Tokenizer.GetToken() as TokenKeyword; if ((t3 != null) && (t3.Value == ParseKeyword.Obj)) { TokenInteger id = (TokenInteger)t1; entries.Add(new TokenXRefEntry(id.Value, t2.Value, t1Position, true)); } } } else if ((t1 is TokenKeyword) && ((TokenKeyword)t1).Value == ParseKeyword.Trailer) { lastTrailer = t1Position; } } while (Tokenizer.GotoNextLine()); // Leave with the position on the last 'trailer' as caller will then parse it if (lastTrailer >= 0) { Tokenizer.Position = lastTrailer; } return(entries); }
public void ParseXRefSections(List <TokenXRefEntry> entries) { while (true) { TokenObject t = Tokenizer.GetToken(); ThrowOnError(t); // Cross-reference table ends when we find a 'trailer' keyword instead of another section if ((t is TokenKeyword) && (((TokenKeyword)t).Value == ParseKeyword.Trailer)) { Tokenizer.PushToken(t); return; } // Section starts with an integer object number TokenInteger start = t as TokenInteger; if (start == null) { throw new ApplicationException($"Cross-reference section number must be an integer."); } t = Tokenizer.GetToken(); ThrowOnError(t); // Section then has an integer length number TokenInteger length = t as TokenInteger; if (length == null) { throw new ApplicationException($"Cross-reference section length must be an integer."); } // Load each line in the section for (int i = 0, id = start.Value; i < length.Value; i++, id++) { TokenObject entry = Tokenizer.GetXRefEntry(id); ThrowOnError(entry); entries.Add((TokenXRefEntry)entry); } } }
public ParseDictionary ParseTrailer() { Tokenizer.IgnoreComments = true; TokenObject t = Tokenizer.GetToken(); ThrowOnError(t); // Cross-reference table ends when we find a 'trailer' keyword instead of another section if (!(t is TokenKeyword) || (((TokenKeyword)t).Value != ParseKeyword.Trailer)) { throw new ApplicationException($"Trailer section must start with the 'trailer' keyword."); } ParseObjectBase obj = ParseObject(); if ((obj == null) || !(obj is ParseDictionary)) { throw new ApplicationException($"Trailer section must contain a dictionary."); } return((ParseDictionary)obj); }
public void ParseHeader(out int major, out int minor) { // The header is a comment token Tokenizer.IgnoreComments = false; TokenObject t = Tokenizer.GetToken(); if (!(t is TokenComment)) { throw new ApplicationException("Missing PDF header."); } TokenComment c = (TokenComment)t; if (!c.Value.StartsWith("%PDF")) { throw new ApplicationException("PDF Header must start with '%PDF'."); } string[] splits = c.Value.Substring(5).Split('.'); if (splits.Length != 2) { throw new ApplicationException("PDF Header must have a <major>.<minor> version number."); } if (!int.TryParse(splits[0].Trim(), out major)) { throw new ApplicationException("Could not parse the header major version number."); } if (!int.TryParse(splits[1].Trim(), out minor)) { throw new ApplicationException("Could not parse the header minor version number."); } Tokenizer.IgnoreComments = true; }
public ParseObjectBase ParseObject(bool allowEmpty = false) { Tokenizer.IgnoreComments = true; TokenObject t = Tokenizer.GetToken(); if (allowEmpty && (t is TokenEmpty)) { return(null); } else { ThrowOnEmptyOrError(t); } if (t is TokenName) { // Store one instance of each unique name to minimize memory footprint TokenName tokenName = (TokenName)t; return(ParseName.GetParse(tokenName.Value)); } else if (t is TokenInteger) { TokenObject t2 = Tokenizer.GetToken(); ThrowOnError(t2); // An object reference has a second integer, the generation number if (t2 is TokenInteger) { TokenObject t3 = Tokenizer.GetToken(); ThrowOnError(t3); // An object reference has a third value which is the 'R' keyword if ((t3 is TokenKeyword) && (((TokenKeyword)t3).Value == ParseKeyword.R)) { return(new ParseObjectReference(t as TokenInteger, t2 as TokenInteger)); } Tokenizer.PushToken(t3); } Tokenizer.PushToken(t2); return(new ParseInteger(t as TokenInteger)); } else if (t is TokenReal) { return(new ParseReal(t as TokenReal)); } else if (t is TokenStringHex) { return(new ParseString(t as TokenStringHex)); } else if (t is TokenStringLiteral) { return(new ParseString(t as TokenStringLiteral)); } else if (t is TokenArrayOpen) { List <ParseObjectBase> objects = new List <ParseObjectBase>(); ParseObjectBase entry = null; while (true) { entry = ParseObject(); if (entry == null) { break; } else { ThrowOnEmptyOrError(t); } objects.Add(entry); } ThrowIfNot <TokenArrayClose>(Tokenizer.GetToken()); return(new ParseArray(objects)); } else if (t is TokenDictionaryOpen) { List <string> names = new List <string>(); List <ParseObjectBase> entries = new List <ParseObjectBase>(); ParseObjectBase value1 = null; ParseObjectBase value2 = null; while (true) { value1 = ParseObject(); if (value1 == null) { break; } else { ThrowOnEmptyOrError(t); } // Key value must be a Name ParseName name = value1 as ParseName; if (name == null) { throw new ApplicationException($"Dictionary key must be a name instead of {name.GetType().Name}."); } value2 = ParseObject(); if (value2 == null) { throw new ApplicationException($"Dictionary value missing for key {name.Value}."); } else { ThrowOnEmptyOrError(t); } names.Add(name.Value); entries.Add(value2); } ThrowIfNot <TokenDictionaryClose>(Tokenizer.GetToken()); return(new ParseDictionary(names, entries)); } else if (t is TokenKeyword) { switch ((t as TokenKeyword).Value) { case ParseKeyword.True: return(ParseObjectBase.True); case ParseKeyword.False: return(ParseObjectBase.False); case ParseKeyword.Null: return(ParseObjectBase.Null); } } else if (t is TokenIdentifier) { // Store one instance of each unique identifier to minimize memory footprint TokenIdentifier tokenIdentifier = (TokenIdentifier)t; return(ParseIdentifier.GetParse(tokenIdentifier.Value)); } // Token is not one that starts an object, so put the token back Tokenizer.PushToken(t); return(null); }
public ParseIndirectObject ParseIndirectObject() { Tokenizer.IgnoreComments = true; TokenObject t = Tokenizer.GetToken(); ThrowOnEmptyOrError(t); // Indirect object starts with an integer, the object identifier if (!(t is TokenInteger)) { Tokenizer.PushToken(t); return(null); } // Second is another integer, the generation number TokenObject u = Tokenizer.GetToken(); ThrowOnEmptyOrError(u); if (!(u is TokenInteger)) { Tokenizer.PushToken(t); Tokenizer.PushToken(u); return(null); } // This is the keyword 'obj' TokenObject v = Tokenizer.GetToken(); ThrowOnEmptyOrError(v); if (!(v is TokenKeyword) || ((v as TokenKeyword).Value != ParseKeyword.Obj)) { Tokenizer.PushToken(t); Tokenizer.PushToken(u); Tokenizer.PushToken(v); return(null); } // Get actual object that is the content ParseObjectBase obj = ParseObject(); if (obj == null) { throw new ApplicationException($"Indirect object has missing content."); } // Must be followed by either 'endobj' or 'stream' v = Tokenizer.GetToken(); ThrowOnEmptyOrError(v); TokenKeyword keyword = v as TokenKeyword; if (keyword == null) { // PH: FIXME: //throw new ApplicationException($"Indirect object has missing 'endobj or 'stream'."); return(new ParseIndirectObject(t as TokenInteger, u as TokenInteger, new ParseInteger(0))); } if (keyword.Value == ParseKeyword.EndObj) { return(new ParseIndirectObject(t as TokenInteger, u as TokenInteger, obj)); } else if (keyword.Value == ParseKeyword.Stream) { ParseDictionary dictionary = obj as ParseDictionary; if (dictionary == null) { throw new ApplicationException($"Stream must be preceded by a dictionary."); } if (!dictionary.ContainsName("Length")) { throw new ApplicationException($"Stream dictionary must contain a 'Length' entry."); } ParseObjectBase lengthObj = dictionary["Length"]; // Resolve any object reference ParseObjectReference reference = lengthObj as ParseObjectReference; if (reference != null) { lengthObj = OnResolveReference(reference); } ParseInteger length = lengthObj as ParseInteger; if (length == null) { throw new ApplicationException($"Stream dictionary has a 'Length' entry that is not an integer entry."); } if (length.Value < 0) { throw new ApplicationException($"Stream dictionary has a 'Length' less than 0."); } byte[] bytes = Tokenizer.GetBytes(length.Value); if (bytes == null) { throw new ApplicationException($"Cannot read in expected {length.Value} bytes from stream."); } // Stream contents must be followed by 'endstream' v = Tokenizer.GetToken(); ThrowOnEmptyOrError(v); keyword = v as TokenKeyword; if (keyword == null) { throw new ApplicationException($"Stream has missing 'endstream' after content."); } if (keyword.Value != ParseKeyword.EndStream) { throw new ApplicationException($"Stream has unexpected keyword {keyword.Value} instead of 'endstream'."); } // Stream contents must be followed by 'endobj' v = Tokenizer.GetToken(); ThrowOnEmptyOrError(v); keyword = v as TokenKeyword; if (keyword == null) { throw new ApplicationException($"Indirect object has missing 'endobj'."); } if (keyword.Value != ParseKeyword.EndObj) { throw new ApplicationException($"Indirect object has unexpected keyword {keyword.Value} instead of 'endobj'."); } return(new ParseIndirectObject(t as TokenInteger, u as TokenInteger, new ParseStream(dictionary, bytes))); } else { throw new ApplicationException($"Indirect object has unexpected keyword {keyword.Value}."); } }
public void PushToken(TokenObject token) { _stack.Push(token); }