Example #1
0
        public TokenObject GetToken()
        {
            TokenObject t = null;

            if (_stack.Count > 0)
            {
                t = _stack.Pop();
            }
            else
            {
                t = GetAnyToken();
            }

            if (IgnoreComments)
            {
                while (t is TokenComment)
                {
                    if (_stack.Count > 0)
                    {
                        t = _stack.Pop();
                    }
                    else
                    {
                        t = GetAnyToken();
                    }
                }
            }

            return(t);
        }
Example #2
0
 private void ThrowOnError(TokenObject t)
 {
     if (t is TokenError)
     {
         throw new ApplicationException(t.ToString());
     }
 }
Example #3
0
 private void ThrowOnEmptyOrError(TokenObject t)
 {
     if (t is TokenError)
     {
         throw new ApplicationException(t.ToString());
     }
     else if (t is TokenEmpty)
     {
         throw new ApplicationException("Unexpected end of PDF document.");
     }
 }
Example #4
0
        private T ThrowIfNot <T>(TokenObject t) where T : TokenObject
        {
            if (t is TokenError)
            {
                throw new ApplicationException(t.ToString());
            }
            else if (t is TokenEmpty)
            {
                throw new ApplicationException("Unexpected end of PDF document.");
            }
            else if (!(t is T))
            {
                throw new ApplicationException($"Found {t.GetType().Name} instead of {typeof(T).Name}.");
            }

            return((T)t);
        }
Example #5
0
        public List <TokenXRefEntry> ParseXRef()
        {
            Tokenizer.IgnoreComments = true;
            TokenObject  t       = Tokenizer.GetToken();
            TokenKeyword keyword = t as TokenKeyword;

            if ((keyword == null) || (keyword.Value != ParseKeyword.XRef))
            {
                // Scan entire source creating XRef entries for each indirect object
                return(IndirectObjectsToXRef());
            }
            else
            {
                List <TokenXRefEntry> entries = new List <TokenXRefEntry>();
                ParseXRefSections(entries);
                return(entries);
            }
        }
Example #6
0
        public List <TokenXRefEntry> IndirectObjectsToXRef()
        {
            List <TokenXRefEntry> entries = new List <TokenXRefEntry>();

            // Start scanning from beginning of the source
            Tokenizer.IgnoreComments = false;
            Tokenizer.Position       = 0;

            long lastTrailer = -1;

            do
            {
                long        t1Position = Tokenizer.Position;
                TokenObject t1         = Tokenizer.GetToken();
                if (t1 is TokenInteger)
                {
                    TokenInteger t2 = Tokenizer.GetToken() as TokenInteger;
                    if (t2 != null)
                    {
                        TokenKeyword t3 = Tokenizer.GetToken() as TokenKeyword;
                        if ((t3 != null) && (t3.Value == ParseKeyword.Obj))
                        {
                            TokenInteger id = (TokenInteger)t1;
                            entries.Add(new TokenXRefEntry(id.Value, t2.Value, t1Position, true));
                        }
                    }
                }
                else if ((t1 is TokenKeyword) && ((TokenKeyword)t1).Value == ParseKeyword.Trailer)
                {
                    lastTrailer = t1Position;
                }
            } while (Tokenizer.GotoNextLine());

            // Leave with the position on the last 'trailer' as caller will then parse it
            if (lastTrailer >= 0)
            {
                Tokenizer.Position = lastTrailer;
            }

            return(entries);
        }
Example #7
0
        public void ParseXRefSections(List <TokenXRefEntry> entries)
        {
            while (true)
            {
                TokenObject t = Tokenizer.GetToken();
                ThrowOnError(t);

                // Cross-reference table ends when we find a 'trailer' keyword instead of another section
                if ((t is TokenKeyword) && (((TokenKeyword)t).Value == ParseKeyword.Trailer))
                {
                    Tokenizer.PushToken(t);
                    return;
                }

                // Section starts with an integer object number
                TokenInteger start = t as TokenInteger;
                if (start == null)
                {
                    throw new ApplicationException($"Cross-reference section number must be an integer.");
                }

                t = Tokenizer.GetToken();
                ThrowOnError(t);

                // Section then has an integer length number
                TokenInteger length = t as TokenInteger;
                if (length == null)
                {
                    throw new ApplicationException($"Cross-reference section length must be an integer.");
                }

                // Load each line in the section
                for (int i = 0, id = start.Value; i < length.Value; i++, id++)
                {
                    TokenObject entry = Tokenizer.GetXRefEntry(id);
                    ThrowOnError(entry);
                    entries.Add((TokenXRefEntry)entry);
                }
            }
        }
Example #8
0
        public ParseDictionary ParseTrailer()
        {
            Tokenizer.IgnoreComments = true;
            TokenObject t = Tokenizer.GetToken();

            ThrowOnError(t);

            // Cross-reference table ends when we find a 'trailer' keyword instead of another section
            if (!(t is TokenKeyword) || (((TokenKeyword)t).Value != ParseKeyword.Trailer))
            {
                throw new ApplicationException($"Trailer section must start with the 'trailer' keyword.");
            }

            ParseObjectBase obj = ParseObject();

            if ((obj == null) || !(obj is ParseDictionary))
            {
                throw new ApplicationException($"Trailer section must contain a dictionary.");
            }

            return((ParseDictionary)obj);
        }
Example #9
0
        public void ParseHeader(out int major, out int minor)
        {
            // The header is a comment token
            Tokenizer.IgnoreComments = false;
            TokenObject t = Tokenizer.GetToken();

            if (!(t is TokenComment))
            {
                throw new ApplicationException("Missing PDF header.");
            }

            TokenComment c = (TokenComment)t;

            if (!c.Value.StartsWith("%PDF"))
            {
                throw new ApplicationException("PDF Header must start with '%PDF'.");
            }

            string[] splits = c.Value.Substring(5).Split('.');
            if (splits.Length != 2)
            {
                throw new ApplicationException("PDF Header must have a <major>.<minor> version number.");
            }

            if (!int.TryParse(splits[0].Trim(), out major))
            {
                throw new ApplicationException("Could not parse the header major version number.");
            }

            if (!int.TryParse(splits[1].Trim(), out minor))
            {
                throw new ApplicationException("Could not parse the header minor version number.");
            }

            Tokenizer.IgnoreComments = true;
        }
Example #10
0
        public ParseObjectBase ParseObject(bool allowEmpty = false)
        {
            Tokenizer.IgnoreComments = true;
            TokenObject t = Tokenizer.GetToken();

            if (allowEmpty && (t is TokenEmpty))
            {
                return(null);
            }
            else
            {
                ThrowOnEmptyOrError(t);
            }

            if (t is TokenName)
            {
                // Store one instance of each unique name to minimize memory footprint
                TokenName tokenName = (TokenName)t;
                return(ParseName.GetParse(tokenName.Value));
            }
            else if (t is TokenInteger)
            {
                TokenObject t2 = Tokenizer.GetToken();
                ThrowOnError(t2);

                // An object reference has a second integer, the generation number
                if (t2 is TokenInteger)
                {
                    TokenObject t3 = Tokenizer.GetToken();
                    ThrowOnError(t3);

                    // An object reference has a third value which is the 'R' keyword
                    if ((t3 is TokenKeyword) && (((TokenKeyword)t3).Value == ParseKeyword.R))
                    {
                        return(new ParseObjectReference(t as TokenInteger, t2 as TokenInteger));
                    }

                    Tokenizer.PushToken(t3);
                }

                Tokenizer.PushToken(t2);
                return(new ParseInteger(t as TokenInteger));
            }
            else if (t is TokenReal)
            {
                return(new ParseReal(t as TokenReal));
            }
            else if (t is TokenStringHex)
            {
                return(new ParseString(t as TokenStringHex));
            }
            else if (t is TokenStringLiteral)
            {
                return(new ParseString(t as TokenStringLiteral));
            }
            else if (t is TokenArrayOpen)
            {
                List <ParseObjectBase> objects = new List <ParseObjectBase>();

                ParseObjectBase entry = null;
                while (true)
                {
                    entry = ParseObject();
                    if (entry == null)
                    {
                        break;
                    }
                    else
                    {
                        ThrowOnEmptyOrError(t);
                    }

                    objects.Add(entry);
                }

                ThrowIfNot <TokenArrayClose>(Tokenizer.GetToken());
                return(new ParseArray(objects));
            }
            else if (t is TokenDictionaryOpen)
            {
                List <string>          names   = new List <string>();
                List <ParseObjectBase> entries = new List <ParseObjectBase>();

                ParseObjectBase value1 = null;
                ParseObjectBase value2 = null;
                while (true)
                {
                    value1 = ParseObject();
                    if (value1 == null)
                    {
                        break;
                    }
                    else
                    {
                        ThrowOnEmptyOrError(t);
                    }

                    // Key value must be a Name
                    ParseName name = value1 as ParseName;
                    if (name == null)
                    {
                        throw new ApplicationException($"Dictionary key must be a name instead of {name.GetType().Name}.");
                    }

                    value2 = ParseObject();
                    if (value2 == null)
                    {
                        throw new ApplicationException($"Dictionary value missing for key {name.Value}.");
                    }
                    else
                    {
                        ThrowOnEmptyOrError(t);
                    }

                    names.Add(name.Value);
                    entries.Add(value2);
                }

                ThrowIfNot <TokenDictionaryClose>(Tokenizer.GetToken());
                return(new ParseDictionary(names, entries));
            }
            else if (t is TokenKeyword)
            {
                switch ((t as TokenKeyword).Value)
                {
                case ParseKeyword.True:
                    return(ParseObjectBase.True);

                case ParseKeyword.False:
                    return(ParseObjectBase.False);

                case ParseKeyword.Null:
                    return(ParseObjectBase.Null);
                }
            }
            else if (t is TokenIdentifier)
            {
                // Store one instance of each unique identifier to minimize memory footprint
                TokenIdentifier tokenIdentifier = (TokenIdentifier)t;
                return(ParseIdentifier.GetParse(tokenIdentifier.Value));
            }

            // Token is not one that starts an object, so put the token back
            Tokenizer.PushToken(t);
            return(null);
        }
Example #11
0
        public ParseIndirectObject ParseIndirectObject()
        {
            Tokenizer.IgnoreComments = true;
            TokenObject t = Tokenizer.GetToken();

            ThrowOnEmptyOrError(t);

            // Indirect object starts with an integer, the object identifier
            if (!(t is TokenInteger))
            {
                Tokenizer.PushToken(t);
                return(null);
            }

            // Second is another integer, the generation number
            TokenObject u = Tokenizer.GetToken();

            ThrowOnEmptyOrError(u);

            if (!(u is TokenInteger))
            {
                Tokenizer.PushToken(t);
                Tokenizer.PushToken(u);
                return(null);
            }

            // This is the keyword 'obj'
            TokenObject v = Tokenizer.GetToken();

            ThrowOnEmptyOrError(v);
            if (!(v is TokenKeyword) || ((v as TokenKeyword).Value != ParseKeyword.Obj))
            {
                Tokenizer.PushToken(t);
                Tokenizer.PushToken(u);
                Tokenizer.PushToken(v);
                return(null);
            }

            // Get actual object that is the content
            ParseObjectBase obj = ParseObject();

            if (obj == null)
            {
                throw new ApplicationException($"Indirect object has missing content.");
            }

            // Must be followed by either 'endobj' or 'stream'
            v = Tokenizer.GetToken();
            ThrowOnEmptyOrError(v);

            TokenKeyword keyword = v as TokenKeyword;

            if (keyword == null)
            {
                // PH: FIXME:
                //throw new ApplicationException($"Indirect object has missing 'endobj or 'stream'.");
                return(new ParseIndirectObject(t as TokenInteger, u as TokenInteger, new ParseInteger(0)));
            }

            if (keyword.Value == ParseKeyword.EndObj)
            {
                return(new ParseIndirectObject(t as TokenInteger, u as TokenInteger, obj));
            }
            else if (keyword.Value == ParseKeyword.Stream)
            {
                ParseDictionary dictionary = obj as ParseDictionary;
                if (dictionary == null)
                {
                    throw new ApplicationException($"Stream must be preceded by a dictionary.");
                }

                if (!dictionary.ContainsName("Length"))
                {
                    throw new ApplicationException($"Stream dictionary must contain a 'Length' entry.");
                }

                ParseObjectBase lengthObj = dictionary["Length"];

                // Resolve any object reference
                ParseObjectReference reference = lengthObj as ParseObjectReference;
                if (reference != null)
                {
                    lengthObj = OnResolveReference(reference);
                }

                ParseInteger length = lengthObj as ParseInteger;
                if (length == null)
                {
                    throw new ApplicationException($"Stream dictionary has a 'Length' entry that is not an integer entry.");
                }

                if (length.Value < 0)
                {
                    throw new ApplicationException($"Stream dictionary has a 'Length' less than 0.");
                }

                byte[] bytes = Tokenizer.GetBytes(length.Value);
                if (bytes == null)
                {
                    throw new ApplicationException($"Cannot read in expected {length.Value} bytes from stream.");
                }

                // Stream contents must be followed by 'endstream'
                v = Tokenizer.GetToken();
                ThrowOnEmptyOrError(v);

                keyword = v as TokenKeyword;
                if (keyword == null)
                {
                    throw new ApplicationException($"Stream has missing 'endstream' after content.");
                }

                if (keyword.Value != ParseKeyword.EndStream)
                {
                    throw new ApplicationException($"Stream has unexpected keyword {keyword.Value} instead of 'endstream'.");
                }

                // Stream contents must be followed by 'endobj'
                v = Tokenizer.GetToken();
                ThrowOnEmptyOrError(v);

                keyword = v as TokenKeyword;
                if (keyword == null)
                {
                    throw new ApplicationException($"Indirect object has missing 'endobj'.");
                }

                if (keyword.Value != ParseKeyword.EndObj)
                {
                    throw new ApplicationException($"Indirect object has unexpected keyword {keyword.Value} instead of 'endobj'.");
                }

                return(new ParseIndirectObject(t as TokenInteger, u as TokenInteger, new ParseStream(dictionary, bytes)));
            }
            else
            {
                throw new ApplicationException($"Indirect object has unexpected keyword {keyword.Value}.");
            }
        }
Example #12
0
 public void PushToken(TokenObject token)
 {
     _stack.Push(token);
 }