private void ParseFieldValue_AlphaNumeric(BibTexLexerCallback callback)
        {
            int field_value_start = c;

            while (true)
            {
                {
                    if (false)
                    {
                    }
                    else if (Char.IsLetterOrDigit(bibtex[c]))
                    {
                        ++c;
                    }
                    else if ('.' == bibtex[c])
                    {
                        ++c;
                    }
                    else if ('-' == bibtex[c])
                    {
                        ++c;
                    }
                    else
                    {
                        break;
                    }
                }
            }
            int    field_value_end = c;
            string field_value     = bibtex.Substring(field_value_start, field_value_end - field_value_start);

            callback.RaiseFieldValue(field_value);
        }
        private void ParseFields(BibTexLexerCallback callback, char delim_close)
        {
            {
                while (delim_close != bibtex[c])
                {
                    // Parse whitespace
                    ParseWhiteSpace();

                    // Get the name
                    if (ParseFieldName(callback))
                    {
                        // Parse whitespace
                        ParseWhiteSpace();

                        // Get the equals
                        if ('=' != bibtex[c])
                        {
                            Exception(callback, "Expecting an = between the field name and the field value");
                            continue;
                        }
                        else
                        {
                            ++c;
                        }

                        // Parse whitespace
                        ParseWhiteSpace();

                        ParseFieldValue(callback);

                        // Parse whitespace
                        ParseWhiteSpace();

                        // Get a optional comma
                        if (',' == bibtex[c])
                        {
                            ++c;
                        }
                    }
                    else
                    {
                        Exception(callback, "Expecting a field name but nothing was acceptable, so moving onto next whitespace");
                        HuntForWhitespace();
                    }

                    // Parse whitespace
                    ParseWhiteSpace();
                }
            }
        }
Esempio n. 3
0
        private void ParseEntry_Delim(BibTexLexerCallback callback, char delim_open, char delim_close)
        {
            // Check the beginning of this entry
            if (delim_open != bibtex[c])
            {
                Exception(callback, "Expecting a {0} to start a BibTeX reference", delim_open);
                return;
            }
            else
            {
                ++c;
            }

            // Parse whitespace
            ParseWhiteSpace();

            // Get the key
            ParseKey(callback);

            // Parse whitespace
            ParseWhiteSpace();

            // Then the comma after the key
            if (',' != bibtex[c])
            {
                Exception(callback, "Expecting a , after the BibTeX key");
                return;
            }
            else
            {
                ++c;
            }

            // Parse whitespace
            ParseWhiteSpace();

            ParseFields(callback, delim_close);

            // Check the end of this entry
            if (delim_close != bibtex[c])
            {
                Exception(callback, "Expecting a {0} to end this BibTeX reference", delim_close);
                return;
            }
            else
            {
                ++c;
            }
        }
Esempio n. 4
0
        private string ParseUntilDelim(BibTexLexerCallback callback, char delim)
        {
            int start = c;

            while (c < MAX_C && delim != bibtex[c])
            {
                ++c;
            }

            if (c >= MAX_C)
            {
                Exception(callback, "Could not find delim: {0}", delim);
            }

            return(bibtex.Substring(start, c - start));
        }
        void ParseTopLevel(BibTexLexerCallback callback)
        {
            while (c < MAX_C)
            {
                if ('@' != bibtex[c])
                {
                    ++c;
                }
                else
                {
                    // Get this entry
                    ParseEntry(callback);

                    // Skip spaces between entries
                    ParseWhiteSpace();
                }
            }

            callback.RaiseFinished();
        }
Esempio n. 6
0
        private void ParseKey(BibTexLexerCallback callback)
        {
            // Get the entry name
            int key_start = c;

            while (/* c < MAX_C && */ IsKeyChar(bibtex[c]))  // -- optimization
            {
                ++c;
            }
            int key_end = c;

            if (key_end == key_start)
            {
                callback.RaiseWarning("There is no key in this BibTeX reference");
            }

            string key = bibtex.Substring(key_start, key_end - key_start);

            callback.RaiseKey(key);
        }
Esempio n. 7
0
        // ------------------------------------------------------------------------------------

        private void Exception(BibTexLexerCallback callback, string p, params object[] args)
        {
            StringBuilder sb = new StringBuilder();

            sb.AppendFormat(p, args);
            sb.AppendLine();

            // Build up some context
            int run_from = Math.Max(0, c - 40);
            int run_to   = Math.Min(MAX_C, c + 40);

            for (int i = run_from; i < run_to; ++i)
            {
                if (Char.IsWhiteSpace(bibtex[i]))
                {
                    sb.Append(' ');
                }
                else
                {
                    sb.Append(bibtex[i]);
                }
            }
            sb.AppendLine();
            for (int i = run_from; i < run_to; ++i)
            {
                if (i == c)
                {
                    sb.Append('^');
                }
                else
                {
                    sb.Append(' ');
                }
            }
            sb.AppendLine();

            string msg = sb.ToString();

            msg = msg.Trim();
            callback.RaiseException(msg);
        }
Esempio n. 8
0
        /// <summary>
        /// Returns true if it managed to find a field name
        /// </summary>
        /// <param name="callback"></param>
        /// <returns></returns>
        private bool ParseFieldName(BibTexLexerCallback callback)
        {
            int field_name_start = c;

            while (IsNameChar(bibtex[c]))
            {
                ++c;
            }
            int field_name_end = c;

            if (field_name_end > field_name_start)
            {
                string field_name = bibtex.Substring(field_name_start, field_name_end - field_name_start);
                callback.RaiseFieldName(field_name);
                return(true);
            }
            else
            {
                return(false);
            }
        }
Esempio n. 9
0
        private bool ParseFieldValue_AlphaNumeric(BibTexLexerCallback callback)
        {
            int field_value_start = c;

            while (c < MAX_C)
            {
                if (Char.IsLetterOrDigit(bibtex[c]))
                {
                    ++c;
                }
                else if ('.' == bibtex[c])
                {
                    ++c;
                }
                else if ('-' == bibtex[c])
                {
                    ++c;
                }
                else
                {
                    break;
                }
            }
            int    field_value_end = c;
            string field_value     = bibtex.Substring(field_value_start, field_value_end - field_value_start);

            if (c >= MAX_C)
            {
                // a field value MUST always sit inside a BibTeX record at least, hence we expect
                // at least one outer '}'. Therefore the field value cannot run up to EOF.
                Exception(callback, "Alphanumeric field value should not continue until EOF. Corrupted or truncated BibTeX?");
                return(false);
            }

            field_values.Add(field_value);

            return(true);
        }
Esempio n. 10
0
        private bool ParseFieldValue(BibTexLexerCallback callback)
        {
            // Decide on the value type and parse it
            char ch = bibtex[c];

            if ('{' == ch)
            {
                return(ParseFieldValue_Braces(callback));
            }
            else if ('"' == ch)
            {
                return(ParseFieldValue_Quotes(callback));
            }
            else if (Char.IsLetterOrDigit(ch))
            {
                return(ParseFieldValue_AlphaNumeric(callback));
            }
            else
            {
                Exception(callback, "A field value should start with '{0}' or '{1}' or digit", "(", "{{");
                return(false);
            }
        }
Esempio n. 11
0
        private void ParseTopLevel(BibTexLexerCallback callback)
        {
            int prev_c = c;

            while (c < MAX_C)
            {
                // The `@keyword` must always be preceded by at least one whitespace
                // character so as to differentiate it from email addresses, etc.:
                // `joe@article` vs. ` @article`
                if ('@' != bibtex[c] || !Char.IsWhiteSpace(bibtex[c - 1]))
                {
                    ++c;
                }
                else
                {
                    // Hit a `@` BibTeX command start marker.

                    // Everything that we looped through thus far is either whitespace or comment or both:
                    string comment = bibtex.Substring(prev_c, c - prev_c).Trim();
                    if (!String.IsNullOrEmpty(comment))
                    {
                        callback.RaiseComment(comment);
                    }

                    // Get this @entry
                    ParseEntry(callback);

                    // Skip spaces between entries
                    ParseWhiteSpace();

                    prev_c = c;
                }
            }

            callback.RaiseFinished();
        }
 private void ParseFieldValue(BibTexLexerCallback callback)
 {
     // Decide on the value type and parse it
     if (false)
     {
     }
     else if ('{' == bibtex[c])
     {
         ParseFieldValue_Braces(callback);
     }
     else if ('"' == bibtex[c])
     {
         ParseFieldValue_Quotes(callback);
     }
     else if (Char.IsLetterOrDigit(bibtex[c]))
     {
         ParseFieldValue_AlphaNumeric(callback);
     }
     else
     {
         Exception(callback, "A field value should start with '{0}' or '{1}' or digit", "(", "{{");
         return;
     }
 }
Esempio n. 13
0
        private void ParseEntry(BibTexLexerCallback callback)
        {
            // skip initial `@`
            ++c;

            // Get the entry name
            int entry_name_start = c;

            while (IsNameChar(bibtex[c]))
            {
                ++c;
            }
            int entry_name_end = c;

            string entry_name = bibtex.Substring(entry_name_start, entry_name_end - entry_name_start);

            // Check if it is a comment
            if (0 == "comment".CompareTo(entry_name.ToLower()))
            {
                ParseWhiteSpace();

                switch (bibtex[c])
                {
                case '(':
                    callback.RaiseComment(ParseUntilDelim(callback, ')'));
                    return;

                case '{':
                    callback.RaiseComment(ParseUntilDelim(callback, '}'));
                    return;

                default:
                    // b0rked `@comment`: parse until EOL
                    callback.RaiseComment(ParseUntilDelim(callback, '\n'));
                    return;
                }
            }

            callback.RaiseEntryName(entry_name);

            // Check the length of the entry name
            if (entry_name_end == entry_name_start)
            {
                Exception(callback, "The BibTeX type is missing");
            }

            // Skip spaces between name and open brackets
            ParseWhiteSpace();

            // Now an entry is either { xxx } or ( xxx )
            switch (/* c < MAX_C ? */ bibtex[c])    // -- optimization
            {
            case '(':
                ParseEntry_Delim(callback, '(', ')');
                break;

            case '{':
                ParseEntry_Delim(callback, '{', '}');
                break;

            default:
                Exception(callback, "Expecting a {0} or {1} to start a BibTeX reference", "(", "{{");
                return;
            }
        }
        private void ParseFieldValue_Braces(BibTexLexerCallback callback)
        {
            // Check we have our {
            if ('{' != bibtex[c])
            {
                Exception(callback, "Braces field value should start with {0}", "{{");
                return;
            }
            else
            {
                ++c;
            }

            int brace_depth       = 0;
            int field_value_start = c;

            while (true)
            {
                {
                    if (false)
                    {
                    }
                    else if ('{' == bibtex[c] && '\\' != bibtex[c - 1])
                    {
                        ++brace_depth;
                        ++c;
                    }
                    else if ('}' == bibtex[c] && '\\' != bibtex[c - 1])
                    {
                        --brace_depth;

                        // Are we out of our delimeters yet?
                        if (0 <= brace_depth)
                        {
                            ++c;
                        }
                        else
                        {
                            break;
                        }
                    }
                    else
                    {
                        ++c;
                    }
                }
            }
            int    field_value_end = c;
            string field_value     = bibtex.Substring(field_value_start, field_value_end - field_value_start);

            callback.RaiseFieldValue(field_value);

            // Check we have our final }
            if ('}' != bibtex[c])
            {
                Exception(callback, "Braces field value should end with {0}", "}}");
                return;
            }
            else
            {
                ++c;
            }
        }
        private void ParseEntry(BibTexLexerCallback callback)
        {
            try
            {
                // Check we have our @
                if ('@' != bibtex[c])
                {
                    Exception(callback, "Entry should start with @");
                    return;
                }
                else
                {
                    ++c;
                }

                // Get the entry name
                int entry_name_start = c;
                while (IsNameChar(bibtex[c]))
                {
                    ++c;
                }
                int entry_name_end = c;

                string entry_name = bibtex.Substring(entry_name_start, entry_name_end - entry_name_start);

                // Check if it is a comment
                if (0 == "comment".CompareTo(entry_name.ToLower()))
                {
                    ParseUntilDelim(callback, '{'); //Ensure we have an opening {
                    ++c;                            //Skip it.
                    callback.RaiseComment(ParseUntilDelim(callback, '}'));
                    return;
                }

                callback.RaiseEntryName(entry_name);

                // Check the length of the entry name
                if (entry_name_end == entry_name_start)
                {
                    Exception(callback, "The BibTeX type is missing");
                }

                // Skip spaces between name and open brackets
                ParseWhiteSpace();

                // Now and entry is either { xxx } or ( xxx )
                if (false)
                {
                }
                else if ('(' == bibtex[c])
                {
                    ParseEntry_Delim(callback, '(', ')');
                }
                else if ('{' == bibtex[c])
                {
                    ParseEntry_Delim(callback, '{', '}');
                }
                else
                {
                    Exception(callback, "Expecting a {0} or {1} to start a BibTeX reference", "(", "{{");
                    return;
                }
            }
            catch (IndexOutOfRangeException)
            {
                Exception(callback, "Ran out of characters while reading the entry");
            }
        }
Esempio n. 16
0
        private bool ParseFieldValue_Braces(BibTexLexerCallback callback)
        {
            // Skip our initial {
            ++c;

            bool escape            = false;
            int  brace_depth       = 0;
            int  field_value_start = c;

            while (c < MAX_C)
            {
                if ('\\' == bibtex[c] && '\\' == bibtex[c - 1] && escape)
                {
                    ++c;
                    escape = false;
                }
                else if ('\\' == bibtex[c])
                {
                    ++c;
                    escape = true;
                }
                else if ('{' == bibtex[c] && ('\\' != bibtex[c - 1] || !escape))
                {
                    ++brace_depth;
                    ++c;
                }
                else if ('}' == bibtex[c] && ('\\' != bibtex[c - 1] || !escape))
                {
                    --brace_depth;

                    // Are we out of our delimiters yet?
                    if (0 <= brace_depth)
                    {
                        ++c;
                    }
                    else
                    {
                        break;
                    }
                }
                else
                {
                    ++c;
                    escape = false;
                }
            }

            int    field_value_end = c;
            string field_value     = bibtex.Substring(field_value_start, field_value_end - field_value_start);

            // Check we have our final }
            if ('}' != bibtex[c])
            {
                Exception(callback, "Braces field value should end with {0}. Corrupted or truncated BibTeX?", "}}");

                // see if we can backpedal to a first comma and take it from there, thus 'recovering' the damaged field value
                c = field_value_start;

                while (true)
                {
                    while (c < MAX_C && ',' != bibtex[c])
                    {
                        ++c;
                    }

                    if (c < MAX_C)
                    {
                        // Assuming we will parse a field name and an assignment next...
                        Regex re = new Regex(@"^\s*\w+\s*=\s*\S+", RegexOptions.Compiled);

                        if (re.IsMatch(bibtex.Substring(c + 1)))
                        {
                            // ... we now assume this comma ends this field
                            break;
                        }
                        ++c;
                    }
                    else
                    {
                        break;
                    }
                }

                if (c < MAX_C)
                {
                    field_value_end = c;
                    field_value     = bibtex.Substring(field_value_start, field_value_end - field_value_start);

                    field_values.Add(field_value);
                }
                return(false);
            }
            else
            {
                ++c;
            }

            field_values.Add(field_value);

            return(true);
        }
Esempio n. 17
0
        private bool ParseFieldValue_Quotes(BibTexLexerCallback callback)
        {
            // Skip our initial quote
            char quote = bibtex[c];

            ++c;

            int  field_value_start = c;
            bool escape            = false;

            while (c < MAX_C)
            {
                if ('\\' == bibtex[c] && '\\' == bibtex[c - 1] && escape)
                {
                    ++c;
                    escape = false;
                }
                else if ('\\' == bibtex[c])
                {
                    ++c;
                    escape = true;
                }
                else if (quote == bibtex[c] && ('\\' != bibtex[c - 1] || !escape))
                {
                    break;
                }
                else
                {
                    ++c;
                    escape = false;
                }
            }
            int    field_value_end = c;
            string field_value     = bibtex.Substring(field_value_start, field_value_end - field_value_start);

            // Check we have our final "
            if (quote != bibtex[c])
            {
                Exception(callback, "Quotes field value should end with \\{0} instead of being terminated at EOF. Corrupted or truncated BibTeX?", quote);

                // see if we can backpedal to a first comma and take it from there, thus 'recovering' the damaged field value
                c = field_value_start;

                while (true)
                {
                    while (c < MAX_C && ',' != bibtex[c])
                    {
                        ++c;
                    }

                    if (c < MAX_C)
                    {
                        // Assuming we will parse a field name and an assignment next...
                        Regex re = new Regex(@"^\s*\w+\s*=\s*\S+", RegexOptions.Compiled);

                        if (re.IsMatch(bibtex.Substring(c + 1)))
                        {
                            // ... we now assume this comma ends this field
                            break;
                        }
                        ++c;
                    }
                    else
                    {
                        break;
                    }
                }

                if (c < MAX_C)
                {
                    field_value_end = c;
                    field_value     = bibtex.Substring(field_value_start, field_value_end - field_value_start);

                    field_values.Add(field_value);
                }

                return(false);
            }
            else
            {
                ++c;
            }

            field_values.Add(field_value);

            return(true);
        }
Esempio n. 18
0
 public void Parse(BibTexLexerCallback callback)
 {
     ParseTopLevel(callback);
 }
Esempio n. 19
0
        private void ParseFields(BibTexLexerCallback callback, char delim_close)
        {
            while (delim_close != bibtex[c])
            {
                // Parse whitespace
                ParseWhiteSpace();

                // Get the name
                if (ParseFieldName(callback))
                {
                    // Parse whitespace
                    ParseWhiteSpace();

                    // Get the equals
                    if ('=' != bibtex[c])
                    {
                        Exception(callback, "Expecting an = between the field name and the field value");
                        if (c >= MAX_C)
                        {
                            break;              // abort as we've hit EOF prematurely
                        }
                        continue;
                    }
                    else
                    {
                        ++c;
                    }

                    field_values.Clear();
                    while (c < MAX_C)
                    {
                        // Parse whitespace
                        ParseWhiteSpace();

                        // Remember `c` as broken (and thus infinitely running) field values
                        // are a common problem in BibTeX records...
                        int value_start = c;

                        if (!ParseFieldValue(callback))
                        {
                            break;
                        }

                        // Parse whitespace
                        ParseWhiteSpace();

                        // Get an optional comma or `#` concatenation operator
                        switch (bibtex[c])
                        {
                        case ',':
                            ++c;
                            break;

                        case '#':
                            // concatenation: keep adding to the field value!
                            ++c;
                            continue;

                        default:
                            break;
                        }
                        break;
                    }

                    callback.RaiseFieldValue(field_values);
                    field_values.Clear();
                }
                else
                {
                    if (c >= MAX_C)
                    {
                        Exception(callback, "Expecting a field name but nothing was acceptable until EOF: broken or truncated BibTeX?");
                        break;  // abort as we've hit EOF prematurely
                    }
                    else
                    {
                        Exception(callback, "Expecting a field name but nothing was acceptable, so moving onto next whitespace");
                        HuntForWhitespace();
                    }
                }

                // Parse whitespace
                ParseWhiteSpace();
            }
        }