Exemple #1
0
        internal static FA Parse(LexContext pc, int accept = -1)
        {
            FA  result = null, next = null;
            int ich;

            pc.EnsureStarted();
            while (true)
            {
                switch (pc.Current)
                {
                case -1:
#if MINIMIZE
                    result = result.ToDfa();
                    result.TrimDuplicates();
#endif
                    return(result);

                case '.':
                    var dot = FA.Set(new int[] { 0, 0x10ffff }, accept);
                    if (null == result)
                    {
                        result = dot;
                    }
                    else
                    {
                        result = FA.Concat(new FA[] { result, dot }, accept);
                    }
                    pc.Advance();
                    result = _ParseModifier(result, pc, accept);
                    break;

                case '\\':

                    pc.Advance();
                    pc.Expecting();
                    var isNot = false;
                    switch (pc.Current)
                    {
                    case 'P':
                        isNot = true;
                        goto case 'p';

                    case 'p':
                        pc.Advance();
                        pc.Expecting('{');
                        var  uc  = new StringBuilder();
                        int  uli = pc.Line;
                        int  uco = pc.Column;
                        long upo = pc.Position;
                        while (-1 != pc.Advance() && '}' != pc.Current)
                        {
                            uc.Append((char)pc.Current);
                        }
                        pc.Expecting('}');
                        pc.Advance();
                        int uci = 0;
                        switch (uc.ToString())
                        {
                        case "Pe":
                            uci = 21;
                            break;

                        case "Pc":
                            uci = 19;
                            break;

                        case "Cc":
                            uci = 14;
                            break;

                        case "Sc":
                            uci = 26;
                            break;

                        case "Pd":
                            uci = 19;
                            break;

                        case "Nd":
                            uci = 8;
                            break;

                        case "Me":
                            uci = 7;
                            break;

                        case "Pf":
                            uci = 23;
                            break;

                        case "Cf":
                            uci = 15;
                            break;

                        case "Pi":
                            uci = 22;
                            break;

                        case "Nl":
                            uci = 9;
                            break;

                        case "Zl":
                            uci = 12;
                            break;

                        case "Ll":
                            uci = 1;
                            break;

                        case "Sm":
                            uci = 25;
                            break;

                        case "Lm":
                            uci = 3;
                            break;

                        case "Sk":
                            uci = 27;
                            break;

                        case "Mn":
                            uci = 5;
                            break;

                        case "Ps":
                            uci = 20;
                            break;

                        case "Lo":
                            uci = 4;
                            break;

                        case "Cn":
                            uci = 29;
                            break;

                        case "No":
                            uci = 10;
                            break;

                        case "Po":
                            uci = 24;
                            break;

                        case "So":
                            uci = 28;
                            break;

                        case "Zp":
                            uci = 13;
                            break;

                        case "Co":
                            uci = 17;
                            break;

                        case "Zs":
                            uci = 11;
                            break;

                        case "Mc":
                            uci = 6;
                            break;

                        case "Cs":
                            uci = 16;
                            break;

                        case "Lt":
                            uci = 2;
                            break;

                        case "Lu":
                            uci = 0;
                            break;
                        }
                        if (isNot)
                        {
                            next = FA.Set(CharacterClasses.UnicodeCategories[uci], accept);
                        }
                        else
                        {
                            next = FA.Set(CharacterClasses.NotUnicodeCategories[uci], accept);
                        }
                        break;

                    case 'd':
                        next = FA.Set(CharacterClasses.digit, accept);
                        pc.Advance();
                        break;

                    case 'D':
                        next = FA.Set(RangeUtility.NotRanges(CharacterClasses.digit), accept);
                        pc.Advance();
                        break;

                    case 's':
                        next = FA.Set(CharacterClasses.space, accept);
                        pc.Advance();
                        break;

                    case 'S':
                        next = FA.Set(RangeUtility.NotRanges(CharacterClasses.space), accept);
                        pc.Advance();
                        break;

                    case 'w':
                        next = FA.Set(CharacterClasses.word, accept);
                        pc.Advance();
                        break;

                    case 'W':
                        next = FA.Set(RangeUtility.NotRanges(CharacterClasses.word), accept);
                        pc.Advance();
                        break;

                    default:
                        if (-1 != (ich = _ParseEscapePart(pc)))
                        {
                            next = FA.Literal(new int[] { ich }, accept);
                        }
                        else
                        {
                            pc.Expecting();                                          // throw an error
                            return(null);                                            // doesn't execute
                        }
                        break;
                    }
                    next = _ParseModifier(next, pc, accept);
                    if (null != result)
                    {
                        result = FA.Concat(new FA[] { result, next }, accept);
                    }
                    else
                    {
                        result = next;
                    }
                    break;

                case ')':
#if MINIMIZE
                    result = result.ToDfa();
                    result.TrimDuplicates();
#endif
                    return(result);

                case '(':
                    pc.Advance();
                    pc.Expecting();
                    next = Parse(pc, accept);
                    pc.Expecting(')');
                    pc.Advance();
                    next = _ParseModifier(next, pc, accept);
                    if (null == result)
                    {
                        result = next;
                    }
                    else
                    {
                        result = FA.Concat(new FA[] { result, next }, accept);
                    }
                    break;

                case '|':
                    if (-1 != pc.Advance())
                    {
                        next   = Parse(pc, accept);
                        result = FA.Or(new FA[] { result, next }, accept);
                    }
                    else
                    {
                        result = FA.Optional(result, accept);
                    }
                    break;

                case '[':
                    var seti = _ParseSet(pc);
                    var set  = seti.Value;
                    if (seti.Key)
                    {
                        set = RangeUtility.NotRanges(set);
                    }
                    next = FA.Set(set, accept);
                    next = _ParseModifier(next, pc, accept);

                    if (null == result)
                    {
                        result = next;
                    }
                    else
                    {
                        result = FA.Concat(new FA[] { result, next }, accept);
                    }
                    break;

                default:
                    ich = pc.Current;
                    if (char.IsHighSurrogate((char)ich))
                    {
                        if (-1 == pc.Advance())
                        {
                            throw new ExpectingException("Expecting low surrogate in Unicode stream", pc.Line, pc.Column, pc.Position, pc.FileOrUrl, "low-surrogate");
                        }
                        ich = char.ConvertToUtf32((char)ich, (char)pc.Current);
                    }
                    next = FA.Literal(new int[] { ich }, accept);
                    pc.Advance();
                    next = _ParseModifier(next, pc, accept);
                    if (null == result)
                    {
                        result = next;
                    }
                    else
                    {
                        result = FA.Concat(new FA[] { result, next }, accept);
                    }
                    break;
                }
            }
        }
Exemple #2
0
        /// <summary>
        /// Returns a DFA table that can be used to lex or match
        /// </summary>
        /// <param name="symbolTable">The symbol table to use, or null to just implicitly tag symbols with integer ids</param>
        /// <returns>A DFA table that can be used to efficiently match or lex input</returns>
        public DfaEntry[] ToDfaStateTable(IList <int> symbolTable = null, IProgress <FAProgress> progress = null)
        {
            // only convert to a DFA if we haven't already
            // ToDfa() already checks but it always copies
            // the state information so this performs better
            FA dfa = null;

            if (!IsDfa)
            {
                dfa = ToDfa(progress);
                dfa.TrimDuplicates(progress);
            }
            else
            {
                dfa = this;
            }
            var closure = new List <FA>();

            dfa.FillClosure(closure);
            var symbolLookup = new Dictionary <int, int>();

            // if we don't have a symbol table, build
            // the symbol lookup from the states.
            if (null == symbolTable)
            {
                // go through each state, looking for accept symbols
                // and then add them to the new symbol table is we
                // haven't already
                var i = 0;
                for (int jc = closure.Count, j = 0; j < jc; ++j)
                {
                    var fa = closure[j];
                    if (fa.IsAccepting && !symbolLookup.ContainsKey(fa.AcceptSymbol))
                    {
                        if (0 > fa.AcceptSymbol)
                        {
                            throw new InvalidOperationException("An accept symbol was never specified for state q" + jc.ToString());
                        }
                        symbolLookup.Add(fa.AcceptSymbol, i);
                        ++i;
                    }
                }
            }
            else             // build the symbol lookup from the symbol table
            {
                for (int ic = symbolTable.Count, i = 0; i < ic; ++i)
                {
                    symbolLookup.Add(symbolTable[i], i);
                }
            }

            // build the root array
            var result = new DfaEntry[closure.Count];

            for (var i = 0; i < result.Length; i++)
            {
                var fa = closure[i];
#if DEBUG
                if (fa.IsAccepting)
                {
                    System.Diagnostics.Debug.Assert(-1 < fa.AcceptSymbol, "Illegal accept symbol " + fa.AcceptSymbol.ToString() + " was found on state state q" + i.ToString());
                }
#endif
                // get all the transition ranges for each destination state
                var trgs = fa.FillInputTransitionRangesGroupedByState();
                // make a new transition entry array for our DFA state table
                var trns = new DfaTransitionEntry[trgs.Count];
                var j    = 0;
                // for each transition range
                foreach (var trg in trgs)
                {
                    // add the transition entry using
                    // the packed ranges from CharRange
                    trns[j] = new DfaTransitionEntry(
                        trg.Value,
                        closure.IndexOf(trg.Key));

                    ++j;
                }

                // now add the state entry for the state above
#if DEBUG
                if (fa.IsAccepting && !symbolLookup.ContainsKey(fa.AcceptSymbol))
                {
                    try
                    {
                        dfa.RenderToFile(@"dfastatetable_crashdump_dfa.jpg");
                    }
                    catch
                    {
                    }
                    System.Diagnostics.Debug.Assert(false, "The symbol table did not contain an entry for state q" + i.ToString());
                }
#endif
                result[i] = new DfaEntry(
                    fa.IsAccepting ? symbolLookup[fa.AcceptSymbol] : -1,
                    trns);
            }
            return(result);
        }