Ejemplo n.º 1
0
        /// <summary>
        /// Creates a state machine representing this expression
        /// </summary>
        /// <typeparam name="TAccept">The type of accept symbol to use for this expression</typeparam>
        /// <param name="accept">The accept symbol to use for this expression</param>
        /// <returns>A new <see cref="CharFA{TAccept}"/> finite state machine representing this expression</returns>
        public override CharFA <TAccept> ToFA <TAccept>(TAccept accept)
        {
            var ranges = new List <CharRange>();

            for (int ic = Entries.Count, i = 0; i < ic; ++i)
            {
                var entry = Entries[i];
                var crc   = entry as RegexCharsetCharEntry;
                if (null != crc)
                {
                    ranges.Add(new CharRange(crc.Value, crc.Value));
                }
                var crr = entry as RegexCharsetRangeEntry;
                if (null != crr)
                {
                    ranges.Add(new CharRange(crr.First, crr.Last));
                }
                var crcl = entry as RegexCharsetClassEntry;
                if (null != crcl)
                {
                    ranges.AddRange(CharFA <TAccept> .CharacterClasses[crcl.Name]);
                }
            }
            if (HasNegatedRanges)
            {
                return(CharFA <TAccept> .Set(CharRange.NotRanges(ranges), accept));
            }
            return(CharFA <TAccept> .Set(ranges, accept));
        }
 public void Add(CharRange inputRange, CharFA <TAccept> fa)
 {
     _rangeTransitions.Add(new RangeWithFa(inputRange, fa));
     if (!_charactersByState.TryGetValue(fa, out var chars))
     {
         chars = new CharactersAndRanges(new List <char>(), new List <CharRange>());
         _charactersByState[fa] = chars;
     }
     chars.ranges.Add(inputRange);
 }
Ejemplo n.º 3
0
        /// <summary>
        /// Creates an FA that will match any one of a set of a characters
        /// </summary>
        /// <param name="ranges">The set ranges of characters that will be matched</param>
        /// <param name="accept">The symbol to accept</param>
        /// <returns>An FA that will match the specified set</returns>
        public static CharFA <TAccept> Set(IEnumerable <CharRange> ranges, TAccept accept = default(TAccept))
        {
            var result = new CharFA <TAccept>();
            var final  = new CharFA <TAccept>(true, accept);

            foreach (var ch in CharRange.ExpandRanges(ranges))
            {
                result.InputTransitions[ch] = final;
            }
            return(result);
        }
Ejemplo n.º 4
0
 static IEnumerable <CharRange> _ParseRanges(IEnumerable <char> charRanges, bool normalize)
 {
     if (!normalize)
     {
         return(_ParseRanges(charRanges));
     }
     else
     {
         var result = new List <CharRange>(_ParseRanges(charRanges));
         CharRange.NormalizeRangeList(result);
         return(result);
     }
 }
Ejemplo n.º 5
0
 /// <summary>
 /// Returns a dictionary keyed by state, that contains all of the outgoing local input transitions, expressed as a series of ranges
 /// </summary>
 /// <param name="result">The dictionary to fill, or null to create one.</param>
 /// <returns>A dictionary containing the result of the query</returns>
 public IDictionary <CharFA <TAccept>, IList <CharRange> > FillInputTransitionRangesGroupedByState(IDictionary <CharFA <TAccept>, IList <CharRange> > result = null)
 {
     if (null == result)
     {
         result = new Dictionary <CharFA <TAccept>, IList <CharRange> >();
     }
     // using the optimized dictionary we have little to do here.
     foreach (var trns in (IDictionary <CharFA <TAccept>, ICollection <char> >)InputTransitions)
     {
         var sl = new List <char>(trns.Value);
         sl.Sort();
         result.Add(trns.Key, new List <CharRange>(CharRange.GetRanges(sl)));
     }
     return(result);
 }
Ejemplo n.º 6
0
 static void _AppendRangeTo(StringBuilder builder, CharRange range)
 {
     _AppendRangeCharTo(builder, range.First);
     if (0 == range.Last.CompareTo(range.First))
     {
         return;
     }
     if (range.Last == range.First + 1)             // spit out 1 length ranges as two chars
     {
         _AppendRangeCharTo(builder, range.Last);
         return;
     }
     builder.Append('-');
     _AppendRangeCharTo(builder, range.Last);
 }
Ejemplo n.º 7
0
        /// <summary>
        /// Returns a DFA table that can be used to lex or match
        /// </summary>
        /// <param name="symbolTable">The symbol table to use, or null to just implicitly tag symbols with integer ids</param>
        /// <param name="progress">The progress object used to report the progress of the task</param>
        /// <returns>A DFA table that can be used to efficiently match or lex input</returns>
        public CharDfaEntry[] ToDfaStateTable(IList <TAccept> symbolTable = null, IProgress <CharFAProgress> progress = null)
        {
            // only convert to a DFA if we haven't already
            // ToDfa() already checks but it always copies
            // the state information so this performs better
            var dfa          = IsDfa?this: ToDfa(progress);
            var closure      = dfa.FillClosure();
            var symbolLookup = new ListDictionary <TAccept, int>();

            // if we don't have a symbol table, build
            // the symbol lookup from the states.
            if (null == symbolTable)
            {
                // go through each state, looking for accept symbols
                // and then add them to the new symbol table is we
                // haven't already
                var i = 0;
                for (int jc = closure.Count, j = 0; j < jc; ++j)
                {
                    var fa = closure[j];
                    if (fa.IsAccepting && !symbolLookup.ContainsKey(fa.AcceptSymbol))
                    {
                        symbolLookup.Add(fa.AcceptSymbol, i);
                        ++i;
                    }
                }
            }
            else             // build the symbol lookup from the symbol table
            {
                for (int ic = symbolTable.Count, i = 0; i < ic; ++i)
                {
                    if (null != symbolTable[i])
                    {
                        symbolLookup.Add(symbolTable[i], i);
                    }
                }
            }

            // build the root array
            var result = new CharDfaEntry[closure.Count];

            for (var i = 0; i < result.Length; i++)
            {
                var fa = closure[i];
                // get all the transition ranges for each destination state
                var trgs = fa.FillInputTransitionRangesGroupedByState();
                // make a new transition entry array for our DFA state table
                var trns = new CharDfaTransitionEntry[trgs.Count];
                var j    = 0;
                // for each transition range
                foreach (var trg in trgs)
                {
                    // add the transition entry using
                    // the packed ranges from CharRange
                    trns[j] = new CharDfaTransitionEntry(
                        CharRange.ToPackedChars(trg.Value),
                        closure.IndexOf(trg.Key));

                    ++j;
                }
                // now add the state entry for the state above
                result[i] = new CharDfaEntry(
                    fa.IsAccepting ? symbolLookup[fa.AcceptSymbol] : -1,
                    trns);
            }
            return(result);
        }
        // build the character classes
        static IDictionary <string, IList <CharRange> > _GetCharacterClasses()
        {
            var result = new Dictionary <string, IList <CharRange> >();

            result.Add("alnum",
                       new List <CharRange>(
                           new CharRange[] {
                new CharRange('A', 'Z'),
                new CharRange('a', 'z'),
                new CharRange('0', '9')
            }));
            result.Add("alpha",
                       new List <CharRange>(
                           new CharRange[] {
                new CharRange('A', 'Z'),
                new CharRange('a', 'z')
            }));
            result.Add("ascii",
                       new List <CharRange>(
                           new CharRange[] {
                new CharRange('\0', '\x7F')
            }));
            result.Add("blank",
                       new List <CharRange>(
                           new CharRange[] {
                new CharRange(' ', ' '),
                new CharRange('\t', '\t')
            }));
            result.Add("cntrl",
                       new List <CharRange>(
                           new CharRange[] {
                new CharRange('\0', '\x1F'),
                new CharRange('\x7F', '\x7F')
            }));
            result.Add("digit",
                       new List <CharRange>(
                           new CharRange[] {
                new CharRange('0', '9')
            }));
            result.Add("^digit", new List <CharRange>(CharRange.NotRanges(result["digit"])));
            result.Add("graph",
                       new List <CharRange>(
                           new CharRange[] {
                new CharRange('\x21', '\x7E')
            }));
            result.Add("lower",
                       new List <CharRange>(
                           new CharRange[] {
                new CharRange('a', 'z')
            }));
            result.Add("print",
                       new List <CharRange>(
                           new CharRange[] {
                new CharRange('\x20', '\x7E')
            }));
            // [!"\#$%&'()*+,\-./:;<=>?@\[\\\]^_`{|}~]
            result.Add("punct",
                       new List <CharRange>(
                           CharRange.GetRanges("!\"#$%&\'()*+,-./:;<=>?@[\\]^_`{|}~")
                           ));
            //[ \t\r\n\v\f]
            result.Add("space",
                       new List <CharRange>(
                           CharRange.GetRanges(" \t\r\n\v\f")
                           ));
            result.Add("^space", new List <CharRange>(CharRange.NotRanges(result["space"])));
            result.Add("upper",
                       new List <CharRange>(
                           new CharRange[] {
                new CharRange('A', 'Z')
            }));
            result.Add("word",
                       new List <CharRange>(
                           new CharRange[] {
                new CharRange('0', '9'),
                new CharRange('A', 'Z'),
                new CharRange('_', '_'),
                new CharRange('a', 'z')
            }));
            result.Add("^word", new List <CharRange>(CharRange.NotRanges(result["word"])));
            result.Add("xdigit",
                       new List <CharRange>(
                           new CharRange[] {
                new CharRange('0', '9'),
                new CharRange('A', 'F'),
                new CharRange('a', 'f')
            }));
            return(result);
        }
 public RangeWithFa(CharRange range, CharFA <TAccept> fa)
 {
     this.range = range;
     this.fa    = fa;
 }