/// <summary> /// Creates a state machine representing this expression /// </summary> /// <typeparam name="TAccept">The type of accept symbol to use for this expression</typeparam> /// <param name="accept">The accept symbol to use for this expression</param> /// <returns>A new <see cref="CharFA{TAccept}"/> finite state machine representing this expression</returns> public override CharFA <TAccept> ToFA <TAccept>(TAccept accept) { var ranges = new List <CharRange>(); for (int ic = Entries.Count, i = 0; i < ic; ++i) { var entry = Entries[i]; var crc = entry as RegexCharsetCharEntry; if (null != crc) { ranges.Add(new CharRange(crc.Value, crc.Value)); } var crr = entry as RegexCharsetRangeEntry; if (null != crr) { ranges.Add(new CharRange(crr.First, crr.Last)); } var crcl = entry as RegexCharsetClassEntry; if (null != crcl) { ranges.AddRange(CharFA <TAccept> .CharacterClasses[crcl.Name]); } } if (HasNegatedRanges) { return(CharFA <TAccept> .Set(CharRange.NotRanges(ranges), accept)); } return(CharFA <TAccept> .Set(ranges, accept)); }
public void Add(CharRange inputRange, CharFA <TAccept> fa) { _rangeTransitions.Add(new RangeWithFa(inputRange, fa)); if (!_charactersByState.TryGetValue(fa, out var chars)) { chars = new CharactersAndRanges(new List <char>(), new List <CharRange>()); _charactersByState[fa] = chars; } chars.ranges.Add(inputRange); }
/// <summary> /// Creates an FA that will match any one of a set of a characters /// </summary> /// <param name="ranges">The set ranges of characters that will be matched</param> /// <param name="accept">The symbol to accept</param> /// <returns>An FA that will match the specified set</returns> public static CharFA <TAccept> Set(IEnumerable <CharRange> ranges, TAccept accept = default(TAccept)) { var result = new CharFA <TAccept>(); var final = new CharFA <TAccept>(true, accept); foreach (var ch in CharRange.ExpandRanges(ranges)) { result.InputTransitions[ch] = final; } return(result); }
static IEnumerable <CharRange> _ParseRanges(IEnumerable <char> charRanges, bool normalize) { if (!normalize) { return(_ParseRanges(charRanges)); } else { var result = new List <CharRange>(_ParseRanges(charRanges)); CharRange.NormalizeRangeList(result); return(result); } }
/// <summary> /// Returns a dictionary keyed by state, that contains all of the outgoing local input transitions, expressed as a series of ranges /// </summary> /// <param name="result">The dictionary to fill, or null to create one.</param> /// <returns>A dictionary containing the result of the query</returns> public IDictionary <CharFA <TAccept>, IList <CharRange> > FillInputTransitionRangesGroupedByState(IDictionary <CharFA <TAccept>, IList <CharRange> > result = null) { if (null == result) { result = new Dictionary <CharFA <TAccept>, IList <CharRange> >(); } // using the optimized dictionary we have little to do here. foreach (var trns in (IDictionary <CharFA <TAccept>, ICollection <char> >)InputTransitions) { var sl = new List <char>(trns.Value); sl.Sort(); result.Add(trns.Key, new List <CharRange>(CharRange.GetRanges(sl))); } return(result); }
static void _AppendRangeTo(StringBuilder builder, CharRange range) { _AppendRangeCharTo(builder, range.First); if (0 == range.Last.CompareTo(range.First)) { return; } if (range.Last == range.First + 1) // spit out 1 length ranges as two chars { _AppendRangeCharTo(builder, range.Last); return; } builder.Append('-'); _AppendRangeCharTo(builder, range.Last); }
/// <summary> /// Returns a DFA table that can be used to lex or match /// </summary> /// <param name="symbolTable">The symbol table to use, or null to just implicitly tag symbols with integer ids</param> /// <param name="progress">The progress object used to report the progress of the task</param> /// <returns>A DFA table that can be used to efficiently match or lex input</returns> public CharDfaEntry[] ToDfaStateTable(IList <TAccept> symbolTable = null, IProgress <CharFAProgress> progress = null) { // only convert to a DFA if we haven't already // ToDfa() already checks but it always copies // the state information so this performs better var dfa = IsDfa?this: ToDfa(progress); var closure = dfa.FillClosure(); var symbolLookup = new ListDictionary <TAccept, int>(); // if we don't have a symbol table, build // the symbol lookup from the states. if (null == symbolTable) { // go through each state, looking for accept symbols // and then add them to the new symbol table is we // haven't already var i = 0; for (int jc = closure.Count, j = 0; j < jc; ++j) { var fa = closure[j]; if (fa.IsAccepting && !symbolLookup.ContainsKey(fa.AcceptSymbol)) { symbolLookup.Add(fa.AcceptSymbol, i); ++i; } } } else // build the symbol lookup from the symbol table { for (int ic = symbolTable.Count, i = 0; i < ic; ++i) { if (null != symbolTable[i]) { symbolLookup.Add(symbolTable[i], i); } } } // build the root array var result = new CharDfaEntry[closure.Count]; for (var i = 0; i < result.Length; i++) { var fa = closure[i]; // get all the transition ranges for each destination state var trgs = fa.FillInputTransitionRangesGroupedByState(); // make a new transition entry array for our DFA state table var trns = new CharDfaTransitionEntry[trgs.Count]; var j = 0; // for each transition range foreach (var trg in trgs) { // add the transition entry using // the packed ranges from CharRange trns[j] = new CharDfaTransitionEntry( CharRange.ToPackedChars(trg.Value), closure.IndexOf(trg.Key)); ++j; } // now add the state entry for the state above result[i] = new CharDfaEntry( fa.IsAccepting ? symbolLookup[fa.AcceptSymbol] : -1, trns); } return(result); }
// build the character classes static IDictionary <string, IList <CharRange> > _GetCharacterClasses() { var result = new Dictionary <string, IList <CharRange> >(); result.Add("alnum", new List <CharRange>( new CharRange[] { new CharRange('A', 'Z'), new CharRange('a', 'z'), new CharRange('0', '9') })); result.Add("alpha", new List <CharRange>( new CharRange[] { new CharRange('A', 'Z'), new CharRange('a', 'z') })); result.Add("ascii", new List <CharRange>( new CharRange[] { new CharRange('\0', '\x7F') })); result.Add("blank", new List <CharRange>( new CharRange[] { new CharRange(' ', ' '), new CharRange('\t', '\t') })); result.Add("cntrl", new List <CharRange>( new CharRange[] { new CharRange('\0', '\x1F'), new CharRange('\x7F', '\x7F') })); result.Add("digit", new List <CharRange>( new CharRange[] { new CharRange('0', '9') })); result.Add("^digit", new List <CharRange>(CharRange.NotRanges(result["digit"]))); result.Add("graph", new List <CharRange>( new CharRange[] { new CharRange('\x21', '\x7E') })); result.Add("lower", new List <CharRange>( new CharRange[] { new CharRange('a', 'z') })); result.Add("print", new List <CharRange>( new CharRange[] { new CharRange('\x20', '\x7E') })); // [!"\#$%&'()*+,\-./:;<=>?@\[\\\]^_`{|}~] result.Add("punct", new List <CharRange>( CharRange.GetRanges("!\"#$%&\'()*+,-./:;<=>?@[\\]^_`{|}~") )); //[ \t\r\n\v\f] result.Add("space", new List <CharRange>( CharRange.GetRanges(" \t\r\n\v\f") )); result.Add("^space", new List <CharRange>(CharRange.NotRanges(result["space"]))); result.Add("upper", new List <CharRange>( new CharRange[] { new CharRange('A', 'Z') })); result.Add("word", new List <CharRange>( new CharRange[] { new CharRange('0', '9'), new CharRange('A', 'Z'), new CharRange('_', '_'), new CharRange('a', 'z') })); result.Add("^word", new List <CharRange>(CharRange.NotRanges(result["word"]))); result.Add("xdigit", new List <CharRange>( new CharRange[] { new CharRange('0', '9'), new CharRange('A', 'F'), new CharRange('a', 'f') })); return(result); }
public RangeWithFa(CharRange range, CharFA <TAccept> fa) { this.range = range; this.fa = fa; }