public AlphabetBuilder(RxNode <TLetter> expression, TLetter?eof = null, RangeSet <TLetter>?validRanges = default) { var eofRange = eof.HasValue ? new RangeSet <TLetter>(eof.Value) : RangeSet <TLetter> .Empty; // Step 1: Find all charset-generating regular expression parts var visitor = new AlphabetBuilderVisitor <TLetter>(); expression.Visit(visitor, (letters, negate) => letters - eofRange); var charsets = visitor.Charsets; // Step 2: Get all ranges of all used charsets and register their "users" var ranges = MakeRanges(charsets, (validRanges ?? RangeSet <TLetter> .All) - eofRange); // Step 3: Group the information into alphabet entries var alphabetByKey = ranges .GroupBy <UsedLetterRange <TLetter>, string, Range <TLetter> >(r => r.GetUsersKey(), r => r.Range) .Select((g, ix) => new KeyValuePair <string, AlphabetLetter <TLetter> >(g.Key, new AlphabetLetter <TLetter>(new LetterId(ix + 1), new RangeSet <TLetter>(g)))) .ToDictionary(p => p.Key, p => p.Value); // Step 4: Store alphabet entries for each regex part foreach (var range in ranges) { var alphabetEntry = alphabetByKey[range.GetUsersKey()]; for (var rangeUser = range.Users; rangeUser != null; rangeUser = rangeUser.Next) { charsets[rangeUser.User].Value.Add(alphabetEntry.Id); } } // Step 5: store alphabet information this.AlphabetById = alphabetByKey.Values.ToDictionary(e => e.Id, e => e.Ranges); this.AlphabetById.Add(LetterId.Eof, eofRange); // EOF // Step 6: rebuild expression this.Expression = expression.Visit(new AlphabetLetterVisitor <TLetter>(), node => charsets[visitor.GetId(node)].Value); }
public static Nfa <TLetter> Build(RxNode <TLetter> node) { var builder = new NfaBuilder <TLetter>(); var endState = node.Visit(builder, builder.startState); return(new Nfa <TLetter>(builder.states, builder.startState, endState)); }
public static RxNode <TLetter> Extract(RxNode <TLetter> rx, SymbolId symbolId) { return(rx.Visit(new RxOfSymbol <TLetter>(), symbolId)); }
public void Add(SymbolId symbol, RxNode <TLetter> regex) { this.tokens.Add(symbol, regex); }