/// <summary> /// Process sequences of words and subexpressions /// </summary> /// <param name="lexer"></param> /// <param name="endState"></param> /// <returns></returns> private static RegularState pSeq(Lexer lexer, RegularState endState) { RegularState result = null; RegularState lhs = null; while (lexer.token().type == Token.WORD || lexer.token().type == Token.NOT || lexer.token().type == Token.PLEFT || lexer.token().type == Token.DOT) { RegularState rhs; if (lexer.token().type == Token.WORD) { rhs = pWord(lexer, endState); } else if (lexer.token().type == Token.DOT) { lexer.nextToken(); rhs = new RegularState(); RegularTransition transition = new RegularTransition(rhs, endState); transition.addLabel(RegularTransition.WILDCARD); multTransform(rhs, lexer, endState); } else if (lexer.token().type == Token.NOT) { // ![word1,word2,word3,word4,...] rhs = new RegularState(); RegularTransition transition = new RegularTransition(rhs, endState); transition.setNegation(true); lexer.nextToken(); while (lexer.token().type == Token.WORD) { transition.addLabel(lexer.token().ToString()); lexer.nextToken(); if (lexer.token().type == Token.COMMA) { lexer.nextToken(); } } if (lexer.token().type != Token.SRIGHT) { throw new PatternParseException(String.Format("Missing right square bracket at #%d", lexer .charPos())); } lexer.nextToken(); multTransform(rhs, lexer, endState); } else { rhs = pSubexp(lexer, endState); } if (result == null) { // first item in the sequence is the return value; result = rhs; } if (lhs != null) { // link end state of previous item in the list to current // item replaceStates(lhs, endState, rhs); } lhs = rhs; } if (lexer.token().type == Token.EOF && result == null) { // empty regex return(endState); } if (result == null) { throw new PatternParseException(String.Format("Unexpected token '%s' at #%d", lexer.token().text, lexer .charPos())); } return(result); }
/// <summary> /// Process alternatives. This will optimize the state machine where /// possible. Edges with identical destinations will be merged, but only /// when they are not a lambda transition or contain a self reference. /// </summary> /// <param name="lexer"></param> /// <param name="endState"></param> /// <returns></returns> private static RegularState pAlt(Lexer lexer, RegularState endState) { List <RegularState> alts = new List <RegularState>(); alts.Add(pSeq(lexer, endState)); while (lexer.token().type == Token.OR) { lexer.nextToken(); alts.Add(pSeq(lexer, endState)); } if (alts.Count == 1) { return(alts[0]); } // combine transitions with identical destinations and negation RegularState result = new RegularState(); Dictionary <RegularState, List <RegularTransition> > destMap = new Dictionary <RegularState, List <RegularTransition> >(); foreach (RegularState state in alts) { if (hasSelfReference(state)) { // add lambda to self referencing states new RegularTransition(result, state); continue; } foreach (RegularTransition rt in state.getOutTransitions()) { List <RegularTransition> dst; if (!destMap.TryGetValue(rt.getEndState(), out dst)) { dst = new List <RegularTransition>(); destMap.Add(rt.getEndState(), dst); } dst.Add(rt); } } foreach (RegularState key in destMap.Keys) { List <RegularTransition> value = destMap[key]; RegularTransition regrt = null; RegularTransition neqrt = null; RegularTransition lambda = null; foreach (RegularTransition rt in value) { if (rt.isEmpty()) { // don't combine lambda transitions with others if (lambda == null) { lambda = new RegularTransition(result, key); } } else if (rt.isNegation()) { if (neqrt == null) { neqrt = new RegularTransition(result, key); neqrt.setNegation(true); } neqrt.addLabels(rt.getLabels()); } else { if (regrt == null) { regrt = new RegularTransition(result, key); } regrt.addLabels(rt.getLabels()); } } } return(result); }