public void addRegExp(int regExpNum) { if (Options.DEBUG) { Out.debug("Adding nfa for regexp " + regExpNum + " :" + Out.NL + regExps.getRegExp(regExpNum)); } IntPair nfa = insertNFA(regExps.getRegExp(regExpNum)); IEnumerator lexStates = regExps.getStates(regExpNum).GetEnumerator(); if (!lexStates.MoveNext()) { lexStates = scanner.states.getInclusiveStates(); } lexStates.Reset(); while (lexStates.MoveNext()) { int stateNum = (int)lexStates.Current; if (!regExps.isBOL(regExpNum)) { addEpsilonTransition(2 * stateNum, nfa.start); } addEpsilonTransition(2 * stateNum + 1, nfa.start); } if (regExps.getLookAhead(regExpNum) != null) { IntPair look = insertNFA(regExps.getLookAhead(regExpNum)); addEpsilonTransition(nfa.end, look.start); Action a = regExps.getAction(regExpNum); a.setLookAction(true); isPushback[nfa.end] = true; action[look.end] = a; isFinal[look.end] = true; } else { action[nfa.end] = regExps.getAction(regExpNum); isFinal[nfa.end] = true; } }
/** * Constructs an NFA accepting the complement of the language * of a given NFA. * * Converts the NFA into a DFA, then negates that DFA. * Exponential state blowup possible and common. * * @param the NFA to construct the complement for. * * @return a pair of integers denoting the index of start * and end state of the complement NFA. */ private IntPair complement(IntPair nfa) { if (Options.DEBUG) { Out.debug("complement for " + nfa); Out.debug("NFA is :" + Out.NL + this); } int dfaStart = nfa.end + 1; // fixme: only need epsilon closure of states reachable from nfa.start epsilonFill(); Hashtable dfaStates = new PrettyHashtable(numStates); ArrayList dfaVector = new PrettyArrayList(numStates); int numDFAStates = 0; int currentDFAState = 0; StateSet currentState, newState; newState = epsilon[nfa.start]; dfaStates[newState] = new Integer(numDFAStates); dfaVector.Add(newState); if (Options.DEBUG) { Out.debug("pos DFA start state is :" + Out.NL + dfaStates + Out.NL + Out.NL + "ordered :" + Out.NL + dfaVector); } currentDFAState = 0; while (currentDFAState <= numDFAStates) { currentState = (StateSet)dfaVector[currentDFAState]; for (char input = (char)0; input < numInput; input++) { newState = DFAEdge(currentState, input); if (newState.containsElements()) { // Out.debug("DFAEdge for input "+(int)input+" and state set "+currentState+" is "+newState); // Out.debug("Looking for state set "+newState); Integer nextDFAState = (Integer)dfaStates[newState]; if (nextDFAState != null) { // Out.debug("FOUND!"); addTransition(dfaStart + currentDFAState, input, dfaStart + nextDFAState.intValue()); } else { if (Options.dump) { Out.print("+"); } // Out.debug("NOT FOUND!"); // Out.debug("Table was "+dfaStates); numDFAStates++; dfaStates[newState] = new Integer(numDFAStates); dfaVector.Add(newState); addTransition(dfaStart + currentDFAState, input, dfaStart + numDFAStates); } } } currentDFAState++; } // We have a dfa accepting the positive regexp. // Now the complement: if (Options.DEBUG) { Out.debug("dfa finished, nfa is now :" + Out.NL + this); } int start = dfaStart + numDFAStates + 1; int error = dfaStart + numDFAStates + 2; int end = dfaStart + numDFAStates + 3; addEpsilonTransition(start, dfaStart); for (int i = 0; i < numInput; i++) { addTransition(error, i, error); } addEpsilonTransition(error, end); for (int s = 0; s <= numDFAStates; s++) { currentState = (StateSet)dfaVector[s]; currentDFAState = dfaStart + s; // if it was not a final state, it is now in the complement if (!currentState.isElement(nfa.end)) { addEpsilonTransition(currentDFAState, end); } // all inputs not present (formerly leading to an implicit error) // now lead to an explicit (final) state accepting everything. for (int i = 0; i < numInput; i++) { if (table[currentDFAState][i] == null) { addTransition(currentDFAState, i, error); } } } // eliminate transitions leading to dead states if (live == null || live.Length < numStates) { live = new bool [2 * numStates]; visited = new bool [2 * numStates]; } _end = end; _dfaStates = dfaVector; _dfaStart = dfaStart; removeDead(dfaStart); if (Options.DEBUG) { Out.debug("complement finished, nfa (" + start + "," + end + ") is now :" + this); } return(new IntPair(start, end)); }