/** * Returns the last set of the expression * * (the last-charater-projection of the language) */ private static IntCharSet last(RegExp re) { RegExp2 r; switch (re.type) { case sym.BAR: r = (RegExp2)re; return(last(r.r1).add(last(r.r2))); case sym.CONCAT: r = (RegExp2)re; if (containsEpsilon(r.r2)) { return(last(r.r1).add(last(r.r2))); } else { return(last(r.r2)); } case sym.STAR: case sym.PLUS: case sym.QUESTION: return(last((RegExp)((RegExp1)re).content)); case sym.CCLASS: return(new IntCharSet((ArrayList)((RegExp1)re).content)); case sym.CCLASSNOT: IntCharSet all = new IntCharSet(new Interval((char)0, maxChar)); IntCharSet set = new IntCharSet((ArrayList)((RegExp1)re).content); all.sub(set); return(all); case sym.CHAR: return(new IntCharSet((char)((RegExp1)re).content)); case sym.STRING: String content = (String)((RegExp1)re).content; if (content.Length > 0) { return(new IntCharSet(content[content.Length - 1])); } else { return(new IntCharSet()); } case sym.MACROUSE: return(last(macros.getDefinition((String)((RegExp1)re).content))); } throw new Exception("Unkown expression type " + re.type + " in " + re); //$NON-NLS-1$ //$NON-NLS-2$ }
/** * Stores a new macro and its definition. * * @param name the name of the new macro * @param definition the definition of the new macro * * @return <code>true</code>, iff the macro name has not been * stored before. */ public bool insert(String name, RegExp definition) { #if DEBUG_TRACE log.WriteLine("insert(String name = \"{0}\", RegExp definition = {1})", name, definition); #endif // DEBUG_TRACE if (Options.DEBUG) { Out.debug("inserting macro " + name + " with definition :" + Out.NL + definition); //$NON-NLS-1$ //$NON-NLS-2$ } used[name] = false; bool new_entry = !macros.ContainsKey(name); macros[name] = definition; return(new_entry); }
public int insert(int line, ArrayList stateList, RegExp regExp, Action action, Boolean isBOL, RegExp lookAhead) { if (Options.DEBUG) { Out.debug("Inserting regular expression with statelist :" + Out.NL + stateList); //$NON-NLS-1$ Out.debug("and action code :" + Out.NL + action.content + Out.NL); //$NON-NLS-1$ Out.debug("expression :" + Out.NL + regExp); //$NON-NLS-1$ } states.Add(stateList); regExps.Add(regExp); actions.Add(action); BOL.Add(isBOL); look.Add(lookAhead); lines.Add(line); return(states.Count - 1); }
/** * Returns true iff the matched language contains epsilon */ private static bool containsEpsilon(RegExp re) { RegExp2 r; switch (re.type) { case sym.BAR: r = (RegExp2)re; return(containsEpsilon(r.r1) || containsEpsilon(r.r2)); case sym.CONCAT: r = (RegExp2)re; if (containsEpsilon(r.r1)) { return(containsEpsilon(r.r2)); } else { return(false); } case sym.STAR: case sym.QUESTION: return(true); case sym.PLUS: return(containsEpsilon((RegExp)((RegExp1)re).content)); case sym.CCLASS: case sym.CCLASSNOT: case sym.CHAR: return(false); case sym.STRING: return(((String)((RegExp1)re).content).Length <= 0); case sym.MACROUSE: return(containsEpsilon(macros.getDefinition((String)((RegExp1)re).content))); } throw new Exception("Unkown expression type " + re.type + " in " + re); //$NON-NLS-1$ //$NON-NLS-2$ }
/** * Constructs a two state NFA for char class regexps, * such that the NFA has * * exactly one start state, * exactly one end state, * no transitions leading out of the end state * no transitions leading into the start state * * Assumes that regExp.isCharClass(macros) == true * * @param regExp the regular expression to construct the * NFA for * * @return a pair of integers denoting the index of start * and end state of the NFA. */ private void insertNFA(RegExp regExp, int start, int end) { switch (regExp.type) { case sym.BAR: RegExp2 r = (RegExp2)regExp; insertNFA(r.r1, start, end); insertNFA(r.r2, start, end); return; case sym.CCLASS: insertClassNFA((ArrayList)((RegExp1)regExp).content, start, end); return; case sym.CCLASSNOT: insertNotClassNFA((ArrayList)((RegExp1)regExp).content, start, end); return; case sym.CHAR: insertLetterNFA( false, (char)((RegExp1)regExp).content, start, end); return; case sym.CHAR_I: insertLetterNFA( true, (char)((RegExp1)regExp).content, start, end); return; case sym.MACROUSE: insertNFA(macros.getDefinition((String)((RegExp1)regExp).content), start, end); return; } throw new Exception("Unknown expression type " + regExp.type + " in NFA construction"); }
public int NFASize(Macros macros) { int size = 0; IEnumerator e = regExps.GetEnumerator(); while (e.MoveNext()) { RegExp r = (RegExp)e.Current; if (r != null) { size += r.size(macros); } } e = look.GetEnumerator(); while (e.MoveNext()) { RegExp r = (RegExp)e.Current; if (r != null) { size += r.size(macros); } } return(size); }
public RegExp2(int type, RegExp r1, RegExp r2) : base(type) { this.r1 = r1; this.r2 = r2; }
/** * Constructs an NFA for regExp such that the NFA has * * exactly one start state, * exactly one end state, * no transitions leading out of the end state * no transitions leading into the start state * * @param regExp the regular expression to construct the * NFA for * * @return a pair of integers denoting the index of start * and end state of the NFA. */ public IntPair insertNFA(RegExp regExp) { IntPair nfa1, nfa2; int start, end; RegExp2 r; if (Options.DEBUG) { Out.debug("Inserting RegExp : " + regExp); } if (regExp.isCharClass(macros)) { start = numStates; end = numStates + 1; ensureCapacity(end + 1); if (end + 1 > numStates) { numStates = end + 1; } insertNFA(regExp, start, end); return(new IntPair(start, end)); } switch (regExp.type) { case sym.BAR: r = (RegExp2)regExp; nfa1 = insertNFA(r.r1); nfa2 = insertNFA(r.r2); start = nfa2.end + 1; end = nfa2.end + 2; addEpsilonTransition(start, nfa1.start); addEpsilonTransition(start, nfa2.start); addEpsilonTransition(nfa1.end, end); addEpsilonTransition(nfa2.end, end); return(new IntPair(start, end)); case sym.CONCAT: r = (RegExp2)regExp; nfa1 = insertNFA(r.r1); nfa2 = insertNFA(r.r2); addEpsilonTransition(nfa1.end, nfa2.start); return(new IntPair(nfa1.start, nfa2.end)); case sym.STAR: nfa1 = insertNFA((RegExp)((RegExp1)regExp).content); start = nfa1.end + 1; end = nfa1.end + 2; addEpsilonTransition(nfa1.end, end); addEpsilonTransition(start, nfa1.start); addEpsilonTransition(start, end); addEpsilonTransition(nfa1.end, nfa1.start); return(new IntPair(start, end)); case sym.PLUS: nfa1 = insertNFA((RegExp)((RegExp1)regExp).content); start = nfa1.end + 1; end = nfa1.end + 2; addEpsilonTransition(nfa1.end, end); addEpsilonTransition(start, nfa1.start); addEpsilonTransition(nfa1.end, nfa1.start); return(new IntPair(start, end)); case sym.QUESTION: nfa1 = insertNFA((RegExp)((RegExp1)regExp).content); addEpsilonTransition(nfa1.start, nfa1.end); return(new IntPair(nfa1.start, nfa1.end)); case sym.BANG: return(complement(insertNFA((RegExp)((RegExp1)regExp).content))); case sym.TILDE: nfa1 = insertNFA((RegExp)((RegExp1)regExp).content); start = nfa1.end + 1; int s1 = start + 1; int s2 = s1 + 1; end = s2 + 1; for (int i = 0; i < numInput; i++) { addTransition(s1, i, s1); addTransition(s2, i, s2); } addEpsilonTransition(start, s1); addEpsilonTransition(s1, nfa1.start); addEpsilonTransition(nfa1.end, s2); addEpsilonTransition(s2, end); nfa1 = complement(new IntPair(start, end)); nfa2 = insertNFA((RegExp)((RegExp1)regExp).content); addEpsilonTransition(nfa1.end, nfa2.start); return(new IntPair(nfa1.start, nfa2.end)); case sym.STRING: return(insertStringNFA(false, (String)((RegExp1)regExp).content)); case sym.STRING_I: return(insertStringNFA(true, (String)((RegExp1)regExp).content)); case sym.MACROUSE: return(insertNFA(macros.getDefinition((String)((RegExp1)regExp).content))); } throw new Exception("Unknown expression type " + regExp.type + " in NFA construction"); }
/** * Checks for illegal lookahead expressions. * * Lookahead in C# Flex only works when the first expression has fixed * length or when the intersection of the last set of the first expression * and the first set of the second expression is empty. * * @param r1 first regexp * @param r2 second regexp (the lookahead) * * @return true iff C# Flex can generate code for the lookahead expression */ private static bool checkLookAhead(RegExp r1, RegExp r2) { return(r2 == null || length(r1) > 0 || !(last(r1).and(first(r2)).containsElements())); }
/** * Returns length if expression has fixed length, -1 otherwise. */ private static int length(RegExp re) { RegExp2 r; switch (re.type) { case sym.BAR: { r = (RegExp2)re; int l1 = length(r.r1); if (l1 < 0) { return(-1); } int l2 = length(r.r2); if (l1 == l2) { return(l1); } else { return(-1); } } case sym.CONCAT: { r = (RegExp2)re; int l1 = length(r.r1); if (l1 < 0) { return(-1); } int l2 = length(r.r2); if (l2 < 0) { return(-1); } return(l1 + l2); } case sym.STAR: case sym.PLUS: case sym.QUESTION: return(-1); case sym.CCLASS: case sym.CCLASSNOT: case sym.CHAR: return(1); case sym.STRING: { String content = (String)((RegExp1)re).content; return(content.Length); } case sym.MACROUSE: return(length(macros.getDefinition((String)((RegExp1)re).content))); } throw new Exception("Unkown expression type " + re.type + " in " + re); //$NON-NLS-1$ //$NON-NLS-2$ }