public const String version = "1.4"; //$NON-NLS-1$ /** * Generates a scanner for the specified input file. * * @param inputFile a file containing a lexical specification * to generate a scanner for. */ public static void generate(File inputFile) { Out.resetCounters(); Timer totalTime = new Timer(); Timer time = new Timer(); LexScan scanner = null; LexParse parser = null; TextReader inputReader = null; totalTime.start(); try { Out.println(ErrorMessages.READING, inputFile.ToString()); inputReader = new StreamReader(inputFile); scanner = new LexScan(inputReader); scanner.setFile(inputFile); parser = new LexParse(scanner); } catch (FileNotFoundException) { Out.error(ErrorMessages.CANNOT_OPEN, inputFile.ToString()); throw new GeneratorException(); } try { NFA nfa = (NFA)parser.parse().value; Out.checkErrors(); if (Options.dump) { Out.dump(ErrorMessages.get(ErrorMessages.NFA_IS) + Out.NL + nfa + Out.NL); } if (Options.dot) { nfa.writeDot(Emitter.normalize("nfa.dot", null)); //$NON-NLS-1$ } Out.println(ErrorMessages.NFA_STATES, nfa.numStates); time.start(); DFA dfa = nfa.getDFA(); time.stop(); Out.time(ErrorMessages.DFA_TOOK, time); dfa.checkActions(scanner, parser); nfa = null; if (Options.dump) { Out.dump(ErrorMessages.get(ErrorMessages.DFA_IS) + Out.NL + dfa + Out.NL); } if (Options.dot) { dfa.writeDot(Emitter.normalize("dfa-big.dot", null)); //$NON-NLS-1$ } time.start(); dfa.minimize(); time.stop(); Out.time(ErrorMessages.MIN_TOOK, time); if (Options.dump) { Out.dump(ErrorMessages.get(ErrorMessages.MIN_DFA_IS) + Out.NL + dfa); } if (Options.dot) { dfa.writeDot(Emitter.normalize("dfa-min.dot", null)); //$NON-NLS-1$ } time.start(); Emitter e = new Emitter(inputFile, parser, dfa); e.emit(); time.stop(); Out.time(ErrorMessages.WRITE_TOOK, time); totalTime.stop(); Out.time(ErrorMessages.TOTAL_TIME, totalTime); } catch (ScannerException e) { Out.error(e.file, e.message, e.line, e.column); throw new GeneratorException(); } catch (MacroException e) { Out.error(e.Message); throw new GeneratorException(); } catch (IOException e) { Out.error(ErrorMessages.IO_ERROR, e.ToString()); throw new GeneratorException(); } catch (OutOfMemoryException) { Out.error(ErrorMessages.OUT_OF_MEMORY); throw new GeneratorException(); } catch (GeneratorException) { throw new GeneratorException(); } catch (Exception e) { Out.error(e.ToString()); throw new GeneratorException(); } }
public static ArrayList parseOptions(String[] argv) { ArrayList files = new PrettyArrayList(); for (int i = 0; i < argv.Length; i++) { if ((argv[i] == "-d") || (argv[i] == "--outdir")) //$NON-NLS-1$ //$NON-NLS-2$ { if (++i >= argv.Length) { Out.error(ErrorMessages.NO_DIRECTORY); throw new GeneratorException(); } Options.setDir(argv[i]); continue; } if ((argv[i] == "--skel") || (argv[i] == "-skel")) //$NON-NLS-1$ //$NON-NLS-2$ { if (++i >= argv.Length) { Out.error(ErrorMessages.NO_SKEL_FILE); throw new GeneratorException(); } Options.setSkeleton(new File(argv[i])); continue; } if ((argv[i] == "--nested-default-skeleton") || (argv[i] == "-nested")) { Options.setSkeleton(new File("<nested>")); continue; } if ((argv[i] == "-jlex") || (argv[i] == "--jlex")) //$NON-NLS-1$ //$NON-NLS-2$ { Options.jlex = true; continue; } if ((argv[i] == "-v") || (argv[i] == "--verbose") || (argv[i] == "-verbose")) //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$ { Options.verbose = true; Options.progress = true; continue; } if ((argv[i] == "-q") || (argv[i] == "--quiet") || (argv[i] == "-quiet")) //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$ { Options.verbose = false; Options.progress = false; continue; } if ((argv[i] == "--dump") || (argv[i] == "-dump")) //$NON-NLS-1$ //$NON-NLS-2$ { Options.dump = true; continue; } if ((argv[i] == "--time") || (argv[i] == "-time")) //$NON-NLS-1$ //$NON-NLS-2$ { Options.time = true; continue; } if ((argv[i] == "--version") || (argv[i] == "-version")) //$NON-NLS-1$ //$NON-NLS-2$ { Out.println(ErrorMessages.THIS_IS_CSFLEX, version); throw new SilentExit(); } if ((argv[i] == "--dot") || (argv[i] == "-dot")) //$NON-NLS-1$ //$NON-NLS-2$ { Options.dot = true; continue; } if ((argv[i] == "--help") || (argv[i] == "-h") || (argv[i] == "/h")) //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$ { printUsage(); throw new SilentExit(); } if ((argv[i] == "--info") || (argv[i] == "-info")) //$NON-NLS-1$ //$NON-NLS-2$ { Out.printSystemInfo(); throw new SilentExit(); } if ((argv[i] == "--nomin") || (argv[i] == "-nomin")) //$NON-NLS-1$ //$NON-NLS-2$ { Options.no_minimize = true; continue; } if ((argv[i] == "--pack") || (argv[i] == "-pack")) //$NON-NLS-1$ //$NON-NLS-2$ { Options.gen_method = Options.PACK; continue; } if ((argv[i] == "--table") || (argv[i] == "-table")) //$NON-NLS-1$ //$NON-NLS-2$ { Options.gen_method = Options.TABLE; continue; } if ((argv[i] == "--switch") || (argv[i] == "-switch")) //$NON-NLS-1$ //$NON-NLS-2$ { Options.gen_method = Options.SWITCH; continue; } if ((argv[i] == "--nobak") || (argv[i] == "-nobak")) //$NON-NLS-1$ //$NON-NLS-2$ { Options.no_backup = true; continue; } if ((argv[i] == "--csharp") || (argv[i] == "-cs")) { Options.emit_csharp = true; continue; } if (argv[i].StartsWith("-")) //$NON-NLS-1$ { Out.error(ErrorMessages.UNKNOWN_COMMANDLINE, argv[i]); printUsage(); throw new SilentExit(); } // if argv[i] is not an option, try to read it as file File f = new File(argv[i]); if (f.isFile() && f.canRead()) { files.Add(f); } else { Out.error("Sorry, couldn't open \"" + f + "\""); //$NON-NLS-2$ throw new GeneratorException(); } } return(files); }
/** * Constructs an NFA accepting the complement of the language * of a given NFA. * * Converts the NFA into a DFA, then negates that DFA. * Exponential state blowup possible and common. * * @param the NFA to construct the complement for. * * @return a pair of integers denoting the index of start * and end state of the complement NFA. */ private IntPair complement(IntPair nfa) { if (Options.DEBUG) { Out.debug("complement for " + nfa); Out.debug("NFA is :" + Out.NL + this); } int dfaStart = nfa.end + 1; // fixme: only need epsilon closure of states reachable from nfa.start epsilonFill(); Hashtable dfaStates = new PrettyHashtable(numStates); ArrayList dfaVector = new PrettyArrayList(numStates); int numDFAStates = 0; int currentDFAState = 0; StateSet currentState, newState; newState = epsilon[nfa.start]; dfaStates[newState] = new Integer(numDFAStates); dfaVector.Add(newState); if (Options.DEBUG) { Out.debug("pos DFA start state is :" + Out.NL + dfaStates + Out.NL + Out.NL + "ordered :" + Out.NL + dfaVector); } currentDFAState = 0; while (currentDFAState <= numDFAStates) { currentState = (StateSet)dfaVector[currentDFAState]; for (char input = (char)0; input < numInput; input++) { newState = DFAEdge(currentState, input); if (newState.containsElements()) { // Out.debug("DFAEdge for input "+(int)input+" and state set "+currentState+" is "+newState); // Out.debug("Looking for state set "+newState); Integer nextDFAState = (Integer)dfaStates[newState]; if (nextDFAState != null) { // Out.debug("FOUND!"); addTransition(dfaStart + currentDFAState, input, dfaStart + nextDFAState.intValue()); } else { if (Options.dump) { Out.print("+"); } // Out.debug("NOT FOUND!"); // Out.debug("Table was "+dfaStates); numDFAStates++; dfaStates[newState] = new Integer(numDFAStates); dfaVector.Add(newState); addTransition(dfaStart + currentDFAState, input, dfaStart + numDFAStates); } } } currentDFAState++; } // We have a dfa accepting the positive regexp. // Now the complement: if (Options.DEBUG) { Out.debug("dfa finished, nfa is now :" + Out.NL + this); } int start = dfaStart + numDFAStates + 1; int error = dfaStart + numDFAStates + 2; int end = dfaStart + numDFAStates + 3; addEpsilonTransition(start, dfaStart); for (int i = 0; i < numInput; i++) { addTransition(error, i, error); } addEpsilonTransition(error, end); for (int s = 0; s <= numDFAStates; s++) { currentState = (StateSet)dfaVector[s]; currentDFAState = dfaStart + s; // if it was not a final state, it is now in the complement if (!currentState.isElement(nfa.end)) { addEpsilonTransition(currentDFAState, end); } // all inputs not present (formerly leading to an implicit error) // now lead to an explicit (final) state accepting everything. for (int i = 0; i < numInput; i++) { if (table[currentDFAState][i] == null) { addTransition(currentDFAState, i, error); } } } // eliminate transitions leading to dead states if (live == null || live.Length < numStates) { live = new bool [2 * numStates]; visited = new bool [2 * numStates]; } _end = end; _dfaStates = dfaVector; _dfaStart = dfaStart; removeDead(dfaStart); if (Options.DEBUG) { Out.debug("complement finished, nfa (" + start + "," + end + ") is now :" + this); } return(new IntPair(start, end)); }
public void dumpTable() { Out.dump(ToString()); }
/** * Returns an DFA that accepts the same language as this NFA. * This DFA is usualy not minimal. */ public DFA getDFA() { Hashtable dfaStates = new PrettyHashtable(numStates); ArrayList dfaVector = new PrettyArrayList(numStates); DFA dfa = new DFA(2 * numLexStates, numInput); int numDFAStates = 0; int currentDFAState = 0; Out.println("Converting NFA to DFA : "); epsilonFill(); StateSet currentState, newState; for (int i = 0; i < 2 * numLexStates; i++) { newState = epsilon[i]; dfaStates[newState] = new Integer(numDFAStates); dfaVector.Add(newState); dfa.setLexState(i, numDFAStates); dfa.setFinal(numDFAStates, containsFinal(newState)); dfa.setPushback(numDFAStates, containsPushback(newState)); dfa.setAction(numDFAStates, getAction(newState)); numDFAStates++; } numDFAStates--; if (Options.DEBUG) { Out.debug("DFA start states are :" + Out.NL + dfaStates + Out.NL + Out.NL + "ordered :" + Out.NL + dfaVector); } currentDFAState = 0; StateSet tempStateSet = NFA.tempStateSet; StateSetEnumerator states = NFA.states; // will be reused newState = new StateSet(numStates); while (currentDFAState <= numDFAStates) { currentState = (StateSet)dfaVector[currentDFAState]; for (char input = (char)0; input < numInput; input++) { // newState = DFAEdge(currentState, input); // inlining DFAEdge for performance: // Out.debug("Calculating DFAEdge for state set "+currentState+" and input '"+input+"'"); tempStateSet.clear(); states.reset(currentState); while (states.hasMoreElements()) { tempStateSet.add(table[states.nextElement()][input]); } newState.copy(tempStateSet); states.reset(tempStateSet); while (states.hasMoreElements()) { newState.add(epsilon[states.nextElement()]); } // Out.debug("DFAEdge is : "+newState); if (newState.containsElements()) { // Out.debug("DFAEdge for input "+(int)input+" and state set "+currentState+" is "+newState); // Out.debug("Looking for state set "+newState); Integer nextDFAState = (Integer)dfaStates[newState]; if (nextDFAState != null) { // Out.debug("FOUND!"); dfa.addTransition(currentDFAState, input, nextDFAState.intValue()); } else { if (Options.progress) { Out.print("."); } // Out.debug("NOT FOUND!"); // Out.debug("Table was "+dfaStates); numDFAStates++; // make a new copy of newState to store in dfaStates StateSet storeState = new StateSet(newState); dfaStates[storeState] = new Integer(numDFAStates); dfaVector.Add(storeState); dfa.addTransition(currentDFAState, input, numDFAStates); dfa.setFinal(numDFAStates, containsFinal(storeState)); dfa.setPushback(numDFAStates, containsPushback(storeState)); dfa.setAction(numDFAStates, getAction(storeState)); } } } currentDFAState++; } if (Options.verbose) { Out.println(""); } return(dfa); }
/** * Constructs an NFA for regExp such that the NFA has * * exactly one start state, * exactly one end state, * no transitions leading out of the end state * no transitions leading into the start state * * @param regExp the regular expression to construct the * NFA for * * @return a pair of integers denoting the index of start * and end state of the NFA. */ public IntPair insertNFA(RegExp regExp) { IntPair nfa1, nfa2; int start, end; RegExp2 r; if (Options.DEBUG) { Out.debug("Inserting RegExp : " + regExp); } if (regExp.isCharClass(macros)) { start = numStates; end = numStates + 1; ensureCapacity(end + 1); if (end + 1 > numStates) { numStates = end + 1; } insertNFA(regExp, start, end); return(new IntPair(start, end)); } switch (regExp.type) { case sym.BAR: r = (RegExp2)regExp; nfa1 = insertNFA(r.r1); nfa2 = insertNFA(r.r2); start = nfa2.end + 1; end = nfa2.end + 2; addEpsilonTransition(start, nfa1.start); addEpsilonTransition(start, nfa2.start); addEpsilonTransition(nfa1.end, end); addEpsilonTransition(nfa2.end, end); return(new IntPair(start, end)); case sym.CONCAT: r = (RegExp2)regExp; nfa1 = insertNFA(r.r1); nfa2 = insertNFA(r.r2); addEpsilonTransition(nfa1.end, nfa2.start); return(new IntPair(nfa1.start, nfa2.end)); case sym.STAR: nfa1 = insertNFA((RegExp)((RegExp1)regExp).content); start = nfa1.end + 1; end = nfa1.end + 2; addEpsilonTransition(nfa1.end, end); addEpsilonTransition(start, nfa1.start); addEpsilonTransition(start, end); addEpsilonTransition(nfa1.end, nfa1.start); return(new IntPair(start, end)); case sym.PLUS: nfa1 = insertNFA((RegExp)((RegExp1)regExp).content); start = nfa1.end + 1; end = nfa1.end + 2; addEpsilonTransition(nfa1.end, end); addEpsilonTransition(start, nfa1.start); addEpsilonTransition(nfa1.end, nfa1.start); return(new IntPair(start, end)); case sym.QUESTION: nfa1 = insertNFA((RegExp)((RegExp1)regExp).content); addEpsilonTransition(nfa1.start, nfa1.end); return(new IntPair(nfa1.start, nfa1.end)); case sym.BANG: return(complement(insertNFA((RegExp)((RegExp1)regExp).content))); case sym.TILDE: nfa1 = insertNFA((RegExp)((RegExp1)regExp).content); start = nfa1.end + 1; int s1 = start + 1; int s2 = s1 + 1; end = s2 + 1; for (int i = 0; i < numInput; i++) { addTransition(s1, i, s1); addTransition(s2, i, s2); } addEpsilonTransition(start, s1); addEpsilonTransition(s1, nfa1.start); addEpsilonTransition(nfa1.end, s2); addEpsilonTransition(s2, end); nfa1 = complement(new IntPair(start, end)); nfa2 = insertNFA((RegExp)((RegExp1)regExp).content); addEpsilonTransition(nfa1.end, nfa2.start); return(new IntPair(nfa1.start, nfa2.end)); case sym.STRING: return(insertStringNFA(false, (String)((RegExp1)regExp).content)); case sym.STRING_I: return(insertStringNFA(true, (String)((RegExp1)regExp).content)); case sym.MACROUSE: return(insertNFA(macros.getDefinition((String)((RegExp1)regExp).content))); } throw new Exception("Unknown expression type " + regExp.type + " in NFA construction"); }