/// <summary> /// Build the NFSA from the abstract syntax tree. /// There is an NfsaInstance for each start state. /// Each rule starts with a new nfsa state, which /// is the target of a new epsilon transition from /// the real start state, nInst.Entry. /// </summary> /// <param name="ast"></param> public void Build(AAST ast) { int index = 0; DateTime time0 = DateTime.Now; nfas = new NfsaInstance[ast.StartStateCount]; foreach (KeyValuePair<string, StartState> p in ast.startStates) { StartState s = p.Value; string name = p.Key; if (!s.IsAll) { NfsaInstance nInst = new NfsaInstance(s, this); nfas[index++] = nInst; nInst.key = name; // for each pattern do ... for (int i = 0; i < s.rules.Count; i++) { RuleDesc rule = s.rules[i]; RegExTree tree = rule.Tree; if (rule.isPredDummyRule) { NState entry = nInst.Entry; nInst.MakePath(tree, entry, entry); } else { NState start = nInst.MkState(); NState endSt = nInst.MkState(); if (tree.op == RegOp.leftAnchor) // this is a left anchored pattern { nInst.AnchorState.AddEpsTrns(start); tree = ((Unary)tree).kid; } else // this is not a left anchored pattern nInst.Entry.AddEpsTrns(start); // // Now check for right anchors, and add states as necessary. // if (tree.op == RegOp.eof) { // // <<EOF>> rules are always emitted outside // of the usual subset construction framework. // We ensure that we do not get spurious warnings. // rule.useCount = 1; nInst.eofAction = rule.aSpan; nInst.MakePath(tree, start, endSt); nInst.MarkAccept(endSt, rule); } else if (tree.op == RegOp.rightAnchor) { tree = ((Unary)tree).kid; nInst.MakePath(tree, start, endSt); AddAnchorContext(nInst, endSt, rule); } else { nInst.MakePath(tree, start, endSt); nInst.MarkAccept(endSt, rule); } } } } } if (task.Verbose) { Console.Write("GPLEX: NFSA built"); Console.Write((task.Errors ? ", errors detected" : " without error")); Console.Write((task.Warnings ? "; warnings issued. " : ". ")); Console.WriteLine(TaskState.ElapsedTime(time0)); } if (task.Summary) WriteSummary(time0); }
internal void Process(string fileArg) { GetNames(fileArg); // check for file exists OpenSource(); // parse source file if (inputFile != null) { DateTime start = DateTime.Now; try { handler = new ErrorHandler(); scanner = new QUT.Gplex.Lexer.Scanner(inputFile); parser = new QUT.Gplex.Parser.Parser(scanner); scanner.yyhdlr = handler; parser.Initialize(this, scanner, handler, new OptionParser2(ParseOption)); aast = parser.Aast; parser.Parse(); // aast.DiagnosticDump(); if (verbose) Status(start); CheckOptions(); if (!Errors && !ParseOnly) { // build NFSA if (ChrClasses) { DateTime t0 = DateTime.Now; partition = new Partition( TargetSymCardinality, this ); partition.FindClasses( aast ); partition.FixMap(); if (verbose) ClassStatus( t0, partition.Length ); } else CharRange.Init( TargetSymCardinality ); nfsa = new NFSA(this); nfsa.Build(aast); if (!Errors) { // convert to DFSA dfsa = new DFSA(this); dfsa.Convert(nfsa); if (!Errors) { // minimize automaton if (minimize) dfsa.Minimize(); if (!Errors && !checkOnly) { // emit the scanner to output file TextReader frameRdr = FrameReader(); TextWriter outputWrtr = OutputWriter(); dfsa.EmitScanner(frameRdr, outputWrtr); if (!embedBuffers) CopyBufferCode(); // Clean up! if (frameRdr != null) frameRdr.Close(); if (outputWrtr != null) outputWrtr.Close(); } } } } } catch (Exception ex) { string str = ex.Message; handler.AddError(str, aast.AtStart); throw; } } }
/// <summary> /// This method constructs a RangeLiteral holding /// all of the codepoints from all planes for which /// the Test delegate returns true. /// </summary> /// <param name="name"></param> /// <param name="aast"></param> /// <param name="max"></param> internal void Populate( string name, AAST aast ) { DateTime begin = DateTime.Now; int max = aast.Task.TargetSymCardinality; this.rangeLit = new RangeLiteral( false ); // // Run the delegate over all the values // between '\0' and (max-1). Find contiguous // true values and add to the CharRange list. // int j = 0; int codepage = aast.CodePage; if (max > 256 || codepage == Automaton.TaskState.rawCP || codepage == Automaton.TaskState.guessCP) { if (max <= 256 && codepage == Automaton.TaskState.guessCP) aast.hdlr.ListError( aast.AtStart, 93 ); // // We are generating a set of numeric code points with // the named property. No interpretation is needed, either // (1) because this is for a unicode scanner that has // already decoded its input element stream, OR // (2) the user has commanded /codepoint:raw to indicate // that no interpretation is to be used. // while (j < max) { int start; while (j < max && !Test( j )) j++; if (j == max) break; start = j; while (j < max && Test( j )) j++; this.rangeLit.list.Add( new CharRange( start, (j - 1) ) ); } } else { // We are generating a set of byte values from the // 0x00 to 0xFF "alphabet" that correspond to unicode // characters with the named property. The meaning of // "corresponds" is defined by the nominated codepage. // // Check codepage for single byte property. // Encoding enc = Encoding.GetEncoding( codepage ); Decoder decoder = enc.GetDecoder(); if (!enc.IsSingleByte) aast.hdlr.ListError( aast.AtStart, 92 ); // // Construct character map for bytes. // int bNum, cNum; bool done; char[] cArray = new char[256]; byte[] bArray = new byte[256]; for (int b = 0; b < 256; b++) { bArray[b] = (byte)b; cArray[b] = '?'; } decoder.Convert( bArray, 0, 256, cArray, 0, 256, true, out bNum, out cNum, out done ); // // Now construct the CharRange literal // while (j < max) { int start; while (j < max && !Test( cArray[j] )) j++; if (j == max) break; start = j; while (j < max && Test( cArray[j] )) j++; this.rangeLit.list.Add( new CharRange( start, (j - 1) ) ); } } if (aast.IsVerbose) { Console.WriteLine( "GPLEX: Generating [:{0}:], {1}", name, Gplex.Automaton.TaskState.ElapsedTime( begin ) ); } }
internal ReParser( string str, LexSpan spn, AAST parent ) { if (parent.task.Unicode) CharacterUtilities.SetUnicode(); symCard = parent.task.HostSymCardinality; pat = str; span = spn; InitReParser(); this.parent = parent; // // This is ugly, but we cannot manipulate // RangeLists unless the alphabet upper bound // is known to the code of class Partition. // CharRange.Init( parent.task.TargetSymCardinality ); }
internal void ParseRE( AAST aast ) { regX = new AAST.ReParser( verb, vrbSpan, aast ).Parse(); }
/// <summary> /// This method detects the presence of code *between* rules. Such code has /// no unambiguous meaning, and is skipped, with a warning message. /// </summary> /// <param name="aast"></param> internal void FinalizeCode( AAST aast ) { for (int i = 0; i < locs.Count; i++) { LexSpan loc = locs[i]; if (loc.startLine < FLine) aast.AddCodeSpan( AAST.Destination.scanProlog, loc ); else if (loc.startLine > LLine) aast.AddCodeSpan( AAST.Destination.scanEpilog, loc ); else // code is between rules aast.hdlr.ListError( loc, 110 ); } }
void Check( AAST aast, RegExTree tree ) { Binary bnryTree; Unary unryTree; if (tree == null) return; switch (tree.op) { case RegOp.charClass: case RegOp.primitive: case RegOp.litStr: case RegOp.eof: break; case RegOp.context: case RegOp.concat: case RegOp.alt: bnryTree = (Binary)tree; Check( aast, bnryTree.lKid ); Check( aast, bnryTree.rKid ); if (tree.op == RegOp.context && bnryTree.lKid.contextLength() == 0 && bnryTree.rKid.contextLength() == 0) aast.hdlr.ListError( pSpan, 75 ); break; case RegOp.closure: case RegOp.finiteRep: unryTree = (Unary)tree; Check( aast, unryTree.kid ); break; case RegOp.leftAnchor: case RegOp.rightAnchor: aast.hdlr.ListError( pSpan, 69 ); break; } }
/// <summary> /// This is the place to perform any semantic checks on the /// trees corresponding to a rule of the LEX grammar, /// during a recursive traversal of the tree. It is hard /// to do these on the fly during AST construction, because /// of the tree-grafting that happens for lexical categories. /// /// First check is that '^' and '$' can only appear /// (logically) at the ends of the pattern. /// Later need to check ban on multiple right contexts ... /// </summary> /// <param name="aast"></param> void SemanticCheck( AAST aast ) { RegExTree tree = reAST; if (tree != null && tree.op == RegOp.leftAnchor) tree = ((Unary)tree).kid; if (tree != null && tree.op == RegOp.rightAnchor) { tree = ((Unary)tree).kid; if (tree.op == RegOp.context) aast.hdlr.ListError( pSpan, 100 ); } Check( aast, tree ); if (tree != null) minPatternLength = tree.minimumLength(); }
// internal void Dump() { Console.WriteLine(pattern); } internal void ParseRE( AAST aast ) { reAST = new AAST.ReParser( pattern, pSpan, aast ).Parse(); SemanticCheck( aast ); }
internal static RuleDesc MkDummyRuleDesc( LexCategory cat, AAST aast ) { RuleDesc result = new RuleDesc(); result.pSpan = null; result.aSpan = aast.AtStart; result.isBarAction = false; result.isPredDummyRule = true; result.pattern = String.Format( CultureInfo.InvariantCulture, "{{{0}}}", cat.Name ); result.list = new List<StartState>(); result.ParseRE( aast ); result.list.Add( aast.StartStateValue( cat.PredDummyName ) ); return result; }
internal ReParser(string str, LexSpan spn, AAST parent) { if (parent.task.Unicode) CharacterUtilities.SetUnicode(); symCard = parent.task.HostSymCardinality; pat = str; span = spn; InitReParser(); this.parent = parent; }