This class represents the Attributed Abstract Syntax Tree corresponding to an input LEX file.
Пример #1
0
        /// <summary>
        /// Build the NFSA from the abstract syntax tree.
        /// There is an NfsaInstance for each start state.
        /// Each rule starts with a new nfsa state, which
        /// is the target of a new epsilon transition from
        /// the real start state, nInst.Entry.
        /// </summary>
        /// <param name="ast"></param>
        public void Build(AAST ast)
        {
            int index = 0;
            DateTime time0 = DateTime.Now;
            nfas = new NfsaInstance[ast.StartStateCount];
            foreach (KeyValuePair<string, StartState> p in ast.startStates)
            {
                StartState s = p.Value;
                string name = p.Key;
                if (!s.IsAll)
                {
                    NfsaInstance nInst = new NfsaInstance(s, this);
                    nfas[index++] = nInst;
                    nInst.key = name;

                    // for each pattern do ...
                    for (int i = 0; i < s.rules.Count; i++)
                    {
                        RuleDesc rule = s.rules[i];
                        RegExTree tree = rule.Tree;

                        if (rule.isPredDummyRule)
                        {
                            NState entry = nInst.Entry;
                            nInst.MakePath(tree, entry, entry);
                        }
                        else
                        {
                            NState start = nInst.MkState();
                            NState endSt = nInst.MkState();

                            if (tree.op == RegOp.leftAnchor)     // this is a left anchored pattern
                            {
                                nInst.AnchorState.AddEpsTrns(start);
                                tree = ((Unary)tree).kid;
                            }
                            else                                // this is not a left anchored pattern
                                nInst.Entry.AddEpsTrns(start);
                            //
                            // Now check for right anchors, and add states as necessary.
                            //
                            if (tree.op == RegOp.eof)
                            {
                                //
                                // <<EOF>> rules are always emitted outside
                                // of the usual subset construction framework.
                                // We ensure that we do not get spurious warnings.
                                //
                                rule.useCount = 1;
                                nInst.eofAction = rule.aSpan;
                                nInst.MakePath(tree, start, endSt);
                                nInst.MarkAccept(endSt, rule);
                            }
                            else if (tree.op == RegOp.rightAnchor)
                            {
                                tree = ((Unary)tree).kid;
                                nInst.MakePath(tree, start, endSt);
                                AddAnchorContext(nInst, endSt, rule);
                            }
                            else
                            {
                                nInst.MakePath(tree, start, endSt);
                                nInst.MarkAccept(endSt, rule);
                            }
                        }
                    }
                }
            }
            if (task.Verbose)
            {
                Console.Write("GPLEX: NFSA built");
                Console.Write((task.Errors ? ", errors detected" : " without error"));
                Console.Write((task.Warnings ? "; warnings issued. " : ". "));
                Console.WriteLine(TaskState.ElapsedTime(time0));
            }
            if (task.Summary)
                WriteSummary(time0);
        }
Пример #2
0
        internal void Process(string fileArg)
		{
            GetNames(fileArg);
            // check for file exists
            OpenSource();
            // parse source file
            if (inputFile != null)
            {
                DateTime start = DateTime.Now;
                try
                {
                    handler = new ErrorHandler();
                    scanner = new QUT.Gplex.Lexer.Scanner(inputFile);
                    parser = new QUT.Gplex.Parser.Parser(scanner);
                    scanner.yyhdlr = handler;
                    parser.Initialize(this, scanner, handler, new OptionParser2(ParseOption));
                    aast = parser.Aast;
                    parser.Parse();
                    // aast.DiagnosticDump();
                    if (verbose) 
                        Status(start);
                    CheckOptions();
                    if (!Errors && !ParseOnly)
                    {	// build NFSA
                        if (ChrClasses) {
                            DateTime t0 = DateTime.Now;
                            partition = new Partition( TargetSymCardinality, this );
                            partition.FindClasses( aast );
                            partition.FixMap();
                            if (verbose)
                                ClassStatus( t0, partition.Length );
                        }
                        else
                            CharRange.Init( TargetSymCardinality );
                        nfsa = new NFSA(this);
                        nfsa.Build(aast);
                        if (!Errors)
                        {	// convert to DFSA
                            dfsa = new DFSA(this);
                            dfsa.Convert(nfsa);
                            if (!Errors)
                            {	// minimize automaton
                                if (minimize)
                                    dfsa.Minimize();
                                if (!Errors && !checkOnly)
                                {   // emit the scanner to output file
                                    TextReader frameRdr = FrameReader();
                                    TextWriter outputWrtr = OutputWriter();
                                    dfsa.EmitScanner(frameRdr, outputWrtr);

                                    if (!embedBuffers)
                                        CopyBufferCode();
                                    // Clean up!
                                    if (frameRdr != null) 
                                        frameRdr.Close();
                                    if (outputWrtr != null) 
                                        outputWrtr.Close();
                                }
                            }
                        }
                    }
                }
                catch (Exception ex)
                {
                    string str = ex.Message;
                    handler.AddError(str, aast.AtStart);
                    throw;
                }
            }
		}
Пример #3
0
        /// <summary>
        /// This method constructs a RangeLiteral holding
        /// all of the codepoints from all planes for which
        /// the Test delegate returns true.
        /// </summary>
        /// <param name="name"></param>
        /// <param name="aast"></param>
        /// <param name="max"></param>
        internal void Populate( string name, AAST aast ) {
            DateTime begin = DateTime.Now;

            int max = aast.Task.TargetSymCardinality;

            this.rangeLit = new RangeLiteral( false );
            //
            // Run the delegate over all the values 
            // between '\0' and (max-1).  Find contiguous
            // true values and add to the CharRange list.
            //
            int j = 0;
            int codepage = aast.CodePage;
            if (max > 256 ||
                codepage == Automaton.TaskState.rawCP ||
                codepage == Automaton.TaskState.guessCP) {
                if (max <= 256 && codepage == Automaton.TaskState.guessCP)
                    aast.hdlr.ListError( aast.AtStart, 93 );
                //
                // We are generating a set of numeric code points with
                // the named property.  No interpretation is needed, either
                // (1) because this is for a unicode scanner that has
                //     already decoded its input element stream, OR
                // (2) the user has commanded /codepoint:raw to indicate
                //     that no interpretation is to be used.
                //
                while (j < max) {
                    int start;
                    while (j < max && !Test( j ))
                        j++;
                    if (j == max)
                        break;
                    start = j;
                    while (j < max && Test( j ))
                        j++;
                    this.rangeLit.list.Add( new CharRange( start, (j - 1) ) );
                }
            }
            else {
                // We are generating a set of byte values from the
                // 0x00 to 0xFF "alphabet" that correspond to unicode
                // characters with the named property.  The meaning of
                // "corresponds" is defined by the nominated codepage.
                //
                // Check codepage for single byte property.
                //
                Encoding enc = Encoding.GetEncoding( codepage );
                Decoder decoder = enc.GetDecoder();
                if (!enc.IsSingleByte)
                    aast.hdlr.ListError( aast.AtStart, 92 );
                //
                // Construct character map for bytes.
                //
                int bNum, cNum;
                bool done;
                char[] cArray = new char[256];
                byte[] bArray = new byte[256];
                for (int b = 0; b < 256; b++) {
                    bArray[b] = (byte)b;
                    cArray[b] = '?';
                }
                decoder.Convert( bArray, 0, 256, cArray, 0, 256, true, out bNum, out cNum, out done );
                //
                // Now construct the CharRange literal
                //
                while (j < max) {
                    int start;
                    while (j < max && !Test( cArray[j] ))
                        j++;
                    if (j == max)
                        break;
                    start = j;
                    while (j < max && Test( cArray[j] ))
                        j++;
                    this.rangeLit.list.Add( new CharRange( start, (j - 1) ) );
                }
            }
            if (aast.IsVerbose) {
                Console.WriteLine( "GPLEX: Generating [:{0}:], {1}", name, Gplex.Automaton.TaskState.ElapsedTime( begin ) );
            }
        }
Пример #4
0
 internal ReParser( string str, LexSpan spn, AAST parent ) {
     if (parent.task.Unicode)
         CharacterUtilities.SetUnicode();
     symCard = parent.task.HostSymCardinality;
     pat = str;
     span = spn;
     InitReParser();
     this.parent = parent;
     //
     //  This is ugly, but we cannot manipulate
     //  RangeLists unless the alphabet upper bound
     //  is known to the code of class Partition.
     //
     CharRange.Init( parent.task.TargetSymCardinality );
 }
Пример #5
0
 internal void ParseRE( AAST aast ) { regX = new AAST.ReParser( verb, vrbSpan, aast ).Parse(); }
Пример #6
0
        /// <summary>
        /// This method detects the presence of code *between* rules. Such code has
        /// no unambiguous meaning, and is skipped, with a warning message.
        /// </summary>
        /// <param name="aast"></param>
        internal void FinalizeCode( AAST aast ) {
            for (int i = 0; i < locs.Count; i++) {
                LexSpan loc = locs[i];

                if (loc.startLine < FLine)
                    aast.AddCodeSpan( AAST.Destination.scanProlog, loc );
                else if (loc.startLine > LLine)
                    aast.AddCodeSpan( AAST.Destination.scanEpilog, loc );
                else // code is between rules
                    aast.hdlr.ListError( loc, 110 );
            }
        }
Пример #7
0
        void Check( AAST aast, RegExTree tree ) {
            Binary bnryTree;
            Unary unryTree;

            if (tree == null) return;
            switch (tree.op) {
                case RegOp.charClass:
                case RegOp.primitive:
                case RegOp.litStr:
                case RegOp.eof:
                    break;
                case RegOp.context:
                case RegOp.concat:
                case RegOp.alt:
                    bnryTree = (Binary)tree;
                    Check( aast, bnryTree.lKid );
                    Check( aast, bnryTree.rKid );
                    if (tree.op == RegOp.context &&
                        bnryTree.lKid.contextLength() == 0 &&
                        bnryTree.rKid.contextLength() == 0) aast.hdlr.ListError( pSpan, 75 );
                    break;
                case RegOp.closure:
                case RegOp.finiteRep:
                    unryTree = (Unary)tree;
                    Check( aast, unryTree.kid );
                    break;
                case RegOp.leftAnchor:
                case RegOp.rightAnchor:
                    aast.hdlr.ListError( pSpan, 69 );
                    break;
            }
        }
Пример #8
0
 /// <summary>
 /// This is the place to perform any semantic checks on the 
 /// trees corresponding to a rule of the LEX grammar,
 /// during a recursive traversal of the tree.  It is hard
 /// to do these on the fly during AST construction, because
 /// of the tree-grafting that happens for lexical categories.
 /// 
 /// First check is that '^' and '$' can only appear 
 /// (logically) at the ends of the pattern.
 /// Later need to check ban on multiple right contexts ...
 /// </summary>
 /// <param name="aast"></param>
 void SemanticCheck( AAST aast ) {
     RegExTree tree = reAST;
     if (tree != null && tree.op == RegOp.leftAnchor) tree = ((Unary)tree).kid;
     if (tree != null && tree.op == RegOp.rightAnchor) {
         tree = ((Unary)tree).kid;
         if (tree.op == RegOp.context)
             aast.hdlr.ListError( pSpan, 100 );
     }
     Check( aast, tree );
     if (tree != null)
         minPatternLength = tree.minimumLength();
 }
Пример #9
0
        // internal void Dump() { Console.WriteLine(pattern); }

        internal void ParseRE( AAST aast ) {
            reAST = new AAST.ReParser( pattern, pSpan, aast ).Parse();
            SemanticCheck( aast );
        }
Пример #10
0
 internal static RuleDesc MkDummyRuleDesc( LexCategory cat, AAST aast ) {
     RuleDesc result = new RuleDesc();
     result.pSpan = null;
     result.aSpan = aast.AtStart;
     result.isBarAction = false;
     result.isPredDummyRule = true;
     result.pattern = String.Format( CultureInfo.InvariantCulture, "{{{0}}}", cat.Name );
     result.list = new List<StartState>();
     result.ParseRE( aast );
     result.list.Add( aast.StartStateValue( cat.PredDummyName ) );
     return result;
 }
Пример #11
0
 internal ReParser(string str, LexSpan spn, AAST parent)
 {
     if (parent.task.Unicode)
         CharacterUtilities.SetUnicode();
     symCard = parent.task.HostSymCardinality;
     pat = str;
     span = spn;
     InitReParser();
     this.parent = parent;
 }