コード例 #1
0
		/// <summary>
		/// 根据当前的正则表达式构造 NFA。
		/// </summary>
		/// <param name="nfa">要构造的 NFA。</param>
		internal override void BuildNfa(Nfa nfa)
		{
			nfa.HeadState = nfa.NewState();
			nfa.TailState = nfa.NewState();
			// 添加一个字符类转移。
			nfa.HeadState.Add(nfa.TailState, charClass);
		}
コード例 #2
0
ファイル: SRegex.cs プロジェクト: bkushnir/IronTextLibrary
        public SRegex(string pattern, SRegexOptions options = SRegexOptions.Default)
        {
            var ast = Language.Parse(new SreSyntax(), pattern).Result.Node;
            switch (options)
            {
                case SRegexOptions.ByteCodeCompilation:
                    var compiler = new NfaVMBytecodeBackend(ast);
                    matcher = (int[] input) =>
                        {
                            var vm = new PikeNfaVM(compiler.Code.ToArray());
                            vm.Feed(input.Select(ch => (int)ch)).Done();
                            return vm.HasMatch;
                        };
                    break;
                case SRegexOptions.ILCompilation:
                    string methodName = "MatchSrePattern" + PatternID++;

                    var builder = new CachedMethod<MatchDelegate>(
                        methodName,
                        (emit, args) => EmitAst(emit, ast, args[0]));

                    matcher = builder.Delegate;
                    break;
                case SRegexOptions.NfaCompilation:
                    var nfa = new Nfa(ast);
                    matcher = nfa.Match;
                    break;
                case SRegexOptions.DfaCompilation:
                    var dfa = new RegularToDfaAlgorithm(new RegularTree(ast));
                    var simulation = new DfaSimulation(dfa.Data);
                    matcher = input => simulation.Match(input);
                    break;
            }
        }
コード例 #3
0
ファイル: LiteralExp.cs プロジェクト: sumpacle/Cyjb.Compilers
		/// <summary>
		/// 根据当前的正则表达式构造 NFA。
		/// </summary>
		/// <param name="nfa">要构造的 NFA。</param>
		internal override void BuildNfa(Nfa nfa)
		{
			string str = literal;
			if (ignoreCase)
			{
				str = literal.ToUpper(culture);
			}
			nfa.HeadState = nfa.NewState();
			nfa.TailState = nfa.HeadState;
			for (int i = 0; i < literal.Length; i++)
			{
				NfaState state = nfa.NewState();
				if (culture == null)
				{
					// 区分大小写。
					nfa.TailState.Add(state, str[i]);
				}
				else
				{
					// 不区分大小写。
					RegexCharClass cc = new RegexCharClass();
					cc.AddChar(str[i]);
					cc.AddChar(char.ToLower(str[i], culture));
					nfa.TailState.Add(state, cc.ToStringClass());
				}
				nfa.TailState = state;
			}
		}
コード例 #4
0
ファイル: NfaState.cs プロジェクト: sumpacle/Cyjb.Compilers
		/// <summary>
		/// 初始化 <see cref="NfaState"/> 类的新实例。
		/// </summary>
		/// <param name="nfa">包含状态的 NFA。</param>
		/// <param name="index">状态的索引。</param>
		public NfaState(Nfa nfa, int index)
		{
			Nfa = nfa;
			Index = index;
			SymbolIndex = Constants.None;
			StateType = NfaStateType.Normal;
		}
コード例 #5
0
		/// <summary>
		/// 根据当前的正则表达式构造 NFA。
		/// </summary>
		/// <param name="nfa">要构造的 NFA。</param>
		internal override void BuildNfa(Nfa nfa)
		{
			left.BuildNfa(nfa);
			NfaState head = nfa.HeadState;
			NfaState tail = nfa.TailState;
			right.BuildNfa(nfa);
			tail.Add(nfa.HeadState);
			nfa.HeadState = head;
		}
コード例 #6
0
		/// <summary>
		/// 根据当前的正则表达式构造 NFA。
		/// </summary>
		/// <param name="nfa">要构造的 NFA。</param>
		internal override void BuildNfa(Nfa nfa)
		{
			NfaState head = nfa.NewState();
			NfaState tail = nfa.NewState();
			left.BuildNfa(nfa);
			head.Add(nfa.HeadState);
			nfa.TailState.Add(tail);
			right.BuildNfa(nfa);
			head.Add(nfa.HeadState);
			nfa.TailState.Add(tail);
			nfa.HeadState = head;
			nfa.TailState = tail;
		}
コード例 #7
0
		/// <summary>
		/// 根据当前的正则表达式构造 NFA。
		/// </summary>
		/// <param name="nfa">要构造的 NFA。</param>
		internal override void BuildNfa(Nfa nfa)
		{
			innerExp.BuildNfa(nfa);
			if (TrailingExpression != null)
			{
				NfaState head = nfa.HeadState;
				TrailingHeadState = nfa.TailState;
				TrailingExpression.BuildNfa(nfa);
				TrailingHeadState.Add(nfa.HeadState);
				nfa.HeadState = head;
				TrailingHeadState.StateType = NfaStateType.TrailingHead;
				nfa.TailState.StateType = NfaStateType.Trailing;
			}
		}
コード例 #8
0
ファイル: Regex.cs プロジェクト: sumpacle/Cyjb.Compilers
		/// <summary>
		/// 根据当前的正则表达式构造 NFA。
		/// </summary>
		/// <param name="nfa">要构造的 NFA。</param>
		internal abstract void BuildNfa(Nfa nfa);
コード例 #9
0
ファイル: RepeatExp.cs プロジェクト: sumpacle/Cyjb.Compilers
		/// <summary>
		/// 根据当前的正则表达式构造 NFA。
		/// </summary>
		/// <param name="nfa">要构造的 NFA。</param>
		internal override void BuildNfa(Nfa nfa)
		{
			NfaState head = nfa.NewState();
			NfaState tail = nfa.NewState();
			NfaState lastHead = head;
			// 如果没有上限,则需要特殊处理。
			int times = maxTimes == int.MaxValue ? minTimes : maxTimes;
			if (times == 0)
			{
				// 至少要构造一次。
				times = 1;
			}
			for (int i = 0; i < times; i++)
			{
				innerExp.BuildNfa(nfa);
				lastHead.Add(nfa.HeadState);
				if (i >= minTimes)
				{
					// 添加到最终的尾状态的转移。
					lastHead.Add(tail);
				}
				lastHead = nfa.TailState;
			}
			// 为最后一个节点添加转移。
			lastHead.Add(tail);
			// 无上限的情况。
			if (maxTimes == int.MaxValue)
			{
				// 在尾部添加一个无限循环。
				nfa.TailState.Add(nfa.HeadState);
			}
			nfa.HeadState = head;
			nfa.TailState = tail;
		}
コード例 #10
0
		/// <summary>
		/// 根据当前的正则表达式构造 NFA。
		/// </summary>
		/// <param name="nfa">要构造的 NFA。</param>
		internal override void BuildNfa(Nfa nfa)
		{ }
コード例 #11
0
ファイル: lg.cs プロジェクト: vkarthim/liblcm
    void _Create()
    {
        m_outFile.WriteLine("using System;using Tools;");
        m_tokens = new Tokens(erh);
        string buf  = "";
        string str  = "";
        string name = "";
        string startsym;
        Nfa    nfa;
        int    p, q, max;

        Console.WriteLine("Reading Input File");
        while (!m_inFile.Eof())
        {
            buf      = m_inFile.ReadLine();
            startsym = "YYINITIAL";
            max      = buf.Length;
            p        = 0;
            if (!White(buf, ref p, max))
            {
                continue;
            }
            if (buf[p] == '%')
            {             // directive
                // %lexer
                if (buf.Length >= p + 6 && "%lexer".Equals(buf.Substring(p, 6)))
                {
                    m_lexerseen = true;
                    p          += 6;
                    if (!White(buf, ref p, max))
                    {
                        continue;
                    }
                    q = p;
                    NonWhite(buf, ref p, max);
                    if (q != p)
                    {
                        m_outname = buf.Substring(q, p - q);
                    }
                    continue;
                }
                // %encoding
                if (buf.Length >= p + 9 && "%encoding".Equals(buf.Substring(p, 9)))
                {
                    p += 9; White(buf, ref p, max);
                    q  = p;
                    NonWhite(buf, ref p, max);
                    m_tokens.InputEncoding = buf.Substring(q, p - q);
                    continue;
                }
                // %namespace
                if (buf.Length >= p + 10 && "%namespace".Equals(buf.Substring(p, 10)))
                {
                    p += 10; White(buf, ref p, max);
                    q  = p;
                    NonWhite(buf, ref p, max);
                    m_outFile.WriteLine("namespace " + buf.Substring(q, p - q) + " {");
                    m_namespace = true;
                    continue;
                }
                // %define
                if (buf.Length >= p + 7 && "%define".Equals(buf.Substring(p, 7)))
                {
                    p += 7; White(buf, ref p, max);
                    q  = p;
                    if (!NonWhite(buf, ref p, max))
                    {
                        erh.Error(new CSToolsException(44, "Bad define"));
                        continue;
                    }
                    name = buf.Substring(q, p - q);
                    p++;
                    if (White(buf, ref p, max))
                    {
                        defines[name] = buf.Substring(p, max - p);
                    }
                }
                else
                // % token/node
                if (buf.Length >= p + 6 && "%token".Equals(buf.Substring(p, 6)))
                {
                    EmitClassDefin(buf, ref p, max, m_inFile, "TOKEN", out str, out name, true);
                }
                else if (buf.Length >= p + 5 && "%node".Equals(buf.Substring(p, 5)))
                {
                    EmitClassDefin(buf, ref p, max, m_inFile, "NODE", out str, out name, true);
                }
                else if (buf.Length >= p + 2 && "%{".Equals(buf.Substring(p, 2)))
                {
                    CopyCode();
                }
                else if (buf.Length >= p + 9 && "%declare{".Equals(buf.Substring(p, 9)))
                {
                    p        += 8;
                    m_actvars = ToBraceIfFound(ref buf, ref p, ref max, m_inFile);
                    m_actvars = m_actvars.Substring(1, m_actvars.Length - 2);
                }
                else
                {
                    m_tokens.erh.Error(new CSToolsException(8, "Unknown directive " + buf.Substring(p, max - p)));
                }
                continue;
            }
            else if (buf[p] == '<')
            {              // startstate
                q = p++;
                while (p < max && buf[p] != '>')
                {
                    p++;
                }
                if (p++ == max)
                {
                    m_tokens.erh.Error(new CSToolsException(25, "Bad startsymbol"));
                    continue;
                }
                startsym = buf.Substring(q + 1, p - q - 2);
                White(buf, ref p, max);
            }
            q = p;           // can't simply look for nonwhite space here because embedded spaces
            GetRegex(buf, ref p, max);
            string trgx = buf.Substring(q, p - q);
            if (m_tokens.toupper)
            {
                trgx = trgx.ToUpper();
            }
            Regex rgx  = new Regex(this, q, trgx);
            Nfa   nfa1 = new Nfa(this, rgx);
            if (!m_startstates.Contains(startsym))
            {
                m_startstates[startsym] = new Nfa(this);
            }
            nfa = (Nfa)m_startstates[startsym];
            nfa.AddEps(nfa1);
            White(buf, ref p, max);
            m_actions[nfa1.m_end.m_state] = nfa1.m_end;
            // handle multiline actions enclosed in {}
            nfa1.m_end.m_sTerminal = ToBraceIfFound(ref buf, ref p, ref max, m_inFile);
            // examine action string
            if (nfa1.m_end.m_sTerminal.Length > 0 && nfa1.m_end.m_sTerminal[0] == '%')
            {
                string tokClass, b = nfa1.m_end.m_sTerminal;
                q   = 1;
                max = b.Length;
                int n;
                for (n = 0; q < max && b[q] != ' ' && b[q] != '\t' && b[q] != '\n' && b[q] != '{' && b[q] != ':'; q++, n++) // extract the class name
                {
                    ;
                }
                tokClass = b.Substring(1, n);                // new-style auto token construction
                object      ob    = m_tokens.tokens[tokClass];
                TokClassDef t     = (TokClassDef)ob;
                bool        isNew = (t == null);
                // check for initialisation action following %name
                string init     = b.Substring(n + 1, b.Length - n - 1);
                string bas1     = "TOKEN";
                bool   haveInit = false;
                for (int j = 0; j < init.Length; j++)
                {
                    if (init[j] == '{')
                    {
                        haveInit = true;
                        break;
                    }
                    else if (init[j] == ':')
                    {
                        bas1 = "";
                        for (; init[j] == ' ' || init[j] == '\r'; j++)
                        {
                            ;
                        }
                        for (; init[j] != ' ' && init[j] != '\t' && init[j] != '{' && init[j] != '\n'; j++)
                        {
                            bas1 += init[j];
                        }
                        break;
                    }
                }
                if (isNew && tokClass != "TOKEN")
                {                                              // this token class has not been declared. Do so now
                    t = new TokClassDef(this, tokClass, bas1); // updates TOKEN.tokens
                    m_outFile.WriteLine("//%{0}", tokClass);
                    m_outFile.WriteLine(@"/// <summary/>");
                    m_outFile.WriteLine(String.Format("public class {0} : {1} ", tokClass, bas1) + "{");
                    m_outFile.WriteLine(@"/// <summary/>");
                    m_outFile.WriteLine(" public override string yyname() { return \"" + tokClass + "\";}");
                    m_outFile.WriteLine(@"/// <summary/>");
                    m_outFile.WriteLine(@"/// <param name='yyl'></param>");
                    m_outFile.WriteLine(" public " + tokClass + "(Lexer yyl):base(yyl) {}}");
                }
                if (haveInit)
                {
                    init = init.Replace("yylval", "yymval");                    // yuk: avoid the next line munging yylval
                    init = FixActions(init);
                    init = init.Replace("yymval", "yylval");
                    nfa1.m_end.m_sTerminal = "%" + NewConstructor(t, init);
                }
            }
        }
        if (!m_lexerseen)
        {
            m_tokens.erh.Error(new CSToolsException(26, "No %lexer directive detected: possibly incorrect text encoding?"));
        }
        Console.WriteLine("Constructing DFAs");
        foreach (string s in m_startstates.Keys)
        {
            Dfa d = new Dfa((Nfa)m_startstates[s]);
            m_tokens.starts[s] = d;
            if (d.m_actions != null)
            {
                Console.WriteLine("Warning: This lexer script generates an infinite token stream on bad input");
            }
        }
        Console.WriteLine("Output phase");
        Emit(m_actions, m_actvars, m_namespace, m_showDfa);
        Console.WriteLine("End of Create");
        if (((Dfa)(m_tokens.starts["YYINITIAL"])).m_actions != null)       // repeat the above warning
        {
            Console.WriteLine("Warning: This lexer script generates an infinite token stream on bad input");
        }
    }