Пример #1
0
        /*
            Construct a Regex from a given string

        1.  First examine the given string.
            If it is empty, there is nothing to do, so return (having cleared m_sub as a precaution).
        2.  Look to see if the string begins with a bracket ( . If so, find the matching ) .
            This is not as simple as it might be because )s inside quotes or [] or escaped will not count.
            Recursively call the constructor for the regular expression between the () s.
            Mark everything up to the ) as used, and go to step 9.
        3.  Look to see if the string begins with a bracket [ . If so, find the matching ] , watching for escapes.
            Construct a ReRange for everything between the []s.
            Mark everything up to the ] as used, and go to step 9.
        4.  Look to see if the string begins with a ' or " . If so, build the contents interpreting
            escaped special characters correctly, until the matching quote is reached.
            Construct a ReStr for the contents, mark everything up to the final quote as used, and go to step 9.
        4a.  Look to see if the string begins with a U' or U" . If so, build the contents interpreting
            escaped special characters correctly, until the matching quote is reached.
            Construct a ReUStr for the contents, mark everything up to the final quote as used, and go to step 9.
        5.  Look to see if the string begins with a \ .
            If so, build a ReStr for the next character (special action for ntr),
            mark it as used, and go to step 9.
        6.  Look to see if the string begins with a { .
            If so, find the matching }, lookup the symbolic name in the definitions table,
            recursively call this constructor on the contents,
            mark everything up to the } as used, and go to step 9.
        7.  Look to see if the string begins with a dot.
            If so, construct a ReRange("^\n"), mark the . as used, and go to step 9.
        8.  At this point we conclude that there is a simple character at the start of the regular expression.
            Construct a ReStr for it, mark it as used, and go to step 9.
        9.  If the string is exhausted, return.
            We have a simple Regex whose m_sub contains what we can constructed.
        10.  If the next character is a ? , *, or +, construct a ReOpt, ReStart, or RePlus respectively
            out of m_sub, and make m_sub point to this new class instead. Mark the character as used.
        11.  If the string is exhausted, return.
        12.  If the next character is a | , build a ReAlt using the m_sub we have and the rest of the string.
        13.  Otherwise build a ReCat using the m_sub we have and the rest of the string.
        */
        /// <exclude/>
        public Regex(TokensGen tks,int p,string str)
        {
            int n = str.Length;
            int nlp = 0;
            int lbrack = 0;
            int quote = 0;
            int j;
            char ch;

            //1.  First examine the given string.
            //	If it is empty, there is nothing to do, so return (having cleared m_sub as a precaution).
            m_sub = null;
            if (n==0)
                return;
                //2.  Look to see if the string begins with a bracket ( . If so, find the matching ) .
                //	This is not as simple as it might be because )s inside quotes or [] or escaped will not count.
                // 	Recursively call the constructor for the regular expression between the () s.
                // 	Mark everything up to the ) as used, and go to step 9.
            else if (str[0]=='(')
            { // identify a bracketed expression
                for (j=1;j<n;j++)
                    if (str[j]=='\\')
                        j++;
                    else if (str[j]=='[' && quote==0 && lbrack==0)
                        lbrack++;
                    else if (str[j]==']' && lbrack>0)
                        lbrack = 0;
                    else if (str[j]=='"' || str[j]=='\'')
                    {
                        if (quote==str[j])
                            quote = 0;
                        else if (quote==0)
                            quote = str[j];
                    }
                    else if (str[j]=='(' && quote==0 && lbrack==0)
                        nlp++;
                    else if (str[j]==')' && quote==0 && lbrack==0 && nlp--==0)
                        break;
                if (j==n)
                    goto bad;
                m_sub = new Regex (tks,p+1,str.Substring(1,j-1));
                j++;
                //3.  Look to see if the string begins with a bracket [ . If so, find the matching ] , watching for escapes.
                //	Construct a ReRange for everything between the []s.
                //	Mark everything up to the ] as used, and go to step 9.
            }
            else if (str[0]=='[')
            {	   	// range of characters
                for (j=1;j<n && str[j]!=']';j++)
                    if (str[j]=='\\')
                        j++;
                if (j==n)
                    goto bad;
                m_sub = new ReRange(tks,str.Substring(0,j+1));
                j++;
            }
            //4.  Look to see if the string begins with a ' or " . If so, build the contents interpreting
            //	escaped special characters correctly, until the matching quote is reached.
            //	Construct a CReStr for the contents, mark everything up to the final quote as used, and go to step 9.
            else if (str[0] == '\'' || str[0] == '"')
            {  // quoted string needs special treatment
                StringBuilder qs = new StringBuilder();
                for (j=1;j<n && str[j]!=str[0];j++)
                    if (str[j]=='\\')
                        switch (str[++j])
                        {
                            case 'n':	qs.Append('\n'); break;
                            case 'r':	qs.Append('\r'); break;
                            case 't':	qs.Append('\t'); break;
                            case '\\':	qs.Append('\\'); break;
                            case '\'':	qs.Append('\''); break;
                            case '"':	qs.Append('"'); break;
                            case '\n':	break;
                            default:	qs.Append(str[j]); break;
                        }
                    else
                        qs.Append(str[j]);
                if (j==n)
                    goto bad;
                j++;
                m_sub = new ReStr(tks,qs.ToString());
            }
                //4a.  Look to see if the string begins with a U' or U" . If so, build the contents interpreting
                //	escaped special characters correctly, until the matching quote is reached.
                //	Construct a ReUStr for the contents, mark everything up to the final quote as used, and go to step 9.
            else if (str.StartsWith("U\"")||str.StartsWith("U'"))
            {  // quoted string needs special treatment
                StringBuilder qs = new StringBuilder();
                for (j=2;j<n && str[j]!=str[1];j++)
                    if (str[j]=='\\')
                        switch (str[++j])
                        {
                            case 'n':	qs.Append('\n'); break;
                            case 'r':	qs.Append('\r'); break;
                            case 't':	qs.Append('\t'); break;
                            case '\\':	qs.Append('\\'); break;
                            case '\'':	qs.Append('\''); break;
                            case '"':	qs.Append('"'); break;
                            case '\n':	break;
                            default:	qs.Append(str[j]); break;
                        }
                    else
                        qs.Append(str[j]);
                if (j==n)
                    goto bad;
                j++;
                m_sub = new ReUStr(tks,qs.ToString());
            }
                //5.  Look to see if the string begins with a \ .
            //	If so, build a ReStr for the next character (special action for ntr),
            //	mark it as used, and go to step 9.
            else if (str[0]=='\\')
            {
                switch (ch = str[1])
                {
                    case 'n': ch = '\n'; break;
                    case 't': ch = '\t'; break;
                    case 'r': ch = '\r'; break;
                }
                m_sub = new ReStr(tks,ch);
                j = 2;
                //6.  Look to see if the string begins with a { .
                //	If so, find the matching }, lookup the symbolic name in the definitions table,
                //	recursively call this constructor on the contents,
                //	mark everything up to the } as used, and go to step 9.
            }
            else if (str[0]=='{')
            {
                for (j=1;j<n && str[j]!='}';j++)
                    ;
                if (j==n)
                    goto bad;
                string ds = str.Substring(1,j-1);
                string s = (string)tks.defines[ds];
                if (s==null)
                    m_sub = new ReCategory(tks,ds);
                else
                    m_sub = new Regex(tks,p+1,s);
                j++;
            }
            else
            {	  // simple character at start of regular expression
                //7.  Look to see if the string begins with a dot.
                //	If so, construct a CReDot, mark the . as used, and go to step 9.
                if (str[0]=='.')
                    m_sub = new ReRange(tks,"[^\n]");
                    //8.  At this point we conclude that there is a simple character at the start of the regular expression.
                    //	Construct a ReStr for it, mark it as used, and go to step 9.
                else
                    m_sub = new ReStr(tks,str[0]);
                j = 1;
            }
            //9.  If the string is exhausted, return.
            //	We have a simple Regex whose m_sub contains what we can constructed.
            if (j>=n)
                return;
            //10.  If the next character is a ? , *, or +, construct a CReOpt, CReStart, or CRePlus respectively
            //	out of m_sub, and make m_sub point to this new class instead. Mark the character as used.
            if (str[j]=='?')
            {
                m_sub = new ReOpt(m_sub);
                j++;
            }
            else if (str[j]=='*')
            {
                m_sub = new ReStar(m_sub);
                j++;
            }
            else if (str[j]=='+')
            {
                m_sub = new RePlus(m_sub);
                j++;
            }
            // 11.  If the string is exhausted, return.
            if (j>=n)
                return;
            // 12.  If the next character is a | , build a ReAlt using the m_sub we have and the rest of the string.
            if (str[j]=='|')
                m_sub = new ReAlt(tks,m_sub,p+j+1,str.Substring(j+1,n-j-1));
                // 13.  Otherwise build a ReCat using the m_sub we have and the rest of the string.
            else if (j<n)
                m_sub = new ReCat(tks,m_sub,p+j,str.Substring(j,n-j));
            return;
            bad:
                tks.erh.Error(new CSToolsFatalException(1,tks.sourceLineInfo(p),str,"ill-formed regular expression "+str));
        }
Пример #2
0
 // build an NFA for a given regular expression
 /// <exclude/>
 public Nfa(TokensGen tks,Regex re)
     : base(tks)
 {
     m_end = new NfaNode(tks);
     re.Build(this);
 }
Пример #3
0
 /// <exclude/>
 public void AddArcEx(Regex re,NfaNode next)
 {
     m_arcs.Add(new ArcEx(re,next));
 }
Пример #4
0
 public RePlus(Regex sub)
 {
     m_sub = sub;
 }
Пример #5
0
 public ReStar(Regex sub)
 {
     m_sub = sub;
 }
Пример #6
0
 public ReOpt(Regex sub)
 {
     m_sub = sub;
 }
Пример #7
0
 /// <exclude/>
 public ReCat(TokensGen tks,Regex sub, int p, string str)
 {
     m_sub = sub;
     m_next = new Regex(tks,p,str);
 }
Пример #8
0
 public ReAlt(TokensGen tks,Regex sub,int p,string str)
 {
     m_sub = sub;
     m_alt = new Regex(tks,p,str);
 }
Пример #9
0
 public ArcEx(Regex re,NfaNode next)
 {
     m_ref=re; m_next=next;
 }
Пример #10
0
 /// <summary>
 /// 是否为时间格式
 /// </summary>
 /// <returns>是则返加true 不是则返回 false</returns>
 public static bool IsTime(string timeval)
 {
     return(Regex.IsMatch(timeval, @"^((([0-1]?[0-9])|(2[0-3])):([0-5]?[0-9])(:[0-5]?[0-9])?)$"));
 }