Inheritance: Regex
Example #1
0
		/* 
			Construct a Regex from a given string

		1.  First examine the given string. 
			If it is empty, there is nothing to do, so return (having cleared m_sub as a precaution).
		2.  Look to see if the string begins with a bracket ( . If so, find the matching ) . 
			This is not as simple as it might be because )s inside quotes or [] or escaped will not count.
			Recursively call the constructor for the regular expression between the () s. 
			Mark everything up to the ) as used, and go to step 9.
		3.  Look to see if the string begins with a bracket [ . If so, find the matching ] , watching for escapes.
			Construct a ReRange for everything between the []s. 
			Mark everything up to the ] as used, and go to step 9.
		4.  Look to see if the string begins with a ' or " . If so, build the contents interpreting 
			escaped special characters correctly, until the matching quote is reached. 
			Construct a ReStr for the contents, mark everything up to the final quote as used, and go to step 9.
		4a.  Look to see if the string begins with a U' or U" . If so, build the contents interpreting 
			escaped special characters correctly, until the matching quote is reached. 
			Construct a ReUStr for the contents, mark everything up to the final quote as used, and go to step 9.
		5.  Look to see if the string begins with a \ . 
			If so, build a ReStr for the next character (special action for ntr), 
			mark it as used, and go to step 9.
		6.  Look to see if the string begins with a { . 
			If so, find the matching }, lookup the symbolic name in the definitions table, 
			recursively call this constructor on the contents, 
			mark everything up to the } as used, and go to step 9.
		7.  Look to see if the string begins with a dot. 
			If so, construct a ReRange("^\n"), mark the . as used, and go to step 9.
		8.  At this point we conclude that there is a simple character at the start of the regular expression. 
			Construct a ReStr for it, mark it as used, and go to step 9.
		9.  If the string is exhausted, return. 
			We have a simple Regex whose m_sub contains what we can constructed.
		10.  If the next character is a ? , *, or +, construct a ReOpt, ReStart, or RePlus respectively 
			out of m_sub, and make m_sub point to this new class instead. Mark the character as used.
		11.  If the string is exhausted, return.
		12.  If the next character is a | , build a ReAlt using the m_sub we have and the rest of the string.
		13.  Otherwise build a ReCat using the m_sub we have and the rest of the string.
		*/
		public Regex(TokensGen tks,int p,string str) 
		{
			int n = str.Length;
			int nlp = 0;
			int lbrack = 0;
			int quote = 0;
			int j;
			char ch;

			//1.  First examine the given string. 
			//	If it is empty, there is nothing to do, so return (having cleared m_sub as a precaution).
			m_sub = null;
			if (n==0)
				return;
				//2.  Look to see if the string begins with a bracket ( . If so, find the matching ) . 
				//	This is not as simple as it might be because )s inside quotes or [] or escaped will not count.
				// 	Recursively call the constructor for the regular expression between the () s. 
				// 	Mark everything up to the ) as used, and go to step 9.
			else if (str[0]=='(') 
			{ // identify a bracketed expression
                for (j = 1; j < n; j++)
                    if (str[j] == '\\')
                        j++;
                    else if (str[j] == ']' && lbrack > 0)
                        lbrack = 0;
                    else if (lbrack > 0)
                        continue;
                    else if (str[j] == '"' || str[j] == '\'')
                    {
                        if (quote == str[j])
                            quote = 0;
                        else if (quote == 0)
                            quote = str[j];
                    }
                    else if (quote > 0)
                        continue;
                    else if (str[j] == '[')
                        lbrack++;
                    else if (str[j] == '(')
                        nlp++;
                    else if (str[j] == ')' && nlp-- == 0)
                        break;
				if (j==n)
					goto bad;
				m_sub = new Regex (tks,p+1,str.Substring(1,j-1));
				j++;
				//3.  Look to see if the string begins with a bracket [ . If so, find the matching ] , watching for escapes.
				//	Construct a ReRange for everything between the []s. 
				//	Mark everything up to the ] as used, and go to step 9.
			} 
			else if (str[0]=='[') 
			{	   	// range of characters
				for (j=1;j<n && str[j]!=']';j++)
					if (str[j]=='\\')
						j++;
				if (j==n)
					goto bad;
				m_sub = new ReRange(tks,str.Substring(0,j+1));
				j++;
			} 
			//4.  Look to see if the string begins with a ' or " . If so, build the contents interpreting 
			//	escaped special characters correctly, until the matching quote is reached. 
			//	Construct a CReStr for the contents, mark everything up to the final quote as used, and go to step 9.
			else if (str[0] == '\'' || str[0] == '"') 
			{  // quoted string needs special treatment
				StringBuilder qs = new StringBuilder();
				for (j=1;j<n && str[j]!=str[0];j++)
					if (str[j]=='\\')
						switch (str[++j]) 
						{
							case 'n':	qs.Append('\n'); break;
							case 'r':	qs.Append('\r'); break;
							case 't':	qs.Append('\t'); break;
                            case 'v':   qs.Append('\v'); break;
							case '\\':	qs.Append('\\'); break;
							case '\'':	qs.Append('\''); break;
                            case '0':   qs.Append((char) 0); break;  // 4.7f
							case '"':	qs.Append('"'); break;
							case '\n':	break;
							default:	qs.Append(str[j]); break;
						}
					else
						qs.Append(str[j]);
				if (j==n)
					goto bad;
				j++;
				m_sub = new ReStr(tks,qs.ToString());
			} 
				//4a.  Look to see if the string begins with a U' or U" . If so, build the contents interpreting 
				//	escaped special characters correctly, until the matching quote is reached. 
				//	Construct a ReUStr for the contents, mark everything up to the final quote as used, and go to step 9.
			else if (str.StartsWith("U\"")||str.StartsWith("U'"))
			{  // quoted string needs special treatment
				StringBuilder qs = new StringBuilder();
				for (j=2;j<n && str[j]!=str[1];j++)
					if (str[j]=='\\')
						switch (str[++j]) 
						{
							case 'n':	qs.Append('\n'); break;
							case 'r':	qs.Append('\r'); break;
							case 't':	qs.Append('\t'); break;
                            case 'v':   qs.Append('\v'); break;
                            case '\\':	qs.Append('\\'); break;
							case '\'':	qs.Append('\''); break;
							case '"':	qs.Append('"'); break;
							case '\n':	break;
							default:	qs.Append(str[j]); break;
						}
					else
						qs.Append(str[j]);
				if (j==n)
					goto bad;
				j++;
				m_sub = new ReUStr(tks,qs.ToString());
			} 
				//5.  Look to see if the string begins with a \ . 
			//	If so, build a ReStr for the next character (special action for ntr),
			//	mark it as used, and go to step 9.
			else if (str[0]=='\\') 
			{
				switch (ch = str[1]) 
				{
					case 'n': ch = '\n'; break;
					case 't': ch = '\t'; break;
					case 'r': ch = '\r'; break;
                    case 'v': ch = '\v'; break;
				}
				m_sub = new ReStr(tks,ch);
				j = 2;
				//6.  Look to see if the string begins with a { . 
				//	If so, find the matching }, lookup the symbolic name in the definitions table, 
				//	recursively call this constructor on the contents, 
				//	mark everything up to the } as used, and go to step 9.
			} 
			else if (str[0]=='{') 
			{
				for (j=1;j<n && str[j]!='}';j++)
					;
				if (j==n)
					goto bad;
				string ds = str.Substring(1,j-1);
				string s = (string)tks.defines[ds];
				if (s==null) 
					m_sub = new ReCategory(tks,ds);
				else
					m_sub = new Regex(tks,p+1,s);
				j++;
			} 
			else 
			{	  // simple character at start of regular expression
				//7.  Look to see if the string begins with a dot. 
				//	If so, construct a CReDot, mark the . as used, and go to step 9.
				if (str[0]=='.')
					m_sub = new ReRange(tks,"[^\n]");
					//8.  At this point we conclude that there is a simple character at the start of the regular expression. 
					//	Construct a ReStr for it, mark it as used, and go to step 9.
				else
					m_sub = new ReStr(tks,str[0]);
				j = 1;
			}
			//9.  If the string is exhausted, return. 
			//	We have a simple Regex whose m_sub contains what we can constructed.
			if (j>=n)
				return;
			//10.  If the next character is a ? , *, or +, construct a CReOpt, CReStart, or CRePlus respectively 
			//	out of m_sub, and make m_sub point to this new class instead. Mark the character as used.
			if (str[j]=='?') 
			{
				m_sub = new ReOpt(m_sub);
				j++;
			} 
			else if (str[j]=='*') 
			{
				m_sub = new ReStar(m_sub);
				j++;
			} 
			else if (str[j]=='+') 
			{
				m_sub = new RePlus(m_sub);
				j++;
			}
			// 11.  If the string is exhausted, return.
			if (j>=n)
				return;
			// 12.  If the next character is a | , build a ReAlt using the m_sub we have and the rest of the string.
			if (str[j]=='|')
				m_sub = new ReAlt(tks,m_sub,p+j+1,str.Substring(j+1,n-j-1));
				// 13.  Otherwise build a ReCat using the m_sub we have and the rest of the string.
			else if (j<n)
				m_sub = new ReCat(tks,m_sub,p+j,str.Substring(j,n-j));
			return;
			bad:
				tks.erh.Error(new CSToolsFatalException(1,tks.sourceLineInfo(p),str,"ill-formed regular expression "+str));
		}	
Example #2
0
        /*
         *      Construct a Regex from a given string
         *
         * 1.  First examine the given string.
         *      If it is empty, there is nothing to do, so return (having cleared m_sub as a precaution).
         * 2.  Look to see if the string begins with a bracket ( . If so, find the matching ) .
         *      This is not as simple as it might be because )s inside quotes or [] or escaped will not count.
         *      Recursively call the constructor for the regular expression between the () s.
         *      Mark everything up to the ) as used, and go to step 9.
         * 3.  Look to see if the string begins with a bracket [ . If so, find the matching ] , watching for escapes.
         *      Construct a ReRange for everything between the []s.
         *      Mark everything up to the ] as used, and go to step 9.
         * 4.  Look to see if the string begins with a ' or " . If so, build the contents interpreting
         *      escaped special characters correctly, until the matching quote is reached.
         *      Construct a ReStr for the contents, mark everything up to the final quote as used, and go to step 9.
         * 4a.  Look to see if the string begins with a U' or U" . If so, build the contents interpreting
         *      escaped special characters correctly, until the matching quote is reached.
         *      Construct a ReUStr for the contents, mark everything up to the final quote as used, and go to step 9.
         * 5.  Look to see if the string begins with a \ .
         *      If so, build a ReStr for the next character (special action for ntr),
         *      mark it as used, and go to step 9.
         * 6.  Look to see if the string begins with a { .
         *      If so, find the matching }, lookup the symbolic name in the definitions table,
         *      recursively call this constructor on the contents,
         *      mark everything up to the } as used, and go to step 9.
         * 7.  Look to see if the string begins with a dot.
         *      If so, construct a ReRange("^\n"), mark the . as used, and go to step 9.
         * 8.  At this point we conclude that there is a simple character at the start of the regular expression.
         *      Construct a ReStr for it, mark it as used, and go to step 9.
         * 9.  If the string is exhausted, return.
         *      We have a simple Regex whose m_sub contains what we can constructed.
         * 10.  If the next character is a ? , *, or +, construct a ReOpt, ReStart, or RePlus respectively
         *      out of m_sub, and make m_sub point to this new class instead. Mark the character as used.
         * 11.  If the string is exhausted, return.
         * 12.  If the next character is a | , build a ReAlt using the m_sub we have and the rest of the string.
         * 13.  Otherwise build a ReCat using the m_sub we have and the rest of the string.
         */
        public Regex(TokensGen tks, int p, string str)
        {
            int  n      = str.Length;
            int  nlp    = 0;
            int  lbrack = 0;
            int  quote  = 0;
            int  j;
            char ch;

            //1.  First examine the given string.
            //	If it is empty, there is nothing to do, so return (having cleared m_sub as a precaution).
            m_sub = null;
            if (n == 0)
            {
                return;
            }
            //2.  Look to see if the string begins with a bracket ( . If so, find the matching ) .
            //	This is not as simple as it might be because )s inside quotes or [] or escaped will not count.
            //  Recursively call the constructor for the regular expression between the () s.
            //  Mark everything up to the ) as used, and go to step 9.
            else if (str[0] == '(')
            {             // identify a bracketed expression
                for (j = 1; j < n; j++)
                {
                    if (str[j] == '\\')
                    {
                        j++;
                    }
                    else if (str[j] == ']' && lbrack > 0)
                    {
                        lbrack = 0;
                    }
                    else if (lbrack > 0)
                    {
                        continue;
                    }
                    else if (str[j] == '"' || str[j] == '\'')
                    {
                        if (quote == str[j])
                        {
                            quote = 0;
                        }
                        else if (quote == 0)
                        {
                            quote = str[j];
                        }
                    }
                    else if (quote > 0)
                    {
                        continue;
                    }
                    else if (str[j] == '[')
                    {
                        lbrack++;
                    }
                    else if (str[j] == '(')
                    {
                        nlp++;
                    }
                    else if (str[j] == ')' && nlp-- == 0)
                    {
                        break;
                    }
                }
                if (j == n)
                {
                    goto bad;
                }
                m_sub = new Regex(tks, p + 1, str.Substring(1, j - 1));
                j++;
                //3.  Look to see if the string begins with a bracket [ . If so, find the matching ] , watching for escapes.
                //	Construct a ReRange for everything between the []s.
                //	Mark everything up to the ] as used, and go to step 9.
            }
            else if (str[0] == '[')
            {                           // range of characters
                for (j = 1; j < n && str[j] != ']'; j++)
                {
                    if (str[j] == '\\')
                    {
                        j++;
                    }
                }
                if (j == n)
                {
                    goto bad;
                }
                m_sub = new ReRange(tks, str.Substring(0, j + 1));
                j++;
            }
            //4.  Look to see if the string begins with a ' or " . If so, build the contents interpreting
            //	escaped special characters correctly, until the matching quote is reached.
            //	Construct a CReStr for the contents, mark everything up to the final quote as used, and go to step 9.
            else if (str[0] == '\'' || str[0] == '"')
            {              // quoted string needs special treatment
                StringBuilder qs = new StringBuilder();
                for (j = 1; j < n && str[j] != str[0]; j++)
                {
                    if (str[j] == '\\')
                    {
                        switch (str[++j])
                        {
                        case 'n':       qs.Append('\n'); break;

                        case 'r':       qs.Append('\r'); break;

                        case 't':       qs.Append('\t'); break;

                        case 'v':   qs.Append('\v'); break;

                        case '\\':      qs.Append('\\'); break;

                        case '\'':      qs.Append('\''); break;

                        case '0':   qs.Append((char)0); break;       // 4.7f

                        case '"':       qs.Append('"'); break;

                        case '\n':      break;

                        default:        qs.Append(str[j]); break;
                        }
                    }
                    else
                    {
                        qs.Append(str[j]);
                    }
                }
                if (j == n)
                {
                    goto bad;
                }
                j++;
                m_sub = new ReStr(tks, qs.ToString());
            }
            //4a.  Look to see if the string begins with a U' or U" . If so, build the contents interpreting
            //	escaped special characters correctly, until the matching quote is reached.
            //	Construct a ReUStr for the contents, mark everything up to the final quote as used, and go to step 9.
            else if (str.StartsWith("U\"") || str.StartsWith("U'"))
            {              // quoted string needs special treatment
                StringBuilder qs = new StringBuilder();
                for (j = 2; j < n && str[j] != str[1]; j++)
                {
                    if (str[j] == '\\')
                    {
                        switch (str[++j])
                        {
                        case 'n':       qs.Append('\n'); break;

                        case 'r':       qs.Append('\r'); break;

                        case 't':       qs.Append('\t'); break;

                        case 'v':   qs.Append('\v'); break;

                        case '\\':  qs.Append('\\'); break;

                        case '\'':      qs.Append('\''); break;

                        case '"':       qs.Append('"'); break;

                        case '\n':      break;

                        default:        qs.Append(str[j]); break;
                        }
                    }
                    else
                    {
                        qs.Append(str[j]);
                    }
                }
                if (j == n)
                {
                    goto bad;
                }
                j++;
                m_sub = new ReUStr(tks, qs.ToString());
            }
            //5.  Look to see if the string begins with a \ .
            //	If so, build a ReStr for the next character (special action for ntr),
            //	mark it as used, and go to step 9.
            else if (str[0] == '\\')
            {
                switch (ch = str[1])
                {
                case 'n': ch = '\n'; break;

                case 't': ch = '\t'; break;

                case 'r': ch = '\r'; break;

                case 'v': ch = '\v'; break;
                }
                m_sub = new ReStr(tks, ch);
                j     = 2;
                //6.  Look to see if the string begins with a { .
                //	If so, find the matching }, lookup the symbolic name in the definitions table,
                //	recursively call this constructor on the contents,
                //	mark everything up to the } as used, and go to step 9.
            }
            else if (str[0] == '{')
            {
                for (j = 1; j < n && str[j] != '}'; j++)
                {
                    ;
                }
                if (j == n)
                {
                    goto bad;
                }
                string ds = str.Substring(1, j - 1);
                string s  = (string)tks.defines[ds];
                if (s == null)
                {
                    m_sub = new ReCategory(tks, ds);
                }
                else
                {
                    m_sub = new Regex(tks, p + 1, s);
                }
                j++;
            }
            else
            {                     // simple character at start of regular expression
                //7.  Look to see if the string begins with a dot.
                //	If so, construct a CReDot, mark the . as used, and go to step 9.
                if (str[0] == '.')
                {
                    m_sub = new ReRange(tks, "[^\n]");
                }
                //8.  At this point we conclude that there is a simple character at the start of the regular expression.
                //	Construct a ReStr for it, mark it as used, and go to step 9.
                else
                {
                    m_sub = new ReStr(tks, str[0]);
                }
                j = 1;
            }
            //9.  If the string is exhausted, return.
            //	We have a simple Regex whose m_sub contains what we can constructed.
            if (j >= n)
            {
                return;
            }
            //10.  If the next character is a ? , *, or +, construct a CReOpt, CReStart, or CRePlus respectively
            //	out of m_sub, and make m_sub point to this new class instead. Mark the character as used.
            if (str[j] == '?')
            {
                m_sub = new ReOpt(m_sub);
                j++;
            }
            else if (str[j] == '*')
            {
                m_sub = new ReStar(m_sub);
                j++;
            }
            else if (str[j] == '+')
            {
                m_sub = new RePlus(m_sub);
                j++;
            }
            // 11.  If the string is exhausted, return.
            if (j >= n)
            {
                return;
            }
            // 12.  If the next character is a | , build a ReAlt using the m_sub we have and the rest of the string.
            if (str[j] == '|')
            {
                m_sub = new ReAlt(tks, m_sub, p + j + 1, str.Substring(j + 1, n - j - 1));
            }
            // 13.  Otherwise build a ReCat using the m_sub we have and the rest of the string.
            else if (j < n)
            {
                m_sub = new ReCat(tks, m_sub, p + j, str.Substring(j, n - j));
            }
            return;

bad:
            tks.erh.Error(new CSToolsFatalException(1, tks.sourceLineInfo(p), str, "ill-formed regular expression " + str));
        }