/* Construct a Regex from a given string 1. First examine the given string. If it is empty, there is nothing to do, so return (having cleared m_sub as a precaution). 2. Look to see if the string begins with a bracket ( . If so, find the matching ) . This is not as simple as it might be because )s inside quotes or [] or escaped will not count. Recursively call the constructor for the regular expression between the () s. Mark everything up to the ) as used, and go to step 9. 3. Look to see if the string begins with a bracket [ . If so, find the matching ] , watching for escapes. Construct a ReRange for everything between the []s. Mark everything up to the ] as used, and go to step 9. 4. Look to see if the string begins with a ' or " . If so, build the contents interpreting escaped special characters correctly, until the matching quote is reached. Construct a ReStr for the contents, mark everything up to the final quote as used, and go to step 9. 4a. Look to see if the string begins with a U' or U" . If so, build the contents interpreting escaped special characters correctly, until the matching quote is reached. Construct a ReUStr for the contents, mark everything up to the final quote as used, and go to step 9. 5. Look to see if the string begins with a \ . If so, build a ReStr for the next character (special action for ntr), mark it as used, and go to step 9. 6. Look to see if the string begins with a { . If so, find the matching }, lookup the symbolic name in the definitions table, recursively call this constructor on the contents, mark everything up to the } as used, and go to step 9. 7. Look to see if the string begins with a dot. If so, construct a ReRange("^\n"), mark the . as used, and go to step 9. 8. At this point we conclude that there is a simple character at the start of the regular expression. Construct a ReStr for it, mark it as used, and go to step 9. 9. If the string is exhausted, return. We have a simple Regex whose m_sub contains what we can constructed. 10. If the next character is a ? , *, or +, construct a ReOpt, ReStart, or RePlus respectively out of m_sub, and make m_sub point to this new class instead. Mark the character as used. 11. If the string is exhausted, return. 12. If the next character is a | , build a ReAlt using the m_sub we have and the rest of the string. 13. Otherwise build a ReCat using the m_sub we have and the rest of the string. */ /// <exclude/> public Regex(TokensGen tks,int p,string str) { int n = str.Length; int nlp = 0; int lbrack = 0; int quote = 0; int j; char ch; //1. First examine the given string. // If it is empty, there is nothing to do, so return (having cleared m_sub as a precaution). m_sub = null; if (n==0) return; //2. Look to see if the string begins with a bracket ( . If so, find the matching ) . // This is not as simple as it might be because )s inside quotes or [] or escaped will not count. // Recursively call the constructor for the regular expression between the () s. // Mark everything up to the ) as used, and go to step 9. else if (str[0]=='(') { // identify a bracketed expression for (j=1;j<n;j++) if (str[j]=='\\') j++; else if (str[j]=='[' && quote==0 && lbrack==0) lbrack++; else if (str[j]==']' && lbrack>0) lbrack = 0; else if (str[j]=='"' || str[j]=='\'') { if (quote==str[j]) quote = 0; else if (quote==0) quote = str[j]; } else if (str[j]=='(' && quote==0 && lbrack==0) nlp++; else if (str[j]==')' && quote==0 && lbrack==0 && nlp--==0) break; if (j==n) goto bad; m_sub = new Regex (tks,p+1,str.Substring(1,j-1)); j++; //3. Look to see if the string begins with a bracket [ . If so, find the matching ] , watching for escapes. // Construct a ReRange for everything between the []s. // Mark everything up to the ] as used, and go to step 9. } else if (str[0]=='[') { // range of characters for (j=1;j<n && str[j]!=']';j++) if (str[j]=='\\') j++; if (j==n) goto bad; m_sub = new ReRange(tks,str.Substring(0,j+1)); j++; } //4. Look to see if the string begins with a ' or " . If so, build the contents interpreting // escaped special characters correctly, until the matching quote is reached. // Construct a CReStr for the contents, mark everything up to the final quote as used, and go to step 9. else if (str[0] == '\'' || str[0] == '"') { // quoted string needs special treatment StringBuilder qs = new StringBuilder(); for (j=1;j<n && str[j]!=str[0];j++) if (str[j]=='\\') switch (str[++j]) { case 'n': qs.Append('\n'); break; case 'r': qs.Append('\r'); break; case 't': qs.Append('\t'); break; case '\\': qs.Append('\\'); break; case '\'': qs.Append('\''); break; case '"': qs.Append('"'); break; case '\n': break; default: qs.Append(str[j]); break; } else qs.Append(str[j]); if (j==n) goto bad; j++; m_sub = new ReStr(tks,qs.ToString()); } //4a. Look to see if the string begins with a U' or U" . If so, build the contents interpreting // escaped special characters correctly, until the matching quote is reached. // Construct a ReUStr for the contents, mark everything up to the final quote as used, and go to step 9. else if (str.StartsWith("U\"")||str.StartsWith("U'")) { // quoted string needs special treatment StringBuilder qs = new StringBuilder(); for (j=2;j<n && str[j]!=str[1];j++) if (str[j]=='\\') switch (str[++j]) { case 'n': qs.Append('\n'); break; case 'r': qs.Append('\r'); break; case 't': qs.Append('\t'); break; case '\\': qs.Append('\\'); break; case '\'': qs.Append('\''); break; case '"': qs.Append('"'); break; case '\n': break; default: qs.Append(str[j]); break; } else qs.Append(str[j]); if (j==n) goto bad; j++; m_sub = new ReUStr(tks,qs.ToString()); } //5. Look to see if the string begins with a \ . // If so, build a ReStr for the next character (special action for ntr), // mark it as used, and go to step 9. else if (str[0]=='\\') { switch (ch = str[1]) { case 'n': ch = '\n'; break; case 't': ch = '\t'; break; case 'r': ch = '\r'; break; } m_sub = new ReStr(tks,ch); j = 2; //6. Look to see if the string begins with a { . // If so, find the matching }, lookup the symbolic name in the definitions table, // recursively call this constructor on the contents, // mark everything up to the } as used, and go to step 9. } else if (str[0]=='{') { for (j=1;j<n && str[j]!='}';j++) ; if (j==n) goto bad; string ds = str.Substring(1,j-1); string s = (string)tks.defines[ds]; if (s==null) m_sub = new ReCategory(tks,ds); else m_sub = new Regex(tks,p+1,s); j++; } else { // simple character at start of regular expression //7. Look to see if the string begins with a dot. // If so, construct a CReDot, mark the . as used, and go to step 9. if (str[0]=='.') m_sub = new ReRange(tks,"[^\n]"); //8. At this point we conclude that there is a simple character at the start of the regular expression. // Construct a ReStr for it, mark it as used, and go to step 9. else m_sub = new ReStr(tks,str[0]); j = 1; } //9. If the string is exhausted, return. // We have a simple Regex whose m_sub contains what we can constructed. if (j>=n) return; //10. If the next character is a ? , *, or +, construct a CReOpt, CReStart, or CRePlus respectively // out of m_sub, and make m_sub point to this new class instead. Mark the character as used. if (str[j]=='?') { m_sub = new ReOpt(m_sub); j++; } else if (str[j]=='*') { m_sub = new ReStar(m_sub); j++; } else if (str[j]=='+') { m_sub = new RePlus(m_sub); j++; } // 11. If the string is exhausted, return. if (j>=n) return; // 12. If the next character is a | , build a ReAlt using the m_sub we have and the rest of the string. if (str[j]=='|') m_sub = new ReAlt(tks,m_sub,p+j+1,str.Substring(j+1,n-j-1)); // 13. Otherwise build a ReCat using the m_sub we have and the rest of the string. else if (j<n) m_sub = new ReCat(tks,m_sub,p+j,str.Substring(j,n-j)); return; bad: tks.erh.Error(new CSToolsFatalException(1,tks.sourceLineInfo(p),str,"ill-formed regular expression "+str)); }
// build an NFA for a given regular expression /// <exclude/> public Nfa(TokensGen tks,Regex re) : base(tks) { m_end = new NfaNode(tks); re.Build(this); }
/// <exclude/> public void AddArcEx(Regex re,NfaNode next) { m_arcs.Add(new ArcEx(re,next)); }
public RePlus(Regex sub) { m_sub = sub; }
public ReStar(Regex sub) { m_sub = sub; }
public ReOpt(Regex sub) { m_sub = sub; }
/// <exclude/> public ReCat(TokensGen tks,Regex sub, int p, string str) { m_sub = sub; m_next = new Regex(tks,p,str); }
public ReAlt(TokensGen tks,Regex sub,int p,string str) { m_sub = sub; m_alt = new Regex(tks,p,str); }
public ArcEx(Regex re,NfaNode next) { m_ref=re; m_next=next; }
/// <summary> /// 是否为时间格式 /// </summary> /// <returns>是则返加true 不是则返回 false</returns> public static bool IsTime(string timeval) { return(Regex.IsMatch(timeval, @"^((([0-1]?[0-9])|(2[0-3])):([0-5]?[0-9])(:[0-5]?[0-9])?)$")); }