public ReStr(TokensGen tks, string str) { this.m_str = str; for (int index = 0; index < str.Length; ++index) { tks.m_tokens.UsingChar(str[index]); } }
public ReStr(TokensGen tks, string str) { m_str = str; for (int i = 0; i < str.Length; i++) { tks.m_tokens.UsingChar(str[i]); } }
public ReUStr(TokensGen tks, string str) { this.m_str = str; for (int index = 0; index < str.Length; ++index) { tks.m_tokens.UsingChar(char.ToLower(str[index])); tks.m_tokens.UsingChar(char.ToUpper(str[index])); } }
public ReUStr(TokensGen tks, string str) { m_str = str; for (int i = 0; i < str.Length; i++) { tks.m_tokens.UsingChar(Char.ToLower(str[i])); tks.m_tokens.UsingChar(Char.ToUpper(str[i])); } }
public TokClassDef(GenBase gbs, string name, string bas) { if (gbs is TokensGen) { TokensGen tks = (TokensGen)gbs; m_name = name; tks.m_tokens.tokens[name] = this; m_refToken = bas; } }
public static ResWds New(TokensGen tks, string str) { ResWds resWds = new ResWds(); str = str.Trim(); if (str[0] == 'U') { resWds.m_upper = true; str = str.Substring(1).Trim(); } if (str[0] == '{' && str[str.Length - 1] == '}') { str = str.Substring(1, str.Length - 2).Trim(); string str1 = str; char[] chArray = new char[1] { ',' }; foreach (string str2 in str1.Split(chArray)) { string str3 = str2.Trim(); string name = str3; int num = str3.IndexOf(' '); if (num > 0) { name = str3.Substring(num).Trim(); str3 = str3.Substring(0, num); } resWds.m_wds[(object)str3] = (object)name; if (tks.m_tokens.tokens[(object)name] == null) { TokClassDef tokClassDef = new TokClassDef((GenBase)tks, name, "TOKEN"); tks.m_outFile.WriteLine("//%{0}+{1}", (object)name, (object)tokClassDef.m_yynum); tks.m_outFile.Write("public class {0} : TOKEN", (object)name); tks.m_outFile.WriteLine("{ public override string yyname { get { return \"" + name + "\";}}"); tks.m_outFile.WriteLine("public override int yynum { get { return " + (object)tokClassDef.m_yynum + "; }}"); tks.m_outFile.WriteLine(" public " + name + "(Lexer yyl):base(yyl) {}}"); } } return(resWds); } tks.m_tokens.erh.Error(new CSToolsException(47, "bad ResWds element")); return((ResWds)null); }
public static ResWds New(TokensGen tks, string str) { ResWds r = new ResWds(); str = str.Trim(); if (str[0] == 'U') { r.m_upper = true; str = str.Substring(1).Trim(); } if (str[0] != '{' || str[str.Length - 1] != '}') { goto bad; } str = str.Substring(1, str.Length - 2).Trim(); string[] wds = str.Split(','); for (int j = 0; j < wds.Length; j++) { string w = wds[j].Trim(); string a = w; int i = w.IndexOf(' '); if (i > 0) { a = w.Substring(i).Trim(); w = w.Substring(0, i); } r.m_wds[w] = a; if (tks.m_tokens.tokens[a] == null) { TokClassDef t = new TokClassDef(tks, a, "TOKEN"); tks.m_outFile.WriteLine("//%{0}+{1}", a, t.m_yynum); tks.m_outFile.Write("public class {0} : TOKEN", a); tks.m_outFile.WriteLine("{ public override string yyname { get { return \"" + a + "\";}}"); tks.m_outFile.WriteLine("public override int yynum { get { return " + t.m_yynum + "; }}"); tks.m_outFile.WriteLine(" public " + a + "(Lexer yyl):base(yyl) {}}"); } } return(r); bad: tks.m_tokens.erh.Error(new CSToolsException(47, "bad ResWds element")); return(null); }
public Regex(TokensGen tks, int p, string str) { int length = str.Length; int num1 = 0; int num2 = 0; int num3 = 0; this.m_sub = (Regex)null; if (length == 0) { return; } int startIndex; if (str[0] == '(') { int index; for (index = 1; index < length; ++index) { if (str[index] == '\\') { ++index; } else if (str[index] == ']' && num2 > 0) { num2 = 0; } else if (num2 <= 0) { if (str[index] == '"' || str[index] == '\'') { if (num3 == (int)str[index]) { num3 = 0; } else if (num3 == 0) { num3 = (int)str[index]; } } else if (num3 <= 0) { if (str[index] == '[') { ++num2; } else if (str[index] == '(') { ++num1; } else if (str[index] == ')' && num1-- == 0) { break; } } } } if (index != length) { this.m_sub = new Regex(tks, p + 1, str.Substring(1, index - 1)); startIndex = index + 1; } else { goto label_99; } } else if (str[0] == '[') { int index; for (index = 1; index < length && str[index] != ']'; ++index) { if (str[index] == '\\') { ++index; } } if (index != length) { this.m_sub = (Regex) new ReRange(tks, str.Substring(0, index + 1)); startIndex = index + 1; } else { goto label_99; } } else if (str[0] == '\'' || str[0] == '"') { StringBuilder stringBuilder = new StringBuilder(); int index; for (index = 1; index < length && (int)str[index] != (int)str[0]; ++index) { if (str[index] == '\\') { char ch = str[++index]; switch (ch) { case 'r': stringBuilder.Append('\r'); continue; case 't': stringBuilder.Append('\t'); continue; case 'v': stringBuilder.Append('\v'); continue; default: switch (ch) { case '\n': continue; case '"': stringBuilder.Append('"'); continue; case '\'': stringBuilder.Append('\''); continue; case '0': stringBuilder.Append(char.MinValue); continue; case '\\': stringBuilder.Append('\\'); continue; case 'n': stringBuilder.Append('\n'); continue; default: stringBuilder.Append(str[index]); continue; } } } else { stringBuilder.Append(str[index]); } } if (index != length) { startIndex = index + 1; this.m_sub = (Regex) new ReStr(tks, stringBuilder.ToString()); } else { goto label_99; } } else if (str.StartsWith("U\"") || str.StartsWith("U'")) { StringBuilder stringBuilder = new StringBuilder(); int index; for (index = 2; index < length && (int)str[index] != (int)str[1]; ++index) { if (str[index] == '\\') { char ch = str[++index]; switch (ch) { case 'r': stringBuilder.Append('\r'); continue; case 't': stringBuilder.Append('\t'); continue; case 'v': stringBuilder.Append('\v'); continue; default: switch (ch) { case '\n': continue; case '"': stringBuilder.Append('"'); continue; case '\'': stringBuilder.Append('\''); continue; case '\\': stringBuilder.Append('\\'); continue; case 'n': stringBuilder.Append('\n'); continue; default: stringBuilder.Append(str[index]); continue; } } } else { stringBuilder.Append(str[index]); } } if (index != length) { startIndex = index + 1; this.m_sub = (Regex) new ReUStr(tks, stringBuilder.ToString()); } else { goto label_99; } } else if (str[0] == '\\') { char ch1; char ch2 = ch1 = str[1]; switch (ch2) { case 'r': ch1 = '\r'; break; case 't': ch1 = '\t'; break; case 'v': ch1 = '\v'; break; default: if (ch2 == 'n') { ch1 = '\n'; break; } break; } this.m_sub = (Regex) new ReStr(tks, ch1); startIndex = 2; } else if (str[0] == '{') { int index = 1; while (index < length && str[index] != '}') { ++index; } if (index != length) { string str1 = str.Substring(1, index - 1); string define = (string)tks.defines[(object)str1]; this.m_sub = define != null ? new Regex(tks, p + 1, define) : (Regex) new ReCategory(tks, str1); startIndex = index + 1; } else { goto label_99; } } else { this.m_sub = str[0] != '.' ? (Regex) new ReStr(tks, str[0]) : (Regex) new ReRange(tks, "[^\n]"); startIndex = 1; } if (startIndex >= length) { return; } if (str[startIndex] == '?') { this.m_sub = (Regex) new ReOpt(this.m_sub); ++startIndex; } else if (str[startIndex] == '*') { this.m_sub = (Regex) new ReStar(this.m_sub); ++startIndex; } else if (str[startIndex] == '+') { this.m_sub = (Regex) new RePlus(this.m_sub); ++startIndex; } if (startIndex >= length) { return; } if (str[startIndex] == '|') { this.m_sub = (Regex) new ReAlt(tks, this.m_sub, p + startIndex + 1, str.Substring(startIndex + 1, length - startIndex - 1)); return; } if (startIndex >= length) { return; } this.m_sub = (Regex) new ReCat(tks, this.m_sub, p + startIndex, str.Substring(startIndex, length - startIndex)); return; label_99: tks.erh.Error((CSToolsException) new CSToolsFatalException(1, tks.sourceLineInfo(p), str, "ill-formed regular expression " + str)); }
public ReRange(TokensGen tks, string str) { StringBuilder stringBuilder = new StringBuilder(); int num1 = str.Length - 1; for (int index = 1; index < num1; ++index) { if (str[index] == '\\') { if (index + 1 < num1) { ++index; } if (str[index] >= '0' && str[index] <= '7') { int num2; for (num2 = (int)str[index++] - 48; index < num1 && str[index] >= '0' && str[index] <= '7'; ++index) { num2 = num2 * 8 + (int)str[index] - 48; } stringBuilder.Append((char)num2); } else { char ch = str[index]; switch (ch) { case 'r': stringBuilder.Append('\r'); continue; case 't': stringBuilder.Append('\t'); continue; case 'v': stringBuilder.Append('\v'); continue; default: if (ch == 'n') { stringBuilder.Append('\n'); continue; } stringBuilder.Append(str[index]); continue; } } } else { stringBuilder.Append(str[index]); } } int length = stringBuilder.Length; if (length > 0 && stringBuilder[0] == '^') { this.m_invert = true; stringBuilder.Remove(0, 1).Append(char.MinValue).Append(char.MaxValue); } for (int index1 = 0; index1 < length; ++index1) { if (index1 + 1 < length && stringBuilder[index1 + 1] == '-') { for (int index2 = (int)stringBuilder[index1]; index2 <= (int)stringBuilder[index1 + 2]; ++index2) { this.Set(tks, (char)index2); } index1 += 2; } else { this.Set(tks, stringBuilder[index1]); } } }
public ReStr(TokensGen tks, char ch) { m_str = new string(ch, 1); tks.m_tokens.UsingChar(ch); }
public ReCategory(TokensGen tks, string str) { m_str = str; m_test = tks.m_tokens.GetTest(str); }
public string m_sTerminal = ""; // or something for the Lexer #endregion Fields #region Constructors /// <exclude/> public NfaNode(TokensGen tks) : base(tks) { }
public ReAlt(TokensGen tks,Regex sub,int p,string str) { m_sub = sub; m_alt = new Regex(tks,p,str); }
public ReCategory(TokensGen tks,string str) { m_str = str; m_test = tks.m_tokens.GetTest(str); }
void Set(TokensGen tks,char ch) { m_map[ch] = true; tks.m_tokens.UsingChar(ch); }
/// <exclude/> public ReStr(TokensGen tks,char ch) { m_str = new string(ch,1); tks.m_tokens.UsingChar(ch); }
public Nfa(TokensGen tks) : base(tks) { m_end = new NfaNode(m_tks); }
// build an NFA for a given regular expression public Nfa(TokensGen tks, Regex re) : base(tks) { m_end = new NfaNode(tks); re.Build(this); }
public ObjectList m_eps = new ObjectList(); // of NfaNode for unlabelled arcs public NfaNode(TokensGen tks) : base(tks) { }
void Set(TokensGen tks, char ch) { m_map[ch] = true; tks.m_tokens.UsingChar(ch); }
public ReRange(TokensGen tks, string str) { StringBuilder ns = new StringBuilder(); int n = str.Length - 1, v; int p; for (p = 1; p < n; p++) // fix \ escapes { if (str[p] == '\\') { if (p + 1 < n) { p++; } if (str[p] >= '0' && str[p] <= '7') { for (v = str[p++] - '0'; p < n && str[p] >= '0' && str[p] <= '7'; p++) { v = v * 8 + str[p] - '0'; } ns.Append((char)v); } else { switch (str[p]) { case 'n': ns.Append('\n'); break; case 't': ns.Append('\t'); break; case 'r': ns.Append('\r'); break; case 'v': ns.Append('\v'); break; default: ns.Append(str[p]); break; } } } else { ns.Append(str[p]); } } n = ns.Length; if (n > 0 && ns[0] == '^') { // invert range m_invert = true; ns.Remove(0, 1).Append((char)0).Append((char)0xFFFF); } for (p = 0; p < n; p++) { if (p + 1 < n && ns[p + 1] == '-') { for (v = ns[p]; v <= ns[p + 2]; v++) { Set(tks, (char)v); } p += 2; } else { Set(tks, ns[p]); } } }
public ReRange(TokensGen tks, string str) { string ns = ""; int n = str.Length - 1, v; int p; for (p = 1; p < n; p++) // fix \ escapes { if (str[p] == '\\') { if (p + 1 < n) { p++; } if (str[p] >= '0' && str[p] <= '7') { for (v = str[p++] - '0'; p < n && str[p] >= '0' && str[p] <= '7'; p++) { v = v * 8 + str[p] - '0'; } ns += (char)v; } else { switch (str[p]) { case 'n': ns += '\n'; break; case 't': ns += '\t'; break; case 'r': ns += '\r'; break; default: ns += str[p]; break; } } } else { ns += str[p]; } } n = ns.Length; if (ns[0] == '^') { // invert range m_invert = true; ns = ns.Substring(1) + (char)0 + (char)0xFFFF; } for (p = 0; p < n; p++) { if (p + 1 < n && ns[p + 1] == '-') { for (v = ns[p]; v <= ns[p + 2]; v++) { Set(tks, (char)v); } p += 2; } else { Set(tks, ns[p]); } } }
/// <exclude/> public ReUStr(TokensGen tks,char ch) { m_str = new string(ch,1); tks.m_tokens.UsingChar(Char.ToLower(ch)); tks.m_tokens.UsingChar(Char.ToUpper(ch)); }
public ReUStr(TokensGen tks, char ch) { this.m_str = new string(ch, 1); tks.m_tokens.UsingChar(char.ToLower(ch)); tks.m_tokens.UsingChar(char.ToUpper(ch)); }
/// <exclude/> public Dfa(TokensGen tks) : base(tks) { m_tokens = tks.m_tokens; }
/// <exclude/> public ReCat(TokensGen tks,Regex sub, int p, string str) { m_sub = sub; m_next = new Regex(tks,p,str); }
/// <exclude/> public Nfa(TokensGen tks) : base(tks) { m_end = new NfaNode(m_tks); }
public Hashtable m_map = new Hashtable(); // char->bool #endregion Fields #region Constructors public ReRange(TokensGen tks,string str) { StringBuilder ns = new StringBuilder(); int n = str.Length-1,v; int p; for (p=1;p<n;p++) // fix \ escapes if (str[p] == '\\') { if (p+1<n) p++; if (str[p]>='0' && str[p]<='7') { for (v = str[p++]-'0';p<n && str[p]>='0' && str[p]<='7';p++) v=v*8+str[p]-'0'; ns.Append((char)v); } else switch(str[p]) { case 'n' : ns.Append('\n'); break; case 't' : ns.Append('\t'); break; case 'r' : ns.Append('\r'); break; default: ns.Append(str[p]); break; } } else ns.Append(str[p]); n = ns.Length; if (ns[0] == '^') {// invert range m_invert = true; ns.Remove(0,1).Append((char)0).Append((char)0xFFFF); } for (p=0;p<n;p++) if (p+1<n && ns[p+1]=='-') { for (v=ns[p];v<=ns[p+2];v++) Set(tks,(char)v); p += 2; } else Set(tks,ns[p]); }
public ReRange(TokensGen tks,string str) { string ns = ""; int n = str.Length-1,v; int p; for (p=1;p<n;p++) // fix \ escapes if (str[p] == '\\') { if (p+1<n) p++; if (str[p]>='0' && str[p]<='7') { for (v = str[p++]-'0';p<n && str[p]>='0' && str[p]<='7';p++) v=v*8+str[p]-'0'; ns += (char)v; } else switch(str[p]) { case 'n' : ns += '\n'; break; case 't' : ns += '\t'; break; case 'r' : ns += '\r'; break; default: ns += str[p]; break; } } else ns += str[p]; n = ns.Length; if (ns[0] == '^') {// invert range m_invert = true; ns = ns.Substring(1)+(char)0 +(char)0xFFFF; } for (p=0;p<n;p++) if (p+1<n && ns[p+1]=='-') { for (v=ns[p];v<=ns[p+2];v++) Set(tks,(char)v); p += 2; } else Set(tks,ns[p]); }
/// <exclude/> public ReStr(TokensGen tks,string str) { m_str = str; for (int i=0;i<str.Length;i++) tks.m_tokens.UsingChar(str[i]); }
/* * Construct a Regex from a given string * * 1. First examine the given string. * If it is empty, there is nothing to do, so return (having cleared m_sub as a precaution). * 2. Look to see if the string begins with a bracket ( . If so, find the matching ) . * This is not as simple as it might be because )s inside quotes or [] or escaped will not count. * Recursively call the constructor for the regular expression between the () s. * Mark everything up to the ) as used, and go to step 9. * 3. Look to see if the string begins with a bracket [ . If so, find the matching ] , watching for escapes. * Construct a ReRange for everything between the []s. * Mark everything up to the ] as used, and go to step 9. * 4. Look to see if the string begins with a ' or " . If so, build the contents interpreting * escaped special characters correctly, until the matching quote is reached. * Construct a ReStr for the contents, mark everything up to the final quote as used, and go to step 9. * 4a. Look to see if the string begins with a U' or U" . If so, build the contents interpreting * escaped special characters correctly, until the matching quote is reached. * Construct a ReUStr for the contents, mark everything up to the final quote as used, and go to step 9. * 5. Look to see if the string begins with a \ . * If so, build a ReStr for the next character (special action for ntr), * mark it as used, and go to step 9. * 6. Look to see if the string begins with a { . * If so, find the matching }, lookup the symbolic name in the definitions table, * recursively call this constructor on the contents, * mark everything up to the } as used, and go to step 9. * 7. Look to see if the string begins with a dot. * If so, construct a ReRange("^\n"), mark the . as used, and go to step 9. * 8. At this point we conclude that there is a simple character at the start of the regular expression. * Construct a ReStr for it, mark it as used, and go to step 9. * 9. If the string is exhausted, return. * We have a simple Regex whose m_sub contains what we can constructed. * 10. If the next character is a ? , *, or +, construct a ReOpt, ReStart, or RePlus respectively * out of m_sub, and make m_sub point to this new class instead. Mark the character as used. * 11. If the string is exhausted, return. * 12. If the next character is a | , build a ReAlt using the m_sub we have and the rest of the string. * 13. Otherwise build a ReCat using the m_sub we have and the rest of the string. */ public Regex(TokensGen tks, int p, string str) { int n = str.Length; int nlp = 0; int lbrack = 0; int quote = 0; int j; char ch; //1. First examine the given string. // If it is empty, there is nothing to do, so return (having cleared m_sub as a precaution). m_sub = null; if (n == 0) { return; } //2. Look to see if the string begins with a bracket ( . If so, find the matching ) . // This is not as simple as it might be because )s inside quotes or [] or escaped will not count. // Recursively call the constructor for the regular expression between the () s. // Mark everything up to the ) as used, and go to step 9. else if (str[0] == '(') { // identify a bracketed expression for (j = 1; j < n; j++) { if (str[j] == '\\') { j++; } else if (str[j] == ']' && lbrack > 0) { lbrack = 0; } else if (lbrack > 0) { continue; } else if (str[j] == '"' || str[j] == '\'') { if (quote == str[j]) { quote = 0; } else if (quote == 0) { quote = str[j]; } } else if (quote > 0) { continue; } else if (str[j] == '[') { lbrack++; } else if (str[j] == '(') { nlp++; } else if (str[j] == ')' && nlp-- == 0) { break; } } if (j == n) { goto bad; } m_sub = new Regex(tks, p + 1, str.Substring(1, j - 1)); j++; //3. Look to see if the string begins with a bracket [ . If so, find the matching ] , watching for escapes. // Construct a ReRange for everything between the []s. // Mark everything up to the ] as used, and go to step 9. } else if (str[0] == '[') { // range of characters for (j = 1; j < n && str[j] != ']'; j++) { if (str[j] == '\\') { j++; } } if (j == n) { goto bad; } m_sub = new ReRange(tks, str.Substring(0, j + 1)); j++; } //4. Look to see if the string begins with a ' or " . If so, build the contents interpreting // escaped special characters correctly, until the matching quote is reached. // Construct a CReStr for the contents, mark everything up to the final quote as used, and go to step 9. else if (str[0] == '\'' || str[0] == '"') { // quoted string needs special treatment StringBuilder qs = new StringBuilder(); for (j = 1; j < n && str[j] != str[0]; j++) { if (str[j] == '\\') { switch (str[++j]) { case 'n': qs.Append('\n'); break; case 'r': qs.Append('\r'); break; case 't': qs.Append('\t'); break; case 'v': qs.Append('\v'); break; case '\\': qs.Append('\\'); break; case '\'': qs.Append('\''); break; case '0': qs.Append((char)0); break; // 4.7f case '"': qs.Append('"'); break; case '\n': break; default: qs.Append(str[j]); break; } } else { qs.Append(str[j]); } } if (j == n) { goto bad; } j++; m_sub = new ReStr(tks, qs.ToString()); } //4a. Look to see if the string begins with a U' or U" . If so, build the contents interpreting // escaped special characters correctly, until the matching quote is reached. // Construct a ReUStr for the contents, mark everything up to the final quote as used, and go to step 9. else if (str.StartsWith("U\"") || str.StartsWith("U'")) { // quoted string needs special treatment StringBuilder qs = new StringBuilder(); for (j = 2; j < n && str[j] != str[1]; j++) { if (str[j] == '\\') { switch (str[++j]) { case 'n': qs.Append('\n'); break; case 'r': qs.Append('\r'); break; case 't': qs.Append('\t'); break; case 'v': qs.Append('\v'); break; case '\\': qs.Append('\\'); break; case '\'': qs.Append('\''); break; case '"': qs.Append('"'); break; case '\n': break; default: qs.Append(str[j]); break; } } else { qs.Append(str[j]); } } if (j == n) { goto bad; } j++; m_sub = new ReUStr(tks, qs.ToString()); } //5. Look to see if the string begins with a \ . // If so, build a ReStr for the next character (special action for ntr), // mark it as used, and go to step 9. else if (str[0] == '\\') { switch (ch = str[1]) { case 'n': ch = '\n'; break; case 't': ch = '\t'; break; case 'r': ch = '\r'; break; case 'v': ch = '\v'; break; } m_sub = new ReStr(tks, ch); j = 2; //6. Look to see if the string begins with a { . // If so, find the matching }, lookup the symbolic name in the definitions table, // recursively call this constructor on the contents, // mark everything up to the } as used, and go to step 9. } else if (str[0] == '{') { for (j = 1; j < n && str[j] != '}'; j++) { ; } if (j == n) { goto bad; } string ds = str.Substring(1, j - 1); string s = (string)tks.defines[ds]; if (s == null) { m_sub = new ReCategory(tks, ds); } else { m_sub = new Regex(tks, p + 1, s); } j++; } else { // simple character at start of regular expression //7. Look to see if the string begins with a dot. // If so, construct a CReDot, mark the . as used, and go to step 9. if (str[0] == '.') { m_sub = new ReRange(tks, "[^\n]"); } //8. At this point we conclude that there is a simple character at the start of the regular expression. // Construct a ReStr for it, mark it as used, and go to step 9. else { m_sub = new ReStr(tks, str[0]); } j = 1; } //9. If the string is exhausted, return. // We have a simple Regex whose m_sub contains what we can constructed. if (j >= n) { return; } //10. If the next character is a ? , *, or +, construct a CReOpt, CReStart, or CRePlus respectively // out of m_sub, and make m_sub point to this new class instead. Mark the character as used. if (str[j] == '?') { m_sub = new ReOpt(m_sub); j++; } else if (str[j] == '*') { m_sub = new ReStar(m_sub); j++; } else if (str[j] == '+') { m_sub = new RePlus(m_sub); j++; } // 11. If the string is exhausted, return. if (j >= n) { return; } // 12. If the next character is a | , build a ReAlt using the m_sub we have and the rest of the string. if (str[j] == '|') { m_sub = new ReAlt(tks, m_sub, p + j + 1, str.Substring(j + 1, n - j - 1)); } // 13. Otherwise build a ReCat using the m_sub we have and the rest of the string. else if (j < n) { m_sub = new ReCat(tks, m_sub, p + j, str.Substring(j, n - j)); } return; bad: tks.erh.Error(new CSToolsFatalException(1, tks.sourceLineInfo(p), str, "ill-formed regular expression " + str)); }
/// <exclude/> public ReUStr(TokensGen tks,string str) { m_str = str; for (int i=0;i<str.Length;i++) { tks.m_tokens.UsingChar(Char.ToLower(str[i])); tks.m_tokens.UsingChar(Char.ToUpper(str[i])); } }
public Dfa(TokensGen tks) : base(tks) { m_tokens = tks.m_tokens; }
/// <exclude/> public LNode(TokensGen tks) { m_tks = tks; m_state = tks.NewState(); }
public ReAlt(TokensGen tks, Regex sub, int p, string str) { m_sub = sub; m_alt = new Regex(tks, p, str); }
/// <exclude/> public static ResWds New(TokensGen tks,string str) { ResWds r = new ResWds(); str = str.Trim(); if (str[0]=='U') { r.m_upper = true; str = str.Substring(1).Trim(); } if (str[0]!='{' || str[str.Length-1]!='}') goto bad; str = str.Substring(1,str.Length-2).Trim(); string[] wds = str.Split(','); for (int j=0;j<wds.Length;j++) { string w = wds[j].Trim(); string a = w; int i = w.IndexOf(' '); if (i>0) { a = w.Substring(i).Trim(); w = w.Substring(0,i); } r.m_wds[w] = a; if (tks.m_tokens.tokens[a]==null) { TokClassDef t = new TokClassDef(tks,a,"TOKEN"); tks.m_outFile.WriteLine("//%{0}+{1}",a,t.m_yynum); tks.m_outFile.Write("public class {0} : TOKEN",a); tks.m_outFile.WriteLine("{ public override string yyname { get { return \""+a+"\";}}"); tks.m_outFile.WriteLine("public override int yynum { get { return "+t.m_yynum+"; }}"); tks.m_outFile.WriteLine(" public "+a+"(Lexer yyl):base(yyl) {}}"); } } return r; bad: tks.m_tokens.erh.Error(new CSToolsException(47,"bad ResWds element")); return null; }
public ReCat(TokensGen tks, Regex sub, int p, string str) { m_sub = sub; m_next = new Regex(tks, p, str); }
// build an NFA for a given regular expression /// <exclude/> public Nfa(TokensGen tks,Regex re) : base(tks) { m_end = new NfaNode(tks); re.Build(this); }
public LNode(TokensGen tks) { m_tks = tks; m_state = tks.NewState(); }
/* Construct a Regex from a given string 1. First examine the given string. If it is empty, there is nothing to do, so return (having cleared m_sub as a precaution). 2. Look to see if the string begins with a bracket ( . If so, find the matching ) . This is not as simple as it might be because )s inside quotes or [] or escaped will not count. Recursively call the constructor for the regular expression between the () s. Mark everything up to the ) as used, and go to step 9. 3. Look to see if the string begins with a bracket [ . If so, find the matching ] , watching for escapes. Construct a ReRange for everything between the []s. Mark everything up to the ] as used, and go to step 9. 4. Look to see if the string begins with a ' or " . If so, build the contents interpreting escaped special characters correctly, until the matching quote is reached. Construct a ReStr for the contents, mark everything up to the final quote as used, and go to step 9. 4a. Look to see if the string begins with a U' or U" . If so, build the contents interpreting escaped special characters correctly, until the matching quote is reached. Construct a ReUStr for the contents, mark everything up to the final quote as used, and go to step 9. 5. Look to see if the string begins with a \ . If so, build a ReStr for the next character (special action for ntr), mark it as used, and go to step 9. 6. Look to see if the string begins with a { . If so, find the matching }, lookup the symbolic name in the definitions table, recursively call this constructor on the contents, mark everything up to the } as used, and go to step 9. 7. Look to see if the string begins with a dot. If so, construct a ReRange("^\n"), mark the . as used, and go to step 9. 8. At this point we conclude that there is a simple character at the start of the regular expression. Construct a ReStr for it, mark it as used, and go to step 9. 9. If the string is exhausted, return. We have a simple Regex whose m_sub contains what we can constructed. 10. If the next character is a ? , *, or +, construct a ReOpt, ReStart, or RePlus respectively out of m_sub, and make m_sub point to this new class instead. Mark the character as used. 11. If the string is exhausted, return. 12. If the next character is a | , build a ReAlt using the m_sub we have and the rest of the string. 13. Otherwise build a ReCat using the m_sub we have and the rest of the string. */ /// <exclude/> public Regex(TokensGen tks,int p,string str) { int n = str.Length; int nlp = 0; int lbrack = 0; int quote = 0; int j; char ch; //1. First examine the given string. // If it is empty, there is nothing to do, so return (having cleared m_sub as a precaution). m_sub = null; if (n==0) return; //2. Look to see if the string begins with a bracket ( . If so, find the matching ) . // This is not as simple as it might be because )s inside quotes or [] or escaped will not count. // Recursively call the constructor for the regular expression between the () s. // Mark everything up to the ) as used, and go to step 9. else if (str[0]=='(') { // identify a bracketed expression for (j=1;j<n;j++) if (str[j]=='\\') j++; else if (str[j]=='[' && quote==0 && lbrack==0) lbrack++; else if (str[j]==']' && lbrack>0) lbrack = 0; else if (str[j]=='"' || str[j]=='\'') { if (quote==str[j]) quote = 0; else if (quote==0) quote = str[j]; } else if (str[j]=='(' && quote==0 && lbrack==0) nlp++; else if (str[j]==')' && quote==0 && lbrack==0 && nlp--==0) break; if (j==n) goto bad; m_sub = new Regex (tks,p+1,str.Substring(1,j-1)); j++; //3. Look to see if the string begins with a bracket [ . If so, find the matching ] , watching for escapes. // Construct a ReRange for everything between the []s. // Mark everything up to the ] as used, and go to step 9. } else if (str[0]=='[') { // range of characters for (j=1;j<n && str[j]!=']';j++) if (str[j]=='\\') j++; if (j==n) goto bad; m_sub = new ReRange(tks,str.Substring(0,j+1)); j++; } //4. Look to see if the string begins with a ' or " . If so, build the contents interpreting // escaped special characters correctly, until the matching quote is reached. // Construct a CReStr for the contents, mark everything up to the final quote as used, and go to step 9. else if (str[0] == '\'' || str[0] == '"') { // quoted string needs special treatment StringBuilder qs = new StringBuilder(); for (j=1;j<n && str[j]!=str[0];j++) if (str[j]=='\\') switch (str[++j]) { case 'n': qs.Append('\n'); break; case 'r': qs.Append('\r'); break; case 't': qs.Append('\t'); break; case '\\': qs.Append('\\'); break; case '\'': qs.Append('\''); break; case '"': qs.Append('"'); break; case '\n': break; default: qs.Append(str[j]); break; } else qs.Append(str[j]); if (j==n) goto bad; j++; m_sub = new ReStr(tks,qs.ToString()); } //4a. Look to see if the string begins with a U' or U" . If so, build the contents interpreting // escaped special characters correctly, until the matching quote is reached. // Construct a ReUStr for the contents, mark everything up to the final quote as used, and go to step 9. else if (str.StartsWith("U\"")||str.StartsWith("U'")) { // quoted string needs special treatment StringBuilder qs = new StringBuilder(); for (j=2;j<n && str[j]!=str[1];j++) if (str[j]=='\\') switch (str[++j]) { case 'n': qs.Append('\n'); break; case 'r': qs.Append('\r'); break; case 't': qs.Append('\t'); break; case '\\': qs.Append('\\'); break; case '\'': qs.Append('\''); break; case '"': qs.Append('"'); break; case '\n': break; default: qs.Append(str[j]); break; } else qs.Append(str[j]); if (j==n) goto bad; j++; m_sub = new ReUStr(tks,qs.ToString()); } //5. Look to see if the string begins with a \ . // If so, build a ReStr for the next character (special action for ntr), // mark it as used, and go to step 9. else if (str[0]=='\\') { switch (ch = str[1]) { case 'n': ch = '\n'; break; case 't': ch = '\t'; break; case 'r': ch = '\r'; break; } m_sub = new ReStr(tks,ch); j = 2; //6. Look to see if the string begins with a { . // If so, find the matching }, lookup the symbolic name in the definitions table, // recursively call this constructor on the contents, // mark everything up to the } as used, and go to step 9. } else if (str[0]=='{') { for (j=1;j<n && str[j]!='}';j++) ; if (j==n) goto bad; string ds = str.Substring(1,j-1); string s = (string)tks.defines[ds]; if (s==null) m_sub = new ReCategory(tks,ds); else m_sub = new Regex(tks,p+1,s); j++; } else { // simple character at start of regular expression //7. Look to see if the string begins with a dot. // If so, construct a CReDot, mark the . as used, and go to step 9. if (str[0]=='.') m_sub = new ReRange(tks,"[^\n]"); //8. At this point we conclude that there is a simple character at the start of the regular expression. // Construct a ReStr for it, mark it as used, and go to step 9. else m_sub = new ReStr(tks,str[0]); j = 1; } //9. If the string is exhausted, return. // We have a simple Regex whose m_sub contains what we can constructed. if (j>=n) return; //10. If the next character is a ? , *, or +, construct a CReOpt, CReStart, or CRePlus respectively // out of m_sub, and make m_sub point to this new class instead. Mark the character as used. if (str[j]=='?') { m_sub = new ReOpt(m_sub); j++; } else if (str[j]=='*') { m_sub = new ReStar(m_sub); j++; } else if (str[j]=='+') { m_sub = new RePlus(m_sub); j++; } // 11. If the string is exhausted, return. if (j>=n) return; // 12. If the next character is a | , build a ReAlt using the m_sub we have and the rest of the string. if (str[j]=='|') m_sub = new ReAlt(tks,m_sub,p+j+1,str.Substring(j+1,n-j-1)); // 13. Otherwise build a ReCat using the m_sub we have and the rest of the string. else if (j<n) m_sub = new ReCat(tks,m_sub,p+j,str.Substring(j,n-j)); return; bad: tks.erh.Error(new CSToolsFatalException(1,tks.sourceLineInfo(p),str,"ill-formed regular expression "+str)); }
private void Set(TokensGen tks, char ch) { this.m_map[(object)ch] = (object)true; tks.m_tokens.UsingChar(ch); }