//Compile \d \D \s \S etc. internal RENode CompileSimpleMacro(char c) { RENode node = null; RESetNode set = null; if (@"[]{}()*-+.?\|".Contains(c.ToString())) { return(new RETextNode(c.ToString())); } switch (c) { case 'd': // [0-9] node = set = new RESetNode(true); set.AddRange('0', '9'); break; case 'D': // [^0-9] node = set = new RESetNode(false); set.AddRange('0', '9'); break; case 's': node = set = new RESetNode(true); set.AddChars(" \r\n\f\v\t"); break; case 'S': node = set = new RESetNode(false); set.AddChars(" \r\n\f\v\t"); break; case 'w': // [a-zA-Z0-9_] node = set = new RESetNode(true); set.AddRange('a', 'z'); set.AddRange('A', 'Z'); set.AddRange('0', '9'); set.AddChars("_"); break; case 'W': // [^a-zA-Z0-9_] node = set = new RESetNode(false); set.AddRange('a', 'z'); set.AddRange('A', 'Z'); set.AddRange('0', '9'); set.AddChars("_"); break; case 'f': node = new RETextNode("\f"); break; case 'n': node = new RETextNode("\n"); break; case 'r': node = new RETextNode("\r"); break; case 't': node = new RETextNode("\t"); break; case 'v': node = new RETextNode("\v"); break; case 'A': case 'Z': case 'z': node = new RETextNode(String.Empty); break; default: AssertParse(false, "Invalid escape."); break; } return(node); }
//Compile token internal RENode CompileAtom() { RENode atom = null; RESetNode set = null; int start = 0; int end = 0; AssertParse(!mParseDone, "Reached end of string. No element found."); AssertParse(!("|)?+*{}".Contains(mCurrent.ToString())), "No element found."); switch (mCurrent) { case '.': //Any single char atom = set = new RESetNode(true); set.AddRange(Convert.ToChar(0), Convert.ToChar(127)); NextChar(); break; case '[': //Positive or negative set NextChar(); atom = CompileSet(); break; case '(': //Sub expression int refIndex = 0; //-2 -> don't capture, -1 -> named capture, 0-> indexed capture NextChar(); //By default, subexpressions must be captured for future reference, if (mCurrent == '?') { NextChar(); if (mCurrent == ':') //If sub expression begins with ?: it means don't store reference { NextChar(); refIndex = -2; } else //Named backreference, extract backreference name { ExtractBackrefName(ref start, ref end); refIndex = -1; } } //else use indexed backreference atom = new RESubExprNode(CompileExpr()); AssertParse(mCurrent == ')', "Expected ')'"); NextChar(); if (refIndex == -1) //Named backreference { (atom as RESubExprNode).Name = mRegex.ToString().Substring(start, end - start + 1); mNamedBackRefs.Add(atom); } else if (refIndex == 0) //Indexed backreference { mBackRefs.Add(atom); } break; case '^': case '$': atom = new RETextNode(String.Empty); NextChar(); break; case '\\': NextChar(); if (Char.ToLower(mCurrent, CultureInfo.InvariantCulture) == 'x' || Char.ToLower(mCurrent, CultureInfo.InvariantCulture) == 'u' || mCurrent == '0') { atom = new RETextNode(EscapeValue().ToString()); } else if (Char.IsDigit(mCurrent)) { atom = GetBackRef((int)EscapeValue()); AssertParse(atom != null, "Couldn't find back reference"); atom = new RESubExprNode(atom); } else if (mCurrent == 'k') //referencing a backreference by name { NextChar(); ExtractBackrefName(ref start, ref end); atom = GetBackRef(mRegex.ToString().Substring(start, end - start + 1)); AssertParse(atom != null, "Couldn't find back reference"); atom = new RESubExprNode(atom); //Create a copy of the referenced node } else { atom = CompileSimpleMacro(mCurrent); NextChar(); } break; default: int closeIndex = mRegex.ToString().IndexOfAny("-*+?(){}\\[]^$.|".ToCharArray(), mIndex + 1); if (closeIndex == -1) { mParseDone = true; closeIndex = mRegex.Length - 1; atom = new RETextNode(mRegex.ToString().Substring(mIndex, closeIndex - mIndex + 1)); } else { atom = new RETextNode(mRegex.ToString().Substring(mIndex, closeIndex - mIndex)); } mIndex = closeIndex; mCurrent = mRegex[mIndex]; break; } return(atom); }