//Compile \d \D \s \S etc. internal RENode CompileSimpleMacro(char c) { RENode node = null; RESetNode set = null; if (@"[]{}()*-+.?\|".Contains(c.ToString())) { return new RETextNode(c.ToString()); } switch (c) { case 'd': // [0-9] node = set = new RESetNode(true); set.AddRange('0', '9'); break; case 'D': // [^0-9] node = set = new RESetNode(false); set.AddRange('0', '9'); break; case 's': node = set = new RESetNode(true); set.AddChars(" \r\n\f\v\t"); break; case 'S': node = set = new RESetNode(false); set.AddChars(" \r\n\f\v\t"); break; case 'w': // [a-zA-Z0-9_] node = set = new RESetNode(true); set.AddRange('a', 'z'); set.AddRange('A', 'Z'); set.AddRange('0', '9'); set.AddChars("_"); break; case 'W': // [^a-zA-Z0-9_] node = set = new RESetNode(false); set.AddRange('a', 'z'); set.AddRange('A', 'Z'); set.AddRange('0', '9'); set.AddChars("_"); break; case 'f': node = new RETextNode("\f"); break; case 'n': node = new RETextNode("\n"); break; case 'r': node = new RETextNode("\r"); break; case 't': node = new RETextNode("\t"); break; case 'v': node = new RETextNode("\v"); break; case 'A': case 'Z': case 'z': node = new RETextNode(String.Empty); break; default: AssertParse(false, "Invalid escape."); break; } return node; }
//Compile token internal RENode CompileAtom() { RENode atom = null; RESetNode set = null; int start = 0; int end = 0; AssertParse(!mParseDone, "Reached end of string. No element found."); AssertParse(!("|)?+*{}".Contains(mCurrent.ToString())), "No element found."); switch (mCurrent) { case '.': //Any single char atom = set = new RESetNode(true); set.AddRange(Convert.ToChar(0), Convert.ToChar(127)); NextChar(); break; case '[': //Positive or negative set NextChar(); atom = CompileSet(); break; case '(': //Sub expression int refIndex = 0; //-2 -> don't capture, -1 -> named capture, 0-> indexed capture NextChar(); //By default, subexpressions must be captured for future reference, if (mCurrent == '?') { NextChar(); if (mCurrent == ':') //If sub expression begins with ?: it means don't store reference { NextChar(); refIndex = -2; } else //Named backreference, extract backreference name { ExtractBackrefName(ref start, ref end); refIndex = -1; } } //else use indexed backreference atom = new RESubExprNode(CompileExpr()); AssertParse(mCurrent == ')', "Expected ')'"); NextChar(); if (refIndex == -1) //Named backreference { (atom as RESubExprNode).Name = mRegex.ToString().Substring(start, end - start + 1); mNamedBackRefs.Add(atom); } else if (refIndex == 0) //Indexed backreference { mBackRefs.Add(atom); } break; case '^': case '$': atom = new RETextNode(String.Empty); NextChar(); break; case '\\': NextChar(); if (Char.ToLower(mCurrent, CultureInfo.InvariantCulture) == 'x' || Char.ToLower(mCurrent, CultureInfo.InvariantCulture) == 'u' || mCurrent == '0') { atom = new RETextNode(EscapeValue().ToString()); } else if (Char.IsDigit(mCurrent)) { atom = GetBackRef((int)EscapeValue()); AssertParse(atom != null, "Couldn't find back reference"); atom = new RESubExprNode(atom); } else if (mCurrent == 'k') //referencing a backreference by name { NextChar(); ExtractBackrefName(ref start, ref end); atom = GetBackRef(mRegex.ToString().Substring(start, end - start + 1)); AssertParse(atom != null, "Couldn't find back reference"); atom = new RESubExprNode(atom); //Create a copy of the referenced node } else { atom = CompileSimpleMacro(mCurrent); NextChar(); } break; default: int closeIndex = mRegex.ToString().IndexOfAny("-*+?(){}\\[]^$.|".ToCharArray(), mIndex + 1); if (closeIndex == -1) { mParseDone = true; closeIndex = mRegex.Length - 1; atom = new RETextNode(mRegex.ToString().Substring(mIndex, closeIndex - mIndex + 1)); } else { atom = new RETextNode(mRegex.ToString().Substring(mIndex, closeIndex - mIndex)); } mIndex = closeIndex; mCurrent = mRegex[mIndex]; break; } return atom; }
//Compile a character set (i.e expressions like [abc], [A-Z]) internal RENode CompileSet() { RENode atom = null; char cStart, cEnd; RESetNode set; if (mCurrent == ':') { NextChar(); int closeIndex = mRegex.ToString().IndexOf(":]", StringComparison.Ordinal); atom = CompileMacro(mIndex, closeIndex - mIndex); mIndex = closeIndex; NextChar(); NextChar(); return atom; } if (mCurrent == '^') { atom = set = new RESetNode(false); NextChar(); } else { atom = set = new RESetNode(true); } if (mCurrent == '-' || mCurrent == ']') //if - or ] are specified as the first char, escape is not required { set.AddChars(mCurrent.ToString()); NextChar(); } while ((!mParseDone) && (mCurrent != ']')) { cStart = CompileSetChar(); if (mCurrent == '-') { NextChar(); AssertParse(!mParseDone && mCurrent != ']', "End of range is not specified."); cEnd = CompileSetChar(); set.AddRange(cStart, cEnd); } else { set.AddChars(cStart.ToString()); } } AssertParse(mCurrent == ']', "Expected ']'."); NextChar(); return atom; }
internal override string Generate(Random random) { if (this == RECompiler.InvalidNode) { //select a character int pos = random.Next(mNodeText.Length); //generate any other character using a negative SetNode RESetNode others = new RESetNode(false); others.AddChars(mNodeText[pos].ToString()); //replace the character char backup = mNodeText[pos]; mNodeText[pos] = others.Generate(random)[0]; string result = mNodeText.ToString(); //if this node is repeated it needs to be cleaned up for the next call mNodeText[pos] = backup; return result; } else { return mNodeText.ToString(); } }