private RegexNode mReservedPath; //The child node that must be chosen. //If this is not null then the node must repeat at least once public RegexRepeatNode(RegexNode refNode, int minRepeat, int maxRepeat, bool sameValue) { //if this does not cover zero to infinity, then this node can be invalidated if (RegexCompiler.IsInvalidSection && (minRepeat > 0 || maxRepeat != -1)) { RegexCompiler.InvalidableNodes.Add(this); } mMinRepeat = minRepeat; mMaxRepeat = maxRepeat; mSameValue = sameValue; mReservedPath = null; mRefNode = refNode; mRefNode.Parent = this; }
/// <summary> /// Returns a random string, conforming to the provided regular expression pattern. /// </summary> /// <param name="regex">The regular expression, which the generated string should conform to.</param> /// <param name="seed">The random number generator seed.</param> /// <returns>A string, conforming to the provided regular expression pattern.</returns> /// /// <example> /// The following example demonstrates how to generate a random string from a Regex pattern: /// <code lang="C#" > /// // Using a custom regex pattern... /// Regex emailAddress1 = new Regex(@"^([0-9a-zA-Z]([-.\w]*[0-9a-zA-Z])*@([0-9a-zA-Z][-\w]*[0-9a-zA-Z]\.)+[a-zA-Z]{2,9})$"); /// string s1 = StringFactory.GenerateRandomString(emailAddress1, 1234); /// /// // Using a standard regex pattern... /// Regex emailAddress2 = CommonRegexPatterns.EmailAddress; /// string s2 = StringFactory.GenerateRandomString(emailAddress2, 1234); /// </code> /// </example> public static string GenerateRandomString(Regex regex, int seed) { Random random = new Random(seed); //reset the static variables RegexCompiler.IsInvalidSection = false; RegexCompiler.InvalidNode = null; RegexCompiler.InvalidableNodes.Clear(); //construct the RegEx tree RegexCompiler compiler = new RegexCompiler(); RegexNode node = compiler.Compile(regex.ToString()); //search for a signal to invalidate a node if (regex.ToString().IndexOf("\\i") != -1) { //something should have been invalidated //select a node to invalidate if (RegexCompiler.InvalidableNodes.Count == 0) { throw new ArgumentException("Asked to generate invalid: Impossible to invalidate"); } RegexCompiler.InvalidNode = RegexCompiler.InvalidableNodes[random.Next(RegexCompiler.InvalidableNodes.Count)]; //Mark REOrNodes and RERepeatNodes to ensure that the invalid node will be part of the string RegexCompiler.InvalidNode.ReservePath(null); } //generate and return the string string result = node.Generate(random); if (RegexCompiler.InvalidNode != null) { //confirm that the generated string is invalid (e.g. [a-z]|[^a-z] will always fail) Regex compare = new Regex("^" + regex.Replace("\\i", "") + "$"); if (compare.IsMatch(result)) { throw new ArgumentException(regex + ": Did not generate invalid string: " + result); } } return(result); }
public override string Generate(Random random) { if (this == RegexCompiler.InvalidNode) { RegexNode.AssertParse(mNumChoices > 0, "No valid range specified in char set"); //select from the elements that are not available (elements that are invalid) int randIndex = random.Next(mMapSize - mNumChoices); int i = -1; while (randIndex >= 0) //seek to the available element { i++; //invert positive and negative sets if ((mPositiveSet && mMap[i] == 0) || (!mPositiveSet && mMap[i] == 1)) { randIndex--; } } return(Convert.ToChar(i).ToString()); } else { RegexNode.AssertParse(mNumChoices > 0, "No valid range specified in char set"); //select from the elements that are available int randIndex = random.Next(mNumChoices); int i = -1; while (randIndex >= 0) //seek to the available element { i++; if ((mPositiveSet && mMap[i] == 1) || (!mPositiveSet && mMap[i] == 0)) { randIndex--; } } return(Convert.ToChar(i).ToString()); } }
//Compile node starting with | public RegexNode CompileBranch() { RegexNode piece = CompilePiece(); if (mParseDone || mCurrent == '|' || mCurrent == ')') { return(piece); } RegexAndNode andNode = new RegexAndNode(); andNode.Children.Add(piece); piece.Parent = andNode; while (!(mParseDone || mCurrent == '|' || mCurrent == ')')) { RegexNode nextPiece = CompilePiece(); andNode.Children.Add(nextPiece); nextPiece.Parent = andNode; } return(andNode); }
//Compile the expression i.e. main body or expr in paranthesis public RegexNode CompileExpr() { RegexNode branch = CompileBranch(); if (mCurrent != '|') { return(branch); } RegexOrNode expr = new RegexOrNode(); expr.Children.Add(branch); branch.Parent = expr; while (mCurrent == '|') { NextChar(); RegexNode nextBranch = CompileBranch(); expr.Children.Add(nextBranch); nextBranch.Parent = expr; } return(expr); }
//Compile \d \D \s \S etc. public RegexNode CompileSimpleMacro(char c) { RegexNode node = null; RegexSetNode set = null; if (@"[]{}()*-+.?\|".Contains(c.ToString())) { return(new RegexTextNode(c.ToString())); } switch (c) { case 'd': // [0-9] node = set = new RegexSetNode(true); set.AddRange('0', '9'); break; case 'D': // [^0-9] node = set = new RegexSetNode(false); set.AddRange('0', '9'); break; case 's': node = set = new RegexSetNode(true); set.AddChars(" \r\n\f\v\t"); break; case 'S': node = set = new RegexSetNode(false); set.AddChars(" \r\n\f\v\t"); break; case 'w': // [a-zA-Z0-9_] node = set = new RegexSetNode(true); set.AddRange('a', 'z'); set.AddRange('A', 'Z'); set.AddRange('0', '9'); set.AddChars("_"); break; case 'W': // [^a-zA-Z0-9_] node = set = new RegexSetNode(false); set.AddRange('a', 'z'); set.AddRange('A', 'Z'); set.AddRange('0', '9'); set.AddChars("_"); break; case 'f': node = new RegexTextNode("\f"); break; case 'n': node = new RegexTextNode("\n"); break; case 'r': node = new RegexTextNode("\r"); break; case 't': node = new RegexTextNode("\t"); break; case 'v': node = new RegexTextNode("\v"); break; case 'A': case 'Z': case 'z': node = new RegexTextNode(String.Empty); break; default: AssertParse(false, "Invalid escape."); break; } return(node); }
//Compile token public RegexNode CompileAtom() { RegexNode atom = null; RegexSetNode set = null; int start = 0; int end = 0; AssertParse(!mParseDone, "Reached end of string. No element found."); AssertParse(!("|)?+*{}".Contains(mCurrent.ToString())), "No element found."); switch (mCurrent) { case '.': //Any single char atom = set = new RegexSetNode(true); set.AddRange(Convert.ToChar(0), Convert.ToChar(127)); NextChar(); break; case '[': //Positive or negative set NextChar(); atom = CompileSet(); break; case '(': //Sub expression int refIndex = 0; //-2 -> don't capture, -1 -> named capture, 0-> indexed capture NextChar(); //By default, subexpressions must be captured for future reference, if (mCurrent == '?') { NextChar(); if (mCurrent == ':') //If sub expression begins with ?: it means don't store reference { NextChar(); refIndex = -2; } else //Named backreference, extract backreference name { ExtractBackrefName(ref start, ref end); refIndex = -1; } } //else use indexed backreference atom = new RegexSubExpressionNode(CompileExpr()); AssertParse(mCurrent == ')', "Expected ')'"); NextChar(); if (refIndex == -1) //Named backreference { (atom as RegexSubExpressionNode).Name = mRegex.ToString().Substring(start, end - start + 1); mNamedBackRefs.Add(atom); } else if (refIndex == 0) //Indexed backreference { mBackRefs.Add(atom); } break; case '^': case '$': atom = new RegexTextNode(String.Empty); NextChar(); break; case '\\': NextChar(); if (Char.ToLower(mCurrent) == 'x' || Char.ToLower(mCurrent) == 'u' || mCurrent == '0') { atom = new RegexTextNode(EscapeValue().ToString()); } else if (Char.IsDigit(mCurrent)) { atom = GetBackRef((int)EscapeValue()); AssertParse(atom != null, "Couldn't find back reference"); atom = new RegexSubExpressionNode(atom); } else if (mCurrent == 'k') //referencing a backreference by name { NextChar(); ExtractBackrefName(ref start, ref end); atom = GetBackRef(mRegex.ToString().Substring(start, end - start + 1)); AssertParse(atom != null, "Couldn't find back reference"); atom = new RegexSubExpressionNode(atom); //Create a copy of the referenced node } else { atom = CompileSimpleMacro(mCurrent); NextChar(); } break; default: int closeIndex = mRegex.ToString().IndexOfAny("-*+?(){}\\[]^$.|".ToCharArray(), mIndex + 1); if (closeIndex == -1) { mParseDone = true; closeIndex = mRegex.Length - 1; atom = new RegexTextNode(mRegex.ToString().Substring(mIndex, closeIndex - mIndex + 1)); } else { atom = new RegexTextNode(mRegex.ToString().Substring(mIndex, closeIndex - mIndex)); } mIndex = closeIndex; mCurrent = mRegex[mIndex]; break; } return(atom); }
//Compile token followed by *+?{} public RegexNode CompilePiece() { RegexNode node = null; //store the old invalidating state for restoring after this node bool oldInvalidState = RegexCompiler.IsInvalidSection; //check if we want to invalidate the 'atom' node and subnodes if (mCurrent == '\\' && mRegex[mIndex + 1] == 'i') //entering invalidating nodes section { NextChar(); NextChar(); //invalidate the following node and subnodes RegexCompiler.IsInvalidSection = true; } RegexNode atom = CompileAtom(); //revert the invalidating state RegexCompiler.IsInvalidSection = oldInvalidState; //check special case of invalidating a repeating node //have to confirm with "*+?{" to verify that it's not another type of node (that parses elsewhere) if (mCurrent == '\\' && mRegex[mIndex + 1] == 'i' && "*+?{".Contains(mRegex[mIndex + 2].ToString())) { NextChar(); NextChar(); //invalidate the repeating node RegexCompiler.IsInvalidSection = true; } const int MAXREPEAT = -1; //value representing infinity switch (mCurrent) { case '*': //zero or more repetition node = new RegexRepeatNode(atom, 0, MAXREPEAT, false); NextChar(); break; case '+': //one or more repetition node = new RegexRepeatNode(atom, 1, MAXREPEAT, false); NextChar(); break; case '?': //zero or one repetition node = new RegexRepeatNode(atom, 0, 1, false); NextChar(); break; case '{': //Min and max repetition limits defined int nMin = 0; int nMax = 0; bool sameChar = false; NextChar(); if (mCurrent == '=') { sameChar = true; NextChar(); } int closeIndex = mRegex.ToString().IndexOf('}', mIndex); AssertParse(closeIndex != -1, "Expected '}'"); string[] repeatTokens = mRegex.ToString().Substring(mIndex, closeIndex - mIndex). Split(new char[] { ',' }); if (repeatTokens.Length == 1) { nMin = nMax = int.Parse(repeatTokens[0]); } else if (repeatTokens.Length == 2) { nMin = int.Parse(repeatTokens[0]); //check for {n,} case if (repeatTokens[1].Length > 0) { nMax = int.Parse(repeatTokens[1]); } else { nMax = MAXREPEAT; //only lower bound specified } } else { AssertParse(false, "Repeat values cannot be parsed"); } AssertParse(nMin <= nMax || repeatTokens[1].Length == 0, "Max repeat is less than min repeat"); mIndex = closeIndex; NextChar(); node = new RegexRepeatNode(atom, nMin, nMax, sameChar); break; default: node = atom; break; } //revert invalidation after generating the repeating node RegexCompiler.IsInvalidSection = oldInvalidState; return(node); }
private RegexNode mReservedPath = null; //The child node that this Or Node must choose //Chosen node is random if this is null public override void ReservePath(RegexNode child) { //this child (in Children) must be called when generating the string mReservedPath = child; base.ReservePath(child); }
public override void ReservePath(RegexNode child) { //this child (mRefNode) must be called when generating the string (cannot repeat zero times) mReservedPath = child; base.ReservePath(child); }
public string Name; //Identifies subexpression by name, used for named backreferences public RegexSubExpressionNode(RegexNode subExpr) { mRefNode = subExpr; mRefNode.Parent = this; }