//Add the chars in alphabet from start to end to the set internal void AddRange(char start, char end) { RENode.AssertParse((start < end) && end <= char.MaxValue, "Invalid range specified in char set"); if (end > mMapSize) { ExpandToUnicodeRange(); } //mark the added characters and update the number of available choices for (long c = start; c <= end; c++) { if (mMap[c] == 0) { mMap[c] = 1; mNumChoices += mPositiveSet ? 1 : -1; } } //check if this set still has invalid characters available if ((mPositiveSet && mNumChoices == mMapSize) || (!mPositiveSet && mNumChoices == 0)) { //can never be invalid RECompiler.InvalidableNodes.Remove(this); } }
//Required for GenerateInvalid. //Marks along the RegEx tree to ensure that 'child' is part of the generated string //child - The child node that must be part of the generated string internal virtual void ReservePath(RENode child) { if (Parent != null) { Parent.ReservePath(this); } }
//Compile a character set (i.e expressions like [abc], [A-Z]) internal RENode CompileSet() { RENode atom = null; char cStart, cEnd; RESetNode set; if (mCurrent == ':') { NextChar(); int closeIndex = mRegex.ToString().IndexOf(":]", StringComparison.Ordinal); atom = CompileMacro(mIndex, closeIndex - mIndex); mIndex = closeIndex; NextChar(); NextChar(); return(atom); } if (mCurrent == '^') { atom = set = new RESetNode(false); NextChar(); } else { atom = set = new RESetNode(true); } if (mCurrent == '-' || mCurrent == ']') //if - or ] are specified as the first char, escape is not required { set.AddChars(mCurrent.ToString()); NextChar(); } while ((!mParseDone) && (mCurrent != ']')) { cStart = CompileSetChar(); if (mCurrent == '-') { NextChar(); AssertParse(!mParseDone && mCurrent != ']', "End of range is not specified."); cEnd = CompileSetChar(); set.AddRange(cStart, cEnd); } else { set.AddChars(cStart.ToString()); } } AssertParse(mCurrent == ']', "Expected ']'."); NextChar(); return(atom); }
private RENode mReservedPath; //The child node that must be chosen. //If this is not null then the node must repeat at least once internal RERepeatNode(RENode refNode, int minRepeat, int maxRepeat, bool sameValue) { //if this does not cover zero to infinity, then this node can be invalidated if (RECompiler.IsInvalidSection && (minRepeat > 0 || maxRepeat != -1)) { RECompiler.InvalidableNodes.Add(this); } mMinRepeat = minRepeat; mMaxRepeat = maxRepeat; mSameValue = sameValue; mRefNode = refNode; mRefNode.Parent = this; }
internal override string Generate(Random random) { if (this == RECompiler.InvalidNode) { RENode.AssertParse(mNumChoices > 0, "No valid range specified in char set"); //select from the elements that are not available (elements that are invalid) int randIndex = random.Next(mMapSize - mNumChoices); int i = -1; while (randIndex >= 0) //seek to the available element { i++; //invert positive and negative sets if ((mPositiveSet && mMap[i] == 0) || (!mPositiveSet && mMap[i] == 1)) { randIndex--; } } return(Convert.ToChar(i).ToString()); } else { RENode.AssertParse(mNumChoices > 0, "No valid range specified in char set"); //select from the elements that are available int randIndex = random.Next(mNumChoices); int i = -1; while (randIndex >= 0) //seek to the available element { i++; if ((mPositiveSet && mMap[i] == 1) || (!mPositiveSet && mMap[i] == 0)) { randIndex--; } } return(Convert.ToChar(i).ToString()); } }
/// <summary> /// Generates a string based on the given regular expression /// if any nodes are prepended with \i, then one of these nodes will be chosen /// at random to be invalidated /// </summary> /// <param name="random">Random object to use for generation</param> /// <param name="regex">Regular expression used to generate the string</param> /// <returns>generated string</returns> public static string NextString(Random random, string regex) { //reset the static variables RECompiler.IsInvalidSection = false; RECompiler.InvalidNode = null; RECompiler.InvalidableNodes.Clear(); //construct the RegEx tree RECompiler compiler = new RECompiler(); RENode node = compiler.Compile(regex); //search for a signal to invalidate a node if (regex.IndexOf("\\i", StringComparison.Ordinal) != -1) { //something should have been invalidated //select a node to invalidate if (RECompiler.InvalidableNodes.Count == 0) { throw new ArgumentException("Asked to generate invalid: Impossible to invalidate"); } RECompiler.InvalidNode = RECompiler.InvalidableNodes[random.Next(RECompiler.InvalidableNodes.Count)]; //Mark REOrNodes and RERepeatNodes to ensure that the invalid node will be part of the string RECompiler.InvalidNode.ReservePath(null); } //generate and return the string string result = node.Generate(random); if (RECompiler.InvalidNode != null) { //confirm that the generated string is invalid (e.g. [a-z]|[^a-z] will always fail) Regex compare = new Regex("^" + regex.Replace("\\i", "") + "$"); if (compare.IsMatch(result)) { throw new ArgumentException(regex + ": Did not generate invalid string: " + result); } } return(result); }
//Compile node starting with | internal RENode CompileBranch() { RENode piece = CompilePiece(); if (mParseDone || mCurrent == '|' || mCurrent == ')') { return(piece); } REAndNode andNode = new REAndNode(); andNode.Children.Add(piece); piece.Parent = andNode; while (!(mParseDone || mCurrent == '|' || mCurrent == ')')) { RENode nextPiece = CompilePiece(); andNode.Children.Add(nextPiece); nextPiece.Parent = andNode; } return(andNode); }
//Compile the expression i.e. main body or expr in paranthesis internal RENode CompileExpr() { RENode branch = CompileBranch(); if (mCurrent != '|') { return(branch); } REOrNode expr = new REOrNode(); expr.Children.Add(branch); branch.Parent = expr; while (mCurrent == '|') { NextChar(); RENode nextBranch = CompileBranch(); expr.Children.Add(nextBranch); nextBranch.Parent = expr; } return(expr); }
//Compile \d \D \s \S etc. internal RENode CompileSimpleMacro(char c) { RENode node = null; RESetNode set = null; if (@"[]{}()*-+.?\|".Contains(c.ToString())) { return(new RETextNode(c.ToString())); } switch (c) { case 'd': // [0-9] node = set = new RESetNode(true); set.AddRange('0', '9'); break; case 'D': // [^0-9] node = set = new RESetNode(false); set.AddRange('0', '9'); break; case 's': node = set = new RESetNode(true); set.AddChars(" \r\n\f\v\t"); break; case 'S': node = set = new RESetNode(false); set.AddChars(" \r\n\f\v\t"); break; case 'w': // [a-zA-Z0-9_] node = set = new RESetNode(true); set.AddRange('a', 'z'); set.AddRange('A', 'Z'); set.AddRange('0', '9'); set.AddChars("_"); break; case 'W': // [^a-zA-Z0-9_] node = set = new RESetNode(false); set.AddRange('a', 'z'); set.AddRange('A', 'Z'); set.AddRange('0', '9'); set.AddChars("_"); break; case 'f': node = new RETextNode("\f"); break; case 'n': node = new RETextNode("\n"); break; case 'r': node = new RETextNode("\r"); break; case 't': node = new RETextNode("\t"); break; case 'v': node = new RETextNode("\v"); break; case 'A': case 'Z': case 'z': node = new RETextNode(String.Empty); break; default: AssertParse(false, "Invalid escape."); break; } return(node); }
//Compile token internal RENode CompileAtom() { RENode atom = null; RESetNode set = null; int start = 0; int end = 0; AssertParse(!mParseDone, "Reached end of string. No element found."); AssertParse(!("|)?+*{}".Contains(mCurrent.ToString())), "No element found."); switch (mCurrent) { case '.': //Any single char atom = set = new RESetNode(true); set.AddRange(Convert.ToChar(0), Convert.ToChar(127)); NextChar(); break; case '[': //Positive or negative set NextChar(); atom = CompileSet(); break; case '(': //Sub expression int refIndex = 0; //-2 -> don't capture, -1 -> named capture, 0-> indexed capture NextChar(); //By default, subexpressions must be captured for future reference, if (mCurrent == '?') { NextChar(); if (mCurrent == ':') //If sub expression begins with ?: it means don't store reference { NextChar(); refIndex = -2; } else //Named backreference, extract backreference name { ExtractBackrefName(ref start, ref end); refIndex = -1; } } //else use indexed backreference atom = new RESubExprNode(CompileExpr()); AssertParse(mCurrent == ')', "Expected ')'"); NextChar(); if (refIndex == -1) //Named backreference { (atom as RESubExprNode).Name = mRegex.ToString().Substring(start, end - start + 1); mNamedBackRefs.Add(atom); } else if (refIndex == 0) //Indexed backreference { mBackRefs.Add(atom); } break; case '^': case '$': atom = new RETextNode(String.Empty); NextChar(); break; case '\\': NextChar(); if (Char.ToLower(mCurrent, CultureInfo.InvariantCulture) == 'x' || Char.ToLower(mCurrent, CultureInfo.InvariantCulture) == 'u' || mCurrent == '0') { atom = new RETextNode(EscapeValue().ToString()); } else if (Char.IsDigit(mCurrent)) { atom = GetBackRef((int)EscapeValue()); AssertParse(atom != null, "Couldn't find back reference"); atom = new RESubExprNode(atom); } else if (mCurrent == 'k') //referencing a backreference by name { NextChar(); ExtractBackrefName(ref start, ref end); atom = GetBackRef(mRegex.ToString().Substring(start, end - start + 1)); AssertParse(atom != null, "Couldn't find back reference"); atom = new RESubExprNode(atom); //Create a copy of the referenced node } else { atom = CompileSimpleMacro(mCurrent); NextChar(); } break; default: int closeIndex = mRegex.ToString().IndexOfAny("-*+?(){}\\[]^$.|".ToCharArray(), mIndex + 1); if (closeIndex == -1) { mParseDone = true; closeIndex = mRegex.Length - 1; atom = new RETextNode(mRegex.ToString().Substring(mIndex, closeIndex - mIndex + 1)); } else { atom = new RETextNode(mRegex.ToString().Substring(mIndex, closeIndex - mIndex)); } mIndex = closeIndex; mCurrent = mRegex[mIndex]; break; } return(atom); }
//Compile token followed by *+?{} internal RENode CompilePiece() { RENode node = null; //store the old invalidating state for restoring after this node bool oldInvalidState = RECompiler.IsInvalidSection; //check if we want to invalidate the 'atom' node and subnodes if (mCurrent == '\\' && mRegex[mIndex + 1] == 'i') //entering invalidating nodes section { NextChar(); NextChar(); //invalidate the following node and subnodes RECompiler.IsInvalidSection = true; } RENode atom = CompileAtom(); //revert the invalidating state RECompiler.IsInvalidSection = oldInvalidState; //check special case of invalidating a repeating node //have to confirm with "*+?{" to verify that it's not another type of node (that parses elsewhere) if (mCurrent == '\\' && mRegex[mIndex + 1] == 'i' && "*+?{".Contains(mRegex[mIndex + 2].ToString())) { NextChar(); NextChar(); //invalidate the repeating node RECompiler.IsInvalidSection = true; } const int MAXREPEAT = -1; //value representing infinity switch (mCurrent) { case '*': //zero or more repetition node = new RERepeatNode(atom, 0, MAXREPEAT, false); NextChar(); break; case '+': //one or more repetition node = new RERepeatNode(atom, 1, MAXREPEAT, false); NextChar(); break; case '?': //zero or one repetition node = new RERepeatNode(atom, 0, 1, false); NextChar(); break; case '{': //Min and max repetition limits defined int nMin = 0; int nMax = 0; bool sameChar = false; NextChar(); if (mCurrent == '=') { sameChar = true; NextChar(); } int closeIndex = mRegex.ToString().IndexOf('}', mIndex); AssertParse(closeIndex != -1, "Expected '}'"); string[] repeatTokens = mRegex.ToString().Substring(mIndex, closeIndex - mIndex). Split(new char[] { ',' }); if (repeatTokens.Length == 1) { nMin = nMax = int.Parse(repeatTokens[0], CultureInfo.InvariantCulture); } else if (repeatTokens.Length == 2) { nMin = int.Parse(repeatTokens[0], CultureInfo.InvariantCulture); //check for {n,} case if (repeatTokens[1].Length > 0) { nMax = int.Parse(repeatTokens[1], CultureInfo.InvariantCulture); } else { nMax = MAXREPEAT; //only lower bound specified } } else { AssertParse(false, "Repeat values cannot be parsed"); } AssertParse(nMin <= nMax || repeatTokens[1].Length == 0, "Max repeat is less than min repeat"); mIndex = closeIndex; NextChar(); node = new RERepeatNode(atom, nMin, nMax, sameChar); break; default: node = atom; break; } //revert invalidation after generating the repeating node RECompiler.IsInvalidSection = oldInvalidState; return(node); }
internal string Name; //Identifies subexpression by name, used for named backreferences internal RESubExprNode(RENode subExpr) { mRefNode = subExpr; mRefNode.Parent = this; }
private RENode mReservedPath; //The child node that this Or Node must choose //Chosen node is random if this is null internal override void ReservePath(RENode child) { //this child (in Children) must be called when generating the string mReservedPath = child; base.ReservePath(child); }
internal override void ReservePath(RENode child) { //this child (mRefNode) must be called when generating the string (cannot repeat zero times) mReservedPath = child; base.ReservePath(child); }