public void MinMaxLengthIsCorrect_HugeDepth() { const int Depth = 10_000; RegexTree tree = RegexParser.Parse($"{new string('(', Depth)}a{new string(')', Depth)}$", RegexOptions.None, CultureInfo.InvariantCulture); // too deep for analysis on some platform default stack sizes int minRequiredLength = tree.FindOptimizations.MinRequiredLength; Assert.True( minRequiredLength == 1 /* successfully analyzed */ || minRequiredLength == 0 /* ran out of stack space to complete analysis */, $"Expected 1 or 0, got {minRequiredLength}"); int?maxPossibleLength = tree.FindOptimizations.MaxPossibleLength; Assert.True( maxPossibleLength == 1 /* successfully analyzed */ || maxPossibleLength is null /* ran out of stack space to complete analysis */, $"Expected 1 or null, got {maxPossibleLength}"); }
/// <summary> /// Convert a regex pattern to an equivalent symbolic finite automaton /// </summary> /// <param name="regex">the given .NET regex pattern</param> /// <param name="options">regular expression options for the pattern</param> /// <param name="keepBoundaryStates">used for testing purposes, when true boundary states are not eliminated</param> public Automaton <S> Convert(string regex, RegexOptions options, bool keepBoundaryStates) { automBuilder.Reset(); //filter out the RightToLeft option that turns around the parse tree //but has no semantical meaning regarding the regex var options1 = (options & ~RegexOptions.RightToLeft); RegexTree tree = RegexParser.Parse(regex, options1); var aut = ConvertNode(tree._root); //delay accessing the condition Func <bool, S> getWordLetterCondition = (b => categorizer.WordLetterCondition); if (!keepBoundaryStates) { aut.EliminateWordBoundaries(getWordLetterCondition); } return(aut); }
/// <summary> /// Convert a regex pattern to an equivalent symbolic regex /// </summary> /// <param name="regex">the given .NET regex pattern</param> /// <param name="options">regular expression options for the pattern (default is RegexOptions.None)</param> /// <param name="keepAnchors">if false (default) then anchors are replaced by equivalent regexes</param> public SymbolicRegexNode <S> ConvertToSymbolicRegex(string regex, RegexOptions options = RegexOptions.None, bool keepAnchors = false) { //filter out the RightToLeft option that turns around the parse tree //but has no semantical meaning regarding the regex var options1 = (options & ~RegexOptions.RightToLeft); RegexTree tree = RegexParser.Parse(regex, options1); var sregex = ConvertNodeToSymbolicRegex(tree._root); if (keepAnchors) { return(sregex); } else { //remove all anchors return(this.srBuilder.RemoveAnchors(sregex, true, true)); } }
/// <summary>Initializes the factory.</summary> public SymbolicRegexRunnerFactory(RegexTree regexTree, RegexOptions options, TimeSpan matchTimeout, CultureInfo culture) { Debug.Assert((options & (RegexOptions.RightToLeft | RegexOptions.ECMAScript)) == 0); var converter = new RegexNodeConverter(culture, regexTree.CaptureNumberSparseMapping); CharSetSolver solver = CharSetSolver.Instance; SymbolicRegexNode <BDD> root = converter.ConvertToSymbolicRegexNode(regexTree.Root, tryCreateFixedLengthMarker: true); BDD[] minterms = root.ComputeMinterms(); if (minterms.Length > 64) { // Use BitVector to represent a predicate var algebra = new BitVectorAlgebra(solver, minterms); var builder = new SymbolicRegexBuilder <BitVector>(algebra) { // The default constructor sets the following predicates to False; this update happens after the fact. // It depends on whether anchors where used in the regex whether the predicates are actually different from False. _wordLetterPredicateForAnchors = algebra.ConvertFromCharSet(solver, converter._builder._wordLetterPredicateForAnchors), _newLinePredicate = algebra.ConvertFromCharSet(solver, converter._builder._newLinePredicate) }; // Convert the BDD-based AST to BitVector-based AST SymbolicRegexNode <BitVector> rootNode = converter._builder.Transform(root, builder, bdd => builder._solver.ConvertFromCharSet(solver, bdd)); _matcher = new SymbolicRegexMatcher <BitVector>(rootNode, regexTree, minterms, matchTimeout); } else { // Use ulong to represent a predicate var algebra = new BitVector64Algebra(solver, minterms); var builder = new SymbolicRegexBuilder <ulong>(algebra) { // The default constructor sets the following predicates to False, this update happens after the fact // It depends on whether anchors where used in the regex whether the predicates are actually different from False _wordLetterPredicateForAnchors = algebra.ConvertFromCharSet(solver, converter._builder._wordLetterPredicateForAnchors), _newLinePredicate = algebra.ConvertFromCharSet(solver, converter._builder._newLinePredicate) }; // Convert the BDD-based AST to ulong-based AST SymbolicRegexNode <ulong> rootNode = converter._builder.Transform(root, builder, bdd => builder._solver.ConvertFromCharSet(solver, bdd)); _matcher = new SymbolicRegexMatcher <ulong>(rootNode, regexTree, minterms, matchTimeout); } }
public IEnumerable <RegexMutation> Mutate() { try { var parser = new Parser(_pattern); RegexTree tree = parser.Parse(); _root = tree.Root; } catch (RegexParseException) { yield break; } var regexNodes = _root.GetDescendantNodes().ToList(); regexNodes.Add(_root); foreach (RegexMutation mutant in regexNodes.SelectMany(node => FindMutants(node, _root))) { yield return(mutant); } }
internal Tuple <string, Automaton <S> >[] ConvertCaptures(string regex, out bool isLoop) { //automBuilder.Reset(); automBuilder.isBeg = false; automBuilder.isEnd = false; var options = RegexOptions.Singleline | RegexOptions.ExplicitCapture; RegexTree tree = RegexParser.Parse(regex, options); List <Tuple <string, Automaton <S> > > automata = new List <Tuple <string, Automaton <S> > >(); //delay accessing the condition Func <bool, S> getWordLetterCondition = (b => categorizer.WordLetterCondition); var rootnode = tree._root._children[0]; isLoop = (rootnode._type == RegexNode.Loop); if (isLoop) { rootnode = rootnode._children[0]; } foreach (var aut in ConvertCaptures(rootnode, id => tree._capslist[id])) { aut.Item2.EliminateWordBoundaries(getWordLetterCondition); automata.Add(aut); } return(automata.ToArray()); }
private static EmbeddedBraceMatchingResult?FindCharacterClassBraces(RegexTree tree, VirtualChar ch) { var node = FindCharacterClassNode(tree.Root, ch); return(node == null ? null : CreateResult(node.OpenBracketToken, node.CloseBracketToken)); }
private static EmbeddedBraceMatchingResult?FindGroupingBraces(RegexTree tree, VirtualChar ch) { var node = FindGroupingNode(tree.Root, ch); return(node == null ? null : CreateResult(node.OpenParenToken, node.CloseParenToken)); }
private static (RegexTree Tree, AnalysisResults Analysis) Analyze(string pattern) { RegexTree tree = RegexParser.Parse(pattern, RegexOptions.None, CultureInfo.InvariantCulture); return(tree, RegexTreeAnalyzer.Analyze(tree)); }
/// <summary> /// Convert a regex pattern to an equivalent symbolic regex /// </summary> /// <param name="regex">the given .NET regex pattern</param> /// <param name="options">regular expression options for the pattern (default is RegexOptions.None)</param> /// <param name="keepAnchors">if false (default) then anchors are replaced by equivalent regexes</param> public SymbolicRegexNode <S> ConvertToSymbolicRegex(string regex, RegexOptions options, bool keepAnchors = false) { RegexTree tree = RegexParser.Parse(regex, options); return(ConvertToSymbolicRegex(tree._root, keepAnchors)); }
private static RegexFindOptimizations ComputeOptimizations(string pattern, RegexOptions options) { RegexTree tree = RegexParser.Parse(pattern, options, CultureInfo.InvariantCulture); return(new RegexFindOptimizations(tree.Root, options, CultureInfo.InvariantCulture)); }