public SymbolicRegexSampler(SymbolicRegexNode <S> root, int randomseed, bool negative) { _root = negative ? root._builder.MkNot(root) : root; // Treat 0 as no seed and instead choose a random seed randomly RandomSeed = randomseed == 0 ? new Random().Next() : randomseed; _random = new Random(RandomSeed); _solver = root._builder._solver; ICharAlgebra <BDD> bddSolver = SymbolicRegexRunner.s_unicode._solver; _asciiWordCharacters = bddSolver.Or(new BDD[] { bddSolver.RangeConstraint('A', 'Z'), bddSolver.RangeConstraint('a', 'z'), bddSolver.CharConstraint('_'), bddSolver.RangeConstraint('0', '9') }); // Visible ASCII range for input character generation _ascii = bddSolver.RangeConstraint('\x20', '\x7E'); _asciiNonWordCharacters = bddSolver.And(_ascii, bddSolver.Not(_asciiWordCharacters)); }
public BDD ConvertToCharSet(ICharAlgebra <BDD> solver, BV pred) { Debug.Assert(_partition is not null); // the result will be the union of all minterms in the set BDD res = solver.False; if (!pred.Equals(False)) { for (int i = 0; i < _bits; i++) { // include the i'th minterm in the union if the i'th bit is set if (pred[i]) { res = solver.Or(res, _partition[i]); } } } return(res); }
public BDD ConvertToCharSet(BitVector pred) { BDD[] partition = _minterms; // the result will be the union of all minterms in the set BDD res = CharSetSolver.Instance.False; if (!pred.Equals(False)) { for (int i = 0; i < partition.Length; i++) { // include the i'th minterm in the union if the i'th bit is set if (pred[i]) { res = CharSetSolver.Instance.Or(res, partition[i]); } } } return(res); }
public BDD ConvertToCharSet(ulong pred) { BDD[] partition = _minterms; // the result will be the union of all minterms in the set BDD res = BDD.False; if (pred != 0) { for (int i = 0; i < partition.Length; i++) { // include the i'th minterm in the union if the i'th bit is set if ((pred & ((ulong)1 << i)) != 0) { res = CharSetSolver.Instance.Or(res, partition[i]); } } } return(res); }
public BDD ConvertToCharSet(ICharAlgebra <BDD> solver, ulong pred) { Debug.Assert(_partition is not null); // the result will be the union of all minterms in the set BDD res = BDD.False; if (pred != _false) { for (int i = 0; i < _bits; i++) { // include the i'th minterm in the union if the i'th bit is set if ((pred & ((ulong)1 << i)) != _false) { res = solver.Or(res, _partition[i]); } } } return(res); }
public BDD ConvertToBDD(ulong set, CharSetSolver solver) { BDD[] partition = _minterms; // the result will be the union of all minterms in the set BDD result = BDD.False; if (set != 0) { for (int i = 0; i < partition.Length; i++) { // include the i'th minterm in the union if the i'th bit is set if ((set & ((ulong)1 << i)) != 0) { result = solver.Or(result, partition[i]); } } } return(result); }
public BDD ConvertToBDD(BitVector set, CharSetSolver solver) { BDD[] partition = _minterms; // the result will be the union of all minterms in the set BDD result = solver.Empty; if (!set.Equals(Empty)) { for (int i = 0; i < partition.Length; i++) { // include the i'th minterm in the union if the i'th bit is set if (set[i]) { result = solver.Or(result, partition[i]); } } } return(result); }
public BV?ConvertFromCharSet(BDDAlgebra alg, BDD set) { if (set == null) { return(null); } Debug.Assert(_partition is not null); BV res = False; for (int i = 0; i < _bits; i++) { BDD bdd_i = _partition[i]; BDD conj = alg.And(bdd_i, set); if (alg.IsSatisfiable(conj)) { res |= _minterms[i]; } } return(res); }
/// <summary>Generator for BDD Unicode category definitions.</summary> /// <param name="namespacename">namespace for the class</param> /// <param name="classname">name of the class</param> /// <param name="path">path where the file classname.cs is written</param> public static void Generate(string namespacename, string classname, string path) { Debug.Assert(namespacename != null); Debug.Assert(classname != null); Debug.Assert(path != null); using StreamWriter sw = new StreamWriter($"{Path.Combine(path, classname)}.cs"); sw.WriteLine( $@"// Licensed to the .NET Foundation under one or more agreements. // The .NET Foundation licenses this file to you under the MIT license. // This is a programmatically generated file from Regex.GenerateUnicodeTables. // It provides serialized BDD Unicode category definitions for System.Environment.Version = {Environment.Version} using System.Globalization; namespace {namespacename} {{ internal static class {classname} {{"); var catMap = new Dictionary <UnicodeCategory, Ranges>(); foreach (UnicodeCategory c in Enum.GetValues <UnicodeCategory>()) { catMap[c] = new Ranges(); } Ranges whitespace = new Ranges(); Regex whitespaceRegex = new(@"\s"); for (int i = 0; i <= char.MaxValue; i++) { char ch = (char)i; catMap[char.GetUnicodeCategory(ch)].Add(ch); if (whitespaceRegex.IsMatch(ch.ToString())) { whitespace.Add(ch); } } var charSetSolver = new CharSetSolver(); sw.WriteLine(" /// <summary>Serialized BDD representation of the set of all whitespace characters.</summary>"); sw.Write($" public static ReadOnlySpan<byte> SerializedWhitespaceBDD => "); WriteByteArrayInitSyntax(sw, charSetSolver.CreateSetFromRanges(whitespace.ranges).SerializeToBytes()); sw.WriteLine(";"); // Generate a BDD representation of each UnicodeCategory. BDD[] catBDDs = new BDD[catMap.Count]; for (int c = 0; c < catBDDs.Length; c++) { catBDDs[c] = charSetSolver.CreateSetFromRanges(catMap[(UnicodeCategory)c].ranges); } sw.WriteLine(); sw.WriteLine(" /// <summary>Gets the serialized BDD representations of any defined UnicodeCategory.</summary>"); sw.WriteLine(" public static ReadOnlySpan<byte> GetSerializedCategory(UnicodeCategory category) =>"); sw.WriteLine(" (int)category switch"); sw.WriteLine(" {"); for (int i = 0; i < catBDDs.Length; i++) { sw.WriteLine($" {i} => SerializedCategory{i}_{(UnicodeCategory)i},"); } sw.WriteLine($" _ => default,"); sw.WriteLine(" };"); for (int i = 0; i < catBDDs.Length; i++) { sw.WriteLine(); sw.WriteLine($" /// <summary>Serialized BDD representation of the set of all characters in UnicodeCategory.{(UnicodeCategory)i}.</summary>"); sw.Write($" private static ReadOnlySpan<byte> SerializedCategory{i}_{(UnicodeCategory)i} => "); WriteByteArrayInitSyntax(sw, catBDDs[i].SerializeToBytes()); sw.WriteLine(";"); } sw.WriteLine($@" }} }}");
public override IEnumerable <string> SampleMatches(int k, int randomseed) { // Zero is treated as no seed, instead using a system provided one Random random = randomseed != 0 ? new Random(randomseed) : new Random(); ISolver <TSet> solver = _builder._solver; CharSetSolver charSetSolver = _builder._charSetSolver; // Create helper BDDs for handling anchors and preferentially generating ASCII inputs BDD asciiWordCharacters = charSetSolver.Or(new BDD[] { charSetSolver.CreateBDDFromRange('A', 'Z'), charSetSolver.CreateBDDFromRange('a', 'z'), charSetSolver.CreateBDDFromChar('_'), charSetSolver.CreateBDDFromRange('0', '9') }); // Visible ASCII range for input character generation BDD ascii = charSetSolver.CreateBDDFromRange('\x20', '\x7E'); BDD asciiNonWordCharacters = charSetSolver.And(ascii, charSetSolver.Not(asciiWordCharacters)); // Set up two sets of minterms, one with the additional special minterm for the last end-of-line Debug.Assert(_builder._minterms is not null); int[] mintermIdsWithoutZ = new int[_builder._minterms.Length]; int[] mintermIdsWithZ = new int[_builder._minterms.Length + 1]; for (int i = 0; i < _builder._minterms.Length; ++i) { mintermIdsWithoutZ[i] = i; mintermIdsWithZ[i] = i; } mintermIdsWithZ[_builder._minterms.Length] = _builder._minterms.Length; for (int i = 0; i < k; i++) { // Holds the generated input so far StringBuilder inputSoFar = new(); StringBuilder?latestCandidate = null; // Current set of states reached initially contains just the root NfaMatchingState states = new(_builder); // Here one could also consider previous characters for example for \b, \B, and ^ anchors // and initialize inputSoFar accordingly states.InitializeFrom(_initialStates[GetCharKind(ReadOnlySpan <char> .Empty, -1)]); CurrentState statesWrapper = new(states); // Used for end suffixes List <string> possibleEndings = new(); while (true) { Debug.Assert(states.NfaStateSet.Count > 0); // Gather the possible endings for satisfying nullability possibleEndings.Clear(); if (NfaStateHandler.CanBeNullable(ref statesWrapper)) { // Unconditionally final state or end of the input due to \Z anchor for example if (NfaStateHandler.IsNullable(ref statesWrapper) || NfaStateHandler.IsNullableFor(_builder, ref statesWrapper, CharKind.BeginningEnd)) { possibleEndings.Add(""); } // End of line due to end-of-line anchor if (NfaStateHandler.IsNullableFor(_builder, ref statesWrapper, CharKind.Newline)) { possibleEndings.Add("\n"); } // Related to wordborder due to \b or \B if (NfaStateHandler.IsNullableFor(_builder, ref statesWrapper, CharKind.WordLetter)) { possibleEndings.Add(ChooseChar(random, asciiWordCharacters, ascii, charSetSolver).ToString()); } // Related to wordborder due to \b or \B if (NfaStateHandler.IsNullableFor(_builder, ref statesWrapper, CharKind.General)) { possibleEndings.Add(ChooseChar(random, asciiNonWordCharacters, ascii, charSetSolver).ToString()); } } // If we have a possible ending, then store a candidate input if (possibleEndings.Count > 0) { latestCandidate ??= new(); latestCandidate.Clear(); latestCandidate.Append(inputSoFar); //Choose some suffix that allows some anchor (if any) to be nullable latestCandidate.Append(Choose(random, possibleEndings)); // Choose to stop here based on a coin-toss if (FlipBiasedCoin(random, SampleMatchesStoppingProbability)) { yield return(latestCandidate.ToString()); break; } } // Shuffle the minterms, including the last end-of-line marker if appropriate int[] mintermIds = NfaStateHandler.StartsWithLineAnchor(_builder, ref statesWrapper) ? Shuffle(random, mintermIdsWithZ) : Shuffle(random, mintermIdsWithoutZ); foreach (int mintermId in mintermIds) { bool success = NfaStateHandler.TakeTransition(_builder, ref statesWrapper, mintermId); Debug.Assert(success); if (states.NfaStateSet.Count > 0) { TSet minterm = _builder.GetMinterm(mintermId); // Append a random member of the minterm inputSoFar.Append(ChooseChar(random, ToBDD(minterm, solver, charSetSolver), ascii, charSetSolver)); break; } else { // The transition was a dead end, undo and continue to try another minterm NfaStateHandler.UndoTransition(ref statesWrapper); } } // In the case that there are no next states or input has become too large: stop here if (states.NfaStateSet.Count == 0 || inputSoFar.Length > SampleMatchesMaxInputLength) { // Ending up here without an ending is unlikely but possible for example for infeasible patterns // such as @"no\bway" or due to poor choice of c -- no anchor is enabled -- so this is a deadend. if (latestCandidate != null) { yield return(latestCandidate.ToString()); } break; } } }
/// <summary>Converts the root <see cref="RegexNode"/> into its corresponding <see cref="SymbolicRegexNode{S}"/>.</summary> /// <param name="root">The root node to convert.</param> /// <returns>The generated <see cref="SymbolicRegexNode{S}"/> that corresponds to the supplied <paramref name="root"/>.</returns> internal SymbolicRegexNode <BDD> ConvertToSymbolicRegexNode(RegexNode root) { Debug.Assert(_builder is not null); // Create the root list that will store the built-up result. DoublyLinkedList <SymbolicRegexNode <BDD> > rootResult = new(); // Create a stack to be processed in order to process iteratively rather than recursively, and push the root on. Stack <(RegexNode Node, bool TryToMarkFixedLength, DoublyLinkedList <SymbolicRegexNode <BDD> > Result, DoublyLinkedList <SymbolicRegexNode <BDD> >[]? ChildResults)> stack = new(); stack.Push((root, true, rootResult, CreateChildResultArray(root.ChildCount()))); // Continue to iterate until the stack is empty, popping the next item on each iteration. // Some popped items may be pushed back on as part of processing. while (stack.TryPop(out (RegexNode Node, bool TryToMarkFixedLength, DoublyLinkedList <SymbolicRegexNode <BDD> > Result, DoublyLinkedList <SymbolicRegexNode <BDD> >[]? ChildResults)popped)) { RegexNode node = popped.Node; DoublyLinkedList <SymbolicRegexNode <BDD> > result = popped.Result; DoublyLinkedList <SymbolicRegexNode <BDD> >[]? childResults = popped.ChildResults; Debug.Assert(childResults is null || childResults.Length != 0); if (childResults is null || childResults[0] is null) { // Child nodes have not been converted yet // Handle each node kind as-is appropriate. switch (node.Kind) { // Singletons and multis case RegexNodeKind.One: result.AddLast(_builder.CreateSingleton(_builder._solver.CreateFromChar(node.Ch))); break; case RegexNodeKind.Notone: result.AddLast(_builder.CreateSingleton(_builder._solver.Not(_builder._solver.CreateFromChar(node.Ch)))); break; case RegexNodeKind.Set: result.AddLast(ConvertSet(node)); break; case RegexNodeKind.Multi: { // Create a BDD for each character in the string and concatenate them. string?str = node.Str; Debug.Assert(str is not null); foreach (char c in str) { result.AddLast(_builder.CreateSingleton(_builder._solver.CreateFromChar(c))); } break; } // The following five cases are the only node kinds that are pushed twice: // Joins, general loops, and supported captures case RegexNodeKind.Concatenate: case RegexNodeKind.Alternate: case RegexNodeKind.Loop: case RegexNodeKind.Lazyloop: case RegexNodeKind.Capture when node.N == -1: // N == -1 because balancing groups (which have N >= 0) aren't supported { Debug.Assert(childResults is not null && childResults.Length == node.ChildCount()); // Push back the temporarily popped item. Next time this work item is seen, its ChildResults list will be ready. // Propagate the length mark check only in case of alternation. stack.Push(popped); bool mark = node.Kind == RegexNodeKind.Alternate && popped.TryToMarkFixedLength; // Push all the children to be converted for (int i = 0; i < node.ChildCount(); ++i) { childResults[i] = new DoublyLinkedList <SymbolicRegexNode <BDD> >(); stack.Push((node.Child(i), mark, childResults[i], CreateChildResultArray(node.Child(i).ChildCount()))); } break; } // Specialized loops case RegexNodeKind.Oneloop: case RegexNodeKind.Onelazy: case RegexNodeKind.Notoneloop: case RegexNodeKind.Notonelazy: { // Create a BDD that represents the character, then create a loop around it. BDD bdd = _builder._solver.CreateFromChar(node.Ch); if (node.IsNotoneFamily) { bdd = _builder._solver.Not(bdd); } result.AddLast(_builder.CreateLoop(_builder.CreateSingleton(bdd), node.Kind is RegexNodeKind.Onelazy or RegexNodeKind.Notonelazy, node.M, node.N)); break; } case RegexNodeKind.Setloop: case RegexNodeKind.Setlazy: { // Create a BDD that represents the set string, then create a loop around it. string?set = node.Str; Debug.Assert(set is not null); BDD setBdd = CreateBDDFromSetString(set); result.AddLast(_builder.CreateLoop(_builder.CreateSingleton(setBdd), node.Kind == RegexNodeKind.Setlazy, node.M, node.N)); break; } case RegexNodeKind.Empty: case RegexNodeKind.UpdateBumpalong: // UpdateBumpalong is a directive relevant only to backtracking and can be ignored just like Empty break; case RegexNodeKind.Nothing: result.AddLast(_builder._nothing); break; // Anchors case RegexNodeKind.Beginning: result.AddLast(_builder.BeginningAnchor); break; case RegexNodeKind.Bol: EnsureNewlinePredicateInitialized(); result.AddLast(_builder.BolAnchor); break; case RegexNodeKind.End: // \z anchor result.AddLast(_builder.EndAnchor); break; case RegexNodeKind.EndZ: // \Z anchor EnsureNewlinePredicateInitialized(); result.AddLast(_builder.EndAnchorZ); break; case RegexNodeKind.Eol: EnsureNewlinePredicateInitialized(); result.AddLast(_builder.EolAnchor); break; case RegexNodeKind.Boundary: EnsureWordLetterPredicateInitialized(); result.AddLast(_builder.BoundaryAnchor); break; case RegexNodeKind.NonBoundary: EnsureWordLetterPredicateInitialized(); result.AddLast(_builder.NonBoundaryAnchor); break; // Unsupported default: throw new NotSupportedException(SR.Format(SR.NotSupported_NonBacktrackingConflictingExpression, node.Kind switch { RegexNodeKind.Atomic or RegexNodeKind.Setloopatomic or RegexNodeKind.Oneloopatomic or RegexNodeKind.Notoneloopatomic => SR.ExpressionDescription_AtomicSubexpressions, RegexNodeKind.Backreference => SR.ExpressionDescription_Backreference, RegexNodeKind.BackreferenceConditional => SR.ExpressionDescription_Conditional, RegexNodeKind.Capture => SR.ExpressionDescription_BalancingGroup, RegexNodeKind.ExpressionConditional => SR.ExpressionDescription_IfThenElse, RegexNodeKind.NegativeLookaround => SR.ExpressionDescription_NegativeLookaround, RegexNodeKind.PositiveLookaround => SR.ExpressionDescription_PositiveLookaround, RegexNodeKind.Start => SR.ExpressionDescription_ContiguousMatches, _ => UnexpectedNodeType(node) }));
/// <summary>Gets a <see cref="BDD"/> that represents the specified <see cref="UnicodeCategory"/>.</summary> public static BDD GetCategory(UnicodeCategory category) => Volatile.Read(ref s_categories[(int)category]) ?? Interlocked.CompareExchange(ref s_categories[(int)category], BDD.Deserialize(UnicodeCategoryRanges.GetSerializedCategory(category)), null) ?? s_categories[(int)category] !;