public BVAlgebra(CharSetSolver solver, BDD[] minterms) : base(new MintermClassifier(solver, minterms), solver.ComputeDomainSizes(minterms), minterms) { _mintermGenerator = new MintermGenerator <BV>(this); False = BV.CreateFalse(_bits); True = BV.CreateTrue(_bits); var singleBitVectors = new BV[_bits]; for (int i = 0; i < singleBitVectors.Length; i++) { singleBitVectors[i] = BV.CreateSingleBit(_bits, i); } _minterms = singleBitVectors; }
/// <summary>Initializes the factory.</summary> public SymbolicRegexRunnerFactory(RegexTree regexTree, RegexOptions options, TimeSpan matchTimeout, CultureInfo culture) { // RightToLeft and ECMAScript are currently not supported in conjunction with NonBacktracking. if ((options & (RegexOptions.RightToLeft | RegexOptions.ECMAScript)) != 0) { throw new NotSupportedException( SR.Format(SR.NotSupported_NonBacktrackingConflictingOption, (options & RegexOptions.RightToLeft) != 0 ? nameof(RegexOptions.RightToLeft) : nameof(RegexOptions.ECMAScript))); } var converter = new RegexNodeConverter(culture, regexTree.CaptureNumberSparseMapping); CharSetSolver solver = CharSetSolver.Instance; SymbolicRegexNode <BDD> root = converter.ConvertToSymbolicRegexNode(regexTree.Root, tryCreateFixedLengthMarker: true); BDD[] minterms = root.ComputeMinterms(); if (minterms.Length > 64) { // Use BitVector to represent a predicate var algebra = new BitVectorAlgebra(solver, minterms); var builder = new SymbolicRegexBuilder <BitVector>(algebra) { // The default constructor sets the following predicates to False; this update happens after the fact. // It depends on whether anchors where used in the regex whether the predicates are actually different from False. _wordLetterPredicateForAnchors = algebra.ConvertFromCharSet(solver, converter._builder._wordLetterPredicateForAnchors), _newLinePredicate = algebra.ConvertFromCharSet(solver, converter._builder._newLinePredicate) }; // Convert the BDD-based AST to BitVector-based AST SymbolicRegexNode <BitVector> rootNode = converter._builder.Transform(root, builder, bdd => builder._solver.ConvertFromCharSet(solver, bdd)); _matcher = new SymbolicRegexMatcher <BitVector>(rootNode, regexTree, minterms, matchTimeout); } else { // Use ulong to represent a predicate var algebra = new BitVector64Algebra(solver, minterms); var builder = new SymbolicRegexBuilder <ulong>(algebra) { // The default constructor sets the following predicates to False, this update happens after the fact // It depends on whether anchors where used in the regex whether the predicates are actually different from False _wordLetterPredicateForAnchors = algebra.ConvertFromCharSet(solver, converter._builder._wordLetterPredicateForAnchors), _newLinePredicate = algebra.ConvertFromCharSet(solver, converter._builder._newLinePredicate) }; // Convert the BDD-based AST to ulong-based AST SymbolicRegexNode <ulong> rootNode = converter._builder.Transform(root, builder, bdd => builder._solver.ConvertFromCharSet(solver, bdd)); _matcher = new SymbolicRegexMatcher <ulong>(rootNode, regexTree, minterms, matchTimeout); } }
/// <summary>Initializes the factory.</summary> public SymbolicRegexRunnerFactory(RegexTree regexTree, RegexOptions options, TimeSpan matchTimeout) { Debug.Assert((options & (RegexOptions.RightToLeft | RegexOptions.ECMAScript)) == 0); var charSetSolver = new CharSetSolver(); var bddBuilder = new SymbolicRegexBuilder <BDD>(charSetSolver, charSetSolver); var converter = new RegexNodeConverter(bddBuilder, regexTree.CaptureNumberSparseMapping); SymbolicRegexNode <BDD> rootNode = converter.ConvertToSymbolicRegexNode(regexTree.Root); BDD[] minterms = rootNode.ComputeMinterms(); _matcher = minterms.Length > 64 ? SymbolicRegexMatcher <BitVector> .Create(regexTree.CaptureCount, regexTree.FindOptimizations, bddBuilder, rootNode, new BitVectorSolver(minterms, charSetSolver), matchTimeout) : SymbolicRegexMatcher <ulong> .Create(regexTree.CaptureCount, regexTree.FindOptimizations, bddBuilder, rootNode, new UInt64Solver(minterms, charSetSolver), matchTimeout); }
/// <summary> /// Assumes that set is a union of some minterms (or empty). /// If null then null is returned. /// </summary> public BitVector ConvertFromBDD(BDD set, CharSetSolver solver) { BDD[] partition = _minterms; BitVector result = Empty; for (int i = 0; i < partition.Length; i++) { if (!solver.IsEmpty(solver.And(partition[i], set))) { result = BitVector.Or(result, _mintermVectors[i]); } } return(result); }
/// <summary>Gets a <see cref="BDD"/> that represents the \w character class.</summary> /// <remarks>\w is the union of the 8 categories: 0,1,2,3,4,5,8,18</remarks> public static BDD WordLetter(CharSetSolver solver) => s_wordLetter ?? Interlocked.CompareExchange(ref s_wordLetter, solver.Or(new[] { GetCategory(UnicodeCategory.UppercaseLetter), GetCategory(UnicodeCategory.LowercaseLetter), GetCategory(UnicodeCategory.TitlecaseLetter), GetCategory(UnicodeCategory.ModifierLetter), GetCategory(UnicodeCategory.OtherLetter), GetCategory(UnicodeCategory.NonSpacingMark), GetCategory(UnicodeCategory.DecimalDigitNumber), GetCategory(UnicodeCategory.ConnectorPunctuation), }), null) ?? s_wordLetter;
/// <summary> /// Assumes that set is a union of some minterms (or empty). /// If null then 0 is returned. /// </summary> public ulong ConvertFromBDD(BDD set, CharSetSolver solver) { BDD[] partition = _minterms; ulong result = 0; for (int i = 0; i < partition.Length; i++) { // Set the i'th bit if the i'th minterm is in the set. if (!solver.IsEmpty(solver.And(partition[i], set))) { result |= (ulong)1 << i; } } return(result); }
public BitVectorSolver(BDD[] minterms, CharSetSolver solver) { _minterms = minterms; _classifier = new MintermClassifier(minterms, solver); var singleBitVectors = new BitVector[minterms.Length]; for (int i = 0; i < singleBitVectors.Length; i++) { singleBitVectors[i] = BitVector.CreateSingleBit(minterms.Length, i); } _mintermVectors = singleBitVectors; Empty = BitVector.CreateFalse(minterms.Length); Full = BitVector.CreateTrue(minterms.Length); }
public BitVectorAlgebra(CharSetSolver solver, BDD[] minterms) { _minterms = minterms; _classifier = new MintermClassifier(solver, minterms); _mintermGenerator = new MintermGenerator <BitVector>(this); var singleBitVectors = new BitVector[minterms.Length]; for (int i = 0; i < singleBitVectors.Length; i++) { singleBitVectors[i] = BitVector.CreateSingleBit(minterms.Length, i); } _mintermVectors = singleBitVectors; False = BitVector.CreateFalse(minterms.Length); True = BitVector.CreateTrue(minterms.Length); }
public SymbolicRegexSampler(SymbolicRegexNode <TSet> root, int randomseed, bool negative) { _root = negative ? root._builder.Not(root) : root; // Treat 0 as no seed and instead choose a random seed randomly RandomSeed = randomseed == 0 ? new Random().Next() : randomseed; _random = new Random(RandomSeed); _solver = root._builder._solver; _charSetSolver = new CharSetSolver(); _asciiWordCharacters = _charSetSolver.Or(new BDD[] { _charSetSolver.CreateSetFromRange('A', 'Z'), _charSetSolver.CreateSetFromRange('a', 'z'), _charSetSolver.CreateFromChar('_'), _charSetSolver.CreateSetFromRange('0', '9') }); // Visible ASCII range for input character generation _ascii = _charSetSolver.CreateSetFromRange('\x20', '\x7E'); _asciiNonWordCharacters = _charSetSolver.And(_ascii, _charSetSolver.Not(_asciiWordCharacters)); }
public SymbolicRegexSampler(SymbolicRegexNode <S> root, int randomseed, bool negative) { _root = negative ? root._builder.Not(root) : root; // Treat 0 as no seed and instead choose a random seed randomly RandomSeed = randomseed == 0 ? new Random().Next() : randomseed; _random = new Random(RandomSeed); _solver = root._builder._solver; CharSetSolver bddSolver = CharSetSolver.Instance; _asciiWordCharacters = bddSolver.Or(new BDD[] { bddSolver.RangeConstraint('A', 'Z'), bddSolver.RangeConstraint('a', 'z'), bddSolver.CharConstraint('_'), bddSolver.RangeConstraint('0', '9') }); // Visible ASCII range for input character generation _ascii = bddSolver.RangeConstraint('\x20', '\x7E'); _asciiNonWordCharacters = bddSolver.And(_ascii, bddSolver.Not(_asciiWordCharacters)); }
public BDD ConvertToBDD(BitVector set, CharSetSolver solver) { BDD[] partition = _minterms; // the result will be the union of all minterms in the set BDD result = solver.Empty; if (!set.Equals(Empty)) { for (int i = 0; i < partition.Length; i++) { // include the i'th minterm in the union if the i'th bit is set if (set[i]) { result = solver.Or(result, partition[i]); } } } return(result); }
public BDD ConvertToBDD(ulong set, CharSetSolver solver) { BDD[] partition = _minterms; // the result will be the union of all minterms in the set BDD result = BDD.False; if (set != 0) { for (int i = 0; i < partition.Length; i++) { // include the i'th minterm in the union if the i'th bit is set if ((set & ((ulong)1 << i)) != 0) { result = solver.Or(result, partition[i]); } } } return(result); }
/// <summary>Constructs matcher for given symbolic regex.</summary> internal SymbolicRegexMatcher(SymbolicRegexNode <TSetType> sr, RegexCode code, CharSetSolver css, BDD[] minterms, TimeSpan matchTimeout, CultureInfo culture) { Debug.Assert(sr._builder._solver is BV64Algebra or BVAlgebra or CharSetSolver, $"Unsupported algebra: {sr._builder._solver}"); _pattern = sr; _builder = sr._builder; _checkTimeout = Regex.InfiniteMatchTimeout != matchTimeout; _timeout = (int)(matchTimeout.TotalMilliseconds + 0.5); // Round up, so it will be at least 1ms _partitions = _builder._solver switch { BV64Algebra bv64 => bv64._classifier, BVAlgebra bv => bv._classifier, _ => new MintermClassifier((CharSetSolver)(object)_builder._solver, minterms), }; if (code.FindOptimizations.FindMode != FindNextStartingPositionMode.NoSearch && code.FindOptimizations.LeadingAnchor == 0) // If there are any anchors, we're better off letting the DFA quickly do its job of determining whether there's a match. { _findOpts = code.FindOptimizations; } // Determine the number of initial states. If there's no anchor, only the default previous // character kind 0 is ever going to be used for all initial states. int statesCount = _pattern._info.ContainsSomeAnchor ? CharKind.CharKindCount : 1; // Create the initial states for the original pattern. var initialStates = new DfaMatchingState <TSetType> [statesCount]; for (uint i = 0; i < initialStates.Length; i++) { initialStates[i] = _builder.MkState(_pattern, i); } _initialStates = initialStates; // Create the dot-star pattern (a concatenation of any* with the original pattern) // and all of its initial states. _dotStarredPattern = _builder.MkConcat(_builder._anyStar, _pattern); var dotstarredInitialStates = new DfaMatchingState <TSetType> [statesCount]; for (uint i = 0; i < dotstarredInitialStates.Length; i++) { // Used to detect if initial state was reentered, // but observe that the behavior from the state may ultimately depend on the previous // input char e.g. possibly causing nullability of \b or \B or of a start-of-line anchor, // in that sense there can be several "versions" (not more than StateCount) of the initial state. DfaMatchingState <TSetType> state = _builder.MkState(_dotStarredPattern, i); state.IsInitialState = true; dotstarredInitialStates[i] = state; } _dotstarredInitialStates = dotstarredInitialStates; // Create the reverse pattern (the original pattern in reverse order) and all of its // initial states. _reversePattern = _pattern.Reverse(); var reverseInitialStates = new DfaMatchingState <TSetType> [statesCount]; for (uint i = 0; i < reverseInitialStates.Length; i++) { reverseInitialStates[i] = _builder.MkState(_reversePattern, i); } _reverseInitialStates = reverseInitialStates; // Initialize our fast-lookup for determining the character kind of ASCII characters. // This is only required when the pattern contains anchors, as otherwise there's only // ever a single kind used. if (_pattern._info.ContainsSomeAnchor) { var asciiCharKinds = new uint[128]; for (int i = 0; i < asciiCharKinds.Length; i++) { TSetType predicate2; uint charKind; if (i == '\n') { predicate2 = _builder._newLinePredicate; charKind = CharKind.Newline; } else { predicate2 = _builder._wordLetterPredicateForAnchors; charKind = CharKind.WordLetter; } asciiCharKinds[i] = _builder._solver.And(GetMinterm(i), predicate2).Equals(_builder._solver.False) ? 0 : charKind; } _asciiCharKinds = asciiCharKinds; } }
/// <summary>Pretty print the bitvector bv as the character set it represents.</summary> public string PrettyPrint(BitVector bv, CharSetSolver solver) => solver.PrettyPrint(ConvertToBDD(bv, solver));
/// <summary>Generator for BDD Unicode category definitions.</summary> /// <param name="namespacename">namespace for the class</param> /// <param name="classname">name of the class</param> /// <param name="path">path where the file classname.cs is written</param> public static void Generate(string namespacename, string classname, string path) { Debug.Assert(namespacename != null); Debug.Assert(classname != null); Debug.Assert(path != null); using StreamWriter sw = new StreamWriter($"{Path.Combine(path, classname)}.cs"); sw.WriteLine( $@"// Licensed to the .NET Foundation under one or more agreements. // The .NET Foundation licenses this file to you under the MIT license. // This is a programmatically generated file from Regex.GenerateUnicodeTables. // It provides serialized BDD Unicode category definitions for System.Environment.Version = {Environment.Version} using System.Globalization; namespace {namespacename} {{ internal static class {classname} {{"); var catMap = new Dictionary <UnicodeCategory, Ranges>(); foreach (UnicodeCategory c in Enum.GetValues <UnicodeCategory>()) { catMap[c] = new Ranges(); } Ranges whitespace = new Ranges(); Regex whitespaceRegex = new(@"\s"); for (int i = 0; i <= char.MaxValue; i++) { char ch = (char)i; catMap[char.GetUnicodeCategory(ch)].Add(ch); if (whitespaceRegex.IsMatch(ch.ToString())) { whitespace.Add(ch); } } var charSetSolver = new CharSetSolver(); sw.WriteLine(" /// <summary>Serialized BDD representation of the set of all whitespace characters.</summary>"); sw.Write($" public static ReadOnlySpan<byte> SerializedWhitespaceBDD => "); WriteByteArrayInitSyntax(sw, charSetSolver.CreateSetFromRanges(whitespace.ranges).SerializeToBytes()); sw.WriteLine(";"); // Generate a BDD representation of each UnicodeCategory. BDD[] catBDDs = new BDD[catMap.Count]; for (int c = 0; c < catBDDs.Length; c++) { catBDDs[c] = charSetSolver.CreateSetFromRanges(catMap[(UnicodeCategory)c].ranges); } sw.WriteLine(); sw.WriteLine(" /// <summary>Gets the serialized BDD representations of any defined UnicodeCategory.</summary>"); sw.WriteLine(" public static ReadOnlySpan<byte> GetSerializedCategory(UnicodeCategory category) =>"); sw.WriteLine(" (int)category switch"); sw.WriteLine(" {"); for (int i = 0; i < catBDDs.Length; i++) { sw.WriteLine($" {i} => SerializedCategory{i}_{(UnicodeCategory)i},"); } sw.WriteLine($" _ => default,"); sw.WriteLine(" };"); for (int i = 0; i < catBDDs.Length; i++) { sw.WriteLine(); sw.WriteLine($" /// <summary>Serialized BDD representation of the set of all characters in UnicodeCategory.{(UnicodeCategory)i}.</summary>"); sw.Write($" private static ReadOnlySpan<byte> SerializedCategory{i}_{(UnicodeCategory)i} => "); WriteByteArrayInitSyntax(sw, catBDDs[i].SerializeToBytes()); sw.WriteLine(";"); } sw.WriteLine($@" }} }}");
/// <summary>Write the DFA or NFA in DGML format into the TextWriter.</summary> /// <param name="matcher">The <see cref="SymbolicRegexMatcher"/> for the regular expression.</param> /// <param name="writer">Writer to which the DGML is written.</param> /// <param name="nfa">True to create an NFA instead of a DFA.</param> /// <param name="addDotStar">True to prepend .*? onto the pattern (outside of the implicit root capture).</param> /// <param name="reverse">If true, then unwind the regex backwards (and <paramref name="addDotStar"/> is ignored).</param> /// <param name="maxStates">The approximate maximum number of states to include; less than or equal to 0 for no maximum.</param> /// <param name="maxLabelLength">maximum length of labels in nodes anything over that length is indicated with .. </param> public static void Write( TextWriter writer, SymbolicRegexMatcher <TSet> matcher, bool nfa = false, bool addDotStar = true, bool reverse = false, int maxStates = -1, int maxLabelLength = -1) { var charSetSolver = new CharSetSolver(); var explorer = new DfaExplorer(matcher, nfa, addDotStar, reverse, maxStates); var nonEpsilonTransitions = new Dictionary <(int SourceState, int TargetState), List <(SymbolicRegexNode <TSet>?, TSet)> >(); var epsilonTransitions = new List <Transition>(); foreach (Transition transition in explorer.GetTransitions()) { if (transition.IsEpsilon) { epsilonTransitions.Add(transition); } else { (int SourceState, int TargetState)p = (transition.SourceState, transition.TargetState); if (!nonEpsilonTransitions.TryGetValue(p, out List <(SymbolicRegexNode <TSet>?, TSet)>?rules)) { nonEpsilonTransitions[p] = rules = new List <(SymbolicRegexNode <TSet>?, TSet)>(); } rules.Add(transition.Label); } } writer.WriteLine("<?xml version=\"1.0\" encoding=\"utf-8\"?>"); writer.WriteLine("<DirectedGraph xmlns=\"http://schemas.microsoft.com/vs/2009/dgml\" ZoomLevel=\"1.5\" GraphDirection=\"TopToBottom\" >"); writer.WriteLine(" <Nodes>"); writer.WriteLine(" <Node Id=\"dfa\" Label=\" \" Group=\"Collapsed\" Category=\"DFA\" DFAInfo=\"{0}\" />", GetDFAInfo(explorer, charSetSolver)); writer.WriteLine(" <Node Id=\"dfainfo\" Category=\"DFAInfo\" Label=\"{0}\"/>", GetDFAInfo(explorer, charSetSolver)); foreach (int state in explorer.GetStates()) { writer.WriteLine(" <Node Id=\"{0}\" Label=\"{0}\" Category=\"State\" Group=\"Collapsed\" StateInfo=\"{1}\">", state, explorer.DescribeState(state)); if (state == explorer.InitialState) { writer.WriteLine(" <Category Ref=\"InitialState\" />"); } if (explorer.IsFinalState(state)) { writer.WriteLine(" <Category Ref=\"FinalState\" />"); } writer.WriteLine(" </Node>"); writer.WriteLine(" <Node Id=\"{0}info\" Label=\"{1}\" Category=\"StateInfo\"/>", state, explorer.DescribeState(state)); } writer.WriteLine(" </Nodes>"); writer.WriteLine(" <Links>"); writer.WriteLine(" <Link Source=\"dfa\" Target=\"{0}\" Label=\"\" Category=\"StartTransition\" />", explorer.InitialState); writer.WriteLine(" <Link Source=\"dfa\" Target=\"dfainfo\" Label=\"\" Category=\"Contains\" />"); foreach (Transition transition in epsilonTransitions) { writer.WriteLine(" <Link Source=\"{0}\" Target=\"{1}\" Category=\"EpsilonTransition\" />", transition.SourceState, transition.TargetState); } foreach (KeyValuePair <(int, int), List <(SymbolicRegexNode <TSet>?, TSet)> > transition in nonEpsilonTransitions) { string label = string.Join($",{Environment.NewLine} ", DescribeLabels(explorer, transition.Value, charSetSolver)); string info = ""; if (label.Length > (uint)maxLabelLength) { info = $"FullLabel = \"{label}\" "; label = string.Concat(label.AsSpan(0, maxLabelLength), ".."); } writer.WriteLine($" <Link Source=\"{transition.Key.Item1}\" Target=\"{transition.Key.Item2}\" Label=\"{label}\" Category=\"NonEpsilonTransition\" {info}/>"); } foreach (int state in explorer.GetStates()) { writer.WriteLine(" <Link Source=\"{0}\" Target=\"{0}info\" Category=\"Contains\" />", state); } writer.WriteLine(" </Links>"); writer.WriteLine(" <Categories>"); writer.WriteLine(" <Category Id=\"DFA\" Label=\"DFA\" IsTag=\"True\" />"); writer.WriteLine(" <Category Id=\"EpsilonTransition\" Label=\"Epsilon transition\" IsTag=\"True\" />"); writer.WriteLine(" <Category Id=\"StartTransition\" Label=\"Initial transition\" IsTag=\"True\" />"); writer.WriteLine(" <Category Id=\"FinalLabel\" Label=\"Final transition\" IsTag=\"True\" />"); writer.WriteLine(" <Category Id=\"FinalState\" Label=\"Final\" IsTag=\"True\" />"); writer.WriteLine(" <Category Id=\"SinkState\" Label=\"Sink state\" IsTag=\"True\" />"); writer.WriteLine(" <Category Id=\"EpsilonState\" Label=\"Epsilon state\" IsTag=\"True\" />"); writer.WriteLine(" <Category Id=\"InitialState\" Label=\"Initial\" IsTag=\"True\" />"); writer.WriteLine(" <Category Id=\"NonEpsilonTransition\" Label=\"Nonepsilon transition\" IsTag=\"True\" />"); writer.WriteLine(" <Category Id=\"State\" Label=\"State\" IsTag=\"True\" />"); writer.WriteLine(" </Categories>"); writer.WriteLine(" <Styles>"); writer.WriteLine(" <Style TargetType=\"Node\" GroupLabel=\"InitialState\" ValueLabel=\"True\">"); writer.WriteLine(" <Condition Expression=\"HasCategory('InitialState')\" />"); writer.WriteLine(" <Setter Property=\"Background\" Value=\"lightgray\" />"); writer.WriteLine(" <Setter Property=\"MinWidth\" Value=\"0\" />"); writer.WriteLine(" </Style>"); writer.WriteLine(" <Style TargetType=\"Node\" GroupLabel=\"FinalState\" ValueLabel=\"True\">"); writer.WriteLine(" <Condition Expression=\"HasCategory('FinalState')\" />"); writer.WriteLine(" <Setter Property=\"Background\" Value=\"lightgreen\" />"); writer.WriteLine(" <Setter Property=\"StrokeThickness\" Value=\"4\" />"); writer.WriteLine(" </Style>"); writer.WriteLine(" <Style TargetType=\"Node\" GroupLabel=\"State\" ValueLabel=\"True\">"); writer.WriteLine(" <Condition Expression=\"HasCategory('State')\" />"); writer.WriteLine(" <Setter Property=\"Stroke\" Value=\"black\" />"); writer.WriteLine(" <Setter Property=\"Background\" Value=\"white\" />"); writer.WriteLine(" <Setter Property=\"MinWidth\" Value=\"0\" />"); writer.WriteLine(" <Setter Property=\"FontSize\" Value=\"12\" />"); writer.WriteLine(" <Setter Property=\"FontFamily\" Value=\"Arial\" />"); writer.WriteLine(" </Style>"); writer.WriteLine(" <Style TargetType=\"Link\" GroupLabel=\"NonEpsilonTransition\" ValueLabel=\"True\">"); writer.WriteLine(" <Condition Expression=\"HasCategory('NonEpsilonTransition')\" />"); writer.WriteLine(" <Setter Property=\"Stroke\" Value=\"black\" />"); writer.WriteLine(" <Setter Property=\"FontSize\" Value=\"18\" />"); writer.WriteLine(" <Setter Property=\"FontFamily\" Value=\"Arial\" />"); writer.WriteLine(" </Style>"); writer.WriteLine(" <Style TargetType=\"Link\" GroupLabel=\"StartTransition\" ValueLabel=\"True\">"); writer.WriteLine(" <Condition Expression=\"HasCategory('StartTransition')\" />"); writer.WriteLine(" <Setter Property=\"Stroke\" Value=\"black\" />"); writer.WriteLine(" </Style>"); writer.WriteLine(" <Style TargetType=\"Link\" GroupLabel=\"EpsilonTransition\" ValueLabel=\"True\">"); writer.WriteLine(" <Condition Expression=\"HasCategory('EpsilonTransition')\" />"); writer.WriteLine(" <Setter Property=\"Stroke\" Value=\"black\" />"); writer.WriteLine(" <Setter Property=\"StrokeDashArray\" Value=\"8 8\" />"); writer.WriteLine(" </Style>"); writer.WriteLine(" <Style TargetType=\"Link\" GroupLabel=\"FinalLabel\" ValueLabel=\"False\">"); writer.WriteLine(" <Condition Expression=\"HasCategory('FinalLabel')\" />"); writer.WriteLine(" <Setter Property=\"Stroke\" Value=\"black\" />"); writer.WriteLine(" <Setter Property=\"StrokeDashArray\" Value=\"8 8\" />"); writer.WriteLine(" </Style>"); writer.WriteLine(" <Style TargetType=\"Node\" GroupLabel=\"StateInfo\" ValueLabel=\"True\">"); writer.WriteLine(" <Setter Property=\"Stroke\" Value=\"white\" />"); writer.WriteLine(" <Setter Property=\"FontSize\" Value=\"18\" />"); writer.WriteLine(" <Setter Property=\"FontFamily\" Value=\"Arial\" />"); writer.WriteLine(" </Style>"); writer.WriteLine(" <Style TargetType=\"Node\" GroupLabel=\"DFAInfo\" ValueLabel=\"True\">"); writer.WriteLine(" <Setter Property=\"Stroke\" Value=\"white\" />"); writer.WriteLine(" <Setter Property=\"FontSize\" Value=\"18\" />"); writer.WriteLine(" <Setter Property=\"FontFamily\" Value=\"Arial\" />"); writer.WriteLine(" </Style>"); writer.WriteLine(" </Styles>"); writer.WriteLine("</DirectedGraph>"); }
/// <summary>Pretty print the bitvector bv as the character set it represents.</summary> public string PrettyPrint(BV bv) { CharSetSolver solver = CharSetSolver.Instance; return(solver.PrettyPrint(ConvertToCharSet(solver, bv))); }
public override IEnumerable <string> SampleMatches(int k, int randomseed) { // Zero is treated as no seed, instead using a system provided one Random random = randomseed != 0 ? new Random(randomseed) : new Random(); ISolver <TSet> solver = _builder._solver; CharSetSolver charSetSolver = _builder._charSetSolver; // Create helper BDDs for handling anchors and preferentially generating ASCII inputs BDD asciiWordCharacters = charSetSolver.Or(new BDD[] { charSetSolver.CreateBDDFromRange('A', 'Z'), charSetSolver.CreateBDDFromRange('a', 'z'), charSetSolver.CreateBDDFromChar('_'), charSetSolver.CreateBDDFromRange('0', '9') }); // Visible ASCII range for input character generation BDD ascii = charSetSolver.CreateBDDFromRange('\x20', '\x7E'); BDD asciiNonWordCharacters = charSetSolver.And(ascii, charSetSolver.Not(asciiWordCharacters)); // Set up two sets of minterms, one with the additional special minterm for the last end-of-line Debug.Assert(_builder._minterms is not null); int[] mintermIdsWithoutZ = new int[_builder._minterms.Length]; int[] mintermIdsWithZ = new int[_builder._minterms.Length + 1]; for (int i = 0; i < _builder._minterms.Length; ++i) { mintermIdsWithoutZ[i] = i; mintermIdsWithZ[i] = i; } mintermIdsWithZ[_builder._minterms.Length] = _builder._minterms.Length; for (int i = 0; i < k; i++) { // Holds the generated input so far StringBuilder inputSoFar = new(); StringBuilder?latestCandidate = null; // Current set of states reached initially contains just the root NfaMatchingState states = new(_builder); // Here one could also consider previous characters for example for \b, \B, and ^ anchors // and initialize inputSoFar accordingly states.InitializeFrom(_initialStates[GetCharKind(ReadOnlySpan <char> .Empty, -1)]); CurrentState statesWrapper = new(states); // Used for end suffixes List <string> possibleEndings = new(); while (true) { Debug.Assert(states.NfaStateSet.Count > 0); // Gather the possible endings for satisfying nullability possibleEndings.Clear(); if (NfaStateHandler.CanBeNullable(ref statesWrapper)) { // Unconditionally final state or end of the input due to \Z anchor for example if (NfaStateHandler.IsNullable(ref statesWrapper) || NfaStateHandler.IsNullableFor(_builder, ref statesWrapper, CharKind.BeginningEnd)) { possibleEndings.Add(""); } // End of line due to end-of-line anchor if (NfaStateHandler.IsNullableFor(_builder, ref statesWrapper, CharKind.Newline)) { possibleEndings.Add("\n"); } // Related to wordborder due to \b or \B if (NfaStateHandler.IsNullableFor(_builder, ref statesWrapper, CharKind.WordLetter)) { possibleEndings.Add(ChooseChar(random, asciiWordCharacters, ascii, charSetSolver).ToString()); } // Related to wordborder due to \b or \B if (NfaStateHandler.IsNullableFor(_builder, ref statesWrapper, CharKind.General)) { possibleEndings.Add(ChooseChar(random, asciiNonWordCharacters, ascii, charSetSolver).ToString()); } } // If we have a possible ending, then store a candidate input if (possibleEndings.Count > 0) { latestCandidate ??= new(); latestCandidate.Clear(); latestCandidate.Append(inputSoFar); //Choose some suffix that allows some anchor (if any) to be nullable latestCandidate.Append(Choose(random, possibleEndings)); // Choose to stop here based on a coin-toss if (FlipBiasedCoin(random, SampleMatchesStoppingProbability)) { yield return(latestCandidate.ToString()); break; } } // Shuffle the minterms, including the last end-of-line marker if appropriate int[] mintermIds = NfaStateHandler.StartsWithLineAnchor(_builder, ref statesWrapper) ? Shuffle(random, mintermIdsWithZ) : Shuffle(random, mintermIdsWithoutZ); foreach (int mintermId in mintermIds) { bool success = NfaStateHandler.TakeTransition(_builder, ref statesWrapper, mintermId); Debug.Assert(success); if (states.NfaStateSet.Count > 0) { TSet minterm = _builder.GetMinterm(mintermId); // Append a random member of the minterm inputSoFar.Append(ChooseChar(random, ToBDD(minterm, solver, charSetSolver), ascii, charSetSolver)); break; } else { // The transition was a dead end, undo and continue to try another minterm NfaStateHandler.UndoTransition(ref statesWrapper); } } // In the case that there are no next states or input has become too large: stop here if (states.NfaStateSet.Count == 0 || inputSoFar.Length > SampleMatchesMaxInputLength) { // Ending up here without an ending is unlikely but possible for example for infeasible patterns // such as @"no\bway" or due to poor choice of c -- no anchor is enabled -- so this is a deadend. if (latestCandidate != null) { yield return(latestCandidate.ToString()); } break; } } }
static BDD ToBDD(TSet set, ISolver <TSet> solver, CharSetSolver charSetSolver) => solver.ConvertToBDD(set, charSetSolver);
/// <summary> /// Gets a <see cref="BDD"/> that represents <see cref="WordLetter"/> together with the characters /// \u200C (zero width non joiner) and \u200D (zero width joiner) that are treated as if they were /// word characters in the context of the anchors \b and \B. /// </summary> public static BDD WordLetterForAnchors(CharSetSolver solver) => s_wordLetterForAnchors ?? Interlocked.CompareExchange(ref s_wordLetterForAnchors, solver.Or(WordLetter(solver), solver.CreateBDDFromRange('\u200C', '\u200D')), null) ?? s_wordLetterForAnchors;
/// <summary>Pretty print the bitvector bv as the character set it represents.</summary> public string PrettyPrint(ulong bv, CharSetSolver solver) => solver.PrettyPrint(ConvertToBDD(bv, solver));