Exemple #1
0
        public BVAlgebra(CharSetSolver solver, BDD[] minterms) :
            base(new MintermClassifier(solver, minterms), solver.ComputeDomainSizes(minterms), minterms)
        {
            _mintermGenerator = new MintermGenerator <BV>(this);
            False             = BV.CreateFalse(_bits);
            True = BV.CreateTrue(_bits);

            var singleBitVectors = new BV[_bits];

            for (int i = 0; i < singleBitVectors.Length; i++)
            {
                singleBitVectors[i] = BV.CreateSingleBit(_bits, i);
            }
            _minterms = singleBitVectors;
        }
Exemple #2
0
        /// <summary>Initializes the factory.</summary>
        public SymbolicRegexRunnerFactory(RegexTree regexTree, RegexOptions options, TimeSpan matchTimeout, CultureInfo culture)
        {
            // RightToLeft and ECMAScript are currently not supported in conjunction with NonBacktracking.
            if ((options & (RegexOptions.RightToLeft | RegexOptions.ECMAScript)) != 0)
            {
                throw new NotSupportedException(
                          SR.Format(SR.NotSupported_NonBacktrackingConflictingOption,
                                    (options & RegexOptions.RightToLeft) != 0 ? nameof(RegexOptions.RightToLeft) : nameof(RegexOptions.ECMAScript)));
            }

            var                     converter = new RegexNodeConverter(culture, regexTree.CaptureNumberSparseMapping);
            CharSetSolver           solver    = CharSetSolver.Instance;
            SymbolicRegexNode <BDD> root      = converter.ConvertToSymbolicRegexNode(regexTree.Root, tryCreateFixedLengthMarker: true);

            BDD[] minterms = root.ComputeMinterms();
            if (minterms.Length > 64)
            {
                // Use BitVector to represent a predicate
                var algebra = new BitVectorAlgebra(solver, minterms);
                var builder = new SymbolicRegexBuilder <BitVector>(algebra)
                {
                    // The default constructor sets the following predicates to False; this update happens after the fact.
                    // It depends on whether anchors where used in the regex whether the predicates are actually different from False.
                    _wordLetterPredicateForAnchors = algebra.ConvertFromCharSet(solver, converter._builder._wordLetterPredicateForAnchors),
                    _newLinePredicate = algebra.ConvertFromCharSet(solver, converter._builder._newLinePredicate)
                };

                // Convert the BDD-based AST to BitVector-based AST
                SymbolicRegexNode <BitVector> rootNode = converter._builder.Transform(root, builder, bdd => builder._solver.ConvertFromCharSet(solver, bdd));
                _matcher = new SymbolicRegexMatcher <BitVector>(rootNode, regexTree, minterms, matchTimeout);
            }
            else
            {
                // Use ulong to represent a predicate
                var algebra = new BitVector64Algebra(solver, minterms);
                var builder = new SymbolicRegexBuilder <ulong>(algebra)
                {
                    // The default constructor sets the following predicates to False, this update happens after the fact
                    // It depends on whether anchors where used in the regex whether the predicates are actually different from False
                    _wordLetterPredicateForAnchors = algebra.ConvertFromCharSet(solver, converter._builder._wordLetterPredicateForAnchors),
                    _newLinePredicate = algebra.ConvertFromCharSet(solver, converter._builder._newLinePredicate)
                };

                // Convert the BDD-based AST to ulong-based AST
                SymbolicRegexNode <ulong> rootNode = converter._builder.Transform(root, builder, bdd => builder._solver.ConvertFromCharSet(solver, bdd));
                _matcher = new SymbolicRegexMatcher <ulong>(rootNode, regexTree, minterms, matchTimeout);
            }
        }
Exemple #3
0
        /// <summary>Initializes the factory.</summary>
        public SymbolicRegexRunnerFactory(RegexTree regexTree, RegexOptions options, TimeSpan matchTimeout)
        {
            Debug.Assert((options & (RegexOptions.RightToLeft | RegexOptions.ECMAScript)) == 0);

            var charSetSolver = new CharSetSolver();
            var bddBuilder    = new SymbolicRegexBuilder <BDD>(charSetSolver, charSetSolver);
            var converter     = new RegexNodeConverter(bddBuilder, regexTree.CaptureNumberSparseMapping);

            SymbolicRegexNode <BDD> rootNode = converter.ConvertToSymbolicRegexNode(regexTree.Root);

            BDD[] minterms = rootNode.ComputeMinterms();

            _matcher = minterms.Length > 64 ?
                       SymbolicRegexMatcher <BitVector> .Create(regexTree.CaptureCount, regexTree.FindOptimizations, bddBuilder, rootNode, new BitVectorSolver(minterms, charSetSolver), matchTimeout) :
                       SymbolicRegexMatcher <ulong> .Create(regexTree.CaptureCount, regexTree.FindOptimizations, bddBuilder, rootNode, new UInt64Solver(minterms, charSetSolver), matchTimeout);
        }
Exemple #4
0
        /// <summary>
        /// Assumes that set is a union of some minterms (or empty).
        /// If null then null is returned.
        /// </summary>
        public BitVector ConvertFromBDD(BDD set, CharSetSolver solver)
        {
            BDD[] partition = _minterms;

            BitVector result = Empty;

            for (int i = 0; i < partition.Length; i++)
            {
                if (!solver.IsEmpty(solver.And(partition[i], set)))
                {
                    result = BitVector.Or(result, _mintermVectors[i]);
                }
            }

            return(result);
        }
 /// <summary>Gets a <see cref="BDD"/> that represents the \w character class.</summary>
 /// <remarks>\w is the union of the 8 categories: 0,1,2,3,4,5,8,18</remarks>
 public static BDD WordLetter(CharSetSolver solver) =>
 s_wordLetter ??
 Interlocked.CompareExchange(ref s_wordLetter,
                             solver.Or(new[]
 {
     GetCategory(UnicodeCategory.UppercaseLetter),
     GetCategory(UnicodeCategory.LowercaseLetter),
     GetCategory(UnicodeCategory.TitlecaseLetter),
     GetCategory(UnicodeCategory.ModifierLetter),
     GetCategory(UnicodeCategory.OtherLetter),
     GetCategory(UnicodeCategory.NonSpacingMark),
     GetCategory(UnicodeCategory.DecimalDigitNumber),
     GetCategory(UnicodeCategory.ConnectorPunctuation),
 }),
                             null) ??
 s_wordLetter;
Exemple #6
0
        /// <summary>
        /// Assumes that set is a union of some minterms (or empty).
        /// If null then 0 is returned.
        /// </summary>
        public ulong ConvertFromBDD(BDD set, CharSetSolver solver)
        {
            BDD[] partition = _minterms;

            ulong result = 0;

            for (int i = 0; i < partition.Length; i++)
            {
                // Set the i'th bit if the i'th minterm is in the set.
                if (!solver.IsEmpty(solver.And(partition[i], set)))
                {
                    result |= (ulong)1 << i;
                }
            }

            return(result);
        }
Exemple #7
0
        public BitVectorSolver(BDD[] minterms, CharSetSolver solver)
        {
            _minterms = minterms;

            _classifier = new MintermClassifier(minterms, solver);

            var singleBitVectors = new BitVector[minterms.Length];

            for (int i = 0; i < singleBitVectors.Length; i++)
            {
                singleBitVectors[i] = BitVector.CreateSingleBit(minterms.Length, i);
            }
            _mintermVectors = singleBitVectors;

            Empty = BitVector.CreateFalse(minterms.Length);
            Full  = BitVector.CreateTrue(minterms.Length);
        }
        public BitVectorAlgebra(CharSetSolver solver, BDD[] minterms)
        {
            _minterms = minterms;

            _classifier       = new MintermClassifier(solver, minterms);
            _mintermGenerator = new MintermGenerator <BitVector>(this);

            var singleBitVectors = new BitVector[minterms.Length];

            for (int i = 0; i < singleBitVectors.Length; i++)
            {
                singleBitVectors[i] = BitVector.CreateSingleBit(minterms.Length, i);
            }
            _mintermVectors = singleBitVectors;

            False = BitVector.CreateFalse(minterms.Length);
            True  = BitVector.CreateTrue(minterms.Length);
        }
Exemple #9
0
 public SymbolicRegexSampler(SymbolicRegexNode <TSet> root, int randomseed, bool negative)
 {
     _root = negative ? root._builder.Not(root) : root;
     // Treat 0 as no seed and instead choose a random seed randomly
     RandomSeed           = randomseed == 0 ? new Random().Next() : randomseed;
     _random              = new Random(RandomSeed);
     _solver              = root._builder._solver;
     _charSetSolver       = new CharSetSolver();
     _asciiWordCharacters = _charSetSolver.Or(new BDD[] {
         _charSetSolver.CreateSetFromRange('A', 'Z'),
         _charSetSolver.CreateSetFromRange('a', 'z'),
         _charSetSolver.CreateFromChar('_'),
         _charSetSolver.CreateSetFromRange('0', '9')
     });
     // Visible ASCII range for input character generation
     _ascii = _charSetSolver.CreateSetFromRange('\x20', '\x7E');
     _asciiNonWordCharacters = _charSetSolver.And(_ascii, _charSetSolver.Not(_asciiWordCharacters));
 }
        public SymbolicRegexSampler(SymbolicRegexNode <S> root, int randomseed, bool negative)
        {
            _root = negative ? root._builder.Not(root) : root;
            // Treat 0 as no seed and instead choose a random seed randomly
            RandomSeed = randomseed == 0 ? new Random().Next() : randomseed;
            _random    = new Random(RandomSeed);
            _solver    = root._builder._solver;
            CharSetSolver bddSolver = CharSetSolver.Instance;

            _asciiWordCharacters = bddSolver.Or(new BDD[] {
                bddSolver.RangeConstraint('A', 'Z'),
                bddSolver.RangeConstraint('a', 'z'),
                bddSolver.CharConstraint('_'),
                bddSolver.RangeConstraint('0', '9')
            });
            // Visible ASCII range for input character generation
            _ascii = bddSolver.RangeConstraint('\x20', '\x7E');
            _asciiNonWordCharacters = bddSolver.And(_ascii, bddSolver.Not(_asciiWordCharacters));
        }
Exemple #11
0
        public BDD ConvertToBDD(BitVector set, CharSetSolver solver)
        {
            BDD[] partition = _minterms;

            // the result will be the union of all minterms in the set
            BDD result = solver.Empty;

            if (!set.Equals(Empty))
            {
                for (int i = 0; i < partition.Length; i++)
                {
                    // include the i'th minterm in the union if the i'th bit is set
                    if (set[i])
                    {
                        result = solver.Or(result, partition[i]);
                    }
                }
            }

            return(result);
        }
Exemple #12
0
        public BDD ConvertToBDD(ulong set, CharSetSolver solver)
        {
            BDD[] partition = _minterms;

            // the result will be the union of all minterms in the set
            BDD result = BDD.False;

            if (set != 0)
            {
                for (int i = 0; i < partition.Length; i++)
                {
                    // include the i'th minterm in the union if the i'th bit is set
                    if ((set & ((ulong)1 << i)) != 0)
                    {
                        result = solver.Or(result, partition[i]);
                    }
                }
            }

            return(result);
        }
Exemple #13
0
        /// <summary>Constructs matcher for given symbolic regex.</summary>
        internal SymbolicRegexMatcher(SymbolicRegexNode <TSetType> sr, RegexCode code, CharSetSolver css, BDD[] minterms, TimeSpan matchTimeout, CultureInfo culture)
        {
            Debug.Assert(sr._builder._solver is BV64Algebra or BVAlgebra or CharSetSolver, $"Unsupported algebra: {sr._builder._solver}");

            _pattern      = sr;
            _builder      = sr._builder;
            _checkTimeout = Regex.InfiniteMatchTimeout != matchTimeout;
            _timeout      = (int)(matchTimeout.TotalMilliseconds + 0.5); // Round up, so it will be at least 1ms
            _partitions   = _builder._solver switch
            {
                BV64Algebra bv64 => bv64._classifier,
                BVAlgebra bv => bv._classifier,
                            _ => new MintermClassifier((CharSetSolver)(object)_builder._solver, minterms),
            };

            if (code.FindOptimizations.FindMode != FindNextStartingPositionMode.NoSearch &&
                code.FindOptimizations.LeadingAnchor == 0) // If there are any anchors, we're better off letting the DFA quickly do its job of determining whether there's a match.
            {
                _findOpts = code.FindOptimizations;
            }

            // Determine the number of initial states. If there's no anchor, only the default previous
            // character kind 0 is ever going to be used for all initial states.
            int statesCount = _pattern._info.ContainsSomeAnchor ? CharKind.CharKindCount : 1;

            // Create the initial states for the original pattern.
            var initialStates = new DfaMatchingState <TSetType> [statesCount];

            for (uint i = 0; i < initialStates.Length; i++)
            {
                initialStates[i] = _builder.MkState(_pattern, i);
            }
            _initialStates = initialStates;

            // Create the dot-star pattern (a concatenation of any* with the original pattern)
            // and all of its initial states.
            _dotStarredPattern = _builder.MkConcat(_builder._anyStar, _pattern);
            var dotstarredInitialStates = new DfaMatchingState <TSetType> [statesCount];

            for (uint i = 0; i < dotstarredInitialStates.Length; i++)
            {
                // Used to detect if initial state was reentered,
                // but observe that the behavior from the state may ultimately depend on the previous
                // input char e.g. possibly causing nullability of \b or \B or of a start-of-line anchor,
                // in that sense there can be several "versions" (not more than StateCount) of the initial state.
                DfaMatchingState <TSetType> state = _builder.MkState(_dotStarredPattern, i);
                state.IsInitialState       = true;
                dotstarredInitialStates[i] = state;
            }
            _dotstarredInitialStates = dotstarredInitialStates;

            // Create the reverse pattern (the original pattern in reverse order) and all of its
            // initial states.
            _reversePattern = _pattern.Reverse();
            var reverseInitialStates = new DfaMatchingState <TSetType> [statesCount];

            for (uint i = 0; i < reverseInitialStates.Length; i++)
            {
                reverseInitialStates[i] = _builder.MkState(_reversePattern, i);
            }
            _reverseInitialStates = reverseInitialStates;

            // Initialize our fast-lookup for determining the character kind of ASCII characters.
            // This is only required when the pattern contains anchors, as otherwise there's only
            // ever a single kind used.
            if (_pattern._info.ContainsSomeAnchor)
            {
                var asciiCharKinds = new uint[128];
                for (int i = 0; i < asciiCharKinds.Length; i++)
                {
                    TSetType predicate2;
                    uint     charKind;

                    if (i == '\n')
                    {
                        predicate2 = _builder._newLinePredicate;
                        charKind   = CharKind.Newline;
                    }
                    else
                    {
                        predicate2 = _builder._wordLetterPredicateForAnchors;
                        charKind   = CharKind.WordLetter;
                    }

                    asciiCharKinds[i] = _builder._solver.And(GetMinterm(i), predicate2).Equals(_builder._solver.False) ? 0 : charKind;
                }
                _asciiCharKinds = asciiCharKinds;
            }
        }
Exemple #14
0
 /// <summary>Pretty print the bitvector bv as the character set it represents.</summary>
 public string PrettyPrint(BitVector bv, CharSetSolver solver) => solver.PrettyPrint(ConvertToBDD(bv, solver));
Exemple #15
0
        /// <summary>Generator for BDD Unicode category definitions.</summary>
        /// <param name="namespacename">namespace for the class</param>
        /// <param name="classname">name of the class</param>
        /// <param name="path">path where the file classname.cs is written</param>
        public static void Generate(string namespacename, string classname, string path)
        {
            Debug.Assert(namespacename != null);
            Debug.Assert(classname != null);
            Debug.Assert(path != null);

            using StreamWriter sw = new StreamWriter($"{Path.Combine(path, classname)}.cs");
            sw.WriteLine(
                $@"// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.

// This is a programmatically generated file from Regex.GenerateUnicodeTables.
// It provides serialized BDD Unicode category definitions for System.Environment.Version = {Environment.Version}

using System.Globalization;

namespace {namespacename}
{{
    internal static class {classname}
    {{");
            var catMap = new Dictionary <UnicodeCategory, Ranges>();

            foreach (UnicodeCategory c in Enum.GetValues <UnicodeCategory>())
            {
                catMap[c] = new Ranges();
            }

            Ranges whitespace      = new Ranges();
            Regex  whitespaceRegex = new(@"\s");

            for (int i = 0; i <= char.MaxValue; i++)
            {
                char ch = (char)i;
                catMap[char.GetUnicodeCategory(ch)].Add(ch);
                if (whitespaceRegex.IsMatch(ch.ToString()))
                {
                    whitespace.Add(ch);
                }
            }

            var charSetSolver = new CharSetSolver();

            sw.WriteLine("        /// <summary>Serialized BDD representation of the set of all whitespace characters.</summary>");
            sw.Write($"        public static ReadOnlySpan<byte> SerializedWhitespaceBDD => ");
            WriteByteArrayInitSyntax(sw, charSetSolver.CreateSetFromRanges(whitespace.ranges).SerializeToBytes());
            sw.WriteLine(";");

            // Generate a BDD representation of each UnicodeCategory.
            BDD[] catBDDs = new BDD[catMap.Count];
            for (int c = 0; c < catBDDs.Length; c++)
            {
                catBDDs[c] = charSetSolver.CreateSetFromRanges(catMap[(UnicodeCategory)c].ranges);
            }

            sw.WriteLine();
            sw.WriteLine("        /// <summary>Gets the serialized BDD representations of any defined UnicodeCategory.</summary>");
            sw.WriteLine("        public static ReadOnlySpan<byte> GetSerializedCategory(UnicodeCategory category) =>");
            sw.WriteLine("            (int)category switch");
            sw.WriteLine("            {");
            for (int i = 0; i < catBDDs.Length; i++)
            {
                sw.WriteLine($"                {i} => SerializedCategory{i}_{(UnicodeCategory)i},");
            }
            sw.WriteLine($"                _ => default,");
            sw.WriteLine("            };");

            for (int i = 0; i < catBDDs.Length; i++)
            {
                sw.WriteLine();
                sw.WriteLine($"        /// <summary>Serialized BDD representation of the set of all characters in UnicodeCategory.{(UnicodeCategory)i}.</summary>");
                sw.Write($"        private static ReadOnlySpan<byte> SerializedCategory{i}_{(UnicodeCategory)i} => ");
                WriteByteArrayInitSyntax(sw, catBDDs[i].SerializeToBytes());
                sw.WriteLine(";");
            }

            sw.WriteLine($@"    }}
}}");
Exemple #16
0
        /// <summary>Write the DFA or NFA in DGML format into the TextWriter.</summary>
        /// <param name="matcher">The <see cref="SymbolicRegexMatcher"/> for the regular expression.</param>
        /// <param name="writer">Writer to which the DGML is written.</param>
        /// <param name="nfa">True to create an NFA instead of a DFA.</param>
        /// <param name="addDotStar">True to prepend .*? onto the pattern (outside of the implicit root capture).</param>
        /// <param name="reverse">If true, then unwind the regex backwards (and <paramref name="addDotStar"/> is ignored).</param>
        /// <param name="maxStates">The approximate maximum number of states to include; less than or equal to 0 for no maximum.</param>
        /// <param name="maxLabelLength">maximum length of labels in nodes anything over that length is indicated with .. </param>
        public static void Write(
            TextWriter writer, SymbolicRegexMatcher <TSet> matcher,
            bool nfa = false, bool addDotStar = true, bool reverse = false, int maxStates = -1, int maxLabelLength = -1)
        {
            var charSetSolver         = new CharSetSolver();
            var explorer              = new DfaExplorer(matcher, nfa, addDotStar, reverse, maxStates);
            var nonEpsilonTransitions = new Dictionary <(int SourceState, int TargetState), List <(SymbolicRegexNode <TSet>?, TSet)> >();
            var epsilonTransitions    = new List <Transition>();

            foreach (Transition transition in explorer.GetTransitions())
            {
                if (transition.IsEpsilon)
                {
                    epsilonTransitions.Add(transition);
                }
                else
                {
                    (int SourceState, int TargetState)p = (transition.SourceState, transition.TargetState);
                    if (!nonEpsilonTransitions.TryGetValue(p, out List <(SymbolicRegexNode <TSet>?, TSet)>?rules))
                    {
                        nonEpsilonTransitions[p] = rules = new List <(SymbolicRegexNode <TSet>?, TSet)>();
                    }

                    rules.Add(transition.Label);
                }
            }

            writer.WriteLine("<?xml version=\"1.0\" encoding=\"utf-8\"?>");
            writer.WriteLine("<DirectedGraph xmlns=\"http://schemas.microsoft.com/vs/2009/dgml\" ZoomLevel=\"1.5\" GraphDirection=\"TopToBottom\" >");
            writer.WriteLine("    <Nodes>");
            writer.WriteLine("        <Node Id=\"dfa\" Label=\" \" Group=\"Collapsed\" Category=\"DFA\" DFAInfo=\"{0}\" />", GetDFAInfo(explorer, charSetSolver));
            writer.WriteLine("        <Node Id=\"dfainfo\" Category=\"DFAInfo\" Label=\"{0}\"/>", GetDFAInfo(explorer, charSetSolver));
            foreach (int state in explorer.GetStates())
            {
                writer.WriteLine("        <Node Id=\"{0}\" Label=\"{0}\" Category=\"State\" Group=\"Collapsed\" StateInfo=\"{1}\">", state, explorer.DescribeState(state));
                if (state == explorer.InitialState)
                {
                    writer.WriteLine("            <Category Ref=\"InitialState\" />");
                }
                if (explorer.IsFinalState(state))
                {
                    writer.WriteLine("            <Category Ref=\"FinalState\" />");
                }
                writer.WriteLine("        </Node>");
                writer.WriteLine("        <Node Id=\"{0}info\" Label=\"{1}\" Category=\"StateInfo\"/>", state, explorer.DescribeState(state));
            }
            writer.WriteLine("    </Nodes>");
            writer.WriteLine("    <Links>");
            writer.WriteLine("        <Link Source=\"dfa\" Target=\"{0}\" Label=\"\" Category=\"StartTransition\" />", explorer.InitialState);
            writer.WriteLine("        <Link Source=\"dfa\" Target=\"dfainfo\" Label=\"\" Category=\"Contains\" />");

            foreach (Transition transition in epsilonTransitions)
            {
                writer.WriteLine("        <Link Source=\"{0}\" Target=\"{1}\" Category=\"EpsilonTransition\" />", transition.SourceState, transition.TargetState);
            }

            foreach (KeyValuePair <(int, int), List <(SymbolicRegexNode <TSet>?, TSet)> > transition in nonEpsilonTransitions)
            {
                string label = string.Join($",{Environment.NewLine} ", DescribeLabels(explorer, transition.Value, charSetSolver));
                string info  = "";
                if (label.Length > (uint)maxLabelLength)
                {
                    info  = $"FullLabel = \"{label}\" ";
                    label = string.Concat(label.AsSpan(0, maxLabelLength), "..");
                }

                writer.WriteLine($"        <Link Source=\"{transition.Key.Item1}\" Target=\"{transition.Key.Item2}\" Label=\"{label}\" Category=\"NonEpsilonTransition\" {info}/>");
            }

            foreach (int state in explorer.GetStates())
            {
                writer.WriteLine("        <Link Source=\"{0}\" Target=\"{0}info\" Category=\"Contains\" />", state);
            }

            writer.WriteLine("    </Links>");
            writer.WriteLine("    <Categories>");
            writer.WriteLine("        <Category Id=\"DFA\" Label=\"DFA\" IsTag=\"True\" />");
            writer.WriteLine("        <Category Id=\"EpsilonTransition\" Label=\"Epsilon transition\" IsTag=\"True\" />");
            writer.WriteLine("        <Category Id=\"StartTransition\" Label=\"Initial transition\" IsTag=\"True\" />");
            writer.WriteLine("        <Category Id=\"FinalLabel\" Label=\"Final transition\" IsTag=\"True\" />");
            writer.WriteLine("        <Category Id=\"FinalState\" Label=\"Final\" IsTag=\"True\" />");
            writer.WriteLine("        <Category Id=\"SinkState\" Label=\"Sink state\" IsTag=\"True\" />");
            writer.WriteLine("        <Category Id=\"EpsilonState\" Label=\"Epsilon state\" IsTag=\"True\" />");
            writer.WriteLine("        <Category Id=\"InitialState\" Label=\"Initial\" IsTag=\"True\" />");
            writer.WriteLine("        <Category Id=\"NonEpsilonTransition\" Label=\"Nonepsilon transition\" IsTag=\"True\" />");
            writer.WriteLine("        <Category Id=\"State\" Label=\"State\" IsTag=\"True\" />");
            writer.WriteLine("    </Categories>");
            writer.WriteLine("    <Styles>");
            writer.WriteLine("        <Style TargetType=\"Node\" GroupLabel=\"InitialState\" ValueLabel=\"True\">");
            writer.WriteLine("            <Condition Expression=\"HasCategory('InitialState')\" />");
            writer.WriteLine("            <Setter Property=\"Background\" Value=\"lightgray\" />");
            writer.WriteLine("            <Setter Property=\"MinWidth\" Value=\"0\" />");
            writer.WriteLine("        </Style>");
            writer.WriteLine("        <Style TargetType=\"Node\" GroupLabel=\"FinalState\" ValueLabel=\"True\">");
            writer.WriteLine("            <Condition Expression=\"HasCategory('FinalState')\" />");
            writer.WriteLine("            <Setter Property=\"Background\" Value=\"lightgreen\" />");
            writer.WriteLine("            <Setter Property=\"StrokeThickness\" Value=\"4\" />");
            writer.WriteLine("        </Style>");
            writer.WriteLine("        <Style TargetType=\"Node\" GroupLabel=\"State\" ValueLabel=\"True\">");
            writer.WriteLine("            <Condition Expression=\"HasCategory('State')\" />");
            writer.WriteLine("            <Setter Property=\"Stroke\" Value=\"black\" />");
            writer.WriteLine("            <Setter Property=\"Background\" Value=\"white\" />");
            writer.WriteLine("            <Setter Property=\"MinWidth\" Value=\"0\" />");
            writer.WriteLine("            <Setter Property=\"FontSize\" Value=\"12\" />");
            writer.WriteLine("            <Setter Property=\"FontFamily\" Value=\"Arial\" />");
            writer.WriteLine("        </Style>");
            writer.WriteLine("        <Style TargetType=\"Link\" GroupLabel=\"NonEpsilonTransition\" ValueLabel=\"True\">");
            writer.WriteLine("            <Condition Expression=\"HasCategory('NonEpsilonTransition')\" />");
            writer.WriteLine("            <Setter Property=\"Stroke\" Value=\"black\" />");
            writer.WriteLine("            <Setter Property=\"FontSize\" Value=\"18\" />");
            writer.WriteLine("            <Setter Property=\"FontFamily\" Value=\"Arial\" />");
            writer.WriteLine("        </Style>");
            writer.WriteLine("        <Style TargetType=\"Link\" GroupLabel=\"StartTransition\" ValueLabel=\"True\">");
            writer.WriteLine("            <Condition Expression=\"HasCategory('StartTransition')\" />");
            writer.WriteLine("            <Setter Property=\"Stroke\" Value=\"black\" />");
            writer.WriteLine("        </Style>");
            writer.WriteLine("        <Style TargetType=\"Link\" GroupLabel=\"EpsilonTransition\" ValueLabel=\"True\">");
            writer.WriteLine("            <Condition Expression=\"HasCategory('EpsilonTransition')\" />");
            writer.WriteLine("            <Setter Property=\"Stroke\" Value=\"black\" />");
            writer.WriteLine("            <Setter Property=\"StrokeDashArray\" Value=\"8 8\" />");
            writer.WriteLine("        </Style>");
            writer.WriteLine("        <Style TargetType=\"Link\" GroupLabel=\"FinalLabel\" ValueLabel=\"False\">");
            writer.WriteLine("            <Condition Expression=\"HasCategory('FinalLabel')\" />");
            writer.WriteLine("            <Setter Property=\"Stroke\" Value=\"black\" />");
            writer.WriteLine("            <Setter Property=\"StrokeDashArray\" Value=\"8 8\" />");
            writer.WriteLine("        </Style>");
            writer.WriteLine("        <Style TargetType=\"Node\" GroupLabel=\"StateInfo\" ValueLabel=\"True\">");
            writer.WriteLine("            <Setter Property=\"Stroke\" Value=\"white\" />");
            writer.WriteLine("            <Setter Property=\"FontSize\" Value=\"18\" />");
            writer.WriteLine("            <Setter Property=\"FontFamily\" Value=\"Arial\" />");
            writer.WriteLine("        </Style>");
            writer.WriteLine("        <Style TargetType=\"Node\" GroupLabel=\"DFAInfo\" ValueLabel=\"True\">");
            writer.WriteLine("            <Setter Property=\"Stroke\" Value=\"white\" />");
            writer.WriteLine("            <Setter Property=\"FontSize\" Value=\"18\" />");
            writer.WriteLine("            <Setter Property=\"FontFamily\" Value=\"Arial\" />");
            writer.WriteLine("        </Style>");
            writer.WriteLine("    </Styles>");
            writer.WriteLine("</DirectedGraph>");
        }
Exemple #17
0
        /// <summary>Pretty print the bitvector bv as the character set it represents.</summary>
        public string PrettyPrint(BV bv)
        {
            CharSetSolver solver = CharSetSolver.Instance;

            return(solver.PrettyPrint(ConvertToCharSet(solver, bv)));
        }
Exemple #18
0
        public override IEnumerable <string> SampleMatches(int k, int randomseed)
        {
            // Zero is treated as no seed, instead using a system provided one
            Random random = randomseed != 0 ? new Random(randomseed) : new Random();

            ISolver <TSet> solver        = _builder._solver;
            CharSetSolver  charSetSolver = _builder._charSetSolver;

            // Create helper BDDs for handling anchors and preferentially generating ASCII inputs
            BDD asciiWordCharacters = charSetSolver.Or(new BDD[] {
                charSetSolver.CreateBDDFromRange('A', 'Z'),
                charSetSolver.CreateBDDFromRange('a', 'z'),
                charSetSolver.CreateBDDFromChar('_'),
                charSetSolver.CreateBDDFromRange('0', '9')
            });
            // Visible ASCII range for input character generation
            BDD ascii = charSetSolver.CreateBDDFromRange('\x20', '\x7E');
            BDD asciiNonWordCharacters = charSetSolver.And(ascii, charSetSolver.Not(asciiWordCharacters));

            // Set up two sets of minterms, one with the additional special minterm for the last end-of-line
            Debug.Assert(_builder._minterms is not null);
            int[] mintermIdsWithoutZ = new int[_builder._minterms.Length];
            int[] mintermIdsWithZ    = new int[_builder._minterms.Length + 1];
            for (int i = 0; i < _builder._minterms.Length; ++i)
            {
                mintermIdsWithoutZ[i] = i;
                mintermIdsWithZ[i]    = i;
            }
            mintermIdsWithZ[_builder._minterms.Length] = _builder._minterms.Length;

            for (int i = 0; i < k; i++)
            {
                // Holds the generated input so far
                StringBuilder inputSoFar      = new();
                StringBuilder?latestCandidate = null;

                // Current set of states reached initially contains just the root
                NfaMatchingState states = new(_builder);
                // Here one could also consider previous characters for example for \b, \B, and ^ anchors
                // and initialize inputSoFar accordingly
                states.InitializeFrom(_initialStates[GetCharKind(ReadOnlySpan <char> .Empty, -1)]);
                CurrentState statesWrapper = new(states);

                // Used for end suffixes
                List <string> possibleEndings = new();

                while (true)
                {
                    Debug.Assert(states.NfaStateSet.Count > 0);

                    // Gather the possible endings for satisfying nullability
                    possibleEndings.Clear();
                    if (NfaStateHandler.CanBeNullable(ref statesWrapper))
                    {
                        // Unconditionally final state or end of the input due to \Z anchor for example
                        if (NfaStateHandler.IsNullable(ref statesWrapper) ||
                            NfaStateHandler.IsNullableFor(_builder, ref statesWrapper, CharKind.BeginningEnd))
                        {
                            possibleEndings.Add("");
                        }

                        // End of line due to end-of-line anchor
                        if (NfaStateHandler.IsNullableFor(_builder, ref statesWrapper, CharKind.Newline))
                        {
                            possibleEndings.Add("\n");
                        }

                        // Related to wordborder due to \b or \B
                        if (NfaStateHandler.IsNullableFor(_builder, ref statesWrapper, CharKind.WordLetter))
                        {
                            possibleEndings.Add(ChooseChar(random, asciiWordCharacters, ascii, charSetSolver).ToString());
                        }

                        // Related to wordborder due to \b or \B
                        if (NfaStateHandler.IsNullableFor(_builder, ref statesWrapper, CharKind.General))
                        {
                            possibleEndings.Add(ChooseChar(random, asciiNonWordCharacters, ascii, charSetSolver).ToString());
                        }
                    }

                    // If we have a possible ending, then store a candidate input
                    if (possibleEndings.Count > 0)
                    {
                        latestCandidate ??= new();
                        latestCandidate.Clear();
                        latestCandidate.Append(inputSoFar);
                        //Choose some suffix that allows some anchor (if any) to be nullable
                        latestCandidate.Append(Choose(random, possibleEndings));

                        // Choose to stop here based on a coin-toss
                        if (FlipBiasedCoin(random, SampleMatchesStoppingProbability))
                        {
                            yield return(latestCandidate.ToString());

                            break;
                        }
                    }

                    // Shuffle the minterms, including the last end-of-line marker if appropriate
                    int[] mintermIds = NfaStateHandler.StartsWithLineAnchor(_builder, ref statesWrapper) ?
                                       Shuffle(random, mintermIdsWithZ) :
                                       Shuffle(random, mintermIdsWithoutZ);
                    foreach (int mintermId in mintermIds)
                    {
                        bool success = NfaStateHandler.TakeTransition(_builder, ref statesWrapper, mintermId);
                        Debug.Assert(success);
                        if (states.NfaStateSet.Count > 0)
                        {
                            TSet minterm = _builder.GetMinterm(mintermId);
                            // Append a random member of the minterm
                            inputSoFar.Append(ChooseChar(random, ToBDD(minterm, solver, charSetSolver), ascii, charSetSolver));
                            break;
                        }
                        else
                        {
                            // The transition was a dead end, undo and continue to try another minterm
                            NfaStateHandler.UndoTransition(ref statesWrapper);
                        }
                    }

                    // In the case that there are no next states or input has become too large: stop here
                    if (states.NfaStateSet.Count == 0 || inputSoFar.Length > SampleMatchesMaxInputLength)
                    {
                        // Ending up here without an ending is unlikely but possible for example for infeasible patterns
                        // such as @"no\bway" or due to poor choice of c -- no anchor is enabled -- so this is a deadend.
                        if (latestCandidate != null)
                        {
                            yield return(latestCandidate.ToString());
                        }
                        break;
                    }
                }
            }
Exemple #19
0
 static BDD ToBDD(TSet set, ISolver <TSet> solver, CharSetSolver charSetSolver) => solver.ConvertToBDD(set, charSetSolver);
 /// <summary>
 /// Gets a <see cref="BDD"/> that represents <see cref="WordLetter"/> together with the characters
 /// \u200C (zero width non joiner) and \u200D (zero width joiner) that are treated as if they were
 /// word characters in the context of the anchors \b and \B.
 /// </summary>
 public static BDD WordLetterForAnchors(CharSetSolver solver) =>
 s_wordLetterForAnchors ??
 Interlocked.CompareExchange(ref s_wordLetterForAnchors, solver.Or(WordLetter(solver), solver.CreateBDDFromRange('\u200C', '\u200D')), null) ??
 s_wordLetterForAnchors;
Exemple #21
0
 /// <summary>Pretty print the bitvector bv as the character set it represents.</summary>
 public string PrettyPrint(ulong bv, CharSetSolver solver) => solver.PrettyPrint(ConvertToBDD(bv, solver));