Exemplo n.º 1
0
        /// <summary>Converts the root <see cref="RegexNode"/> into its corresponding <see cref="SymbolicRegexNode{S}"/>.</summary>
        /// <param name="root">The root node to convert.</param>
        /// <returns>The generated <see cref="SymbolicRegexNode{S}"/> that corresponds to the supplied <paramref name="root"/>.</returns>
        internal SymbolicRegexNode <BDD> ConvertToSymbolicRegexNode(RegexNode root)
        {
            Debug.Assert(_builder is not null);

            // Create the root list that will store the built-up result.
            DoublyLinkedList <SymbolicRegexNode <BDD> > rootResult = new();

            // Create a stack to be processed in order to process iteratively rather than recursively, and push the root on.
            Stack <(RegexNode Node, DoublyLinkedList <SymbolicRegexNode <BDD> > Result, DoublyLinkedList <SymbolicRegexNode <BDD> >[]? ChildResults)> stack = new();

            stack.Push((root, rootResult, CreateChildResultArray(root.ChildCount())));

            // Continue to iterate until the stack is empty, popping the next item on each iteration.
            // Some popped items may be pushed back on as part of processing.
            while (stack.TryPop(out (RegexNode Node, DoublyLinkedList <SymbolicRegexNode <BDD> > Result, DoublyLinkedList <SymbolicRegexNode <BDD> >[]? ChildResults)popped))
Exemplo n.º 2
0
        /// <summary>Converts the root <see cref="RegexNode"/> into its corresponding <see cref="SymbolicRegexNode{S}"/>.</summary>
        /// <param name="root">The root node to convert.</param>
        /// <returns>The generated <see cref="SymbolicRegexNode{S}"/> that corresponds to the supplied <paramref name="root"/>.</returns>
        internal SymbolicRegexNode <BDD> ConvertToSymbolicRegexNode(RegexNode root)
        {
            Debug.Assert(_builder is not null);

            // Create the root list that will store the built-up result.
            DoublyLinkedList <SymbolicRegexNode <BDD> > rootResult = new();

            // Create a stack to be processed in order to process iteratively rather than recursively, and push the root on.
            Stack <(RegexNode Node, bool TryToMarkFixedLength, DoublyLinkedList <SymbolicRegexNode <BDD> > Result, DoublyLinkedList <SymbolicRegexNode <BDD> >[]? ChildResults)> stack = new();

            stack.Push((root, true, rootResult, CreateChildResultArray(root.ChildCount())));

            // Continue to iterate until the stack is empty, popping the next item on each iteration.
            // Some popped items may be pushed back on as part of processing.
            while (stack.TryPop(out (RegexNode Node, bool TryToMarkFixedLength, DoublyLinkedList <SymbolicRegexNode <BDD> > Result, DoublyLinkedList <SymbolicRegexNode <BDD> >[]? ChildResults)popped))
            {
                RegexNode node = popped.Node;
                DoublyLinkedList <SymbolicRegexNode <BDD> > result = popped.Result;
                DoublyLinkedList <SymbolicRegexNode <BDD> >[]? childResults = popped.ChildResults;
                Debug.Assert(childResults is null || childResults.Length != 0);

                if (childResults is null || childResults[0] is null)
                {
                    // Child nodes have not been converted yet
                    // Handle each node kind as-is appropriate.
                    switch (node.Kind)
                    {
                    // Singletons and multis

                    case RegexNodeKind.One:
                        result.AddLast(_builder.CreateSingleton(_builder._solver.CreateFromChar(node.Ch)));
                        break;

                    case RegexNodeKind.Notone:
                        result.AddLast(_builder.CreateSingleton(_builder._solver.Not(_builder._solver.CreateFromChar(node.Ch))));
                        break;

                    case RegexNodeKind.Set:
                        result.AddLast(ConvertSet(node));
                        break;

                    case RegexNodeKind.Multi:
                    {
                        // Create a BDD for each character in the string and concatenate them.
                        string?str = node.Str;
                        Debug.Assert(str is not null);
                        foreach (char c in str)
                        {
                            result.AddLast(_builder.CreateSingleton(_builder._solver.CreateFromChar(c)));
                        }
                        break;
                    }

                    // The following five cases are the only node kinds that are pushed twice:
                    // Joins, general loops, and supported captures

                    case RegexNodeKind.Concatenate:
                    case RegexNodeKind.Alternate:
                    case RegexNodeKind.Loop:
                    case RegexNodeKind.Lazyloop:
                    case RegexNodeKind.Capture when node.N == -1:     // N == -1 because balancing groups (which have N >= 0) aren't supported
                    {
                        Debug.Assert(childResults is not null && childResults.Length == node.ChildCount());

                        // Push back the temporarily popped item. Next time this work item is seen, its ChildResults list will be ready.
                        // Propagate the length mark check only in case of alternation.
                        stack.Push(popped);
                        bool mark = node.Kind == RegexNodeKind.Alternate && popped.TryToMarkFixedLength;

                        // Push all the children to be converted
                        for (int i = 0; i < node.ChildCount(); ++i)
                        {
                            childResults[i] = new DoublyLinkedList <SymbolicRegexNode <BDD> >();
                            stack.Push((node.Child(i), mark, childResults[i], CreateChildResultArray(node.Child(i).ChildCount())));
                        }
                        break;
                    }

                    // Specialized loops

                    case RegexNodeKind.Oneloop:
                    case RegexNodeKind.Onelazy:
                    case RegexNodeKind.Notoneloop:
                    case RegexNodeKind.Notonelazy:
                    {
                        // Create a BDD that represents the character, then create a loop around it.
                        BDD bdd = _builder._solver.CreateFromChar(node.Ch);
                        if (node.IsNotoneFamily)
                        {
                            bdd = _builder._solver.Not(bdd);
                        }
                        result.AddLast(_builder.CreateLoop(_builder.CreateSingleton(bdd), node.Kind is RegexNodeKind.Onelazy or RegexNodeKind.Notonelazy, node.M, node.N));
                        break;
                    }

                    case RegexNodeKind.Setloop:
                    case RegexNodeKind.Setlazy:
                    {
                        // Create a BDD that represents the set string, then create a loop around it.
                        string?set = node.Str;
                        Debug.Assert(set is not null);
                        BDD setBdd = CreateBDDFromSetString(set);
                        result.AddLast(_builder.CreateLoop(_builder.CreateSingleton(setBdd), node.Kind == RegexNodeKind.Setlazy, node.M, node.N));
                        break;
                    }

                    case RegexNodeKind.Empty:
                    case RegexNodeKind.UpdateBumpalong:     // UpdateBumpalong is a directive relevant only to backtracking and can be ignored just like Empty
                        break;

                    case RegexNodeKind.Nothing:
                        result.AddLast(_builder._nothing);
                        break;

                    // Anchors

                    case RegexNodeKind.Beginning:
                        result.AddLast(_builder.BeginningAnchor);
                        break;

                    case RegexNodeKind.Bol:
                        EnsureNewlinePredicateInitialized();
                        result.AddLast(_builder.BolAnchor);
                        break;

                    case RegexNodeKind.End:      // \z anchor
                        result.AddLast(_builder.EndAnchor);
                        break;

                    case RegexNodeKind.EndZ:     // \Z anchor
                        EnsureNewlinePredicateInitialized();
                        result.AddLast(_builder.EndAnchorZ);
                        break;

                    case RegexNodeKind.Eol:
                        EnsureNewlinePredicateInitialized();
                        result.AddLast(_builder.EolAnchor);
                        break;

                    case RegexNodeKind.Boundary:
                        EnsureWordLetterPredicateInitialized();
                        result.AddLast(_builder.BoundaryAnchor);
                        break;

                    case RegexNodeKind.NonBoundary:
                        EnsureWordLetterPredicateInitialized();
                        result.AddLast(_builder.NonBoundaryAnchor);
                        break;

                    // Unsupported

                    default:
                        throw new NotSupportedException(SR.Format(SR.NotSupported_NonBacktrackingConflictingExpression, node.Kind switch
                        {
                            RegexNodeKind.Atomic or RegexNodeKind.Setloopatomic or RegexNodeKind.Oneloopatomic or RegexNodeKind.Notoneloopatomic => SR.ExpressionDescription_AtomicSubexpressions,
                            RegexNodeKind.Backreference => SR.ExpressionDescription_Backreference,
                            RegexNodeKind.BackreferenceConditional => SR.ExpressionDescription_Conditional,
                            RegexNodeKind.Capture => SR.ExpressionDescription_BalancingGroup,
                            RegexNodeKind.ExpressionConditional => SR.ExpressionDescription_IfThenElse,
                            RegexNodeKind.NegativeLookaround => SR.ExpressionDescription_NegativeLookaround,
                            RegexNodeKind.PositiveLookaround => SR.ExpressionDescription_PositiveLookaround,
                            RegexNodeKind.Start => SR.ExpressionDescription_ContiguousMatches,
                            _ => UnexpectedNodeType(node)
                        }));