Ejemplo n.º 1
0
        /// <summary>
        /// Same as <see cref="Match(byte[], int, int, int)"/>, but allows passing
        /// a reusable <see cref="MatchResult"/> object so that no intermediate garbage is
        /// produced.
        /// </summary>
        /// <param name="reuse">The <see cref="MatchResult"/> to reuse.</param>
        /// <param name="sequence">Input sequence to look for in the automaton.</param>
        /// <param name="start">Start index in the sequence array.</param>
        /// <param name="length">Length of the byte sequence, must be at least 1.</param>
        /// <param name="node">The node to start traversal from, typically the root node (<see cref="FSA.GetRootNode()"/>).</param>
        /// <returns>The same object as <paramref name="reuse"/>, but with updated match <see cref="MatchResult.Kind"/>
        /// and other relevant fields.</returns>
        public MatchResult Match(MatchResult reuse, byte[] sequence, int start, int length, int node)
        {
            if (node == 0)
            {
                reuse.Reset(MatchResult.NoMatch, start, node);
                return(reuse);
            }

            FSA fsa = this.fsa;
            int end = start + length;

            for (int i = start; i < end; i++)
            {
                int arc = fsa.GetArc(node, sequence[i]);
                if (arc != 0)
                {
                    if (i + 1 == end && fsa.IsArcFinal(arc))
                    {
                        /* The automaton has an exact match of the input sequence. */
                        reuse.Reset(MatchResult.ExactMatch, i, node);
                        return(reuse);
                    }

                    if (fsa.IsArcTerminal(arc))
                    {
                        /* The automaton contains a prefix of the input sequence. */
                        reuse.Reset(MatchResult.AutomatonHasPrefix, i + 1, node);
                        return(reuse);
                    }

                    // Make a transition along the arc.
                    node = fsa.GetEndNode(arc);
                }
                else
                {
                    if (i > start)
                    {
                        reuse.Reset(MatchResult.AutomatonHasPrefix, i, node);
                    }
                    else
                    {
                        reuse.Reset(MatchResult.NoMatch, i, node);
                    }
                    return(reuse);
                }
            }

            /* The sequence is a prefix of at least one sequence in the automaton. */
            reuse.Reset(MatchResult.SequenceIsAPrefix, 0, node);
            return(reuse);
        }
Ejemplo n.º 2
0
        /// <summary>
        /// Advances to the next available final state.
        /// </summary>
        private ByteBuffer Advance()
        {
            if (position == 0)
            {
                return(null);
            }

            while (position > 0)
            {
                int lastIndex = position - 1;
                int arc       = arcs[lastIndex];

                if (arc == 0)
                {
                    // Remove the current node from the queue.
                    position--;
                    continue;
                }

                // Go to the next arc, but leave it on the stack
                // so that we keep the recursion depth level accurate.
                arcs[lastIndex] = fsa.GetNextArc(arc);

                // Expand buffer if needed.
                int bufferLength = this.buffer.Length;
                if (lastIndex >= bufferLength)
                {
                    Array.Resize(ref buffer, bufferLength + ExpectedMaxStates);
                    this.bufferWrapper = ByteBuffer.Wrap(buffer);
                }
                buffer[lastIndex] = fsa.GetArcLabel(arc);

                if (!fsa.IsArcTerminal(arc))
                {
                    // Recursively descend into the arc's node.
                    PushNode(fsa.GetEndNode(arc));
                }

                if (fsa.IsArcFinal(arc))
                {
                    bufferWrapper.Clear();
                    bufferWrapper.Limit = (lastIndex + 1);
                    return(bufferWrapper);
                }
            }

            return(null);
        }
Ejemplo n.º 3
0
        /// <summary>
        /// Calculate perfect hash for a given input sequence of bytes. The perfect hash requires
        /// that <see cref="FSA"/> is built with <see cref="FSAFlags.Numbers"/> and corresponds to the sequential
        /// order of input sequences used at automaton construction time.
        /// </summary>
        /// <param name="sequence">The byte sequence to calculate perfect hash for.</param>
        /// <param name="start">Start index in the sequence array.</param>
        /// <param name="length">Length of the byte sequence, must be at least 1.</param>
        /// <param name="node">The node to start traversal from, typically the root node (<see cref="FSA.GetRootNode()"/>).</param>
        /// <returns>
        /// Returns a unique integer assigned to the input sequence in the automaton (reflecting
        /// the number of that sequence in the input used to build the automaton). Returns a negative
        /// integer if the input sequence was not part of the input from which the automaton was created.
        /// The type of mismatch is a constant defined in <see cref="MatchResult"/>.
        /// </returns>
        /// <seealso cref="PerfectHash(byte[])"/>
        public int PerfectHash(byte[] sequence, int start, int length, int node)
        {
            Debug.Assert((fsa.Flags & FSAFlags.Numbers) != 0, $"FSA not built with {FSAFlags.Numbers} option.");
            Debug.Assert(length > 0, "Must be a non-empty sequence.");

            int hash = 0;
            int end  = start + length - 1;

            int  seqIndex = start;
            byte label    = sequence[seqIndex];

            // Seek through the current node's labels, looking for 'label', update hash.
            for (int arc = fsa.GetFirstArc(node); arc != 0;)
            {
                if (fsa.GetArcLabel(arc) == label)
                {
                    if (fsa.IsArcFinal(arc))
                    {
                        if (seqIndex == end)
                        {
                            return(hash);
                        }

                        hash++;
                    }

                    if (fsa.IsArcTerminal(arc))
                    {
                        /* The automaton contains a prefix of the input sequence. */
                        return(MatchResult.AutomatonHasPrefix);
                    }

                    // The sequence is a prefix of one of the sequences stored in the automaton.
                    if (seqIndex == end)
                    {
                        return(MatchResult.SequenceIsAPrefix);
                    }

                    // Make a transition along the arc, go the target node's first arc.
                    arc   = fsa.GetFirstArc(fsa.GetEndNode(arc));
                    label = sequence[++seqIndex];
                    continue;
                }
                else
                {
                    if (fsa.IsArcFinal(arc))
                    {
                        hash++;
                    }
                    if (!fsa.IsArcTerminal(arc))
                    {
                        hash += fsa.GetRightLanguageCount(fsa.GetEndNode(arc));
                    }
                }

                arc = fsa.GetNextArc(arc);
            }

            if (seqIndex > start)
            {
                return(MatchResult.AutomatonHasPrefix);
            }
            else
            {
                // Labels of this node ended without a match on the sequence.
                // Perfect hash does not exist.
                return(MatchResult.NoMatch);
            }
        }