Exemplo n.º 1
0
        //
        // Aho-Corasick
        //
        // q = root
        // FOR i = 1 TO n
        //   WHILE q != fail AND g(q, text[i]) == fail
        //     q = h(q)
        //   ENDWHILE
        //   IF q == fail
        //     q = root
        //   ELSE
        //     q = g(q, text[i])
        //   ENDIF
        //   IF isElement(q, final)
        //     RETURN TRUE
        //   ENDIF
        // ENDFOR
        // RETURN FALSE
        //

        /// <summary>
        /// Search the text for any of the patterns added to the trie.
        /// </summary>
        /// <remarks>
        /// Searches the text for any of the patterns added to the trie.
        /// </remarks>
        /// <returns>The first index of a matched pattern if successful; otherwise, <c>-1</c>.</returns>
        /// <param name="text">The text to search.</param>
        /// <param name="startIndex">The starting index of the text.</param>
        /// <param name="count">The number of characters to search, starting at <paramref name="startIndex"/>.</param>
        /// <param name="pattern">The pattern that was matched.</param>
        /// <exception cref="System.ArgumentNullException">
        /// <paramref name="text"/> is <c>null</c>.
        /// </exception>
        /// <exception cref="System.ArgumentOutOfRangeException">
        /// <paramref name="startIndex"/> and <paramref name="count"/> do not specify
        /// a valid range in the <paramref name="text"/> string.
        /// </exception>
        public int Search(char[] text, int startIndex, int count, out string pattern)
        {
            ValidateArguments(text, startIndex, count);

            int       endIndex = Math.Min(text.Length, startIndex + count);
            TrieState state    = root;
            TrieMatch match    = null;
            int       matched  = 0;
            int       offset   = -1;
            char      c;

            pattern = null;

            for (int i = startIndex; i < endIndex; i++)
            {
                c = icase ? char.ToLower(text[i]) : text[i];

                while (state != null && (match = FindMatch(state, c)) == null && matched == 0)
                {
                    state = state.Fail;
                }

                if (state == root)
                {
                    if (matched > 0)
                    {
                        return(offset);
                    }

                    offset = i;
                }

                if (state == null)
                {
                    if (matched > 0)
                    {
                        return(offset);
                    }

                    state  = root;
                    offset = i;
                }
                else if (match != null)
                {
                    state = match.State;

                    if (state.Depth > matched)
                    {
                        pattern = state.Pattern;
                        matched = state.Depth;
                    }
                }
                else if (matched > 0)
                {
                    return(offset);
                }
            }

            return(matched > 0 ? offset : -1);
        }
Exemplo n.º 2
0
        TrieState InsertMatchAtState(int depth, TrieState q, char c)
        {
            // Create a new state with a failure at %root
            TrieState new_q = new TrieState();

            new_q.Fail = root;

            // Insert/Replace into fail_states at %depth
            if (depth < fail_states.Count)
            {
                new_q.Next          = fail_states [depth];
                fail_states [depth] = new_q;
            }
            else
            {
                fail_states.Insert(depth, new_q);
            }

            // New match points to the newly created state for value %c
            TrieMatch m = new TrieMatch();

            m.Next  = q.FirstMatch;
            m.State = new_q;
            m.Value = c;

            // Insert the new match into existin state's match list
            q.FirstMatch = m;

            return(new_q);
        }
Exemplo n.º 3
0
        /*
         * Aho-Corasick
         *
         * q = root
         * FOR i = 1 TO n
         *   WHILE q != fail AND g(q, text[i]) == fail
         *     q = h(q)
         *   ENDWHILE
         *   IF q == fail
         *     q = root
         *   ELSE
         *     q = g(q, text[i])
         *   ENDIF
         *   IF isElement(q, final)
         *     RETURN TRUE
         *   ENDIF
         * ENDFOR
         * RETURN FALSE
         */
        public IList <TrieHit> FindMatches(string haystack)
        {
            List <TrieHit> matches = new List <TrieHit> ();
            TrieState      q = root;
            TrieMatch      m = null;
            int            idx = 0, start_idx = 0, last_idx = 0;

            while (idx < haystack.Length)
            {
                char c = haystack [idx++];
                if (!case_sensitive)
                {
                    c = Char.ToLower(c);
                }

                while (q != null)
                {
                    m = FindMatchAtState(q, c);
                    if (m == null)
                    {
                        q = q.Fail;
                    }
                    else
                    {
                        break;
                    }
                }

                if (q == root)
                {
                    start_idx = last_idx;
                }

                if (q == null)
                {
                    q         = root;
                    start_idx = idx;
                }
                else if (m != null)
                {
                    q = m.State;

                    // Got a match!
                    if (q.Final != 0)
                    {
                        string key = haystack.Substring(start_idx,
                                                        idx - start_idx);
                        TrieHit hit =
                            new TrieHit(start_idx, idx, key, q.Id);
                        matches.Add(hit);
                    }
                }

                last_idx = idx;
            }

            return(matches);
        }
Exemplo n.º 4
0
        // Iterate the matches at state %s looking for the first match
        // containing %c.
        TrieMatch FindMatchAtState(TrieState s, char c)
        {
            TrieMatch m = s.FirstMatch;

            while (m != null && m.Value != c)
            {
                m = m.Next;
            }

            return(m);
        }
Exemplo n.º 5
0
        TrieState Insert(TrieState state, int depth, char value)
        {
            var inserted = new TrieState(root);
            var match    = new TrieMatch(value);

            match.Next  = state.Match;
            match.State = inserted;

            state.Match = match;

            if (failStates.Count < depth + 1)
            {
                failStates.Add(null);
            }

            inserted.Next     = failStates[depth];
            failStates[depth] = inserted;

            return(inserted);
        }
Exemplo n.º 6
0
        //
        // final = empty set
        // FOR p = 1 TO #pat
        //   q = root
        //   FOR j = 1 TO m[p]
        //     IF g(q, pat[p][j]) == null
        //       insert(q, pat[p][j])
        //     ENDIF
        //     q = g(q, pat[p][j])
        //   ENDFOR
        //   final = union(final, q)
        // ENDFOR
        //

        /// <summary>
        /// Add the specified search pattern.
        /// </summary>
        /// <remarks>
        /// Adds the specified search pattern.
        /// </remarks>
        /// <param name="pattern">The search pattern.</param>
        /// <exception cref="System.ArgumentNullException">
        /// <paramref name="pattern"/> is <c>null</c>.
        /// </exception>
        /// <exception cref="System.ArgumentException">
        /// <paramref name="pattern"/> cannot be an empty string.
        /// </exception>
        public void Add(string pattern)
        {
            TrieState state = root;
            TrieMatch match;
            int       depth = 0;
            char      c;

            if (pattern == null)
            {
                throw new ArgumentNullException("pattern");
            }

            if (pattern.Length == 0)
            {
                throw new ArgumentException("The pattern cannot be empty.", "pattern");
            }

            // Step 1: Add the pattern to the trie
            for (int i = 0; i < pattern.Length; i++)
            {
                c     = icase ? char.ToLower(pattern[i]) : pattern[i];
                match = FindMatch(state, c);
                if (match == null)
                {
                    state = Insert(state, depth, c);
                }
                else
                {
                    state = match.State;
                }

                depth++;
            }

            state.Pattern = pattern;
            state.Depth   = depth;

            // Step 2: Compute the failure graph
            for (int i = 0; i < failStates.Count; i++)
            {
                state = failStates[i];

                while (state != null)
                {
                    match = state.Match;
                    while (match != null)
                    {
                        TrieState matchedState = match.State;
                        TrieState failState    = state.Fail;
                        TrieMatch nextMatch    = null;

                        c = match.Value;

                        while (failState != null && (nextMatch = FindMatch(failState, c)) == null)
                        {
                            failState = failState.Fail;
                        }

                        if (failState != null)
                        {
                            matchedState.Fail = nextMatch.State;
                            if (matchedState.Fail.Depth > matchedState.Depth)
                            {
                                matchedState.Depth = matchedState.Fail.Depth;
                            }
                        }
                        else
                        {
                            if ((nextMatch = FindMatch(root, c)) != null)
                            {
                                matchedState.Fail = nextMatch.State;
                            }
                            else
                            {
                                matchedState.Fail = root;
                            }
                        }

                        match = match.Next;
                    }

                    state = state.Next;
                }
            }
        }
Exemplo n.º 7
0
Arquivo: Trie.cs Projeto: dcga/MimeKit
		TrieState Insert (TrieState state, int depth, char value)
		{
			var inserted = new TrieState (root);
			var match = new TrieMatch (value);

			match.Next = state.Match;
			match.State = inserted;

			state.Match = match;

			if (failStates.Count < depth + 1)
				failStates.Add (null);

			inserted.Next = failStates[depth];
			failStates[depth] = inserted;

			return inserted;
		}
Exemplo n.º 8
0
        /*
         * final = empty set
         * FOR p = 1 TO #pat
         *   q = root
         *   FOR j = 1 TO m[p]
         *     IF g(q, pat[p][j]) == null
         *       insert(q, pat[p][j])
         *     ENDIF
         *     q = g(q, pat[p][j])
         *   ENDFOR
         *   final = union(final, q)
         * ENDFOR
         */
        public void AddKeyword(string needle, object pattern_id)
        {
            TrieState q     = root;
            int       depth = 0;

            // Step 1: add the pattern to the trie...

            for (int idx = 0; idx < needle.Length; idx++)
            {
                char c = needle [idx];
                if (!case_sensitive)
                {
                    c = Char.ToLower(c);
                }

                TrieMatch m = FindMatchAtState(q, c);
                if (m == null)
                {
                    q = InsertMatchAtState(depth, q, c);
                }
                else
                {
                    q = m.State;
                }

                depth++;
            }

            q.Final = depth;
            q.Id    = pattern_id;

            // Step 2: compute failure graph...

            for (int idx = 0; idx < fail_states.Count; idx++)
            {
                q = fail_states [idx];

                while (q != null)
                {
                    TrieMatch m = q.FirstMatch;

                    while (m != null)
                    {
                        TrieState q1 = m.State;
                        TrieState r  = q.Fail;
                        TrieMatch n  = null;

                        while (r != null)
                        {
                            n = FindMatchAtState(r, m.Value);
                            if (n == null)
                            {
                                r = r.Fail;
                            }
                            else
                            {
                                break;
                            }
                        }

                        if (r != null && n != null)
                        {
                            q1.Fail = n.State;

                            if (q1.Fail.Final > q1.Final)
                            {
                                q1.Final = q1.Fail.Final;
                            }
                        }
                        else
                        {
                            n = FindMatchAtState(root, m.Value);
                            if (n == null)
                            {
                                q1.Fail = root;
                            }
                            else
                            {
                                q1.Fail = n.State;
                            }
                        }

                        m = m.Next;
                    }

                    q = q.Next;
                }
            }

            // Update max_length
            max_length = Math.Max(max_length, needle.Length);
        }