Esempio n. 1
0
        public static IEnumerable <TrieNode> FilterTransitive(IEnumerable <TrieNode> nodes, bool isSorted = false)
        {
            if (!isSorted)
            {
                var sorted = new List <TrieNode>(nodes);
                sorted.Sort((a, b) => b.Path.Length - a.Path.Length);
                nodes = sorted;
            }

            var prefixTree = new SubStringTree();

            foreach (var node in nodes)
            {
                var path = node.Path;
                if (!prefixTree.Contains(path))
                {
                    yield return(node);

                    prefixTree.Add(path);
                }
            }
        }
        /// <summary>
        /// Returns a list of disjoint selection for the given substrings.
        /// The sub strings of the returned selection are guaranteed to not be included by <paramref name="processedSubStrings"/>.
        /// <para>
        /// This will modify <paramref name="processedSubStrings"/>, adding all substrings used for the returned selections.
        /// </para>
        /// </summary>
        /// <param name="subStrings"></param>
        /// <param name="processedSubStrings"></param>
        /// <returns></returns>
        public ICollection <Selection <int> > GetSelection(IEnumerable <SubString> subStrings, SubStringTree processedSubStrings)
        {
            if (root.IsLeaf)
            {
                return(Array.Empty <Selection <int> >());
            }

            var selections            = new List <Selection <int> >(4);
            var foundUnknownSubString = false;

            // Note that this operates in O(n*log(n)) where n is the number of characters in subStr
            void AddSubString(SubString subStr)
            {
                if (foundUnknownSubString)
                {
                    return;
                }
                if (subStr.Length == 0)
                {
                    return;                     // nothing to do here
                }
                if (processedSubStrings.Contains(subStr.ToString()))
                {
                    return;                                                  // already int selection
                }
                // get the node with the minimal selection length of all possible nodes for this substring
                TrieNode minNode  = null;
                int      minIndex = -1;

                for (int i = 0; i < subStr.Length; i++)
                {
                    var node = GetNode(subStr.ToSubString(i));
                    if (node == null)
                    {
                        // we can abort immediately once we found a substring which is not included in the list of
                        // words used to create the trie
                        foundUnknownSubString = true;
                        return;
                    }

                    if (!processedSubStrings.Contains(node.Path) && // don't include substrings we already have
                        (minNode == null || node.Selection.Count < minNode.Selection.Count))
                    {
                        minNode  = node;
                        minIndex = i;
                    }

                    // we don't need to check the last small substrings as they are already included by the first node
                    // which reaches to the end of the substring
                    if (i + node.Depth == subStr.Length)
                    {
                        break;
                    }
                }

                if (minNode == null)
                {
                    return;                  // the substring did not include any now substrings
                }
                // add the minimal selection to the other selections
                selections.Add(minNode.Selection);
                processedSubStrings.Add(minNode.Path);

                // and recursively get the selection of the remaining parts of the substring
                AddSubString(subStr.ToSubString(0, minIndex));
                AddSubString(subStr.ToSubString(minIndex + minNode.Depth));
            }

            foreach (var item in subStrings)
            {
                AddSubString(item);
            }

            if (foundUnknownSubString)
            {
                return(ListWithEmptySelection);
            }

            return(selections);
        }