public static IEnumerable <TrieNode> FilterTransitive(IEnumerable <TrieNode> nodes, bool isSorted = false) { if (!isSorted) { var sorted = new List <TrieNode>(nodes); sorted.Sort((a, b) => b.Path.Length - a.Path.Length); nodes = sorted; } var prefixTree = new SubStringTree(); foreach (var node in nodes) { var path = node.Path; if (!prefixTree.Contains(path)) { yield return(node); prefixTree.Add(path); } } }
/// <summary> /// Returns a list of disjoint selection for the given substrings. /// The sub strings of the returned selection are guaranteed to not be included by <paramref name="processedSubStrings"/>. /// <para> /// This will modify <paramref name="processedSubStrings"/>, adding all substrings used for the returned selections. /// </para> /// </summary> /// <param name="subStrings"></param> /// <param name="processedSubStrings"></param> /// <returns></returns> public ICollection <Selection <int> > GetSelection(IEnumerable <SubString> subStrings, SubStringTree processedSubStrings) { if (root.IsLeaf) { return(Array.Empty <Selection <int> >()); } var selections = new List <Selection <int> >(4); var foundUnknownSubString = false; // Note that this operates in O(n*log(n)) where n is the number of characters in subStr void AddSubString(SubString subStr) { if (foundUnknownSubString) { return; } if (subStr.Length == 0) { return; // nothing to do here } if (processedSubStrings.Contains(subStr.ToString())) { return; // already int selection } // get the node with the minimal selection length of all possible nodes for this substring TrieNode minNode = null; int minIndex = -1; for (int i = 0; i < subStr.Length; i++) { var node = GetNode(subStr.ToSubString(i)); if (node == null) { // we can abort immediately once we found a substring which is not included in the list of // words used to create the trie foundUnknownSubString = true; return; } if (!processedSubStrings.Contains(node.Path) && // don't include substrings we already have (minNode == null || node.Selection.Count < minNode.Selection.Count)) { minNode = node; minIndex = i; } // we don't need to check the last small substrings as they are already included by the first node // which reaches to the end of the substring if (i + node.Depth == subStr.Length) { break; } } if (minNode == null) { return; // the substring did not include any now substrings } // add the minimal selection to the other selections selections.Add(minNode.Selection); processedSubStrings.Add(minNode.Path); // and recursively get the selection of the remaining parts of the substring AddSubString(subStr.ToSubString(0, minIndex)); AddSubString(subStr.ToSubString(minIndex + minNode.Depth)); } foreach (var item in subStrings) { AddSubString(item); } if (foundUnknownSubString) { return(ListWithEmptySelection); } return(selections); }