/// <summary> /// Add suffix to the tree, the loop inside will break under two conditions /// 1. If you have reached the leaf node /// 2. If you have reached end of suffix /// </summary> /// <param name="startIndices">List of index of the first character of suffix</param> /// <returns>Suffix tree</returns> private IMultiWaySuffixTree AppendSuffix(IList <int> startIndices) { IMultiWaySuffixTree tree = CreateSuffixTree(); // Loop through subset of sequence string and build the suffix tree foreach (int index in startIndices) { int startIndex = index; IEdge parentEdge = tree.Root; IEdge edge = null; bool continueInsert = true; do { edge = tree.Find(parentEdge, GetReferenceSymbol(startIndex)); if (null == edge) { tree.Insert(parentEdge, startIndex, ReferenceLength - 1); continueInsert = false; break; } else { startIndex++; if (edge.StartIndex < edge.EndIndex) { for (int counter = edge.StartIndex + 1; counter <= edge.EndIndex; counter++) { if (GetReferenceSymbol(startIndex) != GetReferenceSymbol(counter)) { parentEdge = tree.Split(edge, counter - 1); // Add the leaf edge tree.Insert(parentEdge, startIndex, ReferenceLength - 1); continueInsert = false; break; } startIndex++; } } parentEdge = edge; } } while (startIndex < ReferenceLength && continueInsert); } return(tree); }
/// <summary> /// Search for a query sequence in give Suffix Tree for existence /// </summary> /// <param name="startIndex">Index of first suffix character in search sequence</param> /// <returns>Does query sequence exists</returns> private MaxUniqueMatch Search(int startIndex) { // if the input sequence is empty if (0 == _querySequence.Count) { return(null); } IEdge edge = _suffixTree.Find(_suffixTree.Root, GetQuerySymbol(startIndex)); // if edge that starts with start character does not exits if (edge == null) { return(null); } int queryIndex = startIndex; int referenceIndex = 0; MaxUniqueMatch match = null; bool matchFound = false; IEdge nextEdge = null; while (!matchFound) { for (referenceIndex = edge.StartIndex; referenceIndex <= edge.EndIndex; referenceIndex++) { if (queryIndex == _querySequence.Count || referenceIndex == ReferenceLength) { match = CreateMUM( referenceIndex - 1, startIndex, queryIndex - startIndex); matchFound = true; break; } if (GetReferenceSymbol(referenceIndex) != GetQuerySymbol(queryIndex)) { match = CreateMUM( referenceIndex - 1, startIndex, queryIndex - startIndex); matchFound = true; break; } queryIndex++; } if (!matchFound) { if (queryIndex < _querySequence.Count) { nextEdge = _suffixTree.Find(edge, GetQuerySymbol(queryIndex)); if (null == nextEdge) { match = CreateMUM( edge.EndIndex, startIndex, queryIndex - startIndex); matchFound = true; } else { edge = nextEdge; } } else { match = CreateMUM( edge.EndIndex, startIndex, queryIndex - startIndex); matchFound = true; } } } if (null == match) { return(null); } _lastEdge = edge; _lastMatch = match; // Make sure there is not split, if there is split, then this is a duplicate // and should be ignored. // And the length of match is greater then minimum required length if (!edge.IsLeaf && !_findMaximumMatch) { match = null; } return(match); }