/// <summary> /// Computes the longest common substrings of two input sequences of a common element type <typeparamref name="T"/>. To compare /// the items, .Equal() is called on the objects. A longest common substring is always /// contiguous (unlike the longest common subsequence, which is not necessarily continguous). This method /// will compute the broadest coverage for the input sequences. This means that first the longest substring /// is found. Unlike <see cref="ComputeLongestCommonSubstring"/>, the search is then continued and more (possibly shorter) /// longest substrings are identified in the remainder (uncovered portion) of the string. Note that the /// coverage is not necessarily optimal - if more than one longest common substring exists during any /// iteration, the first one found is picked which may lead to a suboptimal overall coverage. /// </summary> /// <param name="sequenceA">The first (source) sequence</param> /// <param name="sequenceB">The second (target) sequence</param> /// <returns>A list of substring objects, which can be null (null/empty argument), empty (no common /// substring), or contain a good coverage of the input sequences. /// </returns> public static List <AlignedSubstring> ComputeLongestCommonSubstringCoverage(IList <T> sequenceA, IList <T> sequenceB) { // http://en.wikipedia.org/wiki/Longest_common_substring_problem if (sequenceA == null || sequenceA.Count == 0 || sequenceB == null || sequenceB.Count == 0) { return(null); } List <AlignedSubstring> result = SequenceAlignmentComputer <T> .ComputeCoverage(sequenceA, sequenceB, new SimpleLCSScoreProvider <T>(), null); #if DEBUG if (result != null && result.Count > 0) { foreach (AlignedSubstring s in result) { System.Diagnostics.Debug.Assert(s.Source.Length == s.Target.Length); System.Diagnostics.Debug.Assert(s.Source.Start >= 0 && s.Source.Start < sequenceA.Count); System.Diagnostics.Debug.Assert(s.Target.Start >= 0 && s.Target.Start < sequenceB.Count); for (int p = 0; p < s.Source.Length; ++p) { System.Diagnostics.Debug.Assert(sequenceA[s.Source.Start + p].Equals(sequenceB[s.Target.Start + p])); } } } #endif return(result); }
/// <summary> /// Computes the longest local alignment coverage of the two sequences. /// </summary> /// <param name="source">The source sequence</param> /// <param name="target">The target sequence</param> /// <param name="minLength">The minimum length of an aligned substring</param> /// <param name="scorer">The score provider to use</param> /// <param name="picker">An extension disambiguator (may be null)</param> /// <param name="maxItems">The maximum number of items in the result coverage. If 1, /// no coverage, but only the longest subsequence will be computed. If 0, the full /// coverage will be computed.</param> public static List <AlignedSubstring> ComputeCoverage(IList <T> source, IList <T> target, int minLength, ISequenceAlignmentItemScoreProvider <T> scorer, IExtensionDisambiguator picker, int maxItems) { SequenceAlignmentComputer <T> aligner = new SequenceAlignmentComputer <T>(source, target, scorer, picker, minLength, maxItems); return(aligner.Compute()); }