Ejemplo n.º 1
0
        /// <summary>
        /// Computes the longest common substrings of two input sequences of a common element type <typeparamref name="T"/>. To compare
        /// the items, .Equal() is called on the objects. A longest common substring is always
        /// contiguous (unlike the longest common subsequence, which is not necessarily continguous). This method
        /// will compute the broadest coverage for the input sequences. This means that first the longest substring
        /// is found. Unlike <see cref="ComputeLongestCommonSubstring"/>, the search is then continued and more (possibly shorter)
        /// longest substrings are identified in the remainder (uncovered portion) of the string. Note that the
        /// coverage is not necessarily optimal - if more than one longest common substring exists during any
        /// iteration, the first one found is picked which may lead to a suboptimal overall coverage.
        /// </summary>
        /// <param name="sequenceA">The first (source) sequence</param>
        /// <param name="sequenceB">The second (target) sequence</param>
        /// <returns>A list of substring objects, which can be null (null/empty argument), empty (no common
        /// substring), or contain a good coverage of the input sequences.
        /// </returns>
        public static List <AlignedSubstring> ComputeLongestCommonSubstringCoverage(IList <T> sequenceA,
                                                                                    IList <T> sequenceB)
        {
            // http://en.wikipedia.org/wiki/Longest_common_substring_problem

            if (sequenceA == null || sequenceA.Count == 0 ||
                sequenceB == null || sequenceB.Count == 0)
            {
                return(null);
            }

            List <AlignedSubstring> result = SequenceAlignmentComputer <T> .ComputeCoverage(sequenceA,
                                                                                            sequenceB, new SimpleLCSScoreProvider <T>(), null);

#if DEBUG
            if (result != null && result.Count > 0)
            {
                foreach (AlignedSubstring s in result)
                {
                    System.Diagnostics.Debug.Assert(s.Source.Length == s.Target.Length);
                    System.Diagnostics.Debug.Assert(s.Source.Start >= 0 && s.Source.Start < sequenceA.Count);
                    System.Diagnostics.Debug.Assert(s.Target.Start >= 0 && s.Target.Start < sequenceB.Count);
                    for (int p = 0; p < s.Source.Length; ++p)
                    {
                        System.Diagnostics.Debug.Assert(sequenceA[s.Source.Start + p].Equals(sequenceB[s.Target.Start + p]));
                    }
                }
            }
#endif

            return(result);
        }
Ejemplo n.º 2
0
        /// <summary>
        /// Computes the longest local alignment coverage of the two sequences.
        /// </summary>
        /// <param name="source">The source sequence</param>
        /// <param name="target">The target sequence</param>
        /// <param name="minLength">The minimum length of an aligned substring</param>
        /// <param name="scorer">The score provider to use</param>
        /// <param name="picker">An extension disambiguator (may be null)</param>
        /// <param name="maxItems">The maximum number of items in the result coverage. If 1,
        /// no coverage, but only the longest subsequence will be computed. If 0, the full
        /// coverage will be computed.</param>
        public static List <AlignedSubstring> ComputeCoverage(IList <T> source,
                                                              IList <T> target, int minLength,
                                                              ISequenceAlignmentItemScoreProvider <T> scorer,
                                                              IExtensionDisambiguator picker,
                                                              int maxItems)
        {
            SequenceAlignmentComputer <T> aligner = new SequenceAlignmentComputer <T>(source,
                                                                                      target, scorer, picker, minLength, maxItems);

            return(aligner.Compute());
        }