Пример #1
0
        /// <summary>
        /// Get the Cluster from given inputs of matches.
        /// Steps are as follows:
        ///     1. Sort MUMs based on query sequence start.
        ///     2. Removing overlapping MUMs (in both sequences) and MUMs with same
        ///         diagonal offset (usually adjacent)
        ///     3. Check for  separation between two MUMs
        ///     4. Check the diagonal separation
        ///     5. If MUMs passes above conditions merge them in one cluster.
        ///     6. Sort MUMs using cluster id
        ///     7. Process clusters (Joining clusters)</summary>
        /// <param name="matches">List of maximum unique matches</param>
        /// <returns>List of Cluster</returns>
        public IList <Cluster> BuildClusters(IList <MaxUniqueMatch> matches)
        {
            // Validate the input
            if (null == matches)
            {
                return(null);
            }

            if (0 == matches.Count)
            {
                return(null);
            }

            IList <MaxUniqueMatchExtension> matchExtensions = new List <MaxUniqueMatchExtension>();

            _unionFind = new List <int>();

            // Convert list of matches to list of matchextensions
            foreach (MaxUniqueMatch match in matches)
            {
                MaxUniqueMatchExtension matchExtension = new MaxUniqueMatchExtension(match);

                _unionFind.Add(0);
                matchExtensions.Add(matchExtension);
            }

            // Get the cluster and return it
            return(GetClusters(matchExtensions));
        }
Пример #2
0
        /// <summary>
        /// Create a new delta alignment
        /// </summary>
        /// <param name="referenceSequence">Reference sequence</param>
        /// <param name="querySequence">Query sequence</param>
        /// <param name="cluster">Cluster object</param>
        /// <param name="match">Match object</param>
        /// <returns>Newly created DeltaAlignment object</returns>
        internal static DeltaAlignment NewAlignment(
            ISequence referenceSequence,
            ISequence querySequence,
            Cluster cluster,
            MaxUniqueMatchExtension match)
        {
            DeltaAlignment deltaAlignment = new DeltaAlignment(referenceSequence, querySequence);

            deltaAlignment.FirstSequenceStart  = match.FirstSequenceStart;
            deltaAlignment.SecondSequenceStart = match.SecondSequenceStart;
            deltaAlignment.FirstSequenceEnd    = match.FirstSequenceStart
                                                 + match.Length
                                                 - 1;
            deltaAlignment.SecondSequenceEnd = match.SecondSequenceStart
                                               + match.Length
                                               - 1;

            deltaAlignment.QueryDirection = cluster.QueryDirection;

            return(deltaAlignment);
        }
Пример #3
0
        /// <summary>
        /// Find the longest increasing sub sequence from the given set of MUMs
        /// </summary>
        /// <param name="sortedMums">List of sorted MUMs</param>
        /// <returns>Longest Increasing Subsequence</returns>
        public IList <MaxUniqueMatch> GetLongestSequence(IList <MaxUniqueMatch> sortedMums)
        {
            MaxUniqueMatchExtension[] matches = ConvertToMUMExtension(sortedMums);

            for (var counteri = 0; counteri < matches.Length; counteri++)
            {
                var matches_i = matches[counteri];
                // Initialize the MUM Extension
                matches_i.Score     = matches[counteri].Length;
                matches_i.WrapScore = matches[counteri].Length;
                matches_i.Adjacent  = 0;
                matches_i.From      = -1;

                for (var counterj = 0; counterj < counteri; counterj++)
                {
                    MaxUniqueMatchExtension matches_j = matches[counterj];
                    // Find the overlap in query sequence of MUM

                    var overlap2 = matches_j.SecondSequenceStart + matches_j.Length;

                    overlap2 -= matches_i.SecondSequenceStart;
                    var overlap = overlap2 > 0 ? overlap2 : 0;

                    // Calculate the score for query sequence of MUM
                    var score = matches_j.Score
                                + matches_i.Length
                                - overlap;
                    if (score > matches_i.WrapScore)
                    {
                        matches_i.WrapScore = score;
                    }

                    // Find the overlap in reference sequence of MUM
                    var overlap1 = matches_j.FirstSequenceStart
                                   + matches_j.Length
                                   - matches_i.FirstSequenceStart;

                    overlap = overlap > overlap1 ? overlap : overlap1;

                    score = matches_j.Score
                            + matches_i.Length
                            - overlap;
                    if (score > matches_i.Score)
                    {
                        // To remove crosses, mark counteri as next MUM From counterj
                        // without any crosses
                        matches_i.From = counterj;

                        // Set the new score and overlap after removing the cross
                        matches_i.Score    = score;
                        matches_i.Adjacent = overlap;
                    }

                    // Calculate the score for reference sequence of MUM
                    score = matches_j.WrapScore
                            + matches_i.Length
                            - overlap;
                    if (score >= matches_i.WrapScore)
                    {
                        matches_i.WrapScore = score;
                    }
                }
            }

            // Find the best longest increasing subsequence
            // Sequence with highest score is the longest increasing subsequence
            var best      = 0;
            var bestScore = matches[best].Score;

            for (var counteri = 1; counteri < matches.Length; counteri++)
            {
                if (matches[counteri].Score > bestScore)
                {
                    best      = counteri;
                    bestScore = matches[best].Score;
                }
            }

            // Mark the MUMs in longest increasing subsequence as "Good"
            for (var counteri = best; counteri >= 0; counteri = matches[counteri].From)
            {
                matches[counteri].IsGood = true;
            }

            // Clear the list
            // Perform the adjustment to the MUMs in longest increasing subsequence (remove over)
            // Add it the list
            sortedMums.Clear();
            foreach (var t in matches)
            {
                if (t.IsGood)
                {
                    var adjacent = t.Adjacent;
                    if (0 != adjacent)
                    {
                        t.FirstSequenceStart  += adjacent;
                        t.SecondSequenceStart += adjacent;
                        t.Length -= adjacent;
                    }

                    if (0 < t.Length)
                    {
                        sortedMums.Add((MaxUniqueMatch)t);
                    }
                }
            }

            // Return the list of MUMs that represent the longest increasing subsequence
            return(sortedMums);
        }
Пример #4
0
        /// <summary>
        /// Extend the cluster in synteny
        /// </summary>
        /// <param name="synteny">Synteny in which cluster needs to be extened.</param>
        /// <returns>List of delta alignments</returns>
        private IList <DeltaAlignment> ExtendClusters(Synteny synteny)
        {
            bool isClusterExtended = false;
            IList <DeltaAlignment> deltaAlignments = new List <DeltaAlignment>();
            DeltaAlignment         deltaAlignment  = null;
            DeltaAlignment         targetAlignment = null;
            Cluster currentCluster = null;
            Cluster targetCluster  = synteny.Clusters.Last();
            int     targetReference;
            int     targetQuery;
            int     methodName = NUCmerAligner.ForwardAlignFlag;

            IList <Cluster> clusters = synteny.Clusters;

            // Sort the cluster by first sequence start
            clusters = SortCluster(clusters, FirstSequenceStart);

            IEnumerator <Cluster> previousCluster = clusters.GetEnumerator();

            previousCluster.MoveNext();
            IEnumerator <Cluster> cluster = clusters.GetEnumerator();

            while (cluster.MoveNext())
            {
                currentCluster = cluster.Current;

                if (!isClusterExtended &&
                    (currentCluster.IsFused ||
                     IsClusterShadowed(deltaAlignments, currentCluster, deltaAlignment)))
                {
                    currentCluster.IsFused = true;
                    previousCluster.MoveNext();
                    currentCluster = previousCluster.Current;
                    continue;
                }

                // Extend the match
                foreach (MaxUniqueMatchExtension match in currentCluster.Matches)
                {
                    if (isClusterExtended)
                    {
                        if (deltaAlignment.FirstSequenceEnd != match.FirstSequenceStart ||
                            deltaAlignment.SecondSequenceEnd != match.SecondSequenceStart)
                        {
                            continue;
                        }

                        deltaAlignment.FirstSequenceEnd  += match.Length - 1;
                        deltaAlignment.SecondSequenceEnd += match.Length - 1;
                    }
                    else
                    {
                        deltaAlignment = DeltaAlignment.NewAlignment(
                            synteny.ReferenceSequence,
                            synteny.QuerySequence,
                            currentCluster,
                            match);
                        deltaAlignments.Add(deltaAlignment);

                        // Find the MUM which is a good candidate for extension in reverse direction
                        targetAlignment = GetPreviousAlignment(deltaAlignments, deltaAlignment);

                        if (ExtendToPreviousSequence(
                                synteny.ReferenceSequence,
                                synteny.QuerySequence,
                                deltaAlignments,
                                deltaAlignment,
                                targetAlignment))
                        {
                            deltaAlignment = targetAlignment;
                        }
                    }

                    methodName = NUCmerAligner.ForwardAlignFlag;

                    if (currentCluster.Matches.IndexOf(match) < currentCluster.Matches.Count - 1)
                    {
                        // extend till the match in the current cluster
                        MaxUniqueMatchExtension nextMatch =
                            currentCluster.Matches[currentCluster.Matches.IndexOf(match) + 1];
                        targetReference = nextMatch.FirstSequenceStart;
                        targetQuery     = nextMatch.SecondSequenceStart;

                        isClusterExtended = ExtendToNextSequence(
                            synteny.ReferenceSequence,
                            synteny.QuerySequence,
                            deltaAlignment,
                            targetReference,
                            targetQuery,
                            methodName);
                    }
                    else
                    {
                        // extend till next cluster
                        targetReference = synteny.ReferenceSequence.Count - 1;
                        targetQuery     = synteny.QuerySequence.Count - 1;

                        targetCluster = GetNextCluster(
                            clusters,
                            currentCluster,
                            ref targetReference,
                            ref targetQuery);

                        if (!synteny.Clusters.Contains(targetCluster))
                        {
                            methodName |= NUCmerAligner.OptimalFlag;
                        }

                        isClusterExtended = ExtendToNextSequence(
                            synteny.ReferenceSequence,
                            synteny.QuerySequence,
                            deltaAlignment,
                            targetReference,
                            targetQuery,
                            methodName);
                    }
                }

                if (!synteny.Clusters.Contains(targetCluster))
                {
                    isClusterExtended = false;
                }

                currentCluster.IsFused = true;

                if (!isClusterExtended)
                {
                    previousCluster.MoveNext();
                    currentCluster = previousCluster.Current;
                }
                else
                {
                    currentCluster = targetCluster;
                }
            }

            return(deltaAlignments);
        }