예제 #1
0
        /// <summary>
        /// Create a new delta alignment
        /// </summary>
        /// <param name="referenceSequence">Reference sequence</param>
        /// <param name="querySequence">Query sequence</param>
        /// <param name="cluster">Cluster object</param>
        /// <param name="match">Match object</param>
        /// <returns>Newly created DeltaAlignment object</returns>
        internal static DeltaAlignment NewAlignment(
            ISequence referenceSequence,
            ISequence querySequence,
            Cluster cluster,
            MatchExtension match)
        {
            DeltaAlignment deltaAlignment = new DeltaAlignment(referenceSequence, querySequence)
            {
                FirstSequenceStart  = match.ReferenceSequenceOffset,
                SecondSequenceStart = match.QuerySequenceOffset,
                FirstSequenceEnd    = match.ReferenceSequenceOffset + match.Length - 1,
                SecondSequenceEnd   = match.QuerySequenceOffset + match.Length - 1,
                QueryDirection      = cluster.QueryDirection
            };

            return(deltaAlignment);
        }
예제 #2
0
        public void TestMatchAndMatchExtensionToString()
        {
            Match match = new Match();

            match.Length = 20;
            match.QuerySequenceOffset = 33;

            MatchExtension matchExtn = new MatchExtension(match);

            matchExtn.ID     = 1;
            matchExtn.Length = 20;

            string actualMatchExtnString   = matchExtn.ToString();
            string actualMatchstring       = match.ToString();
            string ExpectedMatchExtnString = "RefStart=0 QueryStart=33 Length=20 Score=0 WrapScore=0 IsGood=False";
            string ExpectedMatchString     = "RefStart=0 QueryStart=33 Length=20";

            Assert.AreEqual(ExpectedMatchExtnString, actualMatchExtnString);
            Assert.AreEqual(actualMatchstring, ExpectedMatchString);
        }
예제 #3
0
        public void ValidateMatchAndMatchExtensionToString()
        {
            var match = new Match();

            match.Length = 20;
            match.QuerySequenceOffset = 33;

            var matchExtn = new MatchExtension(match);

            matchExtn.ID     = 1;
            matchExtn.Length = 20;

            string actualMatchExtnString   = matchExtn.ToString();
            string actualMatchstring       = match.ToString();
            string ExpectedMatchExtnString = this.utilityObj.xmlUtil.GetTextValue(Constants.ToStringNodeName,
                                                                                  Constants.ExpectedMatchExtnStringNode);
            string ExpectedMatchString = this.utilityObj.xmlUtil.GetTextValue(Constants.ToStringNodeName,
                                                                              Constants.ExpectedMatchStringNode);

            Assert.AreEqual(ExpectedMatchExtnString, actualMatchExtnString);
            Assert.AreEqual(actualMatchstring, ExpectedMatchString);
        }
예제 #4
0
        public void TestClusterToString()
        {
            Match match = new Match();

            MatchExtension matchExtn1 = new MatchExtension(match);

            matchExtn1.ID     = 1;
            matchExtn1.Length = 20;
            MatchExtension matchExtn2 = new MatchExtension(match);

            matchExtn2.ID     = 2;
            matchExtn2.Length = 30;
            IList <MatchExtension> extnList = new List <MatchExtension>();

            extnList.Add(matchExtn1);
            extnList.Add(matchExtn2);

            Cluster clust          = new Cluster(extnList);
            string  actualString   = clust.ToString();
            string  expectedString = "RefStart=0 QueryStart=0 Length=20 Score=0 WrapScore=0 IsGood=False\r\nRefStart=0 QueryStart=0 Length=30 Score=0 WrapScore=0 IsGood=False\r\n".Replace("\r\n", Environment.NewLine);

            Assert.AreEqual(actualString, expectedString);
        }
예제 #5
0
        public void ValidateClusterToString()
        {
            var match = new Match();

            var matchExtn1 = new MatchExtension(match);

            matchExtn1.ID     = 1;
            matchExtn1.Length = 20;
            var matchExtn2 = new MatchExtension(match);

            matchExtn2.ID     = 2;
            matchExtn2.Length = 30;
            IList <MatchExtension> extnList = new List <MatchExtension>();

            extnList.Add(matchExtn1);
            extnList.Add(matchExtn2);

            var    clust          = new Cluster(extnList);
            string actualString   = clust.ToString();
            string expectedString = this.utilityObj.xmlUtil.GetTextValue(Constants.ToStringNodeName,
                                                                         Constants.ClusterExpectedNode);

            Assert.AreEqual(actualString, expectedString.Replace("\\r\\n", System.Environment.NewLine));
        }
예제 #6
0
        /// <summary>
        /// Extend the cluster in synteny
        /// </summary>
        /// <param name="synteny">Synteny in which cluster needs to be extened.</param>
        /// <returns>List of delta alignments</returns>
        private List <DeltaAlignment> ExtendClusters(Synteny synteny)
        {
            bool isClusterExtended = false;
            List <DeltaAlignment> deltaAlignments = new List <DeltaAlignment>();
            DeltaAlignment        deltaAlignment  = null;
            Cluster currentCluster;

            IList <Cluster> clusters = synteny.Clusters;

            // Sort the cluster by first sequence start
            clusters = SortCluster(clusters, FirstSequenceStart);

            Cluster targetCluster = synteny.Clusters.Last();

            IEnumerator <Cluster> previousCluster = clusters.GetEnumerator();

            previousCluster.MoveNext();
            IEnumerator <Cluster> cluster = clusters.GetEnumerator();

            while (cluster.MoveNext())
            {
                currentCluster = cluster.Current;

                if (!isClusterExtended &&
                    (currentCluster.IsFused ||
                     IsClusterShadowed(deltaAlignments, currentCluster, deltaAlignment)))
                {
                    currentCluster.IsFused = true;
                    previousCluster.MoveNext();
                    currentCluster = previousCluster.Current;
                    continue;
                }

                // Extend the match
                foreach (MatchExtension match in currentCluster.Matches)
                {
                    if (isClusterExtended)
                    {
                        if (deltaAlignment.FirstSequenceEnd != match.ReferenceSequenceOffset ||
                            deltaAlignment.SecondSequenceEnd != match.QuerySequenceOffset)
                        {
                            continue;
                        }

                        deltaAlignment.FirstSequenceEnd  += match.Length - 1;
                        deltaAlignment.SecondSequenceEnd += match.Length - 1;
                    }
                    else
                    {
                        //TODO: Do we need sequence here? Changed to sequence id.
                        deltaAlignment = DeltaAlignment.NewAlignment(
                            synteny.ReferenceSequence,
                            synteny.QuerySequence,
                            currentCluster,
                            match);
                        deltaAlignments.Add(deltaAlignment);

                        // Find the MUM which is a good candidate for extension in reverse direction
                        DeltaAlignment targetAlignment = GetPreviousAlignment(deltaAlignments, deltaAlignment);
                        if (targetAlignment != deltaAlignment &&
                            //TODO: NEED TO VERIFY THIS!!!
                            //if (
                            ExtendToPreviousSequence(
                                synteny.ReferenceSequence,
                                synteny.QuerySequence,
                                deltaAlignments,
                                deltaAlignment,
                                targetAlignment))
                        {
                            deltaAlignment = targetAlignment;
                        }
                    }

                    int methodName = ModifiedSmithWaterman.ForwardAlignFlag;

                    long targetReference;
                    long targetQuery;
                    if (currentCluster.Matches.IndexOf(match) < currentCluster.Matches.Count - 1)
                    {
                        // extend till the match in the current cluster
                        MatchExtension nextMatch =
                            currentCluster.Matches[currentCluster.Matches.IndexOf(match) + 1];
                        targetReference = nextMatch.ReferenceSequenceOffset;
                        targetQuery     = nextMatch.QuerySequenceOffset;

                        isClusterExtended = ExtendToNextSequence(
                            synteny.ReferenceSequence,
                            synteny.QuerySequence,
                            deltaAlignment,
                            targetReference,
                            targetQuery,
                            methodName);
                    }
                    else
                    {
                        // extend till next cluster
                        targetReference = synteny.ReferenceSequence.Count - 1;
                        targetQuery     = synteny.QuerySequence.Count() - 1;

                        targetCluster = GetNextCluster(
                            clusters,
                            currentCluster,
                            ref targetReference,
                            ref targetQuery);

                        if (!synteny.Clusters.Contains(targetCluster))
                        {
                            methodName |= ModifiedSmithWaterman.OptimalFlag;
                        }

                        isClusterExtended = ExtendToNextSequence(
                            synteny.ReferenceSequence,
                            synteny.QuerySequence,
                            deltaAlignment,
                            targetReference,
                            targetQuery,
                            methodName);
                    }
                }

                if (!synteny.Clusters.Contains(targetCluster))
                {
                    isClusterExtended = false;
                }

                currentCluster.IsFused = true;

                if (!isClusterExtended)
                {
                    previousCluster.MoveNext();
                    currentCluster = previousCluster.Current;
                }
                else
                {
                    currentCluster = targetCluster;
                }
            }

            return(deltaAlignments);
        }
예제 #7
0
        /// <summary>
        /// Process the clusters
        /// </summary>
        /// <param name="clusters">List of clusters</param>
        /// <param name="matches">List of matches</param>
        /// <param name="indexToSkip">Start index upto which match extension to be ignored.</param>
        /// <param name="clusterSize">Size of cluster</param>
        private void ProcessCluster(
            List <Cluster> clusters,
            List <MatchExtension> matches,
            int indexToSkip,
            int clusterSize)
        {
            List <MatchExtension> clusterMatches;
            long total, endIndex, startIndex, score;
            int  counter1, counter2, counter3, best;

            do
            {
                // remove cluster overlaps
                for (counter1 = 0; counter1 < clusterSize; counter1++)
                {
                    matches[indexToSkip + counter1].Score    = matches[indexToSkip + counter1].Length;
                    matches[indexToSkip + counter1].Adjacent = 0;
                    matches[indexToSkip + counter1].From     = -1;

                    for (counter2 = 0; counter2 < counter1; counter2++)
                    {
                        long cost, overlap, overlap1, overlap2;

                        overlap1 = matches[indexToSkip + counter2].ReferenceSequenceOffset
                                   + matches[indexToSkip + counter2].Length
                                   - matches[indexToSkip + counter1].ReferenceSequenceOffset;
                        overlap  = Math.Max(0, overlap1);
                        overlap2 = matches[indexToSkip + counter2].QuerySequenceOffset
                                   + matches[indexToSkip + counter2].Length -
                                   matches[indexToSkip + counter1].QuerySequenceOffset;
                        overlap = Math.Max(overlap, overlap2);

                        // cost matches which are not on same diagonal
                        cost = overlap
                               + Math.Abs((matches[indexToSkip + counter1].QuerySequenceOffset - matches[indexToSkip + counter1].ReferenceSequenceOffset)
                                          - (matches[indexToSkip + counter2].QuerySequenceOffset - matches[indexToSkip + counter2].ReferenceSequenceOffset));

                        if (matches[indexToSkip + counter2].Score + matches[indexToSkip + counter1].Length - cost > matches[indexToSkip + counter1].Score)
                        {
                            matches[indexToSkip + counter1].From  = counter2;
                            matches[indexToSkip + counter1].Score = matches[indexToSkip + counter2].Score
                                                                    + matches[indexToSkip + counter1].Length
                                                                    - cost;
                            matches[indexToSkip + counter1].Adjacent = overlap;
                        }
                    }
                }

                // Find the match which has highest score
                best = 0;
                for (counter1 = 1; counter1 < clusterSize; counter1++)
                {
                    if (matches[indexToSkip + counter1].Score > matches[indexToSkip + best].Score)
                    {
                        best = counter1;
                    }
                }

                total      = 0;
                endIndex   = int.MinValue;
                startIndex = int.MaxValue;

                // TODO: remove below cast
                for (counter1 = best; counter1 >= 0; counter1 = (int)matches[indexToSkip + counter1].From)
                {
                    matches[indexToSkip + counter1].IsGood = true;
                    total += matches[indexToSkip + counter1].Length;
                    if (matches[indexToSkip + counter1].ReferenceSequenceOffset + matches[indexToSkip + counter1].Length > endIndex)
                    {
                        // Set the cluster end index
                        endIndex = matches[indexToSkip + counter1].ReferenceSequenceOffset + matches[indexToSkip + counter1].Length;
                    }

                    if (matches[indexToSkip + counter1].ReferenceSequenceOffset < startIndex)
                    {
                        // Set the cluster start index
                        startIndex = matches[indexToSkip + counter1].ReferenceSequenceOffset;
                    }
                }

                if (this.ScoreMethod == ClusterScoreMethod.MatchLength)
                {
                    score = total;
                }
                else
                {
                    score = endIndex - startIndex;
                }

                // If the current score exceeds the minimum score
                // and the matches to cluster
                if (score >= this.MinimumScore)
                {
                    clusterMatches = new List <MatchExtension>();

                    for (counter1 = 0; counter1 < clusterSize; counter1++)
                    {
                        if (matches[indexToSkip + counter1].IsGood)
                        {
                            MatchExtension match = matches[indexToSkip + counter1];
                            if (matches[indexToSkip + counter1].Adjacent != 0)
                            {
                                match = new MatchExtension();
                                matches[indexToSkip + counter1].CopyTo(match);
                                match.ReferenceSequenceOffset += match.Adjacent;
                                match.QuerySequenceOffset     += match.Adjacent;
                                match.Length -= match.Adjacent;
                            }

                            clusterMatches.Add(match);
                        }
                    }

                    // adding the cluster to list
                    if (0 < clusterMatches.Count)
                    {
                        clusters.Add(new Cluster(clusterMatches));
                    }
                }

                // Correcting the cluster indices
                for (counter1 = counter3 = 0; counter1 < clusterSize; counter1++)
                {
                    if (!matches[indexToSkip + counter1].IsGood)
                    {
                        if (counter1 != counter3)
                        {
                            matches[indexToSkip + counter3] = matches[indexToSkip + counter1];
                        }

                        counter3++;
                    }
                }

                clusterSize = counter3;
            }while (clusterSize > 0);
        }
        /// <summary>
        /// Find the longest increasing sub sequence from the given set of MUMs.
        /// </summary>
        /// <param name="sortedMums">List of sorted MUMs.</param>
        /// <returns>Longest Increasing Subsequence.</returns>
        public IList <Match> GetLongestSequence(IList <Match> sortedMums)
        {
            if (sortedMums == null)
            {
                return(null);
            }

            MatchExtension[] matches = ConvertToMUMExtension(sortedMums);

            for (var counteri = 0; counteri < matches.Length; counteri++)
            {
                var matches_i = matches[counteri];

                // Initialize the MUM Extension
                matches_i.Score     = matches[counteri].Length;
                matches_i.WrapScore = matches[counteri].Length;
                matches_i.Adjacent  = 0;
                matches_i.From      = -1;

                for (var counterj = 0; counterj < counteri; counterj++)
                {
                    MatchExtension matches_j = matches[counterj];

                    // Find the overlap in query sequence of MUM
                    var overlap2 = matches_j.QuerySequenceOffset + matches_j.Length;

                    overlap2 -= matches_i.QuerySequenceOffset;
                    var overlap = overlap2 > 0 ? overlap2 : 0;

                    // Calculate the score for query sequence of MUM
                    var score = matches_j.Score
                                + matches_i.Length
                                - overlap;
                    if (score > matches_i.WrapScore)
                    {
                        matches_i.WrapScore = score;
                    }

                    // Find the overlap in reference sequence of MUM
                    var overlap1 = matches_j.ReferenceSequenceOffset
                                   + matches_j.Length
                                   - matches_i.ReferenceSequenceOffset;

                    overlap = overlap > overlap1 ? overlap : overlap1;

                    score = matches_j.Score
                            + matches_i.Length
                            - overlap;
                    if (score > matches_i.Score)
                    {
                        // To remove crosses, mark counteri as next MUM From counterj
                        // without any crosses
                        matches_i.From = counterj;

                        // Set the new score and overlap after removing the cross
                        matches_i.Score    = score;
                        matches_i.Adjacent = overlap;
                    }

                    // Calculate the score for reference sequence of MUM
                    score = matches_j.WrapScore
                            + matches_i.Length
                            - overlap;
                    if (score >= matches_i.WrapScore)
                    {
                        matches_i.WrapScore = score;
                    }
                }
            }

            // Find the best longest increasing subsequence
            // Sequence with highest score is the longest increasing subsequence
            long best      = 0;
            long bestScore = matches[best].Score;

            for (long counteri = 1; counteri < matches.Length; counteri++)
            {
                if (matches[counteri].Score > bestScore)
                {
                    best      = counteri;
                    bestScore = matches[best].Score;
                }
            }

            // Mark the MUMs in longest increasing subsequence as "Good"
            for (long counteri = best; counteri >= 0; counteri = matches[counteri].From)
            {
                matches[counteri].IsGood = true;
            }

            IList <Match> outputMums = new List <Match>();

            foreach (MatchExtension t in matches)
            {
                if (t.IsGood)
                {
                    var adjacent = t.Adjacent;
                    if (0 != adjacent)
                    {
                        t.ReferenceSequenceOffset += adjacent;
                        t.QuerySequenceOffset     += adjacent;
                        t.Length -= adjacent;
                    }

                    if (0 < t.Length)
                    {
                        Match match = new Match();
                        match.Length = t.Length;
                        match.QuerySequenceOffset     = t.QuerySequenceOffset;
                        match.ReferenceSequenceOffset = t.ReferenceSequenceOffset;
                        outputMums.Add(match);
                    }
                }
            }

            // Return the list of MUMs that represent the longest increasing subsequence
            return(outputMums);
        }