/// <summary> /// Create a new delta alignment /// </summary> /// <param name="referenceSequence">Reference sequence</param> /// <param name="querySequence">Query sequence</param> /// <param name="cluster">Cluster object</param> /// <param name="match">Match object</param> /// <returns>Newly created DeltaAlignment object</returns> internal static DeltaAlignment NewAlignment( ISequence referenceSequence, ISequence querySequence, Cluster cluster, MatchExtension match) { DeltaAlignment deltaAlignment = new DeltaAlignment(referenceSequence, querySequence) { FirstSequenceStart = match.ReferenceSequenceOffset, SecondSequenceStart = match.QuerySequenceOffset, FirstSequenceEnd = match.ReferenceSequenceOffset + match.Length - 1, SecondSequenceEnd = match.QuerySequenceOffset + match.Length - 1, QueryDirection = cluster.QueryDirection }; return(deltaAlignment); }
public void TestMatchAndMatchExtensionToString() { Match match = new Match(); match.Length = 20; match.QuerySequenceOffset = 33; MatchExtension matchExtn = new MatchExtension(match); matchExtn.ID = 1; matchExtn.Length = 20; string actualMatchExtnString = matchExtn.ToString(); string actualMatchstring = match.ToString(); string ExpectedMatchExtnString = "RefStart=0 QueryStart=33 Length=20 Score=0 WrapScore=0 IsGood=False"; string ExpectedMatchString = "RefStart=0 QueryStart=33 Length=20"; Assert.AreEqual(ExpectedMatchExtnString, actualMatchExtnString); Assert.AreEqual(actualMatchstring, ExpectedMatchString); }
public void ValidateMatchAndMatchExtensionToString() { var match = new Match(); match.Length = 20; match.QuerySequenceOffset = 33; var matchExtn = new MatchExtension(match); matchExtn.ID = 1; matchExtn.Length = 20; string actualMatchExtnString = matchExtn.ToString(); string actualMatchstring = match.ToString(); string ExpectedMatchExtnString = this.utilityObj.xmlUtil.GetTextValue(Constants.ToStringNodeName, Constants.ExpectedMatchExtnStringNode); string ExpectedMatchString = this.utilityObj.xmlUtil.GetTextValue(Constants.ToStringNodeName, Constants.ExpectedMatchStringNode); Assert.AreEqual(ExpectedMatchExtnString, actualMatchExtnString); Assert.AreEqual(actualMatchstring, ExpectedMatchString); }
public void TestClusterToString() { Match match = new Match(); MatchExtension matchExtn1 = new MatchExtension(match); matchExtn1.ID = 1; matchExtn1.Length = 20; MatchExtension matchExtn2 = new MatchExtension(match); matchExtn2.ID = 2; matchExtn2.Length = 30; IList <MatchExtension> extnList = new List <MatchExtension>(); extnList.Add(matchExtn1); extnList.Add(matchExtn2); Cluster clust = new Cluster(extnList); string actualString = clust.ToString(); string expectedString = "RefStart=0 QueryStart=0 Length=20 Score=0 WrapScore=0 IsGood=False\r\nRefStart=0 QueryStart=0 Length=30 Score=0 WrapScore=0 IsGood=False\r\n".Replace("\r\n", Environment.NewLine); Assert.AreEqual(actualString, expectedString); }
public void ValidateClusterToString() { var match = new Match(); var matchExtn1 = new MatchExtension(match); matchExtn1.ID = 1; matchExtn1.Length = 20; var matchExtn2 = new MatchExtension(match); matchExtn2.ID = 2; matchExtn2.Length = 30; IList <MatchExtension> extnList = new List <MatchExtension>(); extnList.Add(matchExtn1); extnList.Add(matchExtn2); var clust = new Cluster(extnList); string actualString = clust.ToString(); string expectedString = this.utilityObj.xmlUtil.GetTextValue(Constants.ToStringNodeName, Constants.ClusterExpectedNode); Assert.AreEqual(actualString, expectedString.Replace("\\r\\n", System.Environment.NewLine)); }
/// <summary> /// Extend the cluster in synteny /// </summary> /// <param name="synteny">Synteny in which cluster needs to be extened.</param> /// <returns>List of delta alignments</returns> private List <DeltaAlignment> ExtendClusters(Synteny synteny) { bool isClusterExtended = false; List <DeltaAlignment> deltaAlignments = new List <DeltaAlignment>(); DeltaAlignment deltaAlignment = null; Cluster currentCluster; IList <Cluster> clusters = synteny.Clusters; // Sort the cluster by first sequence start clusters = SortCluster(clusters, FirstSequenceStart); Cluster targetCluster = synteny.Clusters.Last(); IEnumerator <Cluster> previousCluster = clusters.GetEnumerator(); previousCluster.MoveNext(); IEnumerator <Cluster> cluster = clusters.GetEnumerator(); while (cluster.MoveNext()) { currentCluster = cluster.Current; if (!isClusterExtended && (currentCluster.IsFused || IsClusterShadowed(deltaAlignments, currentCluster, deltaAlignment))) { currentCluster.IsFused = true; previousCluster.MoveNext(); currentCluster = previousCluster.Current; continue; } // Extend the match foreach (MatchExtension match in currentCluster.Matches) { if (isClusterExtended) { if (deltaAlignment.FirstSequenceEnd != match.ReferenceSequenceOffset || deltaAlignment.SecondSequenceEnd != match.QuerySequenceOffset) { continue; } deltaAlignment.FirstSequenceEnd += match.Length - 1; deltaAlignment.SecondSequenceEnd += match.Length - 1; } else { //TODO: Do we need sequence here? Changed to sequence id. deltaAlignment = DeltaAlignment.NewAlignment( synteny.ReferenceSequence, synteny.QuerySequence, currentCluster, match); deltaAlignments.Add(deltaAlignment); // Find the MUM which is a good candidate for extension in reverse direction DeltaAlignment targetAlignment = GetPreviousAlignment(deltaAlignments, deltaAlignment); if (targetAlignment != deltaAlignment && //TODO: NEED TO VERIFY THIS!!! //if ( ExtendToPreviousSequence( synteny.ReferenceSequence, synteny.QuerySequence, deltaAlignments, deltaAlignment, targetAlignment)) { deltaAlignment = targetAlignment; } } int methodName = ModifiedSmithWaterman.ForwardAlignFlag; long targetReference; long targetQuery; if (currentCluster.Matches.IndexOf(match) < currentCluster.Matches.Count - 1) { // extend till the match in the current cluster MatchExtension nextMatch = currentCluster.Matches[currentCluster.Matches.IndexOf(match) + 1]; targetReference = nextMatch.ReferenceSequenceOffset; targetQuery = nextMatch.QuerySequenceOffset; isClusterExtended = ExtendToNextSequence( synteny.ReferenceSequence, synteny.QuerySequence, deltaAlignment, targetReference, targetQuery, methodName); } else { // extend till next cluster targetReference = synteny.ReferenceSequence.Count - 1; targetQuery = synteny.QuerySequence.Count() - 1; targetCluster = GetNextCluster( clusters, currentCluster, ref targetReference, ref targetQuery); if (!synteny.Clusters.Contains(targetCluster)) { methodName |= ModifiedSmithWaterman.OptimalFlag; } isClusterExtended = ExtendToNextSequence( synteny.ReferenceSequence, synteny.QuerySequence, deltaAlignment, targetReference, targetQuery, methodName); } } if (!synteny.Clusters.Contains(targetCluster)) { isClusterExtended = false; } currentCluster.IsFused = true; if (!isClusterExtended) { previousCluster.MoveNext(); currentCluster = previousCluster.Current; } else { currentCluster = targetCluster; } } return(deltaAlignments); }
/// <summary> /// Process the clusters /// </summary> /// <param name="clusters">List of clusters</param> /// <param name="matches">List of matches</param> /// <param name="indexToSkip">Start index upto which match extension to be ignored.</param> /// <param name="clusterSize">Size of cluster</param> private void ProcessCluster( List <Cluster> clusters, List <MatchExtension> matches, int indexToSkip, int clusterSize) { List <MatchExtension> clusterMatches; long total, endIndex, startIndex, score; int counter1, counter2, counter3, best; do { // remove cluster overlaps for (counter1 = 0; counter1 < clusterSize; counter1++) { matches[indexToSkip + counter1].Score = matches[indexToSkip + counter1].Length; matches[indexToSkip + counter1].Adjacent = 0; matches[indexToSkip + counter1].From = -1; for (counter2 = 0; counter2 < counter1; counter2++) { long cost, overlap, overlap1, overlap2; overlap1 = matches[indexToSkip + counter2].ReferenceSequenceOffset + matches[indexToSkip + counter2].Length - matches[indexToSkip + counter1].ReferenceSequenceOffset; overlap = Math.Max(0, overlap1); overlap2 = matches[indexToSkip + counter2].QuerySequenceOffset + matches[indexToSkip + counter2].Length - matches[indexToSkip + counter1].QuerySequenceOffset; overlap = Math.Max(overlap, overlap2); // cost matches which are not on same diagonal cost = overlap + Math.Abs((matches[indexToSkip + counter1].QuerySequenceOffset - matches[indexToSkip + counter1].ReferenceSequenceOffset) - (matches[indexToSkip + counter2].QuerySequenceOffset - matches[indexToSkip + counter2].ReferenceSequenceOffset)); if (matches[indexToSkip + counter2].Score + matches[indexToSkip + counter1].Length - cost > matches[indexToSkip + counter1].Score) { matches[indexToSkip + counter1].From = counter2; matches[indexToSkip + counter1].Score = matches[indexToSkip + counter2].Score + matches[indexToSkip + counter1].Length - cost; matches[indexToSkip + counter1].Adjacent = overlap; } } } // Find the match which has highest score best = 0; for (counter1 = 1; counter1 < clusterSize; counter1++) { if (matches[indexToSkip + counter1].Score > matches[indexToSkip + best].Score) { best = counter1; } } total = 0; endIndex = int.MinValue; startIndex = int.MaxValue; // TODO: remove below cast for (counter1 = best; counter1 >= 0; counter1 = (int)matches[indexToSkip + counter1].From) { matches[indexToSkip + counter1].IsGood = true; total += matches[indexToSkip + counter1].Length; if (matches[indexToSkip + counter1].ReferenceSequenceOffset + matches[indexToSkip + counter1].Length > endIndex) { // Set the cluster end index endIndex = matches[indexToSkip + counter1].ReferenceSequenceOffset + matches[indexToSkip + counter1].Length; } if (matches[indexToSkip + counter1].ReferenceSequenceOffset < startIndex) { // Set the cluster start index startIndex = matches[indexToSkip + counter1].ReferenceSequenceOffset; } } if (this.ScoreMethod == ClusterScoreMethod.MatchLength) { score = total; } else { score = endIndex - startIndex; } // If the current score exceeds the minimum score // and the matches to cluster if (score >= this.MinimumScore) { clusterMatches = new List <MatchExtension>(); for (counter1 = 0; counter1 < clusterSize; counter1++) { if (matches[indexToSkip + counter1].IsGood) { MatchExtension match = matches[indexToSkip + counter1]; if (matches[indexToSkip + counter1].Adjacent != 0) { match = new MatchExtension(); matches[indexToSkip + counter1].CopyTo(match); match.ReferenceSequenceOffset += match.Adjacent; match.QuerySequenceOffset += match.Adjacent; match.Length -= match.Adjacent; } clusterMatches.Add(match); } } // adding the cluster to list if (0 < clusterMatches.Count) { clusters.Add(new Cluster(clusterMatches)); } } // Correcting the cluster indices for (counter1 = counter3 = 0; counter1 < clusterSize; counter1++) { if (!matches[indexToSkip + counter1].IsGood) { if (counter1 != counter3) { matches[indexToSkip + counter3] = matches[indexToSkip + counter1]; } counter3++; } } clusterSize = counter3; }while (clusterSize > 0); }
/// <summary> /// Find the longest increasing sub sequence from the given set of MUMs. /// </summary> /// <param name="sortedMums">List of sorted MUMs.</param> /// <returns>Longest Increasing Subsequence.</returns> public IList <Match> GetLongestSequence(IList <Match> sortedMums) { if (sortedMums == null) { return(null); } MatchExtension[] matches = ConvertToMUMExtension(sortedMums); for (var counteri = 0; counteri < matches.Length; counteri++) { var matches_i = matches[counteri]; // Initialize the MUM Extension matches_i.Score = matches[counteri].Length; matches_i.WrapScore = matches[counteri].Length; matches_i.Adjacent = 0; matches_i.From = -1; for (var counterj = 0; counterj < counteri; counterj++) { MatchExtension matches_j = matches[counterj]; // Find the overlap in query sequence of MUM var overlap2 = matches_j.QuerySequenceOffset + matches_j.Length; overlap2 -= matches_i.QuerySequenceOffset; var overlap = overlap2 > 0 ? overlap2 : 0; // Calculate the score for query sequence of MUM var score = matches_j.Score + matches_i.Length - overlap; if (score > matches_i.WrapScore) { matches_i.WrapScore = score; } // Find the overlap in reference sequence of MUM var overlap1 = matches_j.ReferenceSequenceOffset + matches_j.Length - matches_i.ReferenceSequenceOffset; overlap = overlap > overlap1 ? overlap : overlap1; score = matches_j.Score + matches_i.Length - overlap; if (score > matches_i.Score) { // To remove crosses, mark counteri as next MUM From counterj // without any crosses matches_i.From = counterj; // Set the new score and overlap after removing the cross matches_i.Score = score; matches_i.Adjacent = overlap; } // Calculate the score for reference sequence of MUM score = matches_j.WrapScore + matches_i.Length - overlap; if (score >= matches_i.WrapScore) { matches_i.WrapScore = score; } } } // Find the best longest increasing subsequence // Sequence with highest score is the longest increasing subsequence long best = 0; long bestScore = matches[best].Score; for (long counteri = 1; counteri < matches.Length; counteri++) { if (matches[counteri].Score > bestScore) { best = counteri; bestScore = matches[best].Score; } } // Mark the MUMs in longest increasing subsequence as "Good" for (long counteri = best; counteri >= 0; counteri = matches[counteri].From) { matches[counteri].IsGood = true; } IList <Match> outputMums = new List <Match>(); foreach (MatchExtension t in matches) { if (t.IsGood) { var adjacent = t.Adjacent; if (0 != adjacent) { t.ReferenceSequenceOffset += adjacent; t.QuerySequenceOffset += adjacent; t.Length -= adjacent; } if (0 < t.Length) { Match match = new Match(); match.Length = t.Length; match.QuerySequenceOffset = t.QuerySequenceOffset; match.ReferenceSequenceOffset = t.ReferenceSequenceOffset; outputMums.Add(match); } } } // Return the list of MUMs that represent the longest increasing subsequence return(outputMums); }