/// <summary> /// Extend the cluster in synteny /// </summary> /// <param name="synteny">Synteny in which cluster needs to be extened.</param> /// <returns>List of delta alignments</returns> private List<DeltaAlignment> ExtendClusters(Synteny synteny) { bool isClusterExtended = false; List<DeltaAlignment> deltaAlignments = new List<DeltaAlignment>(); DeltaAlignment deltaAlignment = null; Cluster currentCluster; Cluster targetCluster = synteny.Clusters.Last(); IList<Cluster> clusters = synteny.Clusters; // Sort the cluster by first sequence start clusters = SortCluster(clusters, FirstSequenceStart); IEnumerator<Cluster> previousCluster = clusters.GetEnumerator(); previousCluster.MoveNext(); IEnumerator<Cluster> cluster = clusters.GetEnumerator(); while (cluster.MoveNext()) { currentCluster = cluster.Current; if (!isClusterExtended && (currentCluster.IsFused || IsClusterShadowed(deltaAlignments, currentCluster, deltaAlignment))) { currentCluster.IsFused = true; previousCluster.MoveNext(); currentCluster = previousCluster.Current; continue; } // Extend the match foreach (MatchExtension match in currentCluster.Matches) { if (isClusterExtended) { if (deltaAlignment.FirstSequenceEnd != match.ReferenceSequenceOffset || deltaAlignment.SecondSequenceEnd != match.QuerySequenceOffset) { continue; } deltaAlignment.FirstSequenceEnd += match.Length - 1; deltaAlignment.SecondSequenceEnd += match.Length - 1; } else { //TODO: Do we need sequence here? Changed to sequence id. deltaAlignment = DeltaAlignment.NewAlignment( synteny.ReferenceSequence, synteny.QuerySequence, currentCluster, match); deltaAlignments.Add(deltaAlignment); // Find the MUM which is a good candidate for extension in reverse direction DeltaAlignment targetAlignment = GetPreviousAlignment(deltaAlignments, deltaAlignment); if (ExtendToPreviousSequence( synteny.ReferenceSequence, synteny.QuerySequence, deltaAlignments, deltaAlignment, targetAlignment)) { deltaAlignment = targetAlignment; } } int methodName = ModifiedSmithWaterman.ForwardAlignFlag; long targetReference; long targetQuery; if (currentCluster.Matches.IndexOf(match) < currentCluster.Matches.Count - 1) { // extend till the match in the current cluster MatchExtension nextMatch = currentCluster.Matches[currentCluster.Matches.IndexOf(match) + 1]; targetReference = nextMatch.ReferenceSequenceOffset; targetQuery = nextMatch.QuerySequenceOffset; isClusterExtended = ExtendToNextSequence( synteny.ReferenceSequence, synteny.QuerySequence, deltaAlignment, targetReference, targetQuery, methodName); } else { // extend till next cluster targetReference = synteny.ReferenceSequence.Count - 1; targetQuery = synteny.QuerySequence.Count() - 1; targetCluster = GetNextCluster( clusters, currentCluster, ref targetReference, ref targetQuery); if (!synteny.Clusters.Contains(targetCluster)) { methodName |= ModifiedSmithWaterman.OptimalFlag; } isClusterExtended = ExtendToNextSequence( synteny.ReferenceSequence, synteny.QuerySequence, deltaAlignment, targetReference, targetQuery, methodName); } } if (!synteny.Clusters.Contains(targetCluster)) { isClusterExtended = false; } currentCluster.IsFused = true; if (!isClusterExtended) { previousCluster.MoveNext(); currentCluster = previousCluster.Current; } else { currentCluster = targetCluster; } } return deltaAlignments; }
/// <summary> /// Process the cluster /// 1. Re-map the reference sequence index to original index /// 2. Create synteny /// 3. Process synteny /// </summary> /// <param name="clusters">List of clusters of a read.</param> /// <returns>List of delta alignments</returns> public IEnumerable<DeltaAlignment> ProcessCluster(IList<Cluster> clusters) { if (clusters == null) { throw new ArgumentNullException("clusters"); } if (clusters.Count == 0) { return Enumerable.Empty<DeltaAlignment>(); } bool isReverse = clusters.Any(c => c.IsReverseQueryDirection); ISequence currentReference = null; List<Synteny> syntenies = new List<Synteny>(); Synteny currentSynteny = null; ISequence querySequence = null; ISequence referenceSequence = null; _nucmerAligner.SimilarityMatrix = SimilarityMatrix; _nucmerAligner.BreakLength = BreakLength; long referenceSequenceLength = _internalReferenceSequence.Count; foreach (Cluster clusterIterator in clusters) { List<MatchExtension> clusterMatches; if (null != currentSynteny) { // Remove the empty clusters (if any) if ((0 < currentSynteny.Clusters.Count) && (0 == currentSynteny.Clusters.Last().Matches.Count)) { currentSynteny.Clusters.Remove( currentSynteny.Clusters.Last()); } clusterMatches = new List<MatchExtension>(); currentSynteny.Clusters.Add(new Cluster(clusterMatches, isReverse)); } foreach (MatchExtension matchIterator in clusterIterator.Matches) { ISequence currentQuery = matchIterator.Query; if (matchIterator.ReferenceSequenceOffset < referenceSequenceLength) { currentReference = _internalReferenceSequence; } else { matchIterator.ReferenceSequenceOffset -= referenceSequenceLength + 1; } if ((null == referenceSequence) || (null == querySequence) || (string.Compare(referenceSequence.ID, currentReference.ID, StringComparison.OrdinalIgnoreCase) != 0) || string.Compare(querySequence.ID, currentQuery.ID, StringComparison.OrdinalIgnoreCase) != 0) { bool found = false; if ((null != querySequence) && (string.Compare(querySequence.ID, currentQuery.ID, StringComparison.OrdinalIgnoreCase) == 0)) { // Check if Synteny already exists // If found, mark the synteny and break foreach (Synteny syntenyIterator in syntenies) { if ((String.Compare( syntenyIterator.ReferenceSequence.ID, currentReference.ID, StringComparison.OrdinalIgnoreCase) == 0) && (String.Compare( syntenyIterator.QuerySequence.ID, currentQuery.ID, StringComparison.OrdinalIgnoreCase) == 0)) { currentSynteny = syntenyIterator; found = true; break; } } } else { ProcessSynteny(syntenies); } referenceSequence = currentReference; querySequence = currentQuery; // Remove the empty clusters (if any) if ((null != currentSynteny) && (0 < currentSynteny.Clusters.Count) && (0 == currentSynteny.Clusters.Last().Matches.Count)) { currentSynteny.Clusters.Remove( currentSynteny.Clusters.Last()); } if (!found) { // Create a Synteny currentSynteny = new Synteny( currentReference, currentQuery); // Add a cluster to Synteny syntenies.Add(currentSynteny); } clusterMatches = new List<MatchExtension>(); currentSynteny.Clusters.Add(new Cluster(clusterMatches, isReverse)); } if (1 < matchIterator.Length) { currentSynteny.Clusters.Last().Matches.Add(matchIterator); } } } return ProcessSynteny(syntenies); }
/// <summary> /// Process the cluster /// 1. Re-map the reference sequence index to original index /// 2. Create synteny /// 3. Process synteny /// </summary> /// <param name="clusters">List of clusters of a read.</param> /// <returns>List of delta alignments</returns> public IEnumerable <DeltaAlignment> ProcessCluster(IList <Cluster> clusters) { if (clusters == null) { throw new ArgumentNullException("clusters"); } if (clusters.Count == 0) { return(Enumerable.Empty <DeltaAlignment>()); } bool isReverse = clusters.Any(c => c.IsReverseQueryDirection); ISequence currentReference = null; List <Synteny> syntenies = new List <Synteny>(); Synteny currentSynteny = null; ISequence querySequence = null; ISequence referenceSequence = null; _nucmerAligner.SimilarityMatrix = SimilarityMatrix; _nucmerAligner.BreakLength = BreakLength; long referenceSequenceLength = _internalReferenceSequence.Count; foreach (Cluster clusterIterator in clusters) { List <MatchExtension> clusterMatches; if (null != currentSynteny) { // Remove the empty clusters (if any) if ((0 < currentSynteny.Clusters.Count) && (0 == currentSynteny.Clusters.Last().Matches.Count)) { currentSynteny.Clusters.Remove( currentSynteny.Clusters.Last()); } clusterMatches = new List <MatchExtension>(); currentSynteny.Clusters.Add(new Cluster(clusterMatches, isReverse)); } foreach (MatchExtension matchIterator in clusterIterator.Matches) { ISequence currentQuery = matchIterator.Query; if (matchIterator.ReferenceSequenceOffset < referenceSequenceLength) { currentReference = _internalReferenceSequence; } else { matchIterator.ReferenceSequenceOffset -= referenceSequenceLength + 1; } if ((null == referenceSequence) || (null == querySequence) || (string.Compare(referenceSequence.ID, currentReference.ID, StringComparison.OrdinalIgnoreCase) != 0) || string.Compare(querySequence.ID, currentQuery.ID, StringComparison.OrdinalIgnoreCase) != 0) { bool found = false; if ((null != querySequence) && (string.Compare(querySequence.ID, currentQuery.ID, StringComparison.OrdinalIgnoreCase) == 0)) { // Check if Synteny already exists // If found, mark the synteny and break foreach (Synteny syntenyIterator in syntenies) { if ((String.Compare( syntenyIterator.ReferenceSequence.ID, currentReference.ID, StringComparison.OrdinalIgnoreCase) == 0) && (String.Compare( syntenyIterator.QuerySequence.ID, currentQuery.ID, StringComparison.OrdinalIgnoreCase) == 0)) { currentSynteny = syntenyIterator; found = true; break; } } } else { ProcessSynteny(syntenies); } referenceSequence = currentReference; querySequence = currentQuery; // Remove the empty clusters (if any) if ((null != currentSynteny) && (0 < currentSynteny.Clusters.Count) && (0 == currentSynteny.Clusters.Last().Matches.Count)) { currentSynteny.Clusters.Remove( currentSynteny.Clusters.Last()); } if (!found) { // Create a Synteny currentSynteny = new Synteny( currentReference, currentQuery); // Add a cluster to Synteny syntenies.Add(currentSynteny); } clusterMatches = new List <MatchExtension>(); currentSynteny.Clusters.Add(new Cluster(clusterMatches, isReverse)); } if (1 < matchIterator.Length) { currentSynteny.Clusters.Last().Matches.Add(matchIterator); } } } return(ProcessSynteny(syntenies)); }
/// <summary> /// Extend the cluster in synteny /// </summary> /// <param name="synteny">Synteny in which cluster needs to be extened.</param> /// <returns>List of delta alignments</returns> private List <DeltaAlignment> ExtendClusters(Synteny synteny) { bool isClusterExtended = false; List <DeltaAlignment> deltaAlignments = new List <DeltaAlignment>(); DeltaAlignment deltaAlignment = null; Cluster currentCluster; IList <Cluster> clusters = synteny.Clusters; // Sort the cluster by first sequence start clusters = SortCluster(clusters, FirstSequenceStart); Cluster targetCluster = synteny.Clusters.Last(); IEnumerator <Cluster> previousCluster = clusters.GetEnumerator(); previousCluster.MoveNext(); IEnumerator <Cluster> cluster = clusters.GetEnumerator(); while (cluster.MoveNext()) { currentCluster = cluster.Current; if (!isClusterExtended && (currentCluster.IsFused || IsClusterShadowed(deltaAlignments, currentCluster, deltaAlignment))) { currentCluster.IsFused = true; previousCluster.MoveNext(); currentCluster = previousCluster.Current; continue; } // Extend the match foreach (MatchExtension match in currentCluster.Matches) { if (isClusterExtended) { if (deltaAlignment.FirstSequenceEnd != match.ReferenceSequenceOffset || deltaAlignment.SecondSequenceEnd != match.QuerySequenceOffset) { continue; } deltaAlignment.FirstSequenceEnd += match.Length - 1; deltaAlignment.SecondSequenceEnd += match.Length - 1; } else { //TODO: Do we need sequence here? Changed to sequence id. deltaAlignment = DeltaAlignment.NewAlignment( synteny.ReferenceSequence, synteny.QuerySequence, currentCluster, match); deltaAlignments.Add(deltaAlignment); // Find the MUM which is a good candidate for extension in reverse direction DeltaAlignment targetAlignment = GetPreviousAlignment(deltaAlignments, deltaAlignment); if (targetAlignment != deltaAlignment && //TODO: NEED TO VERIFY THIS!!! //if ( ExtendToPreviousSequence( synteny.ReferenceSequence, synteny.QuerySequence, deltaAlignments, deltaAlignment, targetAlignment)) { deltaAlignment = targetAlignment; } } int methodName = ModifiedSmithWaterman.ForwardAlignFlag; long targetReference; long targetQuery; if (currentCluster.Matches.IndexOf(match) < currentCluster.Matches.Count - 1) { // extend till the match in the current cluster MatchExtension nextMatch = currentCluster.Matches[currentCluster.Matches.IndexOf(match) + 1]; targetReference = nextMatch.ReferenceSequenceOffset; targetQuery = nextMatch.QuerySequenceOffset; isClusterExtended = ExtendToNextSequence( synteny.ReferenceSequence, synteny.QuerySequence, deltaAlignment, targetReference, targetQuery, methodName); } else { // extend till next cluster targetReference = synteny.ReferenceSequence.Count - 1; targetQuery = synteny.QuerySequence.Count() - 1; targetCluster = GetNextCluster( clusters, currentCluster, ref targetReference, ref targetQuery); if (!synteny.Clusters.Contains(targetCluster)) { methodName |= ModifiedSmithWaterman.OptimalFlag; } isClusterExtended = ExtendToNextSequence( synteny.ReferenceSequence, synteny.QuerySequence, deltaAlignment, targetReference, targetQuery, methodName); } } if (!synteny.Clusters.Contains(targetCluster)) { isClusterExtended = false; } currentCluster.IsFused = true; if (!isClusterExtended) { previousCluster.MoveNext(); currentCluster = previousCluster.Current; } else { currentCluster = targetCluster; } } return(deltaAlignments); }
/// <summary> /// Process the cluster /// 1. Re-map the reference sequence index to original index /// 2. Create synteny /// 3. Process synteny /// </summary> /// <param name="referenceSequenceList">List of reference sequences</param> /// <param name="clusters">List of clusters</param> /// <returns>List of delta alignments</returns> protected override IList <DeltaAlignment> ProcessCluster( IList <ISequence> referenceSequenceList, IList <Cluster> clusters) { ISequence currentReference = null; ISequence currentQuery = null; IList <Synteny> syntenies = new List <Synteny>(); IList <MaxUniqueMatchExtension> clusterMatches = null; Synteny currentSynteny = null; ISequence referenceSequence = null; ISequence querySequence = null; bool found = false; nucmerAligner.SimilarityMatrix = SimilarityMatrix; nucmerAligner.BreakLength = BreakLength; foreach (Cluster clusterIterator in clusters) { if (null != currentSynteny) { // Remove the empty clusters (if any) if ((null != currentSynteny) && (0 < currentSynteny.Clusters.Count) && (0 == currentSynteny.Clusters.Last().Matches.Count)) { currentSynteny.Clusters.Remove( currentSynteny.Clusters.Last()); } clusterMatches = new List <MaxUniqueMatchExtension>(); currentSynteny.Clusters.Add(new Cluster(clusterMatches)); } foreach (MaxUniqueMatchExtension matchIterator in clusterIterator.Matches) { currentQuery = matchIterator.Query; // Re-map the reference coordinate back to its original sequence foreach (ISequence sequence in referenceSequenceList) { if (matchIterator.FirstSequenceStart < sequence.Count) { currentReference = sequence; break; } else { matchIterator.FirstSequenceStart -= sequence.Count + 1; } } if ((null == referenceSequence) || (null == querySequence) || (string.Compare(referenceSequence.ID, currentReference.ID, StringComparison.OrdinalIgnoreCase) != 0) || string.Compare(querySequence.ID, currentQuery.ID, StringComparison.OrdinalIgnoreCase) != 0) { found = false; if ((null != querySequence) && (string.Compare(querySequence.ID, currentQuery.ID, StringComparison.OrdinalIgnoreCase) == 0)) { // Check if Synteny already exists // If found, mark the synteny and break foreach (Synteny syntenyIterator in syntenies) { if ((String.Compare( syntenyIterator.ReferenceSequence.ID, currentReference.ID, StringComparison.OrdinalIgnoreCase) == 0) && (String.Compare( syntenyIterator.QuerySequence.ID, currentQuery.ID, StringComparison.OrdinalIgnoreCase) == 0)) { currentSynteny = syntenyIterator; found = true; break; } } } else { ProcessSynteny(syntenies); } referenceSequence = currentReference; querySequence = currentQuery; // Remove the empty clusters (if any) if ((null != currentSynteny) && (0 < currentSynteny.Clusters.Count) && (0 == currentSynteny.Clusters.Last().Matches.Count)) { currentSynteny.Clusters.Remove( currentSynteny.Clusters.Last()); } if (!found) { // Create a Synteny currentSynteny = new Synteny( currentReference, currentQuery); // Add a cluster to Synteny syntenies.Add(currentSynteny); } clusterMatches = new List <MaxUniqueMatchExtension>(); currentSynteny.Clusters.Add(new Cluster(clusterMatches)); } if (1 < matchIterator.Length) { currentSynteny.Clusters.Last().Matches.Add(matchIterator); } } } return(ProcessSynteny(syntenies)); }