/// <summary> /// Refines alignment layout by taking in consideration indels (insertions and deletions) and rearrangements between two genomes. /// Requires mate-pair information to resolve ambiguity. /// </summary> /// <param name="orderedDeltas">Order deltas.</param> public static IEnumerable <DeltaAlignment> RefineLayout(DeltaAlignmentCollection orderedDeltas) { if (orderedDeltas == null) { throw new ArgumentNullException("orderedDeltas"); } if (orderedDeltas.Count == 0) { yield break; } // As we dont know what is the maximum posible insert and deltes, // assuming 1,000,000 deltas are sufficient for operation. int windowSize = 1000; VirtualDeltaAlignmentCollection deltaCatche = new VirtualDeltaAlignmentCollection(orderedDeltas, windowSize); List <DeltaAlignment> deltasOverlappingAtCurrentIndex = null; List <DeltaAlignment> leftSideDeltas = null; List <DeltaAlignment> rightSideDeltas = null; List <DeltaAlignment> unloadedDeltas = null; try { deltasOverlappingAtCurrentIndex = new List <DeltaAlignment>(); leftSideDeltas = new List <DeltaAlignment>(); rightSideDeltas = new List <DeltaAlignment>(); long currentProcessedOffset = 0; DeltaAlignment alignment = deltaCatche[0]; deltasOverlappingAtCurrentIndex.Add(alignment); DeltaAlignment deltaWithLargestEndIndex = alignment; for (int currentIndex = 0; currentIndex < deltaCatche.Count - 1; currentIndex++) { DeltaAlignment nextDelta = deltaCatche[currentIndex + 1]; unloadedDeltas = null; if (deltaCatche.TryUnload(currentIndex + 1, out unloadedDeltas)) { for (int i = 0; i < unloadedDeltas.Count; i++) { yield return(unloadedDeltas[i]); } unloadedDeltas.Clear(); } if (currentProcessedOffset != 0) { nextDelta.FirstSequenceStart += currentProcessedOffset; nextDelta.FirstSequenceEnd += currentProcessedOffset; } // Check if next delta is just adjacent if (nextDelta.FirstSequenceStart - 1 == deltaWithLargestEndIndex.FirstSequenceEnd) { // If next delta is adjacent there is a possible insertion in target (deletion in reference) // Try to extend the deltas from both sides and make them meet leftSideDeltas.Clear(); for (int index = 0; index < deltasOverlappingAtCurrentIndex.Count; index++) { DeltaAlignment delta = deltasOverlappingAtCurrentIndex[index]; if (delta.FirstSequenceEnd >= deltaWithLargestEndIndex.FirstSequenceEnd) { leftSideDeltas.Add(delta); } } // Find all deltas starting at the adjacent right side rightSideDeltas.Clear(); for (long index = currentIndex + 1; index < deltaCatche.Count; index++) { DeltaAlignment delta = deltaCatche[index]; unloadedDeltas = null; if (deltaCatche.TryUnload(currentIndex + 1, out unloadedDeltas)) { for (int i = 0; i < unloadedDeltas.Count; i++) { yield return(unloadedDeltas[i]); } unloadedDeltas.Clear(); } if (delta.FirstSequenceStart != nextDelta.FirstSequenceStart) { break; } rightSideDeltas.Add(delta); } long offset = ExtendDeltas(leftSideDeltas, rightSideDeltas); if (offset != 0) { nextDelta.FirstSequenceStart += offset; nextDelta.FirstSequenceEnd += offset; } currentProcessedOffset += offset; } else if (nextDelta.FirstSequenceStart <= deltaWithLargestEndIndex.FirstSequenceEnd) { // Check if next delta overlaps with current overlap group deltasOverlappingAtCurrentIndex.Add(nextDelta); // Check if nextDelta is reaching farther than the current farthest delta if (nextDelta.FirstSequenceEnd > deltaWithLargestEndIndex.FirstSequenceEnd) { deltaWithLargestEndIndex = nextDelta; } if (deltasOverlappingAtCurrentIndex.Count > windowSize) { for (int i = deltasOverlappingAtCurrentIndex.Count - 1; i >= 0; i--) { if (deltasOverlappingAtCurrentIndex[i].FirstSequenceEnd < deltaWithLargestEndIndex.FirstSequenceEnd) { deltasOverlappingAtCurrentIndex.RemoveAt(i); } } } } else { // No overlap with nextDelta, so there is a gap at the end of deltaWithLargestEndIndex // Try fix insertion in reference by pulling together two ends of deltas on both sides of the gap leftSideDeltas.Clear(); for (int index = 0; index < deltasOverlappingAtCurrentIndex.Count; index++) { DeltaAlignment delta = deltasOverlappingAtCurrentIndex[index]; if (delta.FirstSequenceEnd >= deltaWithLargestEndIndex.FirstSequenceEnd) { leftSideDeltas.Add(delta); } } // Find all deltas starting at the right end of the gap rightSideDeltas.Clear(); for (long index = currentIndex + 1; index < deltaCatche.Count; index++) { DeltaAlignment delta = deltaCatche[index]; unloadedDeltas = null; if (deltaCatche.TryUnload(currentIndex + 1, out unloadedDeltas)) { for (int i = 0; i < unloadedDeltas.Count; i++) { yield return(unloadedDeltas[i]); } unloadedDeltas.Clear(); } if (delta.FirstSequenceStart != nextDelta.FirstSequenceStart) { break; } rightSideDeltas.Add(delta); } int score = 0; for (int i = 0; i < leftSideDeltas.Count; i++) { var l = leftSideDeltas[i]; int j = 0; for (; j < rightSideDeltas.Count; j++) { var r = rightSideDeltas[j]; // if (object.ReferenceEquals(l.QuerySequence, r.QuerySequence)) // As reference check is not posible, verifying ids here. as id are unique for a given read. if (l.QuerySequence.ID == r.QuerySequence.ID) { score++; break; } } if (j == rightSideDeltas.Count) { score--; } } // Score > 0 means most deltas share same query sequence at both ends, so close this gap if (score > 0) { long gaplength = (nextDelta.FirstSequenceStart - deltaWithLargestEndIndex.FirstSequenceEnd) - 1; currentProcessedOffset -= gaplength; // Pull deltas on right side to close the gap for (int i = 0; i < rightSideDeltas.Count; i++) { DeltaAlignment delta = rightSideDeltas[i]; delta.FirstSequenceStart -= gaplength; delta.FirstSequenceEnd -= gaplength; // deltaCatche.Update(delta.Id); } } // Start a new group from the right side of the gap deltaWithLargestEndIndex = nextDelta; deltasOverlappingAtCurrentIndex.Clear(); deltasOverlappingAtCurrentIndex.Add(nextDelta); } } unloadedDeltas = deltaCatche.GetCachedDeltas(); for (int i = 0; i < unloadedDeltas.Count; i++) { yield return(unloadedDeltas[i]); } unloadedDeltas.Clear(); } finally { if (deltasOverlappingAtCurrentIndex != null) { deltasOverlappingAtCurrentIndex.Clear(); deltasOverlappingAtCurrentIndex = null; } if (leftSideDeltas != null) { leftSideDeltas.Clear(); leftSideDeltas = null; } if (rightSideDeltas != null) { rightSideDeltas.Clear(); rightSideDeltas = null; } if (deltaCatche != null) { deltaCatche = null; } } }
/// <summary> /// Refines alignment layout by taking in consideration indels (insertions and deletions) and rearrangements between two genomes. /// Requires mate-pair information to resolve ambiguity. /// </summary> /// <param name="orderedDeltas">Order deltas.</param> public static IEnumerable<DeltaAlignment> RefineLayout(DeltaAlignmentCollection orderedDeltas) { if (orderedDeltas == null) { throw new ArgumentNullException("orderedDeltas"); } if (orderedDeltas.Count == 0) { yield break; } // As we dont know what is the maximum posible insert and deltes, // assuming 1,000,000 deltas are sufficient for operation. int windowSize = 1000; VirtualDeltaAlignmentCollection deltaCatche = new VirtualDeltaAlignmentCollection(orderedDeltas, windowSize); List<DeltaAlignment> deltasOverlappingAtCurrentIndex = null; List<DeltaAlignment> leftSideDeltas = null; List<DeltaAlignment> rightSideDeltas = null; List<DeltaAlignment> unloadedDeltas = null; try { deltasOverlappingAtCurrentIndex = new List<DeltaAlignment>(); leftSideDeltas = new List<DeltaAlignment>(); rightSideDeltas = new List<DeltaAlignment>(); long currentProcessedOffset = 0; DeltaAlignment alignment = deltaCatche[0]; deltasOverlappingAtCurrentIndex.Add(alignment); DeltaAlignment deltaWithLargestEndIndex = alignment; for (int currentIndex = 0; currentIndex < deltaCatche.Count - 1; currentIndex++) { DeltaAlignment nextDelta = deltaCatche[currentIndex + 1]; unloadedDeltas = null; if (deltaCatche.TryUnload(currentIndex + 1, out unloadedDeltas)) { for (int i = 0; i < unloadedDeltas.Count; i++) { yield return unloadedDeltas[i]; } unloadedDeltas.Clear(); } if (currentProcessedOffset != 0) { nextDelta.FirstSequenceStart += currentProcessedOffset; nextDelta.FirstSequenceEnd += currentProcessedOffset; } // Check if next delta is just adjacent if (nextDelta.FirstSequenceStart - 1 == deltaWithLargestEndIndex.FirstSequenceEnd) { // If next delta is adjacent there is a possible insertion in target (deletion in reference) // Try to extend the deltas from both sides and make them meet leftSideDeltas.Clear(); for (int index = 0; index < deltasOverlappingAtCurrentIndex.Count; index++) { DeltaAlignment delta = deltasOverlappingAtCurrentIndex[index]; if (delta.FirstSequenceEnd >= deltaWithLargestEndIndex.FirstSequenceEnd) { leftSideDeltas.Add(delta); } } // Find all deltas starting at the adjacent right side rightSideDeltas.Clear(); for (long index = currentIndex + 1; index < deltaCatche.Count; index++) { DeltaAlignment delta = deltaCatche[index]; unloadedDeltas = null; if (deltaCatche.TryUnload(currentIndex + 1, out unloadedDeltas)) { for (int i = 0; i < unloadedDeltas.Count; i++) { yield return unloadedDeltas[i]; } unloadedDeltas.Clear(); } if (delta.FirstSequenceStart != nextDelta.FirstSequenceStart) { break; } rightSideDeltas.Add(delta); } long offset = ExtendDeltas(leftSideDeltas, rightSideDeltas); if (offset != 0) { nextDelta.FirstSequenceStart += offset; nextDelta.FirstSequenceEnd += offset; } currentProcessedOffset += offset; } else if (nextDelta.FirstSequenceStart <= deltaWithLargestEndIndex.FirstSequenceEnd) { // Check if next delta overlaps with current overlap group deltasOverlappingAtCurrentIndex.Add(nextDelta); // Check if nextDelta is reaching farther than the current farthest delta if (nextDelta.FirstSequenceEnd > deltaWithLargestEndIndex.FirstSequenceEnd) { deltaWithLargestEndIndex = nextDelta; } if (deltasOverlappingAtCurrentIndex.Count > windowSize) { for (int i = deltasOverlappingAtCurrentIndex.Count - 1; i >= 0; i--) { if (deltasOverlappingAtCurrentIndex[i].FirstSequenceEnd < deltaWithLargestEndIndex.FirstSequenceEnd) { deltasOverlappingAtCurrentIndex.RemoveAt(i); } } } } else { // No overlap with nextDelta, so there is a gap at the end of deltaWithLargestEndIndex // Try fix insertion in reference by pulling together two ends of deltas on both sides of the gap leftSideDeltas.Clear(); for (int index = 0; index < deltasOverlappingAtCurrentIndex.Count; index++) { DeltaAlignment delta = deltasOverlappingAtCurrentIndex[index]; if (delta.FirstSequenceEnd >= deltaWithLargestEndIndex.FirstSequenceEnd) { leftSideDeltas.Add(delta); } } // Find all deltas starting at the right end of the gap rightSideDeltas.Clear(); for (long index = currentIndex + 1; index < deltaCatche.Count; index++) { DeltaAlignment delta = deltaCatche[index]; unloadedDeltas = null; if (deltaCatche.TryUnload(currentIndex + 1, out unloadedDeltas)) { for (int i = 0; i < unloadedDeltas.Count; i++) { yield return unloadedDeltas[i]; } unloadedDeltas.Clear(); } if (delta.FirstSequenceStart != nextDelta.FirstSequenceStart) { break; } rightSideDeltas.Add(delta); } int score = 0; for (int i = 0; i < leftSideDeltas.Count; i++) { var l = leftSideDeltas[i]; int j = 0; for (; j < rightSideDeltas.Count; j++) { var r = rightSideDeltas[j]; // if (object.ReferenceEquals(l.QuerySequence, r.QuerySequence)) // As reference check is not posible, verifying ids here. as id are unique for a given read. if (l.QuerySequence.ID == r.QuerySequence.ID) { score++; break; } } if (j == rightSideDeltas.Count) { score--; } } // Score > 0 means most deltas share same query sequence at both ends, so close this gap if (score > 0) { long gaplength = (nextDelta.FirstSequenceStart - deltaWithLargestEndIndex.FirstSequenceEnd) - 1; currentProcessedOffset -= gaplength; // Pull deltas on right side to close the gap for (int i = 0; i < rightSideDeltas.Count; i++) { DeltaAlignment delta = rightSideDeltas[i]; delta.FirstSequenceStart -= gaplength; delta.FirstSequenceEnd -= gaplength; // deltaCatche.Update(delta.Id); } } // Start a new group from the right side of the gap deltaWithLargestEndIndex = nextDelta; deltasOverlappingAtCurrentIndex.Clear(); deltasOverlappingAtCurrentIndex.Add(nextDelta); } } unloadedDeltas = deltaCatche.GetCachedDeltas(); for (int i = 0; i < unloadedDeltas.Count; i++) { yield return unloadedDeltas[i]; } unloadedDeltas.Clear(); } finally { if (deltasOverlappingAtCurrentIndex != null) { deltasOverlappingAtCurrentIndex.Clear(); deltasOverlappingAtCurrentIndex = null; } if (leftSideDeltas != null) { leftSideDeltas.Clear(); leftSideDeltas = null; } if (rightSideDeltas != null) { rightSideDeltas.Clear(); rightSideDeltas = null; } if (deltaCatche != null) { deltaCatche = null; } } }