コード例 #1
0
ファイル: LayoutRefiner.cs プロジェクト: sjmercer65/bio
        /// <summary>
        /// Refines alignment layout by taking in consideration indels (insertions and deletions) and rearrangements between two genomes.
        /// Requires mate-pair information to resolve ambiguity.
        /// </summary>
        /// <param name="orderedDeltas">Order deltas.</param>
        public static IEnumerable <DeltaAlignment> RefineLayout(DeltaAlignmentCollection orderedDeltas)
        {
            if (orderedDeltas == null)
            {
                throw new ArgumentNullException("orderedDeltas");
            }

            if (orderedDeltas.Count == 0)
            {
                yield break;
            }

            // As we dont know what is the maximum posible insert and deltes,
            // assuming 1,000,000 deltas are sufficient for operation.
            int windowSize = 1000;

            VirtualDeltaAlignmentCollection deltaCatche = new VirtualDeltaAlignmentCollection(orderedDeltas, windowSize);

            List <DeltaAlignment> deltasOverlappingAtCurrentIndex = null;
            List <DeltaAlignment> leftSideDeltas  = null;
            List <DeltaAlignment> rightSideDeltas = null;
            List <DeltaAlignment> unloadedDeltas  = null;

            try
            {
                deltasOverlappingAtCurrentIndex = new List <DeltaAlignment>();
                leftSideDeltas  = new List <DeltaAlignment>();
                rightSideDeltas = new List <DeltaAlignment>();

                long           currentProcessedOffset = 0;
                DeltaAlignment alignment = deltaCatche[0];
                deltasOverlappingAtCurrentIndex.Add(alignment);
                DeltaAlignment deltaWithLargestEndIndex = alignment;

                for (int currentIndex = 0; currentIndex < deltaCatche.Count - 1; currentIndex++)
                {
                    DeltaAlignment nextDelta = deltaCatche[currentIndex + 1];
                    unloadedDeltas = null;
                    if (deltaCatche.TryUnload(currentIndex + 1, out unloadedDeltas))
                    {
                        for (int i = 0; i < unloadedDeltas.Count; i++)
                        {
                            yield return(unloadedDeltas[i]);
                        }

                        unloadedDeltas.Clear();
                    }

                    if (currentProcessedOffset != 0)
                    {
                        nextDelta.FirstSequenceStart += currentProcessedOffset;
                        nextDelta.FirstSequenceEnd   += currentProcessedOffset;
                    }

                    // Check if next delta is just adjacent
                    if (nextDelta.FirstSequenceStart - 1 == deltaWithLargestEndIndex.FirstSequenceEnd)
                    {
                        // If next delta is adjacent there is a possible insertion in target (deletion in reference)
                        // Try to extend the deltas from both sides and make them meet
                        leftSideDeltas.Clear();
                        for (int index = 0; index < deltasOverlappingAtCurrentIndex.Count; index++)
                        {
                            DeltaAlignment delta = deltasOverlappingAtCurrentIndex[index];
                            if (delta.FirstSequenceEnd >= deltaWithLargestEndIndex.FirstSequenceEnd)
                            {
                                leftSideDeltas.Add(delta);
                            }
                        }

                        // Find all deltas starting at the adjacent right side
                        rightSideDeltas.Clear();
                        for (long index = currentIndex + 1; index < deltaCatche.Count; index++)
                        {
                            DeltaAlignment delta = deltaCatche[index];
                            unloadedDeltas = null;
                            if (deltaCatche.TryUnload(currentIndex + 1, out unloadedDeltas))
                            {
                                for (int i = 0; i < unloadedDeltas.Count; i++)
                                {
                                    yield return(unloadedDeltas[i]);
                                }

                                unloadedDeltas.Clear();
                            }

                            if (delta.FirstSequenceStart != nextDelta.FirstSequenceStart)
                            {
                                break;
                            }

                            rightSideDeltas.Add(delta);
                        }

                        long offset = ExtendDeltas(leftSideDeltas, rightSideDeltas);

                        if (offset != 0)
                        {
                            nextDelta.FirstSequenceStart += offset;
                            nextDelta.FirstSequenceEnd   += offset;
                        }

                        currentProcessedOffset += offset;
                    }
                    else
                    if (nextDelta.FirstSequenceStart <= deltaWithLargestEndIndex.FirstSequenceEnd)
                    {
                        // Check if next delta overlaps with current overlap group
                        deltasOverlappingAtCurrentIndex.Add(nextDelta);

                        // Check if nextDelta is reaching farther than the current farthest delta
                        if (nextDelta.FirstSequenceEnd > deltaWithLargestEndIndex.FirstSequenceEnd)
                        {
                            deltaWithLargestEndIndex = nextDelta;
                        }

                        if (deltasOverlappingAtCurrentIndex.Count > windowSize)
                        {
                            for (int i = deltasOverlappingAtCurrentIndex.Count - 1; i >= 0; i--)
                            {
                                if (deltasOverlappingAtCurrentIndex[i].FirstSequenceEnd < deltaWithLargestEndIndex.FirstSequenceEnd)
                                {
                                    deltasOverlappingAtCurrentIndex.RemoveAt(i);
                                }
                            }
                        }
                    }
                    else
                    {
                        // No overlap with nextDelta, so there is a gap at the end of deltaWithLargestEndIndex
                        // Try fix insertion in reference by pulling together two ends of deltas on both sides of the gap
                        leftSideDeltas.Clear();
                        for (int index = 0; index < deltasOverlappingAtCurrentIndex.Count; index++)
                        {
                            DeltaAlignment delta = deltasOverlappingAtCurrentIndex[index];
                            if (delta.FirstSequenceEnd >= deltaWithLargestEndIndex.FirstSequenceEnd)
                            {
                                leftSideDeltas.Add(delta);
                            }
                        }

                        // Find all deltas starting at the right end of the gap
                        rightSideDeltas.Clear();
                        for (long index = currentIndex + 1; index < deltaCatche.Count; index++)
                        {
                            DeltaAlignment delta = deltaCatche[index];
                            unloadedDeltas = null;
                            if (deltaCatche.TryUnload(currentIndex + 1, out unloadedDeltas))
                            {
                                for (int i = 0; i < unloadedDeltas.Count; i++)
                                {
                                    yield return(unloadedDeltas[i]);
                                }

                                unloadedDeltas.Clear();
                            }

                            if (delta.FirstSequenceStart != nextDelta.FirstSequenceStart)
                            {
                                break;
                            }

                            rightSideDeltas.Add(delta);
                        }

                        int score = 0;
                        for (int i = 0; i < leftSideDeltas.Count; i++)
                        {
                            var l = leftSideDeltas[i];
                            int j = 0;

                            for (; j < rightSideDeltas.Count; j++)
                            {
                                var r = rightSideDeltas[j];

                                // if (object.ReferenceEquals(l.QuerySequence, r.QuerySequence))
                                // As reference check is not posible, verifying ids here. as id are unique for a given read.
                                if (l.QuerySequence.ID == r.QuerySequence.ID)
                                {
                                    score++;
                                    break;
                                }
                            }

                            if (j == rightSideDeltas.Count)
                            {
                                score--;
                            }
                        }

                        // Score > 0 means most deltas share same query sequence at both ends, so close this gap
                        if (score > 0)
                        {
                            long gaplength = (nextDelta.FirstSequenceStart - deltaWithLargestEndIndex.FirstSequenceEnd) - 1;
                            currentProcessedOffset -= gaplength;

                            // Pull deltas on right side to close the gap
                            for (int i = 0; i < rightSideDeltas.Count; i++)
                            {
                                DeltaAlignment delta = rightSideDeltas[i];
                                delta.FirstSequenceStart -= gaplength;
                                delta.FirstSequenceEnd   -= gaplength;
                                // deltaCatche.Update(delta.Id);
                            }
                        }

                        // Start a new group from the right side of the gap
                        deltaWithLargestEndIndex = nextDelta;
                        deltasOverlappingAtCurrentIndex.Clear();
                        deltasOverlappingAtCurrentIndex.Add(nextDelta);
                    }
                }

                unloadedDeltas = deltaCatche.GetCachedDeltas();

                for (int i = 0; i < unloadedDeltas.Count; i++)
                {
                    yield return(unloadedDeltas[i]);
                }

                unloadedDeltas.Clear();
            }
            finally
            {
                if (deltasOverlappingAtCurrentIndex != null)
                {
                    deltasOverlappingAtCurrentIndex.Clear();
                    deltasOverlappingAtCurrentIndex = null;
                }

                if (leftSideDeltas != null)
                {
                    leftSideDeltas.Clear();
                    leftSideDeltas = null;
                }

                if (rightSideDeltas != null)
                {
                    rightSideDeltas.Clear();
                    rightSideDeltas = null;
                }

                if (deltaCatche != null)
                {
                    deltaCatche = null;
                }
            }
        }
コード例 #2
0
ファイル: LayoutRefiner.cs プロジェクト: cpatmoore/bio
        /// <summary>
        /// Refines alignment layout by taking in consideration indels (insertions and deletions) and rearrangements between two genomes. 
        /// Requires mate-pair information to resolve ambiguity.
        /// </summary>
        /// <param name="orderedDeltas">Order deltas.</param>
        public static IEnumerable<DeltaAlignment> RefineLayout(DeltaAlignmentCollection orderedDeltas)
        {
            if (orderedDeltas == null)
            {
                throw new ArgumentNullException("orderedDeltas");
            }

            if (orderedDeltas.Count == 0)
            {
                yield break;
            }

            // As we dont know what is the maximum posible insert and deltes, 
            // assuming 1,000,000 deltas are sufficient for operation.
            int windowSize = 1000;

            VirtualDeltaAlignmentCollection deltaCatche = new VirtualDeltaAlignmentCollection(orderedDeltas, windowSize);

            List<DeltaAlignment> deltasOverlappingAtCurrentIndex = null;
            List<DeltaAlignment> leftSideDeltas = null;
            List<DeltaAlignment> rightSideDeltas = null;
            List<DeltaAlignment> unloadedDeltas = null;
            try
            {
                deltasOverlappingAtCurrentIndex = new List<DeltaAlignment>();
                leftSideDeltas = new List<DeltaAlignment>();
                rightSideDeltas = new List<DeltaAlignment>();

                long currentProcessedOffset = 0;
                DeltaAlignment alignment = deltaCatche[0];
                deltasOverlappingAtCurrentIndex.Add(alignment);
                DeltaAlignment deltaWithLargestEndIndex = alignment;

                for (int currentIndex = 0; currentIndex < deltaCatche.Count - 1; currentIndex++)
                {
                    DeltaAlignment nextDelta = deltaCatche[currentIndex + 1];
                    unloadedDeltas = null;
                    if (deltaCatche.TryUnload(currentIndex + 1, out unloadedDeltas))
                    {
                        for (int i = 0; i < unloadedDeltas.Count; i++)
                        {
                            yield return unloadedDeltas[i];
                        }

                        unloadedDeltas.Clear();
                    }

                    if (currentProcessedOffset != 0)
                    {
                        nextDelta.FirstSequenceStart += currentProcessedOffset;
                        nextDelta.FirstSequenceEnd += currentProcessedOffset;
                    }

                    // Check if next delta is just adjacent
                    if (nextDelta.FirstSequenceStart - 1 == deltaWithLargestEndIndex.FirstSequenceEnd)
                    {
                        // If next delta is adjacent there is a possible insertion in target (deletion in reference)
                        // Try to extend the deltas from both sides and make them meet
                        leftSideDeltas.Clear();
                        for (int index = 0; index < deltasOverlappingAtCurrentIndex.Count; index++)
                        {
                            DeltaAlignment delta = deltasOverlappingAtCurrentIndex[index];
                            if (delta.FirstSequenceEnd >= deltaWithLargestEndIndex.FirstSequenceEnd)
                            {
                                leftSideDeltas.Add(delta);
                            }
                        }

                        // Find all deltas starting at the adjacent right side
                        rightSideDeltas.Clear();
                        for (long index = currentIndex + 1; index < deltaCatche.Count; index++)
                        {
                            DeltaAlignment delta = deltaCatche[index];
                            unloadedDeltas = null;
                            if (deltaCatche.TryUnload(currentIndex + 1, out unloadedDeltas))
                            {
                                for (int i = 0; i < unloadedDeltas.Count; i++)
                                {
                                    yield return unloadedDeltas[i];
                                }

                                unloadedDeltas.Clear();
                            }

                            if (delta.FirstSequenceStart != nextDelta.FirstSequenceStart)
                            {
                                break;
                            }

                            rightSideDeltas.Add(delta);
                        }

                        long offset = ExtendDeltas(leftSideDeltas, rightSideDeltas);

                        if (offset != 0)
                        {
                            nextDelta.FirstSequenceStart += offset;
                            nextDelta.FirstSequenceEnd += offset;
                        }

                        currentProcessedOffset += offset;
                    }
                    else
                        if (nextDelta.FirstSequenceStart <= deltaWithLargestEndIndex.FirstSequenceEnd)
                        {
                            // Check if next delta overlaps with current overlap group
                            deltasOverlappingAtCurrentIndex.Add(nextDelta);

                            // Check if nextDelta is reaching farther than the current farthest delta
                            if (nextDelta.FirstSequenceEnd > deltaWithLargestEndIndex.FirstSequenceEnd)
                            {
                                deltaWithLargestEndIndex = nextDelta;
                            }

                            if (deltasOverlappingAtCurrentIndex.Count > windowSize)
                            {
                                for (int i = deltasOverlappingAtCurrentIndex.Count - 1; i >= 0; i--)
                                {
                                    if (deltasOverlappingAtCurrentIndex[i].FirstSequenceEnd < deltaWithLargestEndIndex.FirstSequenceEnd)
                                    {
                                        deltasOverlappingAtCurrentIndex.RemoveAt(i);
                                    }
                                }
                            }
                        }
                        else
                        {
                            // No overlap with nextDelta, so there is a gap at the end of deltaWithLargestEndIndex
                            // Try fix insertion in reference by pulling together two ends of deltas on both sides of the gap
                            leftSideDeltas.Clear();
                            for (int index = 0; index < deltasOverlappingAtCurrentIndex.Count; index++)
                            {
                                DeltaAlignment delta = deltasOverlappingAtCurrentIndex[index];
                                if (delta.FirstSequenceEnd >= deltaWithLargestEndIndex.FirstSequenceEnd)
                                {
                                    leftSideDeltas.Add(delta);
                                }
                            }

                            // Find all deltas starting at the right end of the gap
                            rightSideDeltas.Clear();
                            for (long index = currentIndex + 1; index < deltaCatche.Count; index++)
                            {
                                DeltaAlignment delta = deltaCatche[index];
                                unloadedDeltas = null;
                                if (deltaCatche.TryUnload(currentIndex + 1, out unloadedDeltas))
                                {
                                    for (int i = 0; i < unloadedDeltas.Count; i++)
                                    {
                                        yield return unloadedDeltas[i];
                                    }

                                    unloadedDeltas.Clear();
                                }

                                if (delta.FirstSequenceStart != nextDelta.FirstSequenceStart)
                                {
                                    break;
                                }

                                rightSideDeltas.Add(delta);
                            }

                            int score = 0;
                            for (int i = 0; i < leftSideDeltas.Count; i++)
                            {
                                var l = leftSideDeltas[i];
                                int j = 0;

                                for (; j < rightSideDeltas.Count; j++)
                                {
                                    var r = rightSideDeltas[j];

                                    // if (object.ReferenceEquals(l.QuerySequence, r.QuerySequence))
                                    // As reference check is not posible, verifying ids here. as id are unique for a given read.
                                    if (l.QuerySequence.ID == r.QuerySequence.ID)
                                    {
                                        score++;
                                        break;
                                    }
                                }

                                if (j == rightSideDeltas.Count)
                                {
                                    score--;
                                }
                            }

                            // Score > 0 means most deltas share same query sequence at both ends, so close this gap
                            if (score > 0)
                            {
                                long gaplength = (nextDelta.FirstSequenceStart - deltaWithLargestEndIndex.FirstSequenceEnd) - 1;
                                currentProcessedOffset -= gaplength;

                                // Pull deltas on right side to close the gap
                                for (int i = 0; i < rightSideDeltas.Count; i++)
                                {
                                    DeltaAlignment delta = rightSideDeltas[i];
                                    delta.FirstSequenceStart -= gaplength;
                                    delta.FirstSequenceEnd -= gaplength;
                                    // deltaCatche.Update(delta.Id);
                                }
                            }

                            // Start a new group from the right side of the gap
                            deltaWithLargestEndIndex = nextDelta;
                            deltasOverlappingAtCurrentIndex.Clear();
                            deltasOverlappingAtCurrentIndex.Add(nextDelta);
                        }
                }

                unloadedDeltas = deltaCatche.GetCachedDeltas();

                for (int i = 0; i < unloadedDeltas.Count; i++)
                {
                    yield return unloadedDeltas[i];
                }

                unloadedDeltas.Clear();
            }
            finally
            {
                if (deltasOverlappingAtCurrentIndex != null)
                {
                    deltasOverlappingAtCurrentIndex.Clear();
                    deltasOverlappingAtCurrentIndex = null;
                }

                if (leftSideDeltas != null)
                {
                    leftSideDeltas.Clear();
                    leftSideDeltas = null;
                }

                if (rightSideDeltas != null)
                {
                    rightSideDeltas.Clear();
                    rightSideDeltas = null;
                }

                if (deltaCatche != null)
                {
                    deltaCatche = null;
                }
            }
        }