/// <summary>
        /// This takes a specific starting location in the scoring matrix and generates
        /// an alignment from it using the traceback scores.
        /// </summary>
        /// <param name="startingCell">Starting point</param>
        /// <returns>Pairwise alignment</returns>
        private PairwiseAlignedSequence CreateAlignmentFromCell(OptScoreMatrixCell startingCell)
        {
            long estimatedLength = ReferenceSequence.Length * QuerySequence.Length;
            var  firstAlignment  = new byte[estimatedLength];
            var  secondAlignment = new byte[estimatedLength];

            // Get the starting cell position and record the optimal score found there.
            int i          = startingCell.Row;
            int j          = startingCell.Col;
            var finalScore = startingCell.Score;

            long rowGaps = 0, colGaps = 0, identicalCount = 0, similarityCount = 0;

            // Walk the traceback matrix and build the alignments.
            int faLength = 0, saLength = 0;

            while (!TracebackIsComplete(i, j))
            {
                sbyte tracebackDirection = Traceback[i][j];

                // Reference sequence uses the current cell if we moved diagonal or left.
                if (tracebackDirection == SourceDirection.Left || tracebackDirection == SourceDirection.Diagonal)
                {
                    firstAlignment[faLength++] = ReferenceSequence[j - 1];
                }
                else
                {
                    firstAlignment[faLength++] = _gap;
                    colGaps++;
                }

                // Query sequence uses the current cell if we moved diagonal or up.
                if (tracebackDirection == SourceDirection.Up || tracebackDirection == SourceDirection.Diagonal)
                {
                    secondAlignment[saLength++] = QuerySequence[i - 1];
                }
                else
                {
                    secondAlignment[saLength++] = _gap;
                    rowGaps++;
                }

                // Track some useful statistics
                byte n1 = firstAlignment[faLength - 1];
                byte n2 = secondAlignment[faLength - 1];
                if (n1 == n2 && n1 != _gap)
                {
                    identicalCount++;
                    similarityCount++;
                }
                else if (SimilarityMatrix[n2, n1] > 0)
                {
                    similarityCount++;
                }

                // Walk backwards through the trace back
                switch (tracebackDirection)
                {
                case SourceDirection.Diagonal:
                    i--;
                    j--;
                    break;

                case SourceDirection.Left:
                    j--;
                    break;

                case SourceDirection.Up:
                    i--;
                    break;

                default:
                    break;
                }
            }

            // We build the alignments in reverse since we were
            // walking backwards through the matrix table. To create
            // the proper alignments we need to resize and reverse
            // both underlying arrays.
            Array.Resize(ref firstAlignment, faLength);
            Array.Reverse(firstAlignment);
            Array.Resize(ref secondAlignment, saLength);
            Array.Reverse(secondAlignment);

            // Create the Consensus sequence
            byte[] consensus = new byte[Math.Min(faLength, saLength)];
            for (int n = 0; n < consensus.Length; n++)
            {
                consensus[n] = ConsensusResolver.GetConsensus(new[] { firstAlignment[n], secondAlignment[n] });
            }

            // Create the result alignment
            var pairwiseAlignedSequence = new PairwiseAlignedSequence
            {
                Score         = finalScore,
                FirstSequence = new Sequence(_sequence1.Alphabet, firstAlignment)
                {
                    ID = _sequence1.ID
                },
                SecondSequence = new Sequence(_sequence2.Alphabet, secondAlignment)
                {
                    ID = _sequence2.ID
                },
                Consensus = new Sequence(ConsensusResolver.SequenceAlphabet, consensus),
            };

            // Offset is start of alignment in input sequence with respect to other sequence.
            if (i >= j)
            {
                pairwiseAlignedSequence.FirstOffset  = i - j;
                pairwiseAlignedSequence.SecondOffset = 0;
            }
            else
            {
                pairwiseAlignedSequence.FirstOffset  = 0;
                pairwiseAlignedSequence.SecondOffset = j - i;
            }


            // Add in ISequenceAlignment metadata
            pairwiseAlignedSequence.Metadata["Score"]        = pairwiseAlignedSequence.Score;
            pairwiseAlignedSequence.Metadata["FirstOffset"]  = pairwiseAlignedSequence.FirstOffset;
            pairwiseAlignedSequence.Metadata["SecondOffset"] = pairwiseAlignedSequence.SecondOffset;
            pairwiseAlignedSequence.Metadata["Consensus"]    = pairwiseAlignedSequence.Consensus;
            pairwiseAlignedSequence.Metadata["StartOffsets"] = new List <long> {
                j, i
            };
            pairwiseAlignedSequence.Metadata["EndOffsets"] = new List <long> {
                startingCell.Col - 1, startingCell.Row - 1
            };
            pairwiseAlignedSequence.Metadata["Insertions"] = new List <long> {
                colGaps, rowGaps
            };                                                                                    // ref, query insertions
            pairwiseAlignedSequence.Metadata["IdenticalCount"]  = identicalCount;
            pairwiseAlignedSequence.Metadata["SimilarityCount"] = similarityCount;

            return(pairwiseAlignedSequence);
        }
Exemplo n.º 2
0
        /// <summary>
        /// This takes a specific starting location in the scoring matrix and generates
        /// an alignment from it using the traceback scores.
        /// </summary>
        /// <param name="startingCell">Starting point</param>
        /// <returns>Pairwise alignment</returns>
        protected PairwiseAlignedSequence CreateAlignmentFromCell(OptScoreMatrixCell startingCell)
        {
            int gapStride = Cols + 1;
            //Using list to avoid allocation issues
            int estimatedLength = (int)(1.1 * Math.Max(ReferenceSequence.Length, QuerySequence.Length));
            var firstAlignment  = new List <byte>(estimatedLength);
            var secondAlignment = new List <byte>(estimatedLength);

            // Get the starting cell position and record the optimal score found there.
            int i          = startingCell.Row;
            int j          = startingCell.Col;
            var finalScore = startingCell.Score;

            long rowGaps = 0, colGaps = 0, identicalCount = 0, similarityCount = 0;

            // Walk the traceback matrix and build the alignments.
            while (!TracebackIsComplete(i, j))
            {
                sbyte tracebackDirection = Traceback[i][j];
                // Walk backwards through the trace back
                int gapLength;
                switch (tracebackDirection)
                {
                case SourceDirection.Diagonal:
                    byte n1 = ReferenceSequence[j - 1];
                    byte n2 = QuerySequence[i - 1];
                    firstAlignment.Add(n1);
                    secondAlignment.Add(n2);
                    i--;
                    j--;
                    // Track some useful statistics
                    if (n1 == n2 && n1 != _gap)
                    {
                        identicalCount++;
                        similarityCount++;
                    }
                    else if (SimilarityMatrix[n2, n1] > 0)
                    {
                        similarityCount++;
                    }
                    break;

                case SourceDirection.Left:
                    //Add 1 because this only counts number of extensions
                    if (usingAffineGapModel)
                    {
                        gapLength = h_Gap_Length[i * gapStride + j];
                        for (int k = 0; k < gapLength; k++)
                        {
                            firstAlignment.Add(ReferenceSequence[--j]);
                            secondAlignment.Add(_gap);
                            rowGaps++;
                        }
                    }
                    else
                    {
                        firstAlignment.Add(ReferenceSequence[--j]);
                        secondAlignment.Add(_gap);
                        rowGaps++;
                    }
                    break;

                case SourceDirection.Up:
                    //add 1 because this only counts number of extensions.
                    if (usingAffineGapModel)
                    {
                        gapLength = v_Gap_Length[i * gapStride + j];
                        for (int k = 0; k < gapLength; k++)
                        {
                            firstAlignment.Add(_gap);
                            colGaps++;
                            secondAlignment.Add(QuerySequence[--i]);
                        }
                    }
                    else
                    {
                        secondAlignment.Add(QuerySequence[--i]);
                        firstAlignment.Add(_gap);
                        colGaps++;
                    }
                    break;

                default:
                    break;
                }
            }

            // We build the alignments in reverse since we were
            // walking backwards through the matrix table. To create
            // the proper alignments we need to resize and reverse
            // both underlying arrays.
            firstAlignment.Reverse();
            secondAlignment.Reverse();
            // Create the Consensus sequence
            byte[] consensus = new byte[Math.Min(firstAlignment.Count, secondAlignment.Count)];
            for (int n = 0; n < consensus.Length; n++)
            {
                consensus[n] = ConsensusResolver.GetConsensus(new[] { firstAlignment[n], secondAlignment[n] });
            }

            // Create the result alignment
            var pairwiseAlignedSequence = new PairwiseAlignedSequence
            {
                Score         = finalScore,
                FirstSequence = new Sequence(_sequence1.Alphabet, firstAlignment.ToArray())
                {
                    ID = _sequence1.ID
                },
                SecondSequence = new Sequence(_sequence2.Alphabet, secondAlignment.ToArray())
                {
                    ID = _sequence2.ID
                },
                Consensus = new Sequence(ConsensusResolver.SequenceAlphabet, consensus),
            };

            // Offset is start of alignment in input sequence with respect to other sequence.
            if (i >= j)
            {
                pairwiseAlignedSequence.FirstOffset  = i - j;
                pairwiseAlignedSequence.SecondOffset = 0;
            }
            else
            {
                pairwiseAlignedSequence.FirstOffset  = 0;
                pairwiseAlignedSequence.SecondOffset = j - i;
            }


            // Add in ISequenceAlignment metadata
            pairwiseAlignedSequence.Metadata["Score"]        = pairwiseAlignedSequence.Score;
            pairwiseAlignedSequence.Metadata["FirstOffset"]  = pairwiseAlignedSequence.FirstOffset;
            pairwiseAlignedSequence.Metadata["SecondOffset"] = pairwiseAlignedSequence.SecondOffset;
            pairwiseAlignedSequence.Metadata["Consensus"]    = pairwiseAlignedSequence.Consensus;
            pairwiseAlignedSequence.Metadata["StartOffsets"] = new List <long> {
                j, i
            };
            pairwiseAlignedSequence.Metadata["EndOffsets"] = new List <long> {
                startingCell.Col - 1, startingCell.Row - 1
            };
            pairwiseAlignedSequence.Metadata["Insertions"] = new List <long> {
                colGaps, rowGaps
            };                                                                                    // ref, query insertions
            pairwiseAlignedSequence.Metadata["IdenticalCount"]  = identicalCount;
            pairwiseAlignedSequence.Metadata["SimilarityCount"] = similarityCount;

            return(pairwiseAlignedSequence);
        }
Exemplo n.º 3
0
        /// <summary>
        /// This takes a specific starting location in the scoring matrix and generates
        /// an alignment from it using the traceback scores.
        /// </summary>
        /// <param name="startingCell">Starting point</param>
        /// <returns>Pairwise alignment</returns>
        protected PairwiseAlignedSequence CreateAlignmentFromCell(OptScoreMatrixCell startingCell)
        {
            int gapStride = Cols + 1;
            //Using list to avoid allocation issues
            int estimatedLength = (int)( 1.1*Math.Max(ReferenceSequence.Length,QuerySequence.Length));
            var firstAlignment = new List<byte>(estimatedLength);
            var secondAlignment = new List<byte>(estimatedLength);

            // Get the starting cell position and record the optimal score found there.
            int i = startingCell.Row;
            int j = startingCell.Col;
            var finalScore = startingCell.Score;

            long rowGaps = 0, colGaps = 0, identicalCount = 0, similarityCount = 0;

            // Walk the traceback matrix and build the alignments.
            while (!TracebackIsComplete(i, j))
            {
                sbyte tracebackDirection = Traceback[i][j];
                // Walk backwards through the trace back
                int gapLength;
                switch (tracebackDirection)
                {
                    case SourceDirection.Diagonal:
                        byte n1 = ReferenceSequence[j - 1];
                        byte n2 = QuerySequence[i - 1];
                        firstAlignment.Add(n1);
                        secondAlignment.Add(n2);
                        i--;
                        j--;
                        // Track some useful statistics
                        if (n1 == n2 && n1 != _gap)
                        {
                            identicalCount++;
                            similarityCount++;
                        }
                        else if (SimilarityMatrix[n2, n1] > 0)
                            similarityCount++;
                        break;
                    case SourceDirection.Left:
                        //Add 1 because this only counts number of extensions
                        if (usingAffineGapModel)
                        {
                            gapLength = h_Gap_Length[i * gapStride + j];
                            for (int k = 0; k < gapLength; k++)
                            {
                                firstAlignment.Add(ReferenceSequence[--j]);
                                secondAlignment.Add(_gap);
                                rowGaps++;
                            }
                        }
                        else
                        {
                            firstAlignment.Add(ReferenceSequence[--j]);
                            secondAlignment.Add(_gap);
                            rowGaps++;
                        }
                        break;
                    case SourceDirection.Up:
                        //add 1 because this only counts number of extensions.
                        if (usingAffineGapModel)
                        {
                            gapLength = v_Gap_Length[i * gapStride + j];
                            for (int k = 0; k < gapLength; k++)
                            {
                                firstAlignment.Add(_gap);
                                colGaps++;
                                secondAlignment.Add(QuerySequence[--i]);
                            }
                        }
                        else
                        {
                            secondAlignment.Add(QuerySequence[--i]);
                            firstAlignment.Add(_gap);
                            colGaps++;
                        }
                        break;
                    default:
                        break;
                }
            }

            // We build the alignments in reverse since we were
            // walking backwards through the matrix table. To create
            // the proper alignments we need to resize and reverse
            // both underlying arrays.
            firstAlignment.Reverse();
            secondAlignment.Reverse();
            // Create the Consensus sequence
            byte[] consensus = new byte[Math.Min(firstAlignment.Count, secondAlignment.Count)];
            for (int n = 0; n < consensus.Length; n++)
            {
                consensus[n] = ConsensusResolver.GetConsensus(new[] { firstAlignment[n], secondAlignment[n] });
            }

            // Create the result alignment
            var pairwiseAlignedSequence = new PairwiseAlignedSequence
            {
                Score = finalScore,
                FirstSequence = new Sequence(_sequence1.Alphabet, firstAlignment.ToArray()) { ID = _sequence1.ID },
                SecondSequence = new Sequence(_sequence2.Alphabet, secondAlignment.ToArray()) { ID = _sequence2.ID },
                Consensus = new Sequence(ConsensusResolver.SequenceAlphabet, consensus),
            };

            // Offset is start of alignment in input sequence with respect to other sequence.
            if (i >= j)
            {
                pairwiseAlignedSequence.FirstOffset = i - j;
                pairwiseAlignedSequence.SecondOffset = 0;
            }
            else
            {
                pairwiseAlignedSequence.FirstOffset = 0;
                pairwiseAlignedSequence.SecondOffset = j - i;
            }


            // Add in ISequenceAlignment metadata
            pairwiseAlignedSequence.Metadata["Score"] = pairwiseAlignedSequence.Score;
            pairwiseAlignedSequence.Metadata["FirstOffset"] = pairwiseAlignedSequence.FirstOffset;
            pairwiseAlignedSequence.Metadata["SecondOffset"] = pairwiseAlignedSequence.SecondOffset;
            pairwiseAlignedSequence.Metadata["Consensus"] = pairwiseAlignedSequence.Consensus;
            pairwiseAlignedSequence.Metadata["StartOffsets"] = new List<long> { j, i };
            pairwiseAlignedSequence.Metadata["EndOffsets"] = new List<long> { startingCell.Col - 1, startingCell.Row - 1 };
            pairwiseAlignedSequence.Metadata["Insertions"] = new List<long> { colGaps, rowGaps }; // ref, query insertions
            pairwiseAlignedSequence.Metadata["IdenticalCount"] = identicalCount;
            pairwiseAlignedSequence.Metadata["SimilarityCount"] = similarityCount;

            return pairwiseAlignedSequence;

        }