Exemple #1
0
        /// <summary>
        /// Align the Gap by executing pairwise alignment
        /// </summary>
        /// <param name="referenceSequence">Reference sequence</param>
        /// <param name="querySequence">Query Sequence</param>
        /// <param name="sequenceResult1">Editable sequence containing alignment first result</param>
        /// <param name="sequenceResult2">Editable sequence containing alignment second result</param>
        /// <param name="consensusResult">Editable sequence containing consensus sequence</param>
        /// <param name="mum1">First MUM of Gap</param>
        /// <param name="mum2">Second MUM of Gap</param>
        /// <param name="insertions">Insetions made to the aligned sequences.</param>
        /// <returns>Score of alignment</returns>
        private int AlignGap(
            ISequence referenceSequence,
            ISequence querySequence,
            ISequence sequenceResult1,
            ISequence sequenceResult2,
            ISequence consensusResult,
            MaxUniqueMatch mum1,
            MaxUniqueMatch mum2,
            out List <int> insertions)
        {
            int       score     = 0;
            ISequence sequence1 = null;
            ISequence sequence2 = null;
            IList <IPairwiseSequenceAlignment> sequenceAlignment = null;
            string mum1String = string.Empty;
            string mum2String = string.Empty;

            insertions = new List <int>(2);
            insertions.Add(0);
            insertions.Add(0);

            int mum1ReferenceStartIndex = 0;
            int mum1QueryStartIndex     = 0;
            int mum1Length = 0;
            int mum2ReferenceStartIndex = 0;
            int mum2QueryStartIndex     = 0;
            int mum2Length = 0;

            if (null != mum1)
            {
                mum1ReferenceStartIndex = mum1.FirstSequenceStart;
                mum1QueryStartIndex     = mum1.SecondSequenceStart;
                mum1Length = mum1.Length;
            }

            if (null != mum2)
            {
                mum2ReferenceStartIndex = mum2.FirstSequenceStart;
                mum2QueryStartIndex     = mum2.SecondSequenceStart;
                mum2Length = mum2.Length;
            }
            else
            {
                mum2ReferenceStartIndex = referenceSequence.Count;
                mum2QueryStartIndex     = querySequence.Count;
            }

            int referenceGapStartIndex = mum1ReferenceStartIndex + mum1Length;
            int queryGapStartIndex     = mum1QueryStartIndex + mum1Length;

            if (mum2ReferenceStartIndex > referenceGapStartIndex &&
                mum2QueryStartIndex > queryGapStartIndex)
            {
                sequence1 = referenceSequence.Range(
                    referenceGapStartIndex,
                    mum2ReferenceStartIndex - referenceGapStartIndex);
                sequence2 = querySequence.Range(
                    queryGapStartIndex,
                    mum2QueryStartIndex - queryGapStartIndex);

                sequenceAlignment = RunPairWise(sequence1, sequence2);

                if (sequenceAlignment != null)
                {
                    foreach (IPairwiseSequenceAlignment pairwiseAlignment in sequenceAlignment)
                    {
                        foreach (PairwiseAlignedSequence alignment in pairwiseAlignment.PairwiseAlignedSequences)
                        {
                            sequenceResult1.InsertRange(
                                sequenceResult1.Count,
                                alignment.FirstSequence.ToString());
                            sequenceResult2.InsertRange(
                                sequenceResult2.Count,
                                alignment.SecondSequence.ToString());
                            consensusResult.InsertRange(
                                consensusResult.Count,
                                alignment.Consensus.ToString());

                            score += alignment.Score;

                            if (alignment.Metadata.ContainsKey("Insertions"))
                            {
                                List <int> gapinsertions = alignment.Metadata["Insertions"] as List <int>;
                                if (gapinsertions != null)
                                {
                                    if (gapinsertions.Count > 0)
                                    {
                                        insertions[0] += gapinsertions[0];
                                    }

                                    if (gapinsertions.Count > 1)
                                    {
                                        insertions[1] += gapinsertions[1];
                                    }
                                }
                            }
                        }
                    }
                }
            }
            else if (mum2ReferenceStartIndex > referenceGapStartIndex)
            {
                sequence1 = referenceSequence.Range(
                    referenceGapStartIndex,
                    mum2ReferenceStartIndex - referenceGapStartIndex);

                sequenceResult1.InsertRange(sequenceResult1.Count, sequence1.ToString());
                sequenceResult2.InsertRange(sequenceResult2.Count, CreateDefaultGap(sequence1.Count));
                consensusResult.InsertRange(consensusResult.Count, sequence1.ToString());

                insertions[1] += sequence1.Count;

                if (UseGapExtensionCost)
                {
                    score = GapOpenCost + ((sequence1.Count - 1) * GapExtensionCost);
                }
                else
                {
                    score = sequence1.Count * GapOpenCost;
                }
            }
            else if (mum2QueryStartIndex > queryGapStartIndex)
            {
                sequence2 = querySequence.Range(
                    queryGapStartIndex,
                    mum2QueryStartIndex - queryGapStartIndex);

                sequenceResult1.InsertRange(sequenceResult1.Count, CreateDefaultGap(sequence2.Count));
                sequenceResult2.InsertRange(sequenceResult2.Count, sequence2.ToString());
                consensusResult.InsertRange(consensusResult.Count, sequence2.ToString());

                insertions[0] += sequence2.Count;

                if (UseGapExtensionCost)
                {
                    score = GapOpenCost + ((sequence2.Count - 1) * GapExtensionCost);
                }
                else
                {
                    score = sequence2.Count * GapOpenCost;
                }
            }

            // Add the MUM to the result
            if (0 < mum2Length)
            {
                mum1String = referenceSequence.Range(
                    mum2ReferenceStartIndex,
                    mum2Length).ToString();
                sequenceResult1.InsertRange(sequenceResult1.Count, mum1String);

                mum2String = querySequence.Range(
                    mum2QueryStartIndex,
                    mum2Length).ToString();
                sequenceResult2.InsertRange(sequenceResult2.Count, mum2String);
                consensusResult.InsertRange(consensusResult.Count, mum1String);

                // Get the byte array (indices of symbol in MUM)
                byte[] indices = SimilarityMatrix.ToByteArray(mum1String);

                // Calculate the score
                foreach (byte index in indices)
                {
                    score += SimilarityMatrix[index, index];
                }
            }

            return(score);
        }
Exemple #2
0
        /// <summary>
        /// get all the gaps in each sequence and call pairwise alignment
        /// </summary>
        /// <param name="referenceSequence">Reference sequence</param>
        /// <param name="sequence">Query sequence</param>
        /// <returns>Aligned sequences</returns>
        private PairwiseAlignedSequence ProcessGaps(
            ISequence referenceSequence,
            ISequence sequence)
        {
            Sequence                sequenceResult1;
            Sequence                sequenceResult2;
            Sequence                consensusResult;
            MaxUniqueMatch          mum1            = null;
            MaxUniqueMatch          mum2            = null;
            PairwiseAlignedSequence alignedSequence = new PairwiseAlignedSequence();

            sequenceResult1            = new Sequence(referenceSequence.Alphabet);
            sequenceResult1.IsReadOnly = false;
            sequenceResult1.ID         = referenceSequence.ID;
            sequenceResult1.DisplayID  = referenceSequence.DisplayID;

            sequenceResult2            = new Sequence(referenceSequence.Alphabet);
            sequenceResult2.IsReadOnly = false;
            sequenceResult2.ID         = sequence.ID;
            sequenceResult2.DisplayID  = sequence.DisplayID;

            consensusResult            = new Sequence(referenceSequence.Alphabet);
            consensusResult.IsReadOnly = false;
            consensusResult.ID         = sequence.ID;
            consensusResult.DisplayID  = sequence.DisplayID;

            // Run the alignment for gap before first MUM
            List <int> insertions = new List <int>(2);

            insertions.Add(0);
            insertions.Add(0);

            List <int> gapInsertions;

            mum1 = _finalMumList[0];
            alignedSequence.Score += AlignGap(
                referenceSequence,
                sequence,
                sequenceResult1,
                sequenceResult2,
                consensusResult,
                null,     // Here the first MUM does not exist
                mum1,
                out gapInsertions);

            insertions[0] += gapInsertions[0];
            insertions[1] += gapInsertions[1];

            // Run the alignment for all the gaps between MUM
            for (int index = 1; index < _finalMumList.Count; index++)
            {
                mum2 = _finalMumList[index];

                alignedSequence.Score += AlignGap(
                    referenceSequence,
                    sequence,
                    sequenceResult1,
                    sequenceResult2,
                    consensusResult,
                    mum1,
                    mum2,
                    out gapInsertions);

                insertions[0] += gapInsertions[0];
                insertions[1] += gapInsertions[1];

                mum1 = mum2;
            }

            // Run the alignment for gap after last MUM
            alignedSequence.Score += AlignGap(
                referenceSequence,
                sequence,
                sequenceResult1,
                sequenceResult2,
                consensusResult,
                mum1,
                null,
                out gapInsertions);

            insertions[0] += gapInsertions[0];
            insertions[1] += gapInsertions[1];

            alignedSequence.FirstSequence  = sequenceResult1;
            alignedSequence.SecondSequence = sequenceResult2;
            alignedSequence.Consensus      = consensusResult;

            // Offset is not required as Smith Waterman will  fragmented alignment.
            // Offset is the starting position of alignment of sequence1 with respect to sequence2.
            if (PairWiseAlgorithm is NeedlemanWunschAligner)
            {
                alignedSequence.FirstOffset  = sequenceResult1.IndexOfNonGap() - referenceSequence.IndexOfNonGap();
                alignedSequence.SecondOffset = sequenceResult2.IndexOfNonGap() - sequence.IndexOfNonGap();
            }


            List <int> startOffsets = new List <int>(2);
            List <int> endOffsets   = new List <int>(2);

            startOffsets.Add(0);
            startOffsets.Add(0);

            endOffsets.Add(referenceSequence.Count - 1);
            endOffsets.Add(sequence.Count - 1);

            alignedSequence.Metadata["StartOffsets"] = startOffsets;
            alignedSequence.Metadata["EndOffsets"]   = endOffsets;
            alignedSequence.Metadata["Insertions"]   = insertions;

            // return the aligned sequence
            return(alignedSequence);
        }