/// <summary> /// Align the Gap by executing pairwise alignment /// </summary> /// <param name="referenceSequence">Reference sequence</param> /// <param name="querySequence">Query Sequence</param> /// <param name="sequenceResult1">Editable sequence containing alignment first result</param> /// <param name="sequenceResult2">Editable sequence containing alignment second result</param> /// <param name="consensusResult">Editable sequence containing consensus sequence</param> /// <param name="mum1">First MUM of Gap</param> /// <param name="mum2">Second MUM of Gap</param> /// <param name="insertions">Insetions made to the aligned sequences.</param> /// <returns>Score of alignment</returns> private int AlignGap( ISequence referenceSequence, ISequence querySequence, ISequence sequenceResult1, ISequence sequenceResult2, ISequence consensusResult, MaxUniqueMatch mum1, MaxUniqueMatch mum2, out List <int> insertions) { int score = 0; ISequence sequence1 = null; ISequence sequence2 = null; IList <IPairwiseSequenceAlignment> sequenceAlignment = null; string mum1String = string.Empty; string mum2String = string.Empty; insertions = new List <int>(2); insertions.Add(0); insertions.Add(0); int mum1ReferenceStartIndex = 0; int mum1QueryStartIndex = 0; int mum1Length = 0; int mum2ReferenceStartIndex = 0; int mum2QueryStartIndex = 0; int mum2Length = 0; if (null != mum1) { mum1ReferenceStartIndex = mum1.FirstSequenceStart; mum1QueryStartIndex = mum1.SecondSequenceStart; mum1Length = mum1.Length; } if (null != mum2) { mum2ReferenceStartIndex = mum2.FirstSequenceStart; mum2QueryStartIndex = mum2.SecondSequenceStart; mum2Length = mum2.Length; } else { mum2ReferenceStartIndex = referenceSequence.Count; mum2QueryStartIndex = querySequence.Count; } int referenceGapStartIndex = mum1ReferenceStartIndex + mum1Length; int queryGapStartIndex = mum1QueryStartIndex + mum1Length; if (mum2ReferenceStartIndex > referenceGapStartIndex && mum2QueryStartIndex > queryGapStartIndex) { sequence1 = referenceSequence.Range( referenceGapStartIndex, mum2ReferenceStartIndex - referenceGapStartIndex); sequence2 = querySequence.Range( queryGapStartIndex, mum2QueryStartIndex - queryGapStartIndex); sequenceAlignment = RunPairWise(sequence1, sequence2); if (sequenceAlignment != null) { foreach (IPairwiseSequenceAlignment pairwiseAlignment in sequenceAlignment) { foreach (PairwiseAlignedSequence alignment in pairwiseAlignment.PairwiseAlignedSequences) { sequenceResult1.InsertRange( sequenceResult1.Count, alignment.FirstSequence.ToString()); sequenceResult2.InsertRange( sequenceResult2.Count, alignment.SecondSequence.ToString()); consensusResult.InsertRange( consensusResult.Count, alignment.Consensus.ToString()); score += alignment.Score; if (alignment.Metadata.ContainsKey("Insertions")) { List <int> gapinsertions = alignment.Metadata["Insertions"] as List <int>; if (gapinsertions != null) { if (gapinsertions.Count > 0) { insertions[0] += gapinsertions[0]; } if (gapinsertions.Count > 1) { insertions[1] += gapinsertions[1]; } } } } } } } else if (mum2ReferenceStartIndex > referenceGapStartIndex) { sequence1 = referenceSequence.Range( referenceGapStartIndex, mum2ReferenceStartIndex - referenceGapStartIndex); sequenceResult1.InsertRange(sequenceResult1.Count, sequence1.ToString()); sequenceResult2.InsertRange(sequenceResult2.Count, CreateDefaultGap(sequence1.Count)); consensusResult.InsertRange(consensusResult.Count, sequence1.ToString()); insertions[1] += sequence1.Count; if (UseGapExtensionCost) { score = GapOpenCost + ((sequence1.Count - 1) * GapExtensionCost); } else { score = sequence1.Count * GapOpenCost; } } else if (mum2QueryStartIndex > queryGapStartIndex) { sequence2 = querySequence.Range( queryGapStartIndex, mum2QueryStartIndex - queryGapStartIndex); sequenceResult1.InsertRange(sequenceResult1.Count, CreateDefaultGap(sequence2.Count)); sequenceResult2.InsertRange(sequenceResult2.Count, sequence2.ToString()); consensusResult.InsertRange(consensusResult.Count, sequence2.ToString()); insertions[0] += sequence2.Count; if (UseGapExtensionCost) { score = GapOpenCost + ((sequence2.Count - 1) * GapExtensionCost); } else { score = sequence2.Count * GapOpenCost; } } // Add the MUM to the result if (0 < mum2Length) { mum1String = referenceSequence.Range( mum2ReferenceStartIndex, mum2Length).ToString(); sequenceResult1.InsertRange(sequenceResult1.Count, mum1String); mum2String = querySequence.Range( mum2QueryStartIndex, mum2Length).ToString(); sequenceResult2.InsertRange(sequenceResult2.Count, mum2String); consensusResult.InsertRange(consensusResult.Count, mum1String); // Get the byte array (indices of symbol in MUM) byte[] indices = SimilarityMatrix.ToByteArray(mum1String); // Calculate the score foreach (byte index in indices) { score += SimilarityMatrix[index, index]; } } return(score); }
/// <summary> /// get all the gaps in each sequence and call pairwise alignment /// </summary> /// <param name="referenceSequence">Reference sequence</param> /// <param name="sequence">Query sequence</param> /// <returns>Aligned sequences</returns> private PairwiseAlignedSequence ProcessGaps( ISequence referenceSequence, ISequence sequence) { Sequence sequenceResult1; Sequence sequenceResult2; Sequence consensusResult; MaxUniqueMatch mum1 = null; MaxUniqueMatch mum2 = null; PairwiseAlignedSequence alignedSequence = new PairwiseAlignedSequence(); sequenceResult1 = new Sequence(referenceSequence.Alphabet); sequenceResult1.IsReadOnly = false; sequenceResult1.ID = referenceSequence.ID; sequenceResult1.DisplayID = referenceSequence.DisplayID; sequenceResult2 = new Sequence(referenceSequence.Alphabet); sequenceResult2.IsReadOnly = false; sequenceResult2.ID = sequence.ID; sequenceResult2.DisplayID = sequence.DisplayID; consensusResult = new Sequence(referenceSequence.Alphabet); consensusResult.IsReadOnly = false; consensusResult.ID = sequence.ID; consensusResult.DisplayID = sequence.DisplayID; // Run the alignment for gap before first MUM List <int> insertions = new List <int>(2); insertions.Add(0); insertions.Add(0); List <int> gapInsertions; mum1 = _finalMumList[0]; alignedSequence.Score += AlignGap( referenceSequence, sequence, sequenceResult1, sequenceResult2, consensusResult, null, // Here the first MUM does not exist mum1, out gapInsertions); insertions[0] += gapInsertions[0]; insertions[1] += gapInsertions[1]; // Run the alignment for all the gaps between MUM for (int index = 1; index < _finalMumList.Count; index++) { mum2 = _finalMumList[index]; alignedSequence.Score += AlignGap( referenceSequence, sequence, sequenceResult1, sequenceResult2, consensusResult, mum1, mum2, out gapInsertions); insertions[0] += gapInsertions[0]; insertions[1] += gapInsertions[1]; mum1 = mum2; } // Run the alignment for gap after last MUM alignedSequence.Score += AlignGap( referenceSequence, sequence, sequenceResult1, sequenceResult2, consensusResult, mum1, null, out gapInsertions); insertions[0] += gapInsertions[0]; insertions[1] += gapInsertions[1]; alignedSequence.FirstSequence = sequenceResult1; alignedSequence.SecondSequence = sequenceResult2; alignedSequence.Consensus = consensusResult; // Offset is not required as Smith Waterman will fragmented alignment. // Offset is the starting position of alignment of sequence1 with respect to sequence2. if (PairWiseAlgorithm is NeedlemanWunschAligner) { alignedSequence.FirstOffset = sequenceResult1.IndexOfNonGap() - referenceSequence.IndexOfNonGap(); alignedSequence.SecondOffset = sequenceResult2.IndexOfNonGap() - sequence.IndexOfNonGap(); } List <int> startOffsets = new List <int>(2); List <int> endOffsets = new List <int>(2); startOffsets.Add(0); startOffsets.Add(0); endOffsets.Add(referenceSequence.Count - 1); endOffsets.Add(sequence.Count - 1); alignedSequence.Metadata["StartOffsets"] = startOffsets; alignedSequence.Metadata["EndOffsets"] = endOffsets; alignedSequence.Metadata["Insertions"] = insertions; // return the aligned sequence return(alignedSequence); }