public void SmithWatermanProteinSeqAffineGap() { IPairwiseSequenceAligner sw = new SmithWatermanAligner { SimilarityMatrix = new SimilarityMatrix(SimilarityMatrix.StandardSimilarityMatrix.Blosum62), GapOpenCost = -8, GapExtensionCost = -1, }; ISequence sequence1 = new Sequence(Alphabets.Protein, "HEAGAWGHEE"); ISequence sequence2 = new Sequence(Alphabets.Protein, "PAWHEAE"); IList<IPairwiseSequenceAlignment> result = sw.Align(sequence1, sequence2); AlignmentHelpers.LogResult(sw, result); IList<IPairwiseSequenceAlignment> expectedOutput = new List<IPairwiseSequenceAlignment>(); IPairwiseSequenceAlignment align = new PairwiseSequenceAlignment(); PairwiseAlignedSequence alignedSeq = new PairwiseAlignedSequence { FirstSequence = new Sequence(Alphabets.Protein, "AWGHE"), SecondSequence = new Sequence(Alphabets.Protein, "AW-HE"), Consensus = new Sequence(Alphabets.AmbiguousProtein, "AWGHE"), Score = 20, FirstOffset = 0, SecondOffset = 3 }; align.PairwiseAlignedSequences.Add(alignedSeq); expectedOutput.Add(align); Assert.IsTrue(AlignmentHelpers.CompareAlignment(result, expectedOutput)); }
/// <summary> /// Convert to delta alignments to sequence alignments. /// </summary> /// <param name="alignments">List of delta alignments.</param> /// <returns>List of Sequence alignment.</returns> private static IList <PairwiseAlignedSequence> ConvertDeltaToAlignment( IEnumerable <DeltaAlignment> alignments) { if (alignments == null) { throw new ArgumentNullException("alignments"); } IList <PairwiseAlignedSequence> alignedSequences = new List <PairwiseAlignedSequence>(); foreach (DeltaAlignment deltaAlignment in alignments) { PairwiseAlignedSequence alignedSequence = deltaAlignment.ConvertDeltaToSequences(); // Find the offsets long referenceStart = deltaAlignment.FirstSequenceStart; long queryStart = deltaAlignment.SecondSequenceStart; long difference = referenceStart - queryStart; if (0 < difference) { alignedSequence.FirstOffset = 0; alignedSequence.SecondOffset = difference; } else { alignedSequence.FirstOffset = -1 * difference; alignedSequence.SecondOffset = 0; } alignedSequences.Add(alignedSequence); } return(alignedSequences); }
/// <summary> /// Adds an aligned sequence to the list of aligned sequences in the PairwiseSequenceAlignment. /// Throws exception if sequence alignment is read only. /// </summary> /// <param name="item">PairwiseAlignedSequence to add.</param> public void Add(PairwiseAlignedSequence item) { if (IsReadOnly) { throw new NotSupportedException(Properties.Resource.READ_ONLY_COLLECTION_MESSAGE); } alignedSequences.Add(item); }
/// <summary> /// Removes item from the list of aligned sequences in the PairwiseSequenceAlignment. /// Throws exception if PairwiseSequenceAlignment is read only. /// </summary> /// <param name="item">Aligned sequence object.</param> /// <returns>True if item was removed, false if item was not found.</returns> public bool Remove(PairwiseAlignedSequence item) { if (IsReadOnly) { throw new NotSupportedException(Properties.Resource.READ_ONLY_COLLECTION_MESSAGE); } return(alignedSequences.Remove(item)); }
/// <summary> /// Add a new Aligned Sequence Object to the end of the list. /// </summary> /// <param name="pairwiseAlignedSequence">The sequence to add.</param> public void AddSequence(PairwiseAlignedSequence pairwiseAlignedSequence) { if (IsReadOnly) { Trace.Report(Resource.READ_ONLY_COLLECTION_MESSAGE); throw new NotSupportedException(Resource.READ_ONLY_COLLECTION_MESSAGE); } _alignedSequences.Add(pairwiseAlignedSequence); }
/// <summary> /// Convert aligned sequences back to Sequence objects, load output SequenceAlignment object /// </summary> /// <param name="aInput">First input sequence.</param> /// <param name="bInput">Second input sequence.</param> /// <param name="alignedSequences">List of aligned sequences</param> /// <param name="offsets">List of offsets for each aligned sequence</param> /// <param name="optScore">Optimum alignment score</param> /// <param name="startOffsets">Start indices of aligned sequences with respect to input sequences.</param> /// <param name="endOffsets">End indices of aligned sequences with respect to input sequences.</param> /// <param name="insertions">Insetions made to the aligned sequences.</param> /// <returns>SequenceAlignment with all alignment information</returns> private IList <IPairwiseSequenceAlignment> CollateResults(ISequence aInput, ISequence bInput, List <byte[]> alignedSequences, List <int> offsets, int optScore, List <int> startOffsets, List <int> endOffsets, List <int> insertions) { if (alignedSequences.Count > 0) { PairwiseSequenceAlignment alignment = new PairwiseSequenceAlignment(aInput, bInput); byte[] aAligned, bAligned; for (int i = 0; i < alignedSequences.Count; i += 2) { aAligned = alignedSequences[i]; bAligned = alignedSequences[i + 1]; PairwiseAlignedSequence result = new PairwiseAlignedSequence(); result.Score = optScore; Sequence seq = new Sequence(aInput.Alphabet, _similarityMatrix.ToString(aAligned)); seq.ID = aInput.ID; seq.DisplayID = aInput.DisplayID; result.FirstSequence = seq; seq = new Sequence(bInput.Alphabet, _similarityMatrix.ToString(bAligned)); seq.ID = bInput.ID; seq.DisplayID = bInput.DisplayID; result.SecondSequence = seq; AddSimpleConsensusToResult(result); result.FirstOffset = offsets[i]; result.SecondOffset = offsets[i + 1]; result.Metadata["StartOffsets"] = new List <int> { startOffsets[i], startOffsets[i + 1] }; result.Metadata["EndOffsets"] = new List <int> { endOffsets[i], endOffsets[i + 1] }; result.Metadata["Insertions"] = new List <int> { insertions[i], insertions[i + 1] }; alignment.PairwiseAlignedSequences.Add(result); } return(new List <IPairwiseSequenceAlignment>() { alignment }); } else { return(new List <IPairwiseSequenceAlignment>()); } }
/// <summary> /// Adds consensus to the alignment result. At this point, it is a very simple algorithm /// which puts an ambiguity character where the two aligned sequences do not match. /// Uses X and N for protein and DNA/RNA alignments, respectively. /// </summary> /// <param name="alignment"> /// Alignment to which to add the consensus. This is the result returned by the main Align /// or AlignSimple method, which contains the aligned sequences but not yet a consensus sequence. /// </param> private void AddSimpleConsensusToResult(PairwiseAlignedSequence alignment) { ISequence seq0 = alignment.FirstSequence; ISequence seq1 = alignment.SecondSequence; byte[] consensus = new byte[seq0.Count]; for (int i = 0; i < seq0.Count; i++) { consensus[i] = ConsensusResolver.GetConsensus( new byte[] { seq0[i], seq1[i] }); } IAlphabet consensusAlphabet = Alphabets.AutoDetectAlphabet(consensus, 0, consensus.GetLongLength(), seq0.Alphabet); alignment.Consensus = new Sequence(consensusAlphabet, consensus, false); }
public void PairwiseOverlapProteinSeqSimpleGap() { string sequenceString1 = "HEAGAWGHEE"; string sequenceString2 = "PAWHEAE"; Sequence sequence1 = new Sequence(Alphabets.Protein, sequenceString1); Sequence sequence2 = new Sequence(Alphabets.Protein, sequenceString2); SimilarityMatrix sm = new SimilarityMatrix(SimilarityMatrix.StandardSimilarityMatrix.Blosum50); int gapPenalty = -8; PairwiseOverlapAligner overlap = new PairwiseOverlapAligner(); overlap.SimilarityMatrix = sm; overlap.GapOpenCost = gapPenalty; IList<IPairwiseSequenceAlignment> result = overlap.AlignSimple(sequence1, sequence2); ApplicationLog.WriteLine(string.Format((IFormatProvider)null, "{0}, Simple; Matrix {1}; GapOpenCost {2}", overlap.Name, overlap.SimilarityMatrix.Name, overlap.GapOpenCost)); foreach (IPairwiseSequenceAlignment sequenceResult in result) { ApplicationLog.WriteLine(string.Format((IFormatProvider)null, "score {0}", sequenceResult.PairwiseAlignedSequences[0].Score)); ApplicationLog.WriteLine(string.Format((IFormatProvider)null, "input 0 {0}", sequenceResult.FirstSequence.ToString())); ApplicationLog.WriteLine(string.Format((IFormatProvider)null, "input 1 {0}", sequenceResult.SecondSequence.ToString())); ApplicationLog.WriteLine(string.Format((IFormatProvider)null, "result 0 {0}", sequenceResult.PairwiseAlignedSequences[0].FirstSequence.ToString())); ApplicationLog.WriteLine(string.Format((IFormatProvider)null, "result 1 {0}", sequenceResult.PairwiseAlignedSequences[0].SecondSequence.ToString())); ApplicationLog.WriteLine(string.Format((IFormatProvider)null, "consesus {0}", sequenceResult.PairwiseAlignedSequences[0].Consensus.ToString())); } IList<IPairwiseSequenceAlignment> expectedOutput = new List<IPairwiseSequenceAlignment>(); IPairwiseSequenceAlignment align = new PairwiseSequenceAlignment(); PairwiseAlignedSequence alignedSeq = new PairwiseAlignedSequence(); alignedSeq.FirstSequence = new Sequence(Alphabets.Protein, "GAWGHEE"); alignedSeq.SecondSequence = new Sequence(Alphabets.Protein, "PAW-HEA"); alignedSeq.Consensus = new Sequence(Alphabets.AmbiguousProtein, "XAWGHEX"); alignedSeq.Score = 25; alignedSeq.FirstOffset = 0; alignedSeq.SecondOffset = 3; align.PairwiseAlignedSequences.Add(alignedSeq); expectedOutput.Add(align); Assert.IsTrue(CompareAlignment(result, expectedOutput)); }
/// <summary> /// Adds consensus to the alignment result. At this point, it is a very simple algorithm /// which puts an ambiguity character where the two aligned sequences do not match. /// Uses X and N for protein and DNA/RNA alignments, respectively. /// </summary> /// <param name="alignment"> /// Alignment to which to add the consensus. This is the result returned by the main Align /// or AlignSimple method, which contains the aligned sequences but not yet a consensus sequence. /// </param> private void AddSimpleConsensusToResult(PairwiseAlignedSequence alignment) { ISequence seq0 = alignment.FirstSequence; ISequence seq1 = alignment.SecondSequence; Sequence consensus = new Sequence(seq0.Alphabet); for (int i = 0; i < seq0.Count; i++) { consensus.Add( ConsensusResolver.GetConsensus( new List <ISequenceItem>() { seq0[i], seq1[i] })); } alignment.Consensus = consensus; }
public void TestMUMmerAlignerSingleMum() { const string reference = "TTAATTTTAG"; const string search = "AGTTTAGAG"; ISequence referenceSeq = new Sequence(Alphabets.DNA, reference); ISequence searchSeq = new Sequence(Alphabets.DNA, search); var searchSeqs = new List<ISequence> {searchSeq}; MUMmerAligner mummer = new MUMmerAligner { LengthOfMUM = 3, PairWiseAlgorithm = new NeedlemanWunschAligner(), GapExtensionCost = -2 }; IList<IPairwiseSequenceAlignment> result = mummer.Align(referenceSeq, searchSeqs); // Check if output is not null Assert.AreNotEqual(null, result); IList<IPairwiseSequenceAlignment> expectedOutput = new List<IPairwiseSequenceAlignment>(); IPairwiseSequenceAlignment align = new PairwiseSequenceAlignment(); PairwiseAlignedSequence alignedSeq = new PairwiseAlignedSequence { FirstSequence = new Sequence(Alphabets.DNA, "TTAATTTTAG--"), SecondSequence = new Sequence(Alphabets.DNA, "---AGTTTAGAG"), Consensus = new Sequence(AmbiguousDnaAlphabet.Instance, "TTAAKTTTAGAG"), Score = -6, FirstOffset = 0, SecondOffset = 3 }; align.PairwiseAlignedSequences.Add(alignedSeq); expectedOutput.Add(align); Assert.IsTrue(CompareAlignment(result, expectedOutput)); }
/// <summary> /// Validates PairwiseOverlapAlignment algorithm for the parameters passed. /// </summary> /// <param name="nodeName">Node Name in the xml.</param> /// <param name="alignParam">parameter based on which certain validations are done.</param> /// <param name="similarityMatrixParam">Similarity Matrix Parameter.</param> /// <param name="alignType">Alignment Type</param> private void ValidatePairwiseOverlapAlignment(string nodeName, AlignParameters alignParam, SimilarityMatrixParameters similarityMatrixParam, AlignmentType alignType) { ISequence aInput; ISequence bInput; IAlphabet alphabet = Utility.GetAlphabet(this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.AlphabetNameNode)); if (alignParam.ToString().Contains("Code")) { string sequence1 = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.SequenceNode1); string sequence2 = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.SequenceNode2); aInput = new Sequence(alphabet, sequence1); bInput = new Sequence(alphabet, sequence2); } else { // Read the xml file for getting both the files for aligning. string filePath1 = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.FilePathNode1); string filePath2 = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.FilePathNode2); var parser1 = new FastAParser { Alphabet = alphabet }; aInput = parser1.Parse(filePath1).ElementAt(0); bInput = parser1.Parse(filePath2).ElementAt(0); } string blosumFilePath = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.BlosumFilePathNode); SimilarityMatrix sm; switch (similarityMatrixParam) { case SimilarityMatrixParameters.TextReader: using (TextReader reader = new StreamReader(blosumFilePath)) sm = new SimilarityMatrix(reader); break; case SimilarityMatrixParameters.DiagonalMatrix: string matchValue = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.MatchScoreNode); string misMatchValue = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.MisMatchScoreNode); sm = new DiagonalSimilarityMatrix(int.Parse(matchValue, null), int.Parse(misMatchValue, null)); break; default: sm = new SimilarityMatrix(new StreamReader(blosumFilePath)); break; } int gapOpenCost = int.Parse(this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.GapOpenCostNode), null); int gapExtensionCost = int.Parse(this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.GapExtensionCostNode), null); var pairwiseOverlapObj = new PairwiseOverlapAligner(); if (AlignParameters.AllParam != alignParam) { pairwiseOverlapObj.SimilarityMatrix = sm; pairwiseOverlapObj.GapOpenCost = gapOpenCost; } IList<IPairwiseSequenceAlignment> result = null; switch (alignParam) { case AlignParameters.AlignList: case AlignParameters.AlignListCode: var sequences = new List<ISequence> {aInput, bInput}; switch (alignType) { case AlignmentType.Align: pairwiseOverlapObj.GapExtensionCost = gapExtensionCost; result = pairwiseOverlapObj.Align(sequences); break; default: result = pairwiseOverlapObj.AlignSimple(sequences); break; } break; case AlignParameters.AllParam: case AlignParameters.AllParamCode: switch (alignType) { case AlignmentType.Align: pairwiseOverlapObj.GapExtensionCost = gapExtensionCost; result = pairwiseOverlapObj.Align(sm, gapOpenCost, gapExtensionCost, aInput, bInput); break; default: result = pairwiseOverlapObj.AlignSimple(sm, gapOpenCost, aInput, bInput); break; } break; case AlignParameters.AlignTwo: case AlignParameters.AlignTwoCode: switch (alignType) { case AlignmentType.Align: pairwiseOverlapObj.GapExtensionCost = gapExtensionCost; result = pairwiseOverlapObj.Align(aInput, bInput); break; default: result = pairwiseOverlapObj.AlignSimple(aInput, bInput); break; } break; default: break; } // Read the xml file for getting both the files for aligning. string expectedSequence1; string expectedSequence2; string expectedScore; switch (alignType) { case AlignmentType.Align: expectedScore = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.ExpectedGapExtensionScoreNode); expectedSequence1 = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.ExpectedGapExtensionSequence1Node); expectedSequence2 = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.ExpectedGapExtensionSequence2Node); break; default: expectedScore = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.ExpectedScoreNode); expectedSequence1 = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.ExpectedSequenceNode1); expectedSequence2 = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.ExpectedSequenceNode2); break; } IList<IPairwiseSequenceAlignment> expectedOutput = new List<IPairwiseSequenceAlignment>(); var seperators = new [] {';'}; string[] expectedSequences1 = expectedSequence1.Split(seperators); string[] expectedSequences2 = expectedSequence2.Split(seperators); IPairwiseSequenceAlignment align = new PairwiseSequenceAlignment(); for (int i = 0; i < expectedSequences1.Length; i++) { PairwiseAlignedSequence alignedSeq = new PairwiseAlignedSequence { FirstSequence = new Sequence(alphabet, expectedSequences1[i]), SecondSequence = new Sequence(alphabet, expectedSequences2[i]), Score = Convert.ToInt32(expectedScore, null), FirstOffset = Int32.MinValue, SecondOffset = Int32.MinValue, }; align.PairwiseAlignedSequences.Add(alignedSeq); } expectedOutput.Add(align); Assert.IsTrue(AlignmentHelpers.CompareAlignment(result, expectedOutput, true)); ApplicationLog.WriteLine(string.Format(null, "PairwiseOverlapAligner P1 : Final Score '{0}'.", expectedScore)); ApplicationLog.WriteLine(string.Format(null, "PairwiseOverlapAligner P1 : Aligned First Sequence is '{0}'.", expectedSequence1)); ApplicationLog.WriteLine(string.Format(null, "PairwiseOverlapAligner P1 : Aligned Second Sequence is '{0}'.", expectedSequence2)); }
/// <summary> /// This takes a specific starting location in the scoring matrix and generates /// an alignment from it using the traceback scores. /// </summary> /// <param name="startingCell">Starting point</param> /// <returns>Pairwise alignment</returns> protected PairwiseAlignedSequence CreateAlignmentFromCell(OptScoreMatrixCell startingCell) { int gapStride = Cols + 1; //Using list to avoid allocation issues int estimatedLength = (int)(1.1 * Math.Max(ReferenceSequence.Length, QuerySequence.Length)); var firstAlignment = new List <byte>(estimatedLength); var secondAlignment = new List <byte>(estimatedLength); // Get the starting cell position and record the optimal score found there. int i = startingCell.Row; int j = startingCell.Col; var finalScore = startingCell.Score; long rowGaps = 0, colGaps = 0, identicalCount = 0, similarityCount = 0; // Walk the traceback matrix and build the alignments. while (!TracebackIsComplete(i, j)) { sbyte tracebackDirection = Traceback[i][j]; // Walk backwards through the trace back int gapLength; switch (tracebackDirection) { case SourceDirection.Diagonal: byte n1 = ReferenceSequence[j - 1]; byte n2 = QuerySequence[i - 1]; firstAlignment.Add(n1); secondAlignment.Add(n2); i--; j--; // Track some useful statistics if (n1 == n2 && n1 != _gap) { identicalCount++; similarityCount++; } else if (SimilarityMatrix[n2, n1] > 0) { similarityCount++; } break; case SourceDirection.Left: //Add 1 because this only counts number of extensions if (usingAffineGapModel) { gapLength = h_Gap_Length[i * gapStride + j]; for (int k = 0; k < gapLength; k++) { firstAlignment.Add(ReferenceSequence[--j]); secondAlignment.Add(_gap); rowGaps++; } } else { firstAlignment.Add(ReferenceSequence[--j]); secondAlignment.Add(_gap); rowGaps++; } break; case SourceDirection.Up: //add 1 because this only counts number of extensions. if (usingAffineGapModel) { gapLength = v_Gap_Length[i * gapStride + j]; for (int k = 0; k < gapLength; k++) { firstAlignment.Add(_gap); colGaps++; secondAlignment.Add(QuerySequence[--i]); } } else { secondAlignment.Add(QuerySequence[--i]); firstAlignment.Add(_gap); colGaps++; } break; default: break; } } // We build the alignments in reverse since we were // walking backwards through the matrix table. To create // the proper alignments we need to resize and reverse // both underlying arrays. firstAlignment.Reverse(); secondAlignment.Reverse(); // Create the Consensus sequence byte[] consensus = new byte[Math.Min(firstAlignment.Count, secondAlignment.Count)]; for (int n = 0; n < consensus.Length; n++) { consensus[n] = ConsensusResolver.GetConsensus(new[] { firstAlignment[n], secondAlignment[n] }); } // Create the result alignment var pairwiseAlignedSequence = new PairwiseAlignedSequence { Score = finalScore, FirstSequence = new Sequence(_sequence1.Alphabet, firstAlignment.ToArray()) { ID = _sequence1.ID }, SecondSequence = new Sequence(_sequence2.Alphabet, secondAlignment.ToArray()) { ID = _sequence2.ID }, Consensus = new Sequence(ConsensusResolver.SequenceAlphabet, consensus), }; // Offset is start of alignment in input sequence with respect to other sequence. if (i >= j) { pairwiseAlignedSequence.FirstOffset = i - j; pairwiseAlignedSequence.SecondOffset = 0; } else { pairwiseAlignedSequence.FirstOffset = 0; pairwiseAlignedSequence.SecondOffset = j - i; } // Add in ISequenceAlignment metadata pairwiseAlignedSequence.Metadata["Score"] = pairwiseAlignedSequence.Score; pairwiseAlignedSequence.Metadata["FirstOffset"] = pairwiseAlignedSequence.FirstOffset; pairwiseAlignedSequence.Metadata["SecondOffset"] = pairwiseAlignedSequence.SecondOffset; pairwiseAlignedSequence.Metadata["Consensus"] = pairwiseAlignedSequence.Consensus; pairwiseAlignedSequence.Metadata["StartOffsets"] = new List <long> { j, i }; pairwiseAlignedSequence.Metadata["EndOffsets"] = new List <long> { startingCell.Col - 1, startingCell.Row - 1 }; pairwiseAlignedSequence.Metadata["Insertions"] = new List <long> { colGaps, rowGaps }; // ref, query insertions pairwiseAlignedSequence.Metadata["IdenticalCount"] = identicalCount; pairwiseAlignedSequence.Metadata["SimilarityCount"] = similarityCount; return(pairwiseAlignedSequence); }
/// <summary> /// Returns true if the PairwiseSequenceAlignment contains the aligned sequence in the /// list of aligned sequences. /// </summary> /// <param name="item">PairwiseAlignedSequence object.</param> /// <returns>True if contains item, otherwise returns false.</returns> public bool Contains(PairwiseAlignedSequence item) { return(alignedSequences.Contains(item)); }
/// <summary> /// Validates Sequence Alignment Class General methods. /// </summary> /// <param name="nodeName">Node Name in the xml.</param> /// <param name="methodName">Name of the SequenceAlignment method to be validated</param> /// <param name="isSeqAlignDefCtr">Is sequence alignment Def Constructor</param> private void ValidateSequenceAlignmentGeneralMethods(string nodeName, SeqAlignmentMethods methodName, bool isSeqAlignDefCtr) { // Read the xml file for getting both the files for aligning. string origSequence1 = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.SequenceNode1); string origSequence2 = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.SequenceNode2); IAlphabet alphabet = Utility.GetAlphabet(this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.AlphabetNameNode)); string seqCount = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.SeqCountNode); string alignedSeqCountAfterAddSeq = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants .AlignedSeqCountAfterAddAlignedSeqNode); string arrayLength = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.ArraySizeNode); var alignedSeqItems = new PairwiseAlignedSequence[int.Parse(arrayLength, null)]; const int Index = 0; // Create two sequences ISequence aInput = new Sequence(alphabet, origSequence1); ISequence bInput = new Sequence(alphabet, origSequence2); // Add the sequences to the Sequence alignment object using AddSequence() method. IList<IPairwiseSequenceAlignment> sequenceAlignmentObj = new List<IPairwiseSequenceAlignment>(); var alignSeq = new PairwiseAlignedSequence {FirstSequence = aInput, SecondSequence = bInput}; IPairwiseSequenceAlignment seqAlignObj = isSeqAlignDefCtr ? new PairwiseSequenceAlignment() : new PairwiseSequenceAlignment(aInput, bInput); seqAlignObj.Add(alignSeq); sequenceAlignmentObj.Add(seqAlignObj); IList<PairwiseAlignedSequence> newAlignedSequences = sequenceAlignmentObj[0].PairwiseAlignedSequences; switch (methodName) { case SeqAlignmentMethods.Add: seqAlignObj.Add(alignSeq); Assert.AreEqual(seqCount, seqAlignObj.PairwiseAlignedSequences.Count.ToString((IFormatProvider) null)); break; case SeqAlignmentMethods.Clear: seqAlignObj.Clear(); Assert.AreEqual(0, seqAlignObj.PairwiseAlignedSequences.Count); break; case SeqAlignmentMethods.Contains: Assert.IsTrue(seqAlignObj.Contains(newAlignedSequences[0])); break; case SeqAlignmentMethods.CopyTo: seqAlignObj.CopyTo(alignedSeqItems, Index); // Validate Copied array. Assert.AreEqual(alignedSeqItems[Index].FirstSequence, seqAlignObj.FirstSequence); Assert.AreEqual(alignedSeqItems[Index].SecondSequence, seqAlignObj.SecondSequence); break; case SeqAlignmentMethods.Remove: seqAlignObj.Remove(newAlignedSequences[0]); // Validate whether removed item is deleted from SequenceAlignment. Assert.AreEqual(0, newAlignedSequences.Count); break; case SeqAlignmentMethods.AddSequence: seqAlignObj.AddSequence(newAlignedSequences[0]); // Validate SeqAlignObj after adding aligned sequence. Assert.AreEqual(alignedSeqCountAfterAddSeq, seqAlignObj.Count.ToString((IFormatProvider) null)); break; case SeqAlignmentMethods.GetEnumerator: IEnumerator<PairwiseAlignedSequence> alignedSeqList = seqAlignObj.GetEnumerator(); // Aligned Sequence list after iterating through ailgnedSeq collection. Assert.IsNotNull(alignedSeqList); break; default: break; } ApplicationLog.WriteLine("SequenceAlignment P1 : Successfully validated the IsRead Property"); ApplicationLog.WriteLine("SequenceAlignment P1 : Successfully validated the Count Property"); ApplicationLog.WriteLine("SequenceAlignment P1 : Successfully validated the Sequences Property"); }
public void PairwiseOverlapMultipleAlignments() { Sequence sequence1 = new Sequence(Alphabets.DNA, "CCCAACCC"); Sequence sequence2 = new Sequence(Alphabets.DNA, "CCC"); SimilarityMatrix sm = new DiagonalSimilarityMatrix(5, -20); int gapPenalty = -10; PairwiseOverlapAligner overlap = new PairwiseOverlapAligner(); overlap.SimilarityMatrix = sm; overlap.GapOpenCost = gapPenalty; IList<IPairwiseSequenceAlignment> result = overlap.AlignSimple(sequence1, sequence2); ApplicationLog.WriteLine(string.Format((IFormatProvider)null, "{0}, Simple; Matrix {1}; GapOpenCost {2}", overlap.Name, overlap.SimilarityMatrix.Name, overlap.GapOpenCost)); foreach (IPairwiseSequenceAlignment sequenceResult in result) { ApplicationLog.WriteLine(string.Format((IFormatProvider)null, "score {0}", sequenceResult.PairwiseAlignedSequences[0].Score)); ApplicationLog.WriteLine(string.Format((IFormatProvider)null, "input 0 {0}", sequenceResult.FirstSequence.ToString())); ApplicationLog.WriteLine(string.Format((IFormatProvider)null, "input 1 {0}", sequenceResult.SecondSequence.ToString())); ApplicationLog.WriteLine(string.Format((IFormatProvider)null, "result 0 {0}", sequenceResult.PairwiseAlignedSequences[0].FirstSequence.ToString())); ApplicationLog.WriteLine(string.Format((IFormatProvider)null, "result 1 {0}", sequenceResult.PairwiseAlignedSequences[0].SecondSequence.ToString())); ApplicationLog.WriteLine(string.Format((IFormatProvider)null, "consesus {0}", sequenceResult.PairwiseAlignedSequences[0].Consensus.ToString())); } IList<IPairwiseSequenceAlignment> expectedOutput = new List<IPairwiseSequenceAlignment>(); IPairwiseSequenceAlignment align = new PairwiseSequenceAlignment(); // First alignment PairwiseAlignedSequence alignedSeq = new PairwiseAlignedSequence(); alignedSeq.FirstSequence = new Sequence(Alphabets.DNA, "CCC"); alignedSeq.SecondSequence = new Sequence(Alphabets.DNA, "CCC"); alignedSeq.Consensus = new Sequence(Alphabets.DNA, "CCC"); alignedSeq.Score = 15; alignedSeq.FirstOffset = 0; alignedSeq.SecondOffset = 0; align.PairwiseAlignedSequences.Add(alignedSeq); // Second alignment alignedSeq = new PairwiseAlignedSequence(); alignedSeq.FirstSequence = new Sequence(Alphabets.DNA, "CCC"); alignedSeq.SecondSequence = new Sequence(Alphabets.DNA, "CCC"); alignedSeq.Consensus = new Sequence(Alphabets.DNA, "CCC"); alignedSeq.Score = 15; alignedSeq.FirstOffset = 0; alignedSeq.SecondOffset = 5; align.PairwiseAlignedSequences.Add(alignedSeq); expectedOutput.Add(align); Assert.IsTrue(CompareAlignment(result, expectedOutput)); }
/// <summary> /// Validates PairwiseOverlapAlignment algorithm for the parameters passed. /// </summary> /// <param name="nodeName">Xml node name</param> /// <param name="isTextFile">Is text file an input.</param> /// <param name="caseType">Case Type</param> /// <param name="additionalParameter">parameter based on which certain validations are done.</param> /// <param name="alignType">Is the Align type Simple or Align with Gap Extension cost?</param> /// <param name="similarityMatrixParam">Similarity Matrix</param> private void ValidatePairwiseOverlapAlignment(string nodeName, bool isTextFile, SequenceCaseType caseType, AlignParameters additionalParameter, AlignmentType alignType, SimilarityMatrixParameters similarityMatrixParam) { Sequence aInput = null; Sequence bInput = null; IAlphabet alphabet = Utility.GetAlphabet(this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.AlphabetNameNode)); if (isTextFile) { // Read the xml file for getting both the files for aligning. string filePath1 = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.FilePathNode1); string filePath2 = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.FilePathNode2); var parser1 = new FastAParser(); ISequence originalSequence1 = parser1.Parse(filePath1).ElementAt(0); ISequence originalSequence2 = parser1.Parse(filePath2).ElementAt(0); // Create input sequence for sequence string in different cases. GetSequenceWithCaseType(new string(originalSequence1.Select(a => (char) a).ToArray()), new string(originalSequence2.Select(a => (char) a).ToArray()), alphabet, caseType, out aInput, out bInput); } else { string originalSequence1 = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.SequenceNode1); string originalSequence2 = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.SequenceNode2); // Create input sequence for sequence string in different cases. GetSequenceWithCaseType( originalSequence1, originalSequence2, alphabet, caseType, out aInput, out bInput); } var aInputString = new string(aInput.Select(a => (char) a).ToArray()); var bInputString = new string(bInput.Select(a => (char) a).ToArray()); ApplicationLog.WriteLine(string.Format(null, "PairwiseOverlapAligner P2 : First sequence used is '{0}'.", aInputString)); ApplicationLog.WriteLine(string.Format(null, "PairwiseOverlapAligner P2 : Second sequence used is '{0}'.", bInputString)); // Create similarity matrix object for a given file. string blosumFilePath = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.BlosumFilePathNode); SimilarityMatrix sm = null; switch (similarityMatrixParam) { case SimilarityMatrixParameters.TextReader: using (TextReader reader = new StreamReader(blosumFilePath)) sm = new SimilarityMatrix(reader); break; case SimilarityMatrixParameters.DiagonalMatrix: string matchValue = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.MatchScoreNode); string misMatchValue = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.MisMatchScoreNode); sm = new DiagonalSimilarityMatrix(int.Parse(matchValue, null), int.Parse(misMatchValue, null)); break; default: sm = new SimilarityMatrix(new StreamReader(blosumFilePath)); break; } int gapOpenCost = int.Parse(this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.GapOpenCostNode), null); int gapExtensionCost = int.Parse(this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.GapExtensionCostNode), null); // Create PairwiseOverlapAligner instance and set its values. var pairwiseOverlapObj = new PairwiseOverlapAligner(); if (additionalParameter != AlignParameters.AllParam) { pairwiseOverlapObj.SimilarityMatrix = sm; pairwiseOverlapObj.GapOpenCost = gapOpenCost; pairwiseOverlapObj.GapExtensionCost = gapExtensionCost; } IList<IPairwiseSequenceAlignment> result = null; // Align the input sequences. switch (additionalParameter) { case AlignParameters.AlignList: var sequences = new List<ISequence>(); sequences.Add(aInput); sequences.Add(bInput); switch (alignType) { case AlignmentType.Align: result = pairwiseOverlapObj.Align(sequences); break; default: result = pairwiseOverlapObj.AlignSimple(sequences); break; } break; case AlignParameters.AlignTwo: switch (alignType) { case AlignmentType.Align: result = pairwiseOverlapObj.Align(aInput, bInput); break; default: result = pairwiseOverlapObj.AlignSimple(aInput, bInput); break; } break; case AlignParameters.AllParam: switch (alignType) { case AlignmentType.Align: result = pairwiseOverlapObj.Align(sm, gapOpenCost, gapExtensionCost, aInput, bInput); break; default: result = pairwiseOverlapObj.AlignSimple(sm, gapOpenCost, aInput, bInput); break; } break; default: break; } aInput = null; bInput = null; sm = null; // Get the expected sequence and scorde from xml config. string expectedSequence1 = string.Empty; string expectedSequence2 = string.Empty; string expectedScore = string.Empty; switch (alignType) { case AlignmentType.Align: expectedScore = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.ExpectedGapExtensionScoreNode); expectedSequence1 = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.ExpectedGapExtensionSequence1Node); expectedSequence2 = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.ExpectedGapExtensionSequence2Node); break; default: expectedScore = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.ExpectedScoreNode); expectedSequence1 = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.ExpectedSequenceNode1); expectedSequence2 = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.ExpectedSequenceNode2); break; } IList<IPairwiseSequenceAlignment> expectedOutput = new List<IPairwiseSequenceAlignment>(); string[] expectedSequences1, expectedSequences2; var seperators = new char[1] {';'}; expectedSequences1 = expectedSequence1.Split(seperators); expectedSequences2 = expectedSequence2.Split(seperators); IPairwiseSequenceAlignment align = new PairwiseSequenceAlignment(); PairwiseAlignedSequence alignedSeq; for (int i = 0; i < expectedSequences1.Length; i++) { alignedSeq = new PairwiseAlignedSequence { FirstSequence = new Sequence(alphabet, expectedSequences1[i]), SecondSequence = new Sequence(alphabet, expectedSequences2[i]), Score = Convert.ToInt32(expectedScore, null), FirstOffset = Int32.MinValue, SecondOffset = Int32.MinValue, }; align.PairwiseAlignedSequences.Add(alignedSeq); } expectedOutput.Add(align); Assert.IsTrue(AlignmentHelpers.CompareAlignment(result, expectedOutput,true)); ApplicationLog.WriteLine(string.Format(null, "PairwiseOverlapAligner P2 : Final Score '{0}'.", expectedScore)); ApplicationLog.WriteLine(string.Format(null, "PairwiseOverlapAligner P2 : Aligned First Sequence is '{0}'.", expectedSequence1)); ApplicationLog.WriteLine(string.Format(null, "PairwiseOverlapAligner P2 : Aligned Second Sequence is '{0}'.", expectedSequence2)); }
/// <summary> /// This takes a specific starting location in the scoring matrix and generates /// an alignment from it using the traceback scores. /// </summary> /// <param name="startingCell">Starting point</param> /// <returns>Pairwise alignment</returns> private PairwiseAlignedSequence CreateAlignmentFromCell(OptScoreMatrixCell startingCell) { long estimatedLength = ReferenceSequence.Length * QuerySequence.Length; var firstAlignment = new byte[estimatedLength]; var secondAlignment = new byte[estimatedLength]; // Get the starting cell position and record the optimal score found there. int i = startingCell.Row; int j = startingCell.Col; var finalScore = startingCell.Score; long rowGaps = 0, colGaps = 0, identicalCount = 0, similarityCount = 0; // Walk the traceback matrix and build the alignments. int faLength = 0, saLength = 0; while (!TracebackIsComplete(i, j)) { sbyte tracebackDirection = Traceback[i][j]; // Reference sequence uses the current cell if we moved diagonal or left. if (tracebackDirection == SourceDirection.Left || tracebackDirection == SourceDirection.Diagonal) { firstAlignment[faLength++] = ReferenceSequence[j - 1]; } else { firstAlignment[faLength++] = _gap; colGaps++; } // Query sequence uses the current cell if we moved diagonal or up. if (tracebackDirection == SourceDirection.Up || tracebackDirection == SourceDirection.Diagonal) { secondAlignment[saLength++] = QuerySequence[i - 1]; } else { secondAlignment[saLength++] = _gap; rowGaps++; } // Track some useful statistics byte n1 = firstAlignment[faLength - 1]; byte n2 = secondAlignment[faLength - 1]; if (n1 == n2 && n1 != _gap) { identicalCount++; similarityCount++; } else if (SimilarityMatrix[n2, n1] > 0) { similarityCount++; } // Walk backwards through the trace back switch (tracebackDirection) { case SourceDirection.Diagonal: i--; j--; break; case SourceDirection.Left: j--; break; case SourceDirection.Up: i--; break; default: break; } } // We build the alignments in reverse since we were // walking backwards through the matrix table. To create // the proper alignments we need to resize and reverse // both underlying arrays. Array.Resize(ref firstAlignment, faLength); Array.Reverse(firstAlignment); Array.Resize(ref secondAlignment, saLength); Array.Reverse(secondAlignment); // Create the Consensus sequence byte[] consensus = new byte[Math.Min(faLength, saLength)]; for (int n = 0; n < consensus.Length; n++) { consensus[n] = ConsensusResolver.GetConsensus(new[] { firstAlignment[n], secondAlignment[n] }); } // Create the result alignment var pairwiseAlignedSequence = new PairwiseAlignedSequence { Score = finalScore, FirstSequence = new Sequence(_sequence1.Alphabet, firstAlignment) { ID = _sequence1.ID }, SecondSequence = new Sequence(_sequence2.Alphabet, secondAlignment) { ID = _sequence2.ID }, Consensus = new Sequence(ConsensusResolver.SequenceAlphabet, consensus), }; // Offset is start of alignment in input sequence with respect to other sequence. if (i >= j) { pairwiseAlignedSequence.FirstOffset = i - j; pairwiseAlignedSequence.SecondOffset = 0; } else { pairwiseAlignedSequence.FirstOffset = 0; pairwiseAlignedSequence.SecondOffset = j - i; } // Add in ISequenceAlignment metadata pairwiseAlignedSequence.Metadata["Score"] = pairwiseAlignedSequence.Score; pairwiseAlignedSequence.Metadata["FirstOffset"] = pairwiseAlignedSequence.FirstOffset; pairwiseAlignedSequence.Metadata["SecondOffset"] = pairwiseAlignedSequence.SecondOffset; pairwiseAlignedSequence.Metadata["Consensus"] = pairwiseAlignedSequence.Consensus; pairwiseAlignedSequence.Metadata["StartOffsets"] = new List <long> { j, i }; pairwiseAlignedSequence.Metadata["EndOffsets"] = new List <long> { startingCell.Col - 1, startingCell.Row - 1 }; pairwiseAlignedSequence.Metadata["Insertions"] = new List <long> { colGaps, rowGaps }; // ref, query insertions pairwiseAlignedSequence.Metadata["IdenticalCount"] = identicalCount; pairwiseAlignedSequence.Metadata["SimilarityCount"] = similarityCount; return(pairwiseAlignedSequence); }
/// <summary> /// Launches the alignment algorithm /// </summary> public virtual List <IPairwiseSequenceAlignment> Align() { InitializeCache(); // Grid for (int diagonal = 0; diagonal < gridCols + gridRows - 2; diagonal++) { for (int blockRow = 0; blockRow < gridRows; blockRow++) { int blockCol = diagonal - blockRow; if ((blockCol >= 0) && (blockCol < gridCols)) { int lastRow = (blockRow == gridRows - 1) ? (int)(colHeight - Math.BigMul(blockRow, gridStride) - 1) : gridStride; int lastCol = (blockCol == gridCols - 1) ? (int)(rowWidth - Math.BigMul(blockCol, gridStride) - 1) : gridStride; ComputeIntermediateBlock(blockRow, blockCol, lastRow, lastCol); } } } sbyte[][] trace = new sbyte[gridStride + 1][]; for (int i = 0; i <= gridStride; i++) { trace[i] = new sbyte[gridStride + 1]; } // Last Block - grid calculation and Traceback combined int completeTraceRow = gridRows - 1; int completeTraceCol = gridCols - 1; int completeLastRow = (int)(colHeight - Math.BigMul(completeTraceRow, gridStride) - 1); int completeLastCol = (int)(rowWidth - Math.BigMul(completeTraceCol, gridStride) - 1); ComputeCornerBlock(completeTraceRow, completeTraceCol, completeLastRow, completeLastCol, trace); //Traceback if (optScoreCells.Count == 0) { return(new List <IPairwiseSequenceAlignment>()); } else { PairwiseSequenceAlignment alignment = new PairwiseSequenceAlignment(sequenceI, sequenceJ); for (int alignmentCount = 0; alignmentCount < optScoreCells.Count; alignmentCount++) { PairwiseAlignedSequence result = new PairwiseAlignedSequence(); result.Score = optScore; long alignmentRow = optScoreCells[alignmentCount].Item1; long alignmentCol = optScoreCells[alignmentCount].Item2; int blockRow = (int)(alignmentRow / gridStride); int blockCol = (int)(alignmentCol / gridStride); int lastRow = (int)(alignmentRow - Math.BigMul(blockRow, gridStride)); int lastCol = (int)(alignmentCol - Math.BigMul(blockCol, gridStride)); result.Metadata["EndOffsets"] = new List <long> { alignmentRow - 1, alignmentCol - 1 }; long alignmentLength = 0; byte[] sequence1 = new byte[colHeight + rowWidth]; byte[] sequence2 = new byte[colHeight + rowWidth]; int colGaps = 0; int rowGaps = 0; while ((blockRow >= 0) && (blockCol >= 0)) { if ((blockRow != completeTraceRow) || (blockCol != completeTraceCol) || (lastRow > completeLastRow) || (lastCol > completeLastCol)) { ComputeTraceBlock(blockRow, blockCol, lastRow, lastCol, trace); completeTraceRow = blockRow; completeTraceCol = blockCol; completeLastRow = lastRow; completeLastCol = lastCol; } long startPositionI = blockRow * gridStride - 1; long startPositionJ = blockCol * gridStride - 1; while ((trace[lastRow][lastCol] != SourceDirection.Stop) && (trace[lastRow][lastCol] != SourceDirection.Block)) { switch (trace[lastRow][lastCol]) { case SourceDirection.Diagonal: // diagonal, no gap, use both sequence residues sequence1[alignmentLength] = sequenceI[startPositionI + lastRow]; sequence2[alignmentLength] = sequenceJ[startPositionJ + lastCol]; alignmentLength++; lastRow--; lastCol--; break; case SourceDirection.Up: // up, gap in J sequence1[alignmentLength] = sequenceI[startPositionI + lastRow]; sequence2[alignmentLength] = this.gapCode; alignmentLength++; lastRow--; colGaps++; break; case SourceDirection.Left: // left, gap in I sequence1[alignmentLength] = this.gapCode; sequence2[alignmentLength] = sequenceJ[startPositionJ + lastCol]; alignmentLength++; lastCol--; rowGaps++; break; } } if (trace[lastRow][lastCol] == SourceDirection.Stop) { // Be nice, turn aligned solutions around so that they match the input sequences byte[] alignedA = new byte[alignmentLength]; byte[] alignedB = new byte[alignmentLength]; for (long i = 0, j = alignmentLength - 1; i < alignmentLength; i++, j--) { alignedA[i] = sequence1[j]; alignedB[i] = sequence2[j]; } // If alphabet of inputA is DnaAlphabet then alphabet of alignedA may be Dna or AmbiguousDna. IAlphabet alphabet = Alphabets.AutoDetectAlphabet(alignedA, 0, alignedA.LongLength, sequenceI.Alphabet); Sequence seq = new Sequence(alphabet, alignedA, false); seq.ID = sequenceI.ID; // seq.DisplayID = aInput.DisplayID; result.FirstSequence = seq; alphabet = Alphabets.AutoDetectAlphabet(alignedB, 0, alignedB.LongLength, sequenceJ.Alphabet); seq = new Sequence(alphabet, alignedB, false); seq.ID = sequenceJ.ID; // seq.DisplayID = bInput.DisplayID; result.SecondSequence = seq; // Offset is start of alignment in input sequence with respect to other sequence. if (lastCol >= lastRow) { result.FirstOffset = lastCol - lastRow; result.SecondOffset = 0; } else { result.FirstOffset = 0; result.SecondOffset = lastRow - lastCol; } result.Metadata["StartOffsets"] = new List <long> { lastRow, lastCol }; result.Metadata["Insertions"] = new List <long> { rowGaps, colGaps }; alignment.PairwiseAlignedSequences.Add(result); break; } else { if (lastRow == 0 && lastCol == 0) { blockRow--; blockCol--; lastRow = gridStride; lastCol = gridStride; } else { if (lastRow == 0) { blockRow--; lastRow = gridStride; } else { blockCol--; lastCol = gridStride; } } } } } return(new List <IPairwiseSequenceAlignment>() { alignment }); } }
/// <summary> /// Validates the Mummer align method for several test cases for the parameters passed. /// </summary> /// <param name="nodeName">Node name to be read from xml</param> /// <param name="isFilePath"></param> /// <param name="isSeqList">Is MUMmer alignment with List of sequences</param> void ValidateMUMmerAlignGeneralTestCases(string nodeName, bool isFilePath, bool isSeqList) { ISequence referenceSeq; ISequence querySeq; IList<ISequence> querySeqs = new List<ISequence>(); string referenceSequence; string querySequence; IList<IPairwiseSequenceAlignment> align; if (isFilePath) { // Gets the reference sequence from the configuration file string filePath = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.FilePathNode); Assert.IsNotNull(filePath); ApplicationLog.WriteLine(string.Format(null, "MUMmer BVT : Successfully validated the File Path '{0}'.", filePath)); FastAParser parser = new FastAParser(); IEnumerable<ISequence> referenceSeqs = parser.Parse(filePath); referenceSeq = referenceSeqs.FirstOrDefault(); Assert.IsNotNull(referenceSeq); referenceSequence = referenceSeq.ConvertToString(); parser.Close(); // Gets the reference sequence from the configuration file string queryFilePath = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.SearchSequenceFilePathNode); Assert.IsNotNull(queryFilePath); ApplicationLog.WriteLine(string.Format(null, "MUMmer BVT : Successfully validated the Search File Path '{0}'.", queryFilePath)); FastAParser queryParserObj = new FastAParser(); querySeqs = queryParserObj.Parse(queryFilePath).ToList(); querySeq = querySeqs.FirstOrDefault(); Assert.IsNotNull(querySeq); querySequence = querySeq.ConvertToString(); queryParserObj.Close(); } else { // Gets the reference sequence from the configuration file referenceSequence = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.SequenceNode); string referenceSeqAlphabet = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.AlphabetNameNode); referenceSeq = new Sequence(Utility.GetAlphabet(referenceSeqAlphabet), referenceSequence); querySequence = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.SearchSequenceNode); referenceSeqAlphabet = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.SearchSequenceAlphabetNode); querySeq = new Sequence(Utility.GetAlphabet(referenceSeqAlphabet), querySequence); querySeqs = new List<ISequence> {querySeq}; } string mumLength = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.MUMAlignLengthNode); var mumAlignObj = new Bio.Algorithms.MUMmer.MUMmerAligner { LengthOfMUM = long.Parse(mumLength, null), GapOpenCost = int.Parse(this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.GapOpenCostNode), null) }; if (isSeqList) { querySeqs.Add(referenceSeq); align = mumAlignObj.Align(querySeqs); } else { align = mumAlignObj.AlignSimple(referenceSeq, querySeqs); } string expectedScore = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.ScoreNodeName); Assert.AreEqual(expectedScore, align[0].PairwiseAlignedSequences[0].Score.ToString((IFormatProvider)null)); ApplicationLog.WriteLine(string.Format(null, "MUMmer BVT : Successfully validated the score for the sequence '{0}' and '{1}'.", referenceSequence, querySequence)); string[] expectedSequences = this.utilityObj.xmlUtil.GetTextValues(nodeName, Constants.ExpectedSequencesNode); IList<IPairwiseSequenceAlignment> expectedOutput = new List<IPairwiseSequenceAlignment>(); IPairwiseSequenceAlignment seqAlign = new PairwiseSequenceAlignment(); var alignedSeq = new PairwiseAlignedSequence { FirstSequence = new Sequence(referenceSeq.Alphabet, expectedSequences[0]), SecondSequence = new Sequence(referenceSeq.Alphabet, expectedSequences[1]), Score = Convert.ToInt32(expectedScore, null), FirstOffset = Int32.MinValue, SecondOffset = Int32.MinValue }; seqAlign.PairwiseAlignedSequences.Add(alignedSeq); expectedOutput.Add(seqAlign); Assert.IsTrue(CompareAlignment(align, expectedOutput)); ApplicationLog.WriteLine("MUMmer BVT : Successfully validated the aligned sequences."); }
/// <summary> /// Launches the alignment algorithm /// </summary> public virtual List<IPairwiseSequenceAlignment> Align() { InitializeCache(); // Grid for (int diagonal = 0; diagonal < gridCols + gridRows - 2; diagonal++) { for (int blockRow = 0; blockRow < gridRows; blockRow++) { int blockCol = diagonal - blockRow; if ((blockCol >= 0) && (blockCol < gridCols)) { int lastRow = (blockRow == gridRows - 1) ? (int)(colHeight - Helper.BigMul(blockRow, gridStride) - 1) : gridStride; int lastCol = (blockCol == gridCols - 1) ? (int)(rowWidth - Helper.BigMul(blockCol, gridStride) - 1) : gridStride; ComputeIntermediateBlock(blockRow, blockCol, lastRow, lastCol); } } } sbyte[][] trace = new sbyte[gridStride + 1][]; for (int i = 0; i <= gridStride; i++) { trace[i] = new sbyte[gridStride + 1]; } // Last Block - grid calculation and Traceback combined int completeTraceRow = gridRows - 1; int completeTraceCol = gridCols - 1; int completeLastRow = (int)(colHeight - Helper.BigMul(completeTraceRow, gridStride) - 1); int completeLastCol = (int)(rowWidth - Helper.BigMul(completeTraceCol, gridStride) - 1); ComputeCornerBlock(completeTraceRow, completeTraceCol, completeLastRow, completeLastCol, trace); //Traceback if (optScoreCells.Count == 0) { return new List<IPairwiseSequenceAlignment>(); } else { PairwiseSequenceAlignment alignment = new PairwiseSequenceAlignment(sequenceI, sequenceJ); for (int alignmentCount = 0; alignmentCount < optScoreCells.Count; alignmentCount++) { PairwiseAlignedSequence result = new PairwiseAlignedSequence(); result.Score = optScore; long alignmentRow = optScoreCells[alignmentCount].Item1; long alignmentCol = optScoreCells[alignmentCount].Item2; int blockRow = (int)(alignmentRow / gridStride); int blockCol = (int)(alignmentCol / gridStride); int lastRow = (int)(alignmentRow - Helper.BigMul(blockRow, gridStride)); int lastCol = (int)(alignmentCol - Helper.BigMul(blockCol, gridStride)); result.Metadata["EndOffsets"] = new List<long> { alignmentRow - 1, alignmentCol - 1 }; long alignmentLength = 0; byte[] sequence1 = new byte[colHeight + rowWidth]; byte[] sequence2 = new byte[colHeight + rowWidth]; int colGaps = 0; int rowGaps = 0; while ((blockRow >= 0) && (blockCol >= 0)) { if ((blockRow != completeTraceRow) || (blockCol != completeTraceCol) || (lastRow > completeLastRow) || (lastCol > completeLastCol)) { ComputeTraceBlock(blockRow, blockCol, lastRow, lastCol, trace); completeTraceRow = blockRow; completeTraceCol = blockCol; completeLastRow = lastRow; completeLastCol = lastCol; } long startPositionI = blockRow * gridStride - 1; long startPositionJ = blockCol * gridStride - 1; while ((trace[lastRow][lastCol] != SourceDirection.Stop) && (trace[lastRow][lastCol] != SourceDirection.Block)) { switch (trace[lastRow][lastCol]) { case SourceDirection.Diagonal: // diagonal, no gap, use both sequence residues sequence1[alignmentLength] = sequenceI[startPositionI + lastRow]; sequence2[alignmentLength] = sequenceJ[startPositionJ + lastCol]; alignmentLength++; lastRow--; lastCol--; break; case SourceDirection.Up: // up, gap in J sequence1[alignmentLength] = sequenceI[startPositionI + lastRow]; sequence2[alignmentLength] = this.gapCode; alignmentLength++; lastRow--; colGaps++; break; case SourceDirection.Left: // left, gap in I sequence1[alignmentLength] = this.gapCode; sequence2[alignmentLength] = sequenceJ[startPositionJ + lastCol]; alignmentLength++; lastCol--; rowGaps++; break; } } if (trace[lastRow][lastCol] == SourceDirection.Stop) { // Be nice, turn aligned solutions around so that they match the input sequences byte[] alignedA = new byte[alignmentLength]; byte[] alignedB = new byte[alignmentLength]; for (long i = 0, j = alignmentLength - 1; i < alignmentLength; i++, j--) { alignedA[i] = sequence1[j]; alignedB[i] = sequence2[j]; } // If alphabet of inputA is DnaAlphabet then alphabet of alignedA may be Dna or AmbiguousDna. IAlphabet alphabet = Alphabets.AutoDetectAlphabet(alignedA, 0, alignedA.GetLongLength(), sequenceI.Alphabet); Sequence seq = new Sequence(alphabet, alignedA, false); seq.ID = sequenceI.ID; // seq.DisplayID = aInput.DisplayID; result.FirstSequence = seq; alphabet = Alphabets.AutoDetectAlphabet(alignedB, 0, alignedB.GetLongLength(), sequenceJ.Alphabet); seq = new Sequence(alphabet, alignedB, false); seq.ID = sequenceJ.ID; // seq.DisplayID = bInput.DisplayID; result.SecondSequence = seq; // Offset is start of alignment in input sequence with respect to other sequence. if (lastCol >= lastRow) { result.FirstOffset = lastCol - lastRow; result.SecondOffset = 0; } else { result.FirstOffset = 0; result.SecondOffset = lastRow - lastCol; } result.Metadata["StartOffsets"] = new List<long> { lastRow, lastCol }; result.Metadata["Insertions"] = new List<long> { rowGaps, colGaps }; alignment.PairwiseAlignedSequences.Add(result); break; } else { if (lastRow == 0 && lastCol == 0) { blockRow--; blockCol--; lastRow = gridStride; lastCol = gridStride; } else { if (lastRow == 0) { blockRow--; lastRow = gridStride; } else { blockCol--; lastCol = gridStride; } } } } } return new List<IPairwiseSequenceAlignment>() { alignment }; } }
/// <summary> /// get all the gaps in each sequence and call pairwise alignment /// </summary> /// <param name="referenceSequence">Reference sequence</param> /// <param name="sequence">Query sequence</param> /// <returns>Aligned sequences</returns> private PairwiseAlignedSequence ProcessGaps( ISequence referenceSequence, ISequence sequence) { Sequence sequenceResult1; Sequence sequenceResult2; Sequence consensusResult; MaxUniqueMatch mum1 = null; MaxUniqueMatch mum2 = null; PairwiseAlignedSequence alignedSequence = new PairwiseAlignedSequence(); sequenceResult1 = new Sequence(referenceSequence.Alphabet); sequenceResult1.IsReadOnly = false; sequenceResult1.ID = referenceSequence.ID; sequenceResult1.DisplayID = referenceSequence.DisplayID; sequenceResult2 = new Sequence(referenceSequence.Alphabet); sequenceResult2.IsReadOnly = false; sequenceResult2.ID = sequence.ID; sequenceResult2.DisplayID = sequence.DisplayID; consensusResult = new Sequence(referenceSequence.Alphabet); consensusResult.IsReadOnly = false; consensusResult.ID = sequence.ID; consensusResult.DisplayID = sequence.DisplayID; // Run the alignment for gap before first MUM List <int> insertions = new List <int>(2); insertions.Add(0); insertions.Add(0); List <int> gapInsertions; mum1 = _finalMumList[0]; alignedSequence.Score += AlignGap( referenceSequence, sequence, sequenceResult1, sequenceResult2, consensusResult, null, // Here the first MUM does not exist mum1, out gapInsertions); insertions[0] += gapInsertions[0]; insertions[1] += gapInsertions[1]; // Run the alignment for all the gaps between MUM for (int index = 1; index < _finalMumList.Count; index++) { mum2 = _finalMumList[index]; alignedSequence.Score += AlignGap( referenceSequence, sequence, sequenceResult1, sequenceResult2, consensusResult, mum1, mum2, out gapInsertions); insertions[0] += gapInsertions[0]; insertions[1] += gapInsertions[1]; mum1 = mum2; } // Run the alignment for gap after last MUM alignedSequence.Score += AlignGap( referenceSequence, sequence, sequenceResult1, sequenceResult2, consensusResult, mum1, null, out gapInsertions); insertions[0] += gapInsertions[0]; insertions[1] += gapInsertions[1]; alignedSequence.FirstSequence = sequenceResult1; alignedSequence.SecondSequence = sequenceResult2; alignedSequence.Consensus = consensusResult; // Offset is not required as Smith Waterman will fragmented alignment. // Offset is the starting position of alignment of sequence1 with respect to sequence2. if (PairWiseAlgorithm is NeedlemanWunschAligner) { alignedSequence.FirstOffset = sequenceResult1.IndexOfNonGap() - referenceSequence.IndexOfNonGap(); alignedSequence.SecondOffset = sequenceResult2.IndexOfNonGap() - sequence.IndexOfNonGap(); } List <int> startOffsets = new List <int>(2); List <int> endOffsets = new List <int>(2); startOffsets.Add(0); startOffsets.Add(0); endOffsets.Add(referenceSequence.Count - 1); endOffsets.Add(sequence.Count - 1); alignedSequence.Metadata["StartOffsets"] = startOffsets; alignedSequence.Metadata["EndOffsets"] = endOffsets; alignedSequence.Metadata["Insertions"] = insertions; // return the aligned sequence return(alignedSequence); }
/// <summary> /// Removes item from the list of aligned sequences in the PairwiseSequenceAlignment. /// Throws exception if PairwiseSequenceAlignment is read only. /// </summary> /// <param name="item">Aligned sequence object.</param> /// <returns>True if item was removed, false if item was not found.</returns> public bool Remove(PairwiseAlignedSequence item) { if (IsReadOnly) throw new NotSupportedException(Properties.Resource.READ_ONLY_COLLECTION_MESSAGE); return alignedSequences.Remove(item); }
/// <summary> /// Copies the aligned sequences from the PairwiseSequenceAlignment into an existing aligned sequence array. /// </summary> /// <param name="array">Array into which to copy the sequences.</param> /// <param name="arrayIndex">Starting index in array at which to begin the copy.</param> public void CopyTo(PairwiseAlignedSequence[] array, int arrayIndex) { if (array == null) { throw new ArgumentNullException(Properties.Resource.ParameterNameArray); } foreach (PairwiseAlignedSequence seq in alignedSequences) { array[arrayIndex++] = seq; } }
private void ValidateSmithWatermanAlignment(string nodeName, bool isTextFile, SequenceCaseType caseType, AlignParameters additionalParameter, AlignmentType alignType, SimilarityMatrixParameters similarityMatrixParam) { Sequence aInput, bInput; IAlphabet alphabet = Utility.GetAlphabet(this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.AlphabetNameNode)); if (isTextFile) { // Read the xml file for getting both the files for aligning. string filePath1 = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.FilePathNode1); string filePath2 = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.FilePathNode2); // Parse the files and get the sequence. ISequence originalSequence1 = null; ISequence originalSequence2 = null; var parseObjectForFile1 = new FastAParser { Alphabet = alphabet }; originalSequence1 = parseObjectForFile1.Parse(filePath1).ElementAt(0); originalSequence2 = parseObjectForFile1.Parse(filePath2).ElementAt(0); // Create input sequence for sequence string in different cases. GetSequenceWithCaseType(originalSequence1.ConvertToString(), originalSequence2.ConvertToString(), alphabet, caseType, out aInput, out bInput); } else { string originalSequence1 = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.SequenceNode1); string originalSequence2 = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.SequenceNode2); // Create input sequence for sequence string in different cases. GetSequenceWithCaseType( originalSequence1, originalSequence2, alphabet, caseType, out aInput, out bInput); } ApplicationLog.WriteLine(string.Format("SmithWatermanAligner P2 : First sequence used is '{0}'.", aInput.ConvertToString())); ApplicationLog.WriteLine(string.Format("SmithWatermanAligner P2 : Second sequence used is '{0}'.", bInput.ConvertToString())); // Create similarity matrix object for a given file. string blosumFilePath = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.BlosumFilePathNode); SimilarityMatrix sm; switch (similarityMatrixParam) { case SimilarityMatrixParameters.TextReader: using (TextReader reader = new StreamReader(blosumFilePath)) sm = new SimilarityMatrix(reader); break; case SimilarityMatrixParameters.DiagonalMatrix: string matchValue = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.MatchScoreNode); string misMatchValue = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.MisMatchScoreNode); sm = new DiagonalSimilarityMatrix(int.Parse(matchValue, null), int.Parse(misMatchValue, null)); break; default: sm = new SimilarityMatrix(new StreamReader(blosumFilePath)); break; } int gapOpenCost = int.Parse(this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.GapOpenCostNode), null); int gapExtensionCost = int.Parse(this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.GapExtensionCostNode), null); // Create SmithWatermanAligner instance and set its values. var smithWatermanObj = new SmithWatermanAligner(); if (additionalParameter != AlignParameters.AllParam) { smithWatermanObj.SimilarityMatrix = sm; smithWatermanObj.GapOpenCost = gapOpenCost; smithWatermanObj.GapExtensionCost = gapExtensionCost; } IList<IPairwiseSequenceAlignment> result = null; // Align the input sequences. switch (additionalParameter) { case AlignParameters.AlignList: switch (alignType) { case AlignmentType.Align: result = smithWatermanObj.Align(new List<ISequence> {aInput, bInput}); break; default: result = smithWatermanObj.AlignSimple(new List<ISequence> {aInput, bInput}); break; } break; case AlignParameters.AlignTwo: switch (alignType) { case AlignmentType.Align: result = smithWatermanObj.Align(aInput, bInput); break; default: result = smithWatermanObj.AlignSimple(aInput, bInput); break; } break; case AlignParameters.AllParam: switch (alignType) { case AlignmentType.Align: result = smithWatermanObj.Align(sm, gapOpenCost, gapExtensionCost, aInput, bInput); break; default: result = smithWatermanObj.AlignSimple(sm, gapOpenCost, aInput, bInput); break; } break; default: break; } // Get the expected sequence and scorde from xml config. string expectedSequence1, expectedSequence2, expectedScore; switch (alignType) { case AlignmentType.Align: expectedScore = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.ExpectedGapExtensionScoreNode); switch (caseType) { case SequenceCaseType.LowerCase: expectedSequence1 = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants .ExpectedGapExtensionSequence1InLower); expectedSequence2 = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants .ExpectedGapExtensionSequence2InLower); break; default: expectedSequence1 = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants .ExpectedGapExtensionSequence1Node); expectedSequence2 = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants .ExpectedGapExtensionSequence2Node); break; } break; default: expectedScore = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.ExpectedScoreNode); switch (caseType) { case SequenceCaseType.LowerCase: expectedSequence1 = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.ExpectedSequence1inLowerNode); expectedSequence2 = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.ExpectedSequence2inLowerNode); break; case SequenceCaseType.LowerUpperCase: expectedSequence1 = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.ExpectedSequence1inLowerNode); expectedSequence2 = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.ExpectedSequenceNode2); break; default: expectedSequence1 = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.ExpectedSequenceNode1); expectedSequence2 = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.ExpectedSequenceNode2); break; } break; } // Match the alignment result with expected result. IList<IPairwiseSequenceAlignment> expectedOutput = new List<IPairwiseSequenceAlignment>(); IPairwiseSequenceAlignment align = new PairwiseSequenceAlignment(); var alignedSeq = new PairwiseAlignedSequence { FirstSequence = new Sequence(alphabet, expectedSequence1), SecondSequence = new Sequence(alphabet, expectedSequence2), Score = Convert.ToInt32(expectedScore, null), FirstOffset = Int32.MinValue, SecondOffset = Int32.MinValue, }; align.PairwiseAlignedSequences.Add(alignedSeq); expectedOutput.Add(align); ApplicationLog.WriteLine(string.Format(null, "SmithWatermanAligner P2 : Final Score '{0}'.", expectedScore)); ApplicationLog.WriteLine(string.Format(null, "SmithWatermanAligner P2 : Aligned First Sequence is '{0}'.", expectedSequence1)); ApplicationLog.WriteLine(string.Format(null, "SmithWatermanAligner P2 : Aligned Second Sequence is '{0}'.", expectedSequence2)); Assert.IsTrue(CompareAlignment(result, expectedOutput)); }
/// <summary> /// Returns true if the PairwiseSequenceAlignment contains the aligned sequence in the /// list of aligned sequences. /// </summary> /// <param name="item">PairwiseAlignedSequence object.</param> /// <returns>True if contains item, otherwise returns false.</returns> public bool Contains(PairwiseAlignedSequence item) { return alignedSequences.Contains(item); }
public void TestMUMmer3MultipleMumWithCustomMatrix() { string reference = "ATGCGCATCCCCTT"; string search = "GCGCCCCCTA"; Sequence referenceSeq = null; Sequence searchSeq = null; referenceSeq = new Sequence(Alphabets.DNA, reference); searchSeq = new Sequence(Alphabets.DNA, search); List<ISequence> searchSeqs = new List<ISequence>(); searchSeqs.Add(searchSeq); int[,] customMatrix = new int[256, 256]; customMatrix[(byte)'A', (byte)'A'] = 3; customMatrix[(byte)'A', (byte)'T'] = -2; customMatrix[(byte)'A', (byte)'G'] = -2; customMatrix[(byte)'A', (byte)'c'] = -2; customMatrix[(byte)'G', (byte)'G'] = 3; customMatrix[(byte)'G', (byte)'A'] = -2; customMatrix[(byte)'G', (byte)'T'] = -2; customMatrix[(byte)'G', (byte)'C'] = -2; customMatrix[(byte)'T', (byte)'T'] = 3; customMatrix[(byte)'T', (byte)'A'] = -2; customMatrix[(byte)'T', (byte)'G'] = -2; customMatrix[(byte)'T', (byte)'C'] = -2; customMatrix[(byte)'C', (byte)'C'] = 3; customMatrix[(byte)'C', (byte)'T'] = -2; customMatrix[(byte)'C', (byte)'A'] = -2; customMatrix[(byte)'C', (byte)'G'] = -2; DiagonalSimilarityMatrix matrix = new DiagonalSimilarityMatrix(3, -2); int gapOpenCost = -6; MUMmerAligner mummer = new MUMmerAligner(); mummer.LengthOfMUM = 4; mummer.PairWiseAlgorithm = new NeedlemanWunschAligner(); mummer.SimilarityMatrix = matrix; mummer.GapOpenCost = gapOpenCost; mummer.GapExtensionCost = -2; IList<IPairwiseSequenceAlignment> result = mummer.AlignSimple(referenceSeq, searchSeqs); // Check if output is not null Assert.AreNotEqual(null, result); IList<IPairwiseSequenceAlignment> expectedOutput = new List<IPairwiseSequenceAlignment>(); IPairwiseSequenceAlignment align = new PairwiseSequenceAlignment(); PairwiseAlignedSequence alignedSeq = new PairwiseAlignedSequence(); alignedSeq.FirstSequence = new Sequence(Alphabets.DNA, "ATGCGCATCCCCTT"); alignedSeq.SecondSequence = new Sequence(Alphabets.DNA, "--GCGC--CCCCTA"); alignedSeq.Consensus = new Sequence(AmbiguousDnaAlphabet.Instance, "ATGCGCATCCCCTW"); alignedSeq.Score = 1; alignedSeq.FirstOffset = 0; alignedSeq.SecondOffset = 2; align.PairwiseAlignedSequences.Add(alignedSeq); expectedOutput.Add(align); Assert.IsTrue(CompareAlignment(result, expectedOutput)); }
public void ValidatePairwiseSequenceAlignmentToString() { IPairwiseSequenceAlignment align = new PairwiseSequenceAlignment(); var alignedSeq = new PairwiseAlignedSequence(); alignedSeq.FirstSequence = new Sequence(Alphabets.Protein, "AWGHE"); alignedSeq.SecondSequence = new Sequence(Alphabets.Protein, "AW-HE"); alignedSeq.Consensus = new Sequence(Alphabets.Protein, "AWGHE"); alignedSeq.Score = 28; alignedSeq.FirstOffset = 0; alignedSeq.SecondOffset = 3; align.PairwiseAlignedSequences.Add(alignedSeq); string actualString = align.ToString(); string expectedString = "AWGHE\r\nAWGHE\r\nAW-HE\r\n\r\n".Replace("\r\n", System.Environment.NewLine); Assert.AreEqual(actualString, expectedString); }
public void TestMUMmerAlignerMultipleMum() { string reference = "ATGCGCATCCCCTT"; string search = "GCGCCCCCTA"; Sequence referenceSeq = null; Sequence searchSeq = null; referenceSeq = new Sequence(Alphabets.DNA, reference); searchSeq = new Sequence(Alphabets.DNA, search); List<ISequence> searchSeqs = new List<ISequence>(); searchSeqs.Add(searchSeq); MUMmerAligner mummer = new MUMmerAligner(); mummer.LengthOfMUM = 4; mummer.PairWiseAlgorithm = new NeedlemanWunschAligner(); IList<IPairwiseSequenceAlignment> result = mummer.AlignSimple(referenceSeq, searchSeqs); // Check if output is not null Assert.AreNotEqual(null, result); IList<IPairwiseSequenceAlignment> expectedOutput = new List<IPairwiseSequenceAlignment>(); IPairwiseSequenceAlignment align = new PairwiseSequenceAlignment(); PairwiseAlignedSequence alignedSeq = new PairwiseAlignedSequence(); alignedSeq.FirstSequence = new Sequence(Alphabets.DNA, "ATGCGCATCCCCTT"); alignedSeq.SecondSequence = new Sequence(Alphabets.DNA, "--GCGC--CCCCTA"); alignedSeq.Consensus = new Sequence(AmbiguousDnaAlphabet.Instance, "ATGCGCATCCCCTW"); alignedSeq.Score = -11; alignedSeq.FirstOffset = 0; alignedSeq.SecondOffset = 2; align.PairwiseAlignedSequences.Add(alignedSeq); expectedOutput.Add(align); Assert.IsTrue(CompareAlignment(result, expectedOutput)); }
/// <summary> /// Get all the gaps in each sequence and call pairwise alignment. /// </summary> /// <param name="referenceSequence">Reference sequence.</param> /// <param name="sequence">Query sequence.</param> /// <param name="mums">List of MUMs.</param> /// <returns>Aligned sequences.</returns> private PairwiseAlignedSequence ProcessGaps( ISequence referenceSequence, ISequence sequence, IList<Match> mums) { List<byte> sequenceResult1 = new List<byte>(); List<byte> sequenceResult2 = new List<byte>(); List<byte> consensusResult = new List<byte>(); PairwiseAlignedSequence alignedSequence = new PairwiseAlignedSequence(); Match mum1; Match mum2; // Run the alignment for gap before first MUM List<long> insertions = new List<long>(2); insertions.Add(0); insertions.Add(0); List<long> gapInsertions; mum1 = mums.First(); alignedSequence.Score += this.AlignGap( referenceSequence, sequence, sequenceResult1, sequenceResult2, consensusResult, new Match() { Length = 0 }, // Here the first MUM does not exist mum1, out gapInsertions); insertions[0] += gapInsertions[0]; insertions[1] += gapInsertions[1]; // Run the alignment for all the gaps between MUM for (int index = 1; index < mums.Count; index++) { mum2 = mums[index]; alignedSequence.Score += this.AlignGap( referenceSequence, sequence, sequenceResult1, sequenceResult2, consensusResult, mum1, mum2, out gapInsertions); insertions[0] += gapInsertions[0]; insertions[1] += gapInsertions[1]; mum1 = mum2; } // Run the alignment for gap after last MUM alignedSequence.Score += this.AlignGap( referenceSequence, sequence, sequenceResult1, sequenceResult2, consensusResult, mum1, new Match() { Length = 0 }, out gapInsertions); insertions[0] += gapInsertions[0]; insertions[1] += gapInsertions[1]; byte[] result1 = sequenceResult1.ToArray(); IAlphabet alphabet = Alphabets.AutoDetectAlphabet(result1, 0, result1.GetLongLength(), referenceSequence.Alphabet); alignedSequence.FirstSequence = new Sequence( alphabet, result1) { ID = referenceSequence.ID, // Do not shallow copy dictionary //Metadata = referenceSequence.Metadata }; byte[] result2 = sequenceResult2.ToArray(); alphabet = Alphabets.AutoDetectAlphabet(result2, 0, result2.GetLongLength(), sequence.Alphabet); alignedSequence.SecondSequence = new Sequence( alphabet, result2) { ID = sequence.ID, // Do not shallow copy dictionary //Metadata = sequence.Metadata }; byte[] consensus = consensusResult.ToArray(); alphabet = Alphabets.AutoDetectAlphabet(consensus, 0, consensus.GetLongLength(), referenceSequence.Alphabet); alignedSequence.Consensus = new Sequence( alphabet, consensus); // Offset is not required as Smith Waterman will fragmented alignment. // Offset is the starting position of alignment of sequence1 with respect to sequence2. if (this.PairWiseAlgorithm is NeedlemanWunschAligner) { alignedSequence.FirstOffset = alignedSequence.FirstSequence.IndexOfNonGap() - referenceSequence.IndexOfNonGap(); alignedSequence.SecondOffset = alignedSequence.SecondSequence.IndexOfNonGap() - sequence.IndexOfNonGap(); } List<long> startOffsets = new List<long>(2); List<long> endOffsets = new List<long>(2); startOffsets.Add(0); startOffsets.Add(0); endOffsets.Add(referenceSequence.Count - 1); endOffsets.Add(sequence.Count - 1); alignedSequence.Metadata["StartOffsets"] = startOffsets; alignedSequence.Metadata["EndOffsets"] = endOffsets; alignedSequence.Metadata["Insertions"] = insertions; // return the aligned sequence return alignedSequence; }
private void ValidateMUMmerAlignGeneralTestCases(string nodeName) { // Gets the reference sequence from the configuration file string filePath = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.FilePathNode); Assert.IsNotNull(filePath); ApplicationLog.WriteLine(string.Format(null, "MUMmer P2 : Successfully validated the File Path '{0}'.", filePath)); var fastaParserObj = new FastAParser(); IEnumerable<ISequence> referenceSeqs = fastaParserObj.Parse(filePath); ISequence referenceSeq = referenceSeqs.ElementAt(0); // Gets the reference sequence from the configuration file string queryFilePath = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.SearchSequenceFilePathNode); Assert.IsNotNull(queryFilePath); ApplicationLog.WriteLine(string.Format(null, "MUMmer P2 : Successfully validated the Search File Path '{0}'.", queryFilePath)); var fastaParserObj1 = new FastAParser(); IEnumerable<ISequence> querySeqs = fastaParserObj1.Parse(queryFilePath); string mumLength = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.MUMAlignLengthNode); var mum = new MUMmerAligner { LengthOfMUM = long.Parse(mumLength, null), StoreMUMs = true, PairWiseAlgorithm = new NeedlemanWunschAligner(), GapOpenCost = int.Parse(utilityObj.xmlUtil.GetTextValue(nodeName, Constants.GapOpenCostNode), null) }; IList<IPairwiseSequenceAlignment> align = mum.Align(referenceSeq, querySeqs); // Validate FinalMUMs and MUMs Properties. Assert.IsNotNull(mum.MUMs); string expectedScore = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.ScoreNodeName); string[] expectedSequences = utilityObj.xmlUtil.GetTextValues(nodeName, Constants.ExpectedSequencesNode); IList<IPairwiseSequenceAlignment> expectedOutput = new List<IPairwiseSequenceAlignment>(); IPairwiseSequenceAlignment seqAlign = new PairwiseSequenceAlignment(); var alignedSeq = new PairwiseAlignedSequence { FirstSequence = new Sequence(referenceSeq.Alphabet, expectedSequences[0]), SecondSequence = new Sequence(referenceSeq.Alphabet, expectedSequences[1]), Score = Convert.ToInt32(expectedScore, null), FirstOffset = Int32.MinValue, SecondOffset = Int32.MinValue, }; seqAlign.PairwiseAlignedSequences.Add(alignedSeq); expectedOutput.Add(seqAlign); Assert.IsTrue(AlignmentHelpers.CompareAlignment(align, expectedOutput)); ApplicationLog.WriteLine("MUMmer P2 : Successfully validated the aligned sequences."); }
public static void TestLeftAlignmentStep() { var refseq = "ACAATAAAAGCGCGCGCGCGTTACGTATAT--ATGGATAT"; var queryseq = "ACAATAA-AGC--GCGC--GTTACGTATATATATGGATAT"; var r = new Sequence (DnaAlphabet.Instance, refseq); var q = new Sequence (DnaAlphabet.Instance, queryseq); var aln = new PairwiseSequenceAlignment (r, q); var pas = new PairwiseAlignedSequence (); pas.FirstSequence = r; pas.SecondSequence = q; aln.Add (pas); var tpl = VariantCaller.LeftAlignIndelsAndCallVariants (aln, true); // Check the left alignment aln = tpl.Item1 as PairwiseSequenceAlignment; var lar = aln.PairwiseAlignedSequences [0].FirstSequence.ConvertToString(); var laq = aln.PairwiseAlignedSequences [0].SecondSequence.ConvertToString(); var exprefseq = "ACAATAAAAGCGCGCGCGCGTTACG--TATATATGGATAT"; var expqueryseq = "ACAAT-AAA----GCGCGCGTTACGTATATATATGGATAT"; Assert.AreEqual (exprefseq, lar); Assert.AreEqual (expqueryseq, laq); // And it's hard, so we might as well check the variants var variants = tpl.Item2; Assert.AreEqual (3, variants.Count); string[] bases = new string[] { "A", "GCGC", "TA" }; char[] hpbases = new char[] { 'A', 'G', 'T' }; bool[] inHp = new bool[] { true, false, false }; int[] lengths = new int[] { 1, 4, 2 }; int[] starts = new int[] { 4, 8, 24 }; IndelType[] types = new IndelType[] { IndelType.Deletion, IndelType.Deletion, IndelType.Insertion }; for (int i = 0; i < 3; i++) { Assert.AreEqual (VariantType.INDEL, variants [i].Type); var vi = variants [i] as IndelVariant; Assert.AreEqual (hpbases[i], vi.HomopolymerBase); Assert.AreEqual (starts [i], vi.StartPosition); Assert.AreEqual (lengths [i], vi.Length); Assert.AreEqual (bases [i], vi.InsertedOrDeletedBases); Assert.AreEqual (inHp [i], vi.InHomopolymer); Assert.AreEqual (types [i], vi.InsertionOrDeletion); } }
public void ValidatePairwiseAlignedSequenceMultipleRefQuery() { var referenceSeqs = new List<ISequence>() { new Sequence(Alphabets.DNA, "ATGCGCATCCCC") {ID = "R1"}, new Sequence(Alphabets.DNA, "TAGCT") {ID = "R2"} }; var searchSeqs = new List<ISequence>() { new Sequence(Alphabets.DNA, "CCGCGCCCCCTC") {ID = "Q1"}, new Sequence(Alphabets.DNA, "AGCT") {ID = "Q2"} }; var nucmer = new NucmerPairwiseAligner { FixedSeparation = 0, MinimumScore = 2, SeparationFactor = -1, LengthOfMUM = 3, ForwardOnly = true, }; IList<IPairwiseSequenceAlignment> result = nucmer.Align(referenceSeqs, searchSeqs).Select(a => a as IPairwiseSequenceAlignment).ToList(); // Check if output is not null Assert.AreNotEqual(null, result); IList<IPairwiseSequenceAlignment> expectedOutput = new List<IPairwiseSequenceAlignment>(); IPairwiseSequenceAlignment align = new PairwiseSequenceAlignment(); var alignedSeq = new PairwiseAlignedSequence { FirstSequence = new Sequence(Alphabets.DNA, "GCGCATCCCC"), SecondSequence = new Sequence(Alphabets.DNA, "GCGC--CCCC"), Consensus = new Sequence(Alphabets.DNA, "GCGCATCCCC"), Score = -5, FirstOffset = 0, SecondOffset = 0 }; align.PairwiseAlignedSequences.Add(alignedSeq); expectedOutput.Add(align); align = new PairwiseSequenceAlignment(); alignedSeq = new PairwiseAlignedSequence { FirstSequence = new Sequence(Alphabets.DNA, "AGCT"), SecondSequence = new Sequence(Alphabets.DNA, "AGCT"), Consensus = new Sequence(Alphabets.DNA, "AGCT"), Score = 12, FirstOffset = 0, SecondOffset = 1 }; align.PairwiseAlignedSequences.Add(alignedSeq); expectedOutput.Add(align); Assert.IsTrue(AlignmentHelpers.CompareAlignment(result, expectedOutput)); ApplicationLog.WriteLine("PairwiseAlignedSequence P1: Successfully validated Sequence with Multiple Reference."); }
private void ValidateGeneralSequenceAlignment(string nodeName, bool validateProperty) { // Read the xml file for getting both the files for aligning. string origSequence1 = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.SequenceNode1); string origSequence2 = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.SequenceNode2); IAlphabet alphabet = Utility.GetAlphabet(this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.AlphabetNameNode)); ApplicationLog.WriteLine(string.Format("SequenceAlignment P1 : First sequence used is '{0}'.", origSequence1)); ApplicationLog.WriteLine(string.Format("SequenceAlignment P1 : Second sequence used is '{0}'.", origSequence2)); // Create two sequences ISequence aInput = new Sequence(alphabet, origSequence1); ISequence bInput = new Sequence(alphabet, origSequence2); // Add the sequences to the Sequence alignment object using AddSequence() method. IList<IPairwiseSequenceAlignment> sequenceAlignmentObj = new List<IPairwiseSequenceAlignment>(); var alignSeq = new PairwiseAlignedSequence {FirstSequence = aInput, SecondSequence = bInput}; IPairwiseSequenceAlignment seqAlignObj = new PairwiseSequenceAlignment(); seqAlignObj.Add(alignSeq); sequenceAlignmentObj.Add(seqAlignObj); // Read the output back and validate the same. IList<PairwiseAlignedSequence> newAlignedSequences = sequenceAlignmentObj[0].PairwiseAlignedSequences; ApplicationLog.WriteLine(string.Format("SequenceAlignment P1 : First sequence read is '{0}'.", origSequence1)); ApplicationLog.WriteLine(string.Format("SequenceAlignment P1 : Second sequence read is '{0}'.", origSequence2)); if (validateProperty) { string score = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.MatchScoreNode); string seqCount = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.SequenceCountNode); Assert.IsFalse(sequenceAlignmentObj.IsReadOnly); Assert.AreEqual(sequenceAlignmentObj.Count.ToString((IFormatProvider) null), seqCount); Assert.AreEqual( sequenceAlignmentObj[0].PairwiseAlignedSequences[0].Score.ToString((IFormatProvider) null), score); Assert.AreEqual(sequenceAlignmentObj.Count.ToString((IFormatProvider) null), seqCount); ApplicationLog.WriteLine("SequenceAlignment P1 : Successfully validated the IsRead Property"); ApplicationLog.WriteLine("SequenceAlignment P1 : Successfully validated the Count Property"); ApplicationLog.WriteLine("SequenceAlignment P1 : Successfully validated the Sequences Property"); } else { Assert.AreEqual(new String(newAlignedSequences[0].FirstSequence.Select(a => (char) a).ToArray()), origSequence1); Assert.AreEqual(new String(newAlignedSequences[0].SecondSequence.Select(a => (char) a).ToArray()), origSequence2); } }
public void ValidateSequenceAlignmentProperties() { // Read the xml file for getting both the files for aligning. string origSequence1 = this.utilityObj.xmlUtil.GetTextValue(Constants.AlignDnaAlgorithmNodeName, Constants.SequenceNode1); string origSequence2 = this.utilityObj.xmlUtil.GetTextValue(Constants.AlignDnaAlgorithmNodeName, Constants.SequenceNode2); IAlphabet alphabet = Utility.GetAlphabet(this.utilityObj.xmlUtil.GetTextValue( Constants.AlignDnaAlgorithmNodeName, Constants.AlphabetNameNode)); string seqCount = this.utilityObj.xmlUtil.GetTextValue( Constants.AlignDnaAlgorithmNodeName, Constants.SequenceCountNode); // Create two sequences ISequence aInput = new Sequence(alphabet, origSequence1); ISequence bInput = new Sequence(alphabet, origSequence2); // Add the sequences to the Sequence alignment object using AddSequence() method. IList<IPairwiseSequenceAlignment> sequenceAlignmentObj = new List<IPairwiseSequenceAlignment>(); var alignSeq = new PairwiseAlignedSequence(); alignSeq.FirstSequence = aInput; alignSeq.SecondSequence = bInput; IPairwiseSequenceAlignment seqAlignObj = new PairwiseSequenceAlignment(aInput, bInput); seqAlignObj.Add(alignSeq); sequenceAlignmentObj.Add(seqAlignObj); // Validate all properties of sequence alignment class. Assert.AreEqual(seqCount, seqAlignObj.Count.ToString((IFormatProvider) null)); Assert.AreEqual(origSequence1, new string(seqAlignObj.FirstSequence.Select(a => (char) a).ToArray())); Assert.AreEqual(origSequence2, new string(seqAlignObj.SecondSequence.Select(a => (char) a).ToArray())); Assert.IsFalse(seqAlignObj.IsReadOnly); Assert.IsNull(seqAlignObj.Documentation); Assert.AreEqual(seqCount, seqAlignObj.PairwiseAlignedSequences.Count.ToString((IFormatProvider) null)); ApplicationLog.WriteLine("SequenceAlignment P1 : Successfully validated the IsRead Property"); ApplicationLog.WriteLine("SequenceAlignment P1 : Successfully validated the Count Property"); ApplicationLog.WriteLine("SequenceAlignment P1 : Successfully validated the Sequences Property"); }
private void ValidateNeedlemanWunschAlignment(string nodeName, AlignParameters alignParam, SimilarityMatrixParameters similarityMatrixParam, AlignmentType alignType) { ISequence aInput, bInput; IAlphabet alphabet = Utility.GetAlphabet(this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.AlphabetNameNode)); // Parse the files and get the sequence. if (alignParam.ToString().Contains("Code")) { string sequence1 = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.SequenceNode1); string sequence2 = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.SequenceNode2); aInput = new Sequence(alphabet, sequence1); bInput = new Sequence(alphabet, sequence2); } else { // Read the xml file for getting both the files for aligning. string filePath1 = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.FilePathNode1); string filePath2 = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.FilePathNode2); var parseObjectForFile1 = new FastAParser { Alphabet = alphabet }; ISequence originalSequence1 = parseObjectForFile1.Parse(filePath1).FirstOrDefault(); Assert.IsNotNull(originalSequence1); aInput = new Sequence(alphabet, originalSequence1.ConvertToString()); var parseObjectForFile2 = new FastAParser { Alphabet = alphabet }; ISequence originalSequence2 = parseObjectForFile2.Parse(filePath2).FirstOrDefault(); Assert.IsNotNull(originalSequence2); bInput = new Sequence(alphabet, originalSequence2.ConvertToString()); } string blosumFilePath = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.BlosumFilePathNode); SimilarityMatrix sm; switch (similarityMatrixParam) { case SimilarityMatrixParameters.TextReader: using (TextReader reader = new StreamReader(blosumFilePath)) sm = new SimilarityMatrix(reader); break; case SimilarityMatrixParameters.DiagonalMatrix: string matchValue = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.MatchScoreNode); string misMatchValue = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.MisMatchScoreNode); sm = new DiagonalSimilarityMatrix(int.Parse(matchValue, null), int.Parse(misMatchValue, null)); break; default: sm = new SimilarityMatrix(new StreamReader(blosumFilePath)); break; } int gapOpenCost = int.Parse(this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.GapOpenCostNode), null); int gapExtensionCost = int.Parse(this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.GapExtensionCostNode), null); var needlemanWunschObj = new NeedlemanWunschAligner(); if (AlignParameters.AllParam != alignParam) { needlemanWunschObj.SimilarityMatrix = sm; needlemanWunschObj.GapOpenCost = gapOpenCost; } IList<IPairwiseSequenceAlignment> result = null; switch (alignParam) { case AlignParameters.AlignList: case AlignParameters.AlignListCode: var sequences = new List<ISequence> {aInput, bInput}; switch (alignType) { case AlignmentType.Align: needlemanWunschObj.GapExtensionCost = gapExtensionCost; result = needlemanWunschObj.Align(sequences); break; default: result = needlemanWunschObj.AlignSimple(sequences); break; } break; case AlignParameters.AllParam: case AlignParameters.AllParamCode: switch (alignType) { case AlignmentType.Align: needlemanWunschObj.GapExtensionCost = gapExtensionCost; result = needlemanWunschObj.Align(sm, gapOpenCost, gapExtensionCost, aInput, bInput); break; default: result = needlemanWunschObj.AlignSimple(sm, gapOpenCost, aInput, bInput); break; } break; case AlignParameters.AlignTwo: case AlignParameters.AlignTwoCode: switch (alignType) { case AlignmentType.Align: needlemanWunschObj.GapExtensionCost = gapExtensionCost; result = needlemanWunschObj.Align(aInput, bInput); break; default: result = needlemanWunschObj.AlignSimple(aInput, bInput); break; } break; default: break; } // Read the xml file for getting both the files for aligning. string expectedSequence1, expectedSequence2, expectedScore; switch (alignType) { case AlignmentType.Align: expectedScore = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.ExpectedGapExtensionScoreNode); expectedSequence1 = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.ExpectedGapExtensionSequence1Node); expectedSequence2 = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.ExpectedGapExtensionSequence2Node); break; default: expectedScore = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.ExpectedScoreNode); expectedSequence1 = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.ExpectedSequenceNode1); expectedSequence2 = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.ExpectedSequenceNode2); break; } IList<IPairwiseSequenceAlignment> expectedOutput = new List<IPairwiseSequenceAlignment>(); IPairwiseSequenceAlignment align = new PairwiseSequenceAlignment(aInput, bInput); var alignedSeq = new PairwiseAlignedSequence { FirstSequence = new Sequence(alphabet, expectedSequence1), SecondSequence = new Sequence(alphabet, expectedSequence2), Score = Convert.ToInt32(expectedScore, null) }; align.PairwiseAlignedSequences.Add(alignedSeq); expectedOutput.Add(align); ApplicationLog.WriteLine(string.Format("NeedlemanWunschAligner P1 : Final Score '{0}'.", expectedScore)); ApplicationLog.WriteLine(string.Format("NeedlemanWunschAligner P1 : Aligned First Sequence is '{0}'.", expectedSequence1)); ApplicationLog.WriteLine(string.Format("NeedlemanWunschAligner P1 : Aligned Second Sequence is '{0}'.", expectedSequence2)); Assert.IsTrue(CompareAlignment(result, expectedOutput)); }
/// <summary> /// This takes a specific starting location in the scoring matrix and generates /// an alignment from it using the traceback scores. /// </summary> /// <param name="startingCell">Starting point</param> /// <returns>Pairwise alignment</returns> protected PairwiseAlignedSequence CreateAlignmentFromCell(OptScoreMatrixCell startingCell) { int gapStride = Cols + 1; //Using list to avoid allocation issues int estimatedLength = (int)( 1.1*Math.Max(ReferenceSequence.Length,QuerySequence.Length)); var firstAlignment = new List<byte>(estimatedLength); var secondAlignment = new List<byte>(estimatedLength); // Get the starting cell position and record the optimal score found there. int i = startingCell.Row; int j = startingCell.Col; var finalScore = startingCell.Score; long rowGaps = 0, colGaps = 0, identicalCount = 0, similarityCount = 0; // Walk the traceback matrix and build the alignments. while (!TracebackIsComplete(i, j)) { sbyte tracebackDirection = Traceback[i][j]; // Walk backwards through the trace back int gapLength; switch (tracebackDirection) { case SourceDirection.Diagonal: byte n1 = ReferenceSequence[j - 1]; byte n2 = QuerySequence[i - 1]; firstAlignment.Add(n1); secondAlignment.Add(n2); i--; j--; // Track some useful statistics if (n1 == n2 && n1 != _gap) { identicalCount++; similarityCount++; } else if (SimilarityMatrix[n2, n1] > 0) similarityCount++; break; case SourceDirection.Left: //Add 1 because this only counts number of extensions if (usingAffineGapModel) { gapLength = h_Gap_Length[i * gapStride + j]; for (int k = 0; k < gapLength; k++) { firstAlignment.Add(ReferenceSequence[--j]); secondAlignment.Add(_gap); rowGaps++; } } else { firstAlignment.Add(ReferenceSequence[--j]); secondAlignment.Add(_gap); rowGaps++; } break; case SourceDirection.Up: //add 1 because this only counts number of extensions. if (usingAffineGapModel) { gapLength = v_Gap_Length[i * gapStride + j]; for (int k = 0; k < gapLength; k++) { firstAlignment.Add(_gap); colGaps++; secondAlignment.Add(QuerySequence[--i]); } } else { secondAlignment.Add(QuerySequence[--i]); firstAlignment.Add(_gap); colGaps++; } break; default: break; } } // We build the alignments in reverse since we were // walking backwards through the matrix table. To create // the proper alignments we need to resize and reverse // both underlying arrays. firstAlignment.Reverse(); secondAlignment.Reverse(); // Create the Consensus sequence byte[] consensus = new byte[Math.Min(firstAlignment.Count, secondAlignment.Count)]; for (int n = 0; n < consensus.Length; n++) { consensus[n] = ConsensusResolver.GetConsensus(new[] { firstAlignment[n], secondAlignment[n] }); } // Create the result alignment var pairwiseAlignedSequence = new PairwiseAlignedSequence { Score = finalScore, FirstSequence = new Sequence(_sequence1.Alphabet, firstAlignment.ToArray()) { ID = _sequence1.ID }, SecondSequence = new Sequence(_sequence2.Alphabet, secondAlignment.ToArray()) { ID = _sequence2.ID }, Consensus = new Sequence(ConsensusResolver.SequenceAlphabet, consensus), }; // Offset is start of alignment in input sequence with respect to other sequence. if (i >= j) { pairwiseAlignedSequence.FirstOffset = i - j; pairwiseAlignedSequence.SecondOffset = 0; } else { pairwiseAlignedSequence.FirstOffset = 0; pairwiseAlignedSequence.SecondOffset = j - i; } // Add in ISequenceAlignment metadata pairwiseAlignedSequence.Metadata["Score"] = pairwiseAlignedSequence.Score; pairwiseAlignedSequence.Metadata["FirstOffset"] = pairwiseAlignedSequence.FirstOffset; pairwiseAlignedSequence.Metadata["SecondOffset"] = pairwiseAlignedSequence.SecondOffset; pairwiseAlignedSequence.Metadata["Consensus"] = pairwiseAlignedSequence.Consensus; pairwiseAlignedSequence.Metadata["StartOffsets"] = new List<long> { j, i }; pairwiseAlignedSequence.Metadata["EndOffsets"] = new List<long> { startingCell.Col - 1, startingCell.Row - 1 }; pairwiseAlignedSequence.Metadata["Insertions"] = new List<long> { colGaps, rowGaps }; // ref, query insertions pairwiseAlignedSequence.Metadata["IdenticalCount"] = identicalCount; pairwiseAlignedSequence.Metadata["SimilarityCount"] = similarityCount; return pairwiseAlignedSequence; }
/// <summary> /// Convert the delta alignment object to its sequence representation /// </summary> /// <returns>Reference sequence alignment at 0th index and /// Query sequence alignment at 1st index</returns> public PairwiseAlignedSequence ConvertDeltaToSequences() { PairwiseAlignedSequence alignedSequence = new PairwiseAlignedSequence(); int gap = 0; List <long> startOffsets = new List <long>(2); List <long> endOffsets = new List <long>(2); List <long> insertions = new List <long>(2); startOffsets.Add(FirstSequenceStart); startOffsets.Add(SecondSequenceStart); endOffsets.Add(FirstSequenceEnd); endOffsets.Add(SecondSequenceEnd); insertions.Add(0); insertions.Add(0); // Create the new sequence object with given start and end indices List <byte> referenceSequence = new List <byte>(); for (long index = this.FirstSequenceStart; index <= this.FirstSequenceEnd; index++) { referenceSequence.Add(this.ReferenceSequence[index]); } List <byte> querySequence = new List <byte>(); for (long index = this.SecondSequenceStart; index <= this.SecondSequenceEnd; index++) { querySequence.Add(this.QuerySequence[index]); } // Insert the Alignment character at delta position // +ve delta: Insertion in reference sequence // -ve delta: Insertion in query sequence (deletion in reference sequence) foreach (int delta in Deltas) { gap += Math.Abs(delta); if (delta < 0) { referenceSequence.Insert(gap - 1, DnaAlphabet.Instance.Gap); insertions[0]++; } else { querySequence.Insert(gap - 1, DnaAlphabet.Instance.Gap); insertions[1]++; } } byte[] refSeq = referenceSequence.ToArray(); IAlphabet alphabet = Alphabets.AutoDetectAlphabet(refSeq, 0, refSeq.LongLength, null); alignedSequence.FirstSequence = new Sequence(alphabet, refSeq, false); byte[] querySeq = querySequence.ToArray(); alphabet = Alphabets.AutoDetectAlphabet(querySeq, 0, querySeq.LongLength, QuerySequence.Alphabet); alignedSequence.SecondSequence = new Sequence(alphabet, querySeq, false); alignedSequence.Metadata["StartOffsets"] = startOffsets; alignedSequence.Metadata["EndOffsets"] = endOffsets; alignedSequence.Metadata["Insertions"] = insertions; return(alignedSequence); }
public static void TestExceptionThrownForUnclippedAlignment() { var refseq = "ACAATATA"; var queryseq = "ACAATAT-"; var r = new Sequence (DnaAlphabet.Instance, refseq); var q = new Sequence (DnaAlphabet.Instance, queryseq); var aln = new PairwiseSequenceAlignment (r, q); var pas = new PairwiseAlignedSequence (); pas.FirstSequence = r; pas.SecondSequence = q; aln.Add (pas); Assert.Throws<FormatException> (() => VariantCaller.LeftAlignIndelsAndCallVariants (aln, true)); refseq = "AAACAATATA"; queryseq = "AA-CAATATA"; r = new Sequence (DnaAlphabet.Instance, refseq); q = new Sequence (DnaAlphabet.Instance, queryseq); aln = new PairwiseSequenceAlignment (r, q); pas = new PairwiseAlignedSequence (); pas.FirstSequence = r; pas.SecondSequence = q; aln.Add (pas); Assert.Throws<FormatException> (() => VariantCaller.LeftAlignIndelsAndCallVariants (aln, true)); }
/// <summary> /// Given two byte arrays representing a pairwise alignment, shift them so /// that all deletions start as early as possible. For example: /// /// <code> /// TTTTAAAATTTT -> Converts to -> TTTTAAAATTTT /// TTTTAA--TTTT TTTT--AATTTT /// </code> /// /// This function takes a IPairwiseSequenceAlignment and assumes that the first sequence is the reference and second /// sequence is the query. It returns a new Pairwise sequence alignment with all of the indels left aligned as well as a list of variants. /// </summary> /// <param name="aln">Aln. The second sequence should be of type QualitativeSequence or Sequence</param> /// <param name="callVariants">callVariants. If true, it will call variants, otherwise the second half of tuple will be null. </param> public static Tuple<IPairwiseSequenceAlignment, List<Variant>> LeftAlignIndelsAndCallVariants(IPairwiseSequenceAlignment aln, bool callVariants = true) { if (aln == null) { throw new NullReferenceException ("aln"); } if (aln.PairwiseAlignedSequences == null || aln.PairwiseAlignedSequences.Count != 1) { throw new ArgumentException ("The pairwise aligned sequence should only have one alignment"); } var frstAln = aln.PairwiseAlignedSequences.First (); var seq1 = frstAln.FirstSequence; var seq2 = frstAln.SecondSequence; if (seq1 == null) { throw new NullReferenceException ("seq1"); } else if (seq2 == null) { throw new NullReferenceException ("seq2"); } //TODO: Might implement an ambiguity check later. #if FALSE if (seq1.Alphabet.HasAmbiguity || seq2.Alphabet.HasAmbiguity) { throw new ArgumentException ("Cannot left align sequences with ambiguous symbols."); } #endif // Note we have to copy unless we can guarantee the array will not be mutated. byte[] refseq = seq1.ToArray (); ISequence newQuery; List<Variant> variants = null; // Call variants for a qualitative sequence if (seq2 is QualitativeSequence) { var qs = seq2 as QualitativeSequence; var query = Enumerable.Zip (qs, qs.GetQualityScores (), (bp, qv) => new BPandQV (bp, (byte)qv, false)).ToArray (); AlignmentUtils.LeftAlignIndels (refseq, query); AlignmentUtils.VerifyNoGapsOnEnds (refseq, query); if (callVariants) { variants = VariantCaller.CallVariants (refseq, query, seq2.IsMarkedAsReverseComplement()); } var newQueryQS = new QualitativeSequence (qs.Alphabet, qs.FormatType, query.Select (z => z.BP).ToArray (), query.Select (p => p.QV).ToArray (), false); newQueryQS.Metadata = seq2.Metadata; newQuery = newQueryQS; } else if (seq2 is Sequence) { // For a sequence with no QV values. var qs = seq2 as Sequence; var query = qs.Select (v => new BPandQV (v, 0, false)).ToArray(); AlignmentUtils.LeftAlignIndels (refseq, query); AlignmentUtils.VerifyNoGapsOnEnds (refseq, query); // ISequence does not have a setable metadata var newQueryS = new Sequence(qs.Alphabet, query.Select(z=>z.BP).ToArray(), false); newQueryS.Metadata = seq2.Metadata; if (callVariants) { variants = VariantCaller.CallVariants (refseq, query, seq2.IsMarkedAsReverseComplement()); } newQuery = newQueryS; } else { throw new ArgumentException ("Can only left align indels if the query sequence is of type Sequence or QualitativeSequence."); } if (aln.FirstSequence != null && aln.FirstSequence.ID != null) { foreach (var v in variants) { v.RefName = aln.FirstSequence.ID; } } var newRef = new Sequence (seq1.Alphabet, refseq, false); newRef.ID = seq1.ID; newRef.Metadata = seq1.Metadata; newQuery.ID = seq2.ID; var newaln = new PairwiseSequenceAlignment (aln.FirstSequence, aln.SecondSequence); var pas = new PairwiseAlignedSequence (); pas.FirstSequence = newRef; pas.SecondSequence = newQuery; newaln.Add (pas); return new Tuple<IPairwiseSequenceAlignment, List<Variant>> (newaln, variants); }
/// <summary> /// Convert the delta alignment object to its sequence representation /// </summary> /// <returns>Reference sequence alignment at 0th index and /// Query sequence alignment at 1st index</returns> internal PairwiseAlignedSequence ConvertDeltaToSequences() { PairwiseAlignedSequence alignedSequence = new PairwiseAlignedSequence(); Sequence referenceSequence = null; Sequence querySequence = null; int gap = 0; int length = 0; List <int> startOffsets = new List <int>(2); List <int> endOffsets = new List <int>(2); List <int> insertions = new List <int>(2); startOffsets.Add(FirstSequenceStart); startOffsets.Add(SecondSequenceStart); endOffsets.Add(FirstSequenceEnd); endOffsets.Add(SecondSequenceEnd); insertions.Add(0); insertions.Add(0); // Create the new sequence object with given start and end indices referenceSequence = new Sequence(ReferenceSequence.Alphabet); referenceSequence.IsReadOnly = false; length = FirstSequenceEnd - FirstSequenceStart + 1; referenceSequence.InsertRange( 0, ReferenceSequence.Range(FirstSequenceStart, length).ToString()); querySequence = new Sequence(QuerySequence.Alphabet); querySequence.IsReadOnly = false; length = SecondSequenceEnd - SecondSequenceStart + 1; querySequence.InsertRange( 0, QuerySequence.Range(SecondSequenceStart, length).ToString()); // Insert the Alignment character at delta postion // +ve delta: Insertion in reference sequence // -ve delta: Insertion in query sequence (deletion in reference sequence) foreach (int delta in Deltas) { gap += Math.Abs(delta); if (delta < 0) { referenceSequence.Insert(gap - 1, DnaAlphabet.Instance.Gap.Symbol); insertions[0]++; } else { querySequence.Insert(gap - 1, DnaAlphabet.Instance.Gap.Symbol); insertions[1]++; } } alignedSequence.FirstSequence = referenceSequence; alignedSequence.SecondSequence = querySequence; alignedSequence.Metadata["StartOffsets"] = startOffsets; alignedSequence.Metadata["EndOffsets"] = endOffsets; alignedSequence.Metadata["Insertions"] = insertions; return(alignedSequence); }
public void TestNUCmer3MultipleReferencesAndQueries() { Sequence referenceSeq = null; Sequence searchSeq = null; List<ISequence> referenceSeqs = null; List<ISequence> searchSeqs = null; referenceSeqs = new List<ISequence>(); string reference = "ATGCGCATCCCC"; referenceSeq = new Sequence(Alphabets.DNA, reference); referenceSeq.ID = "R1"; referenceSeqs.Add(referenceSeq); reference = "TAGCT"; referenceSeq = new Sequence(Alphabets.DNA, reference); referenceSeq.ID = "R11"; referenceSeqs.Add(referenceSeq); searchSeqs = new List<ISequence>(); string search = "CCGCGCCCCCTC"; searchSeq = new Sequence(Alphabets.DNA, search); searchSeq.ID = "Q1"; searchSeqs.Add(searchSeq); search = "AGCT"; searchSeq = new Sequence(Alphabets.DNA, search); searchSeq.ID = "Q11"; searchSeqs.Add(searchSeq); NucmerPairwiseAligner nucmer = new NucmerPairwiseAligner(); nucmer.FixedSeparation = 0; nucmer.MinimumScore = 2; nucmer.SeparationFactor = -1; nucmer.LengthOfMUM = 3; nucmer.ForwardOnly = true; IList<IPairwiseSequenceAlignment> result = nucmer.Align(referenceSeqs, searchSeqs).Select(a => a as IPairwiseSequenceAlignment).ToList(); // Check if output is not null Assert.AreNotEqual(null, result); IList<IPairwiseSequenceAlignment> expectedOutput = new List<IPairwiseSequenceAlignment>(); IPairwiseSequenceAlignment align = new PairwiseSequenceAlignment(); PairwiseAlignedSequence alignedSeq = new PairwiseAlignedSequence(); alignedSeq.FirstSequence = new Sequence(Alphabets.DNA, "GCGCATCCCC"); alignedSeq.SecondSequence = new Sequence(Alphabets.DNA, "GCGC--CCCC"); alignedSeq.Consensus = new Sequence(Alphabets.DNA, "GCGCATCCCC"); alignedSeq.Score = -5; alignedSeq.FirstOffset = 0; alignedSeq.SecondOffset = 0; align.PairwiseAlignedSequences.Add(alignedSeq); expectedOutput.Add(align); align = new PairwiseSequenceAlignment(); alignedSeq = new PairwiseAlignedSequence(); alignedSeq.FirstSequence = new Sequence(Alphabets.DNA, "AGCT"); alignedSeq.SecondSequence = new Sequence(Alphabets.DNA, "AGCT"); alignedSeq.Consensus = new Sequence(Alphabets.DNA, "AGCT"); alignedSeq.Score = 12; alignedSeq.FirstOffset = 0; alignedSeq.SecondOffset = 1; align.PairwiseAlignedSequences.Add(alignedSeq); expectedOutput.Add(align); Assert.IsTrue(AlignmentHelpers.CompareAlignment(result, expectedOutput)); }
/// <summary> /// Adds an aligned sequence to the list of aligned sequences in the PairwiseSequenceAlignment. /// Throws exception if sequence alignment is read only. /// </summary> /// <param name="item">PairwiseAlignedSequence to add.</param> public void Add(PairwiseAlignedSequence item) { if (IsReadOnly) throw new NotSupportedException(Properties.Resource.READ_ONLY_COLLECTION_MESSAGE); alignedSequences.Add(item); }
/// <summary> /// Convert the delta alignment object to its sequence representation /// </summary> /// <returns>Reference sequence alignment at 0th index and /// Query sequence alignment at 1st index</returns> public PairwiseAlignedSequence ConvertDeltaToSequences() { PairwiseAlignedSequence alignedSequence = new PairwiseAlignedSequence(); int gap = 0; List<long> startOffsets = new List<long>(2); List<long> endOffsets = new List<long>(2); List<long> insertions = new List<long>(2); startOffsets.Add(FirstSequenceStart); startOffsets.Add(SecondSequenceStart); endOffsets.Add(FirstSequenceEnd); endOffsets.Add(SecondSequenceEnd); insertions.Add(0); insertions.Add(0); // Create the new sequence object with given start and end indices List<byte> referenceSequence = new List<byte>(); for (long index = this.FirstSequenceStart; index <= this.FirstSequenceEnd; index++) { referenceSequence.Add(this.ReferenceSequence[index]); } List<byte> querySequence = new List<byte>(); for (long index = this.SecondSequenceStart; index <= this.SecondSequenceEnd; index++) { querySequence.Add(this.QuerySequence[index]); } // Insert the Alignment character at delta position // +ve delta: Insertion in reference sequence // -ve delta: Insertion in query sequence (deletion in reference sequence) foreach (int delta in Deltas) { gap += Math.Abs(delta); if (delta < 0) { referenceSequence.Insert(gap - 1, DnaAlphabet.Instance.Gap); insertions[0]++; } else { querySequence.Insert(gap - 1, DnaAlphabet.Instance.Gap); insertions[1]++; } } byte[] refSeq = referenceSequence.ToArray(); IAlphabet alphabet = Alphabets.AutoDetectAlphabet(refSeq, 0, refSeq.GetLongLength(), null); alignedSequence.FirstSequence = new Sequence(alphabet, refSeq, false) { ID = ReferenceSequence.ID, Metadata = new Dictionary<string, object>(ReferenceSequence.Metadata) }; byte[] querySeq = querySequence.ToArray(); alphabet = Alphabets.AutoDetectAlphabet(querySeq, 0, querySeq.GetLongLength(), QuerySequence.Alphabet); alignedSequence.SecondSequence = new Sequence(alphabet, querySeq, false) { ID = QuerySequence.ID, Metadata = new Dictionary<string, object>(QuerySequence.Metadata) }; alignedSequence.Metadata["StartOffsets"] = startOffsets; alignedSequence.Metadata["EndOffsets"] = endOffsets; alignedSequence.Metadata["Insertions"] = insertions; return alignedSequence; }
/// <summary> /// Validates the Mummer align method for several test cases for the parameters passed. /// </summary> /// <param name="nodeName">Node name to be read from xml</param> /// <param name="isFilePath">Is Sequence saved in File</param> /// <param name="isAlignList">Is align method to take list?</param> /// <param name="addParam">Additional parameter</param> /// Suppress the ParserParam variable CA1801 as this would be reused later. void ValidateMUMmerAlignGeneralTestCases(string nodeName, bool isFilePath, bool isAlignList, AdditionalParameters addParam) { ISequence referenceSeq; IList<ISequence> querySeqs; List<ISequence> alignList = null; if (isFilePath) { // Gets the reference sequence from the configuration file string filePath = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.FilePathNode); Assert.IsNotNull(filePath); Assert.IsTrue(File.Exists(filePath)); IEnumerable<ISequence> referenceSeqs; FastAParser fastaParserObj = new FastAParser(); referenceSeqs = fastaParserObj.Parse(filePath); referenceSeq = referenceSeqs.FirstOrDefault(); Assert.IsNotNull(referenceSeq); // Gets the query sequence from the configuration file string queryFilePath = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.SearchSequenceFilePathNode); Assert.IsNotNull(queryFilePath); Assert.IsTrue(File.Exists(queryFilePath)); querySeqs = fastaParserObj.Parse(queryFilePath).ToList(); ISequence querySeq = querySeqs.First(); if (isAlignList) { alignList = new List<ISequence> {referenceSeq, querySeq}; } } else { // Gets the reference sequence from the configuration file string referenceSequence = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.SequenceNode); string referenceSeqAlphabet = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.AlphabetNameNode); referenceSeq = new Sequence(Utility.GetAlphabet(referenceSeqAlphabet), referenceSequence); string querySequence = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.SearchSequenceNode); referenceSeqAlphabet = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.SearchSequenceAlphabetNode); ISequence querySeq = new Sequence(Utility.GetAlphabet(referenceSeqAlphabet), querySequence); querySeqs = new List<ISequence>(); if (isAlignList) { alignList = new List<ISequence> {referenceSeq, querySeq}; } else querySeqs.Add(querySeq); } // Setup the algorithm string mumLength = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.MUMAlignLengthNode); MUMmerAligner mumAlignObj = new MUMmerAligner {LengthOfMUM = long.Parse(mumLength, null), StoreMUMs = true}; switch (addParam) { case AdditionalParameters.PerformSimilarityMatrixChange: mumAlignObj.SimilarityMatrix = new SimilarityMatrix(SimilarityMatrix.StandardSimilarityMatrix.Blosum50); mumAlignObj.GapOpenCost = int.Parse(this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.GapOpenCostNode), null); break; default: mumAlignObj.GapOpenCost = int.Parse(this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.GapOpenCostNode), null); break; } IEnumerable<ISequence> alignEnumSeqs = alignList; IList<IPairwiseSequenceAlignment> align = isAlignList ? mumAlignObj.AlignSimple(alignEnumSeqs) : mumAlignObj.AlignSimple(referenceSeq, querySeqs); // Validate MUMs Properties Assert.IsNotNull(mumAlignObj.MUMs); string expectedScore = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.ScoreNodeName); string[] expectedSequences = this.utilityObj.xmlUtil.GetTextValues(nodeName, Constants.ExpectedSequencesNode); IList<IPairwiseSequenceAlignment> expectedOutput = new List<IPairwiseSequenceAlignment>(); // Validate for two aligned sequences and single aligned sequences appropriately if (querySeqs.Count <= 1) { IPairwiseSequenceAlignment seqAlign = new PairwiseSequenceAlignment(); PairwiseAlignedSequence alignedSeq = new PairwiseAlignedSequence { FirstSequence = new Sequence(referenceSeq.Alphabet, expectedSequences[0]), SecondSequence = new Sequence(referenceSeq.Alphabet, expectedSequences[1]), Score = Convert.ToInt32(expectedScore,null), FirstOffset = Int32.MinValue, SecondOffset = Int32.MinValue, }; seqAlign.PairwiseAlignedSequences.Add(alignedSeq); expectedOutput.Add(seqAlign); Assert.IsTrue(CompareAlignment(align, expectedOutput)); } else { string[] expectedScores = expectedScore.Split(','); IPairwiseSequenceAlignment seq1Align = new PairwiseSequenceAlignment(); IPairwiseSequenceAlignment seq2Align = new PairwiseSequenceAlignment(); // Get the first sequence for validation PairwiseAlignedSequence alignedSeq1 = new PairwiseAlignedSequence { FirstSequence = new Sequence(referenceSeq.Alphabet, expectedSequences[0]), SecondSequence = new Sequence(referenceSeq.Alphabet, expectedSequences[1]), Score = int.Parse(expectedScores[0], null), FirstOffset = Int32.MinValue, SecondOffset = Int32.MinValue, }; seq1Align.PairwiseAlignedSequences.Add(alignedSeq1); expectedOutput.Add(seq1Align); // Get the second sequence for validation PairwiseAlignedSequence alignedSeq2 = new PairwiseAlignedSequence { FirstSequence = new Sequence(referenceSeq.Alphabet, expectedSequences[2]), SecondSequence = new Sequence(referenceSeq.Alphabet, expectedSequences[3]), Score = int.Parse(expectedScores[1], null), FirstOffset = Int32.MinValue, SecondOffset = Int32.MinValue, }; seq2Align.PairwiseAlignedSequences.Add(alignedSeq2); expectedOutput.Add(seq2Align); Assert.IsTrue(CompareAlignment(align, expectedOutput)); } }