private void ValidateSmithWatermanAlignment(bool isTextFile, AlignmentParamType alignParam, AlignmentType alignType) { ISequence aInput, bInput; IAlphabet alphabet = Utility.GetAlphabet(this.utilityObj.xmlUtil.GetTextValue(Constants.SmithWatermanAlignAlgorithmNodeName, Constants.AlphabetNameNode)); if (isTextFile) { // Read the xml file for getting both the files for aligning. string filePath1 = this.utilityObj.xmlUtil.GetTextValue(Constants.SmithWatermanAlignAlgorithmNodeName, Constants.FilePathNode1); string filePath2 = this.utilityObj.xmlUtil.GetTextValue(Constants.SmithWatermanAlignAlgorithmNodeName, Constants.FilePathNode2); // Parse the files and get the sequence. var parseObjectForFile1 = new FastAParser(); { parseObjectForFile1.Alphabet = alphabet; aInput = parseObjectForFile1.Parse(filePath1).First(); } var parseObjectForFile2 = new FastAParser(); { parseObjectForFile2.Alphabet = alphabet; bInput = parseObjectForFile2.Parse(filePath2).First(); } } else { // Read the xml file for getting both the files for aligning. string origSequence1 = this.utilityObj.xmlUtil.GetTextValue(Constants.SmithWatermanAlignAlgorithmNodeName, Constants.SequenceNode1); string origSequence2 = this.utilityObj.xmlUtil.GetTextValue(Constants.SmithWatermanAlignAlgorithmNodeName, Constants.SequenceNode2); aInput = new Sequence(alphabet, origSequence1); bInput = new Sequence(alphabet, origSequence2); } string blosumFilePath = this.utilityObj.xmlUtil.GetTextValue(Constants.SmithWatermanAlignAlgorithmNodeName, Constants.BlosumFilePathNode); var sm = new SimilarityMatrix(new StreamReader(blosumFilePath)); int gapOpenCost = int.Parse( this.utilityObj.xmlUtil.GetTextValue(Constants.SmithWatermanAlignAlgorithmNodeName, Constants.GapOpenCostNode), null); int gapExtensionCost = int.Parse( this.utilityObj.xmlUtil.GetTextValue(Constants.SmithWatermanAlignAlgorithmNodeName, Constants.GapExtensionCostNode), null); var smithWatermanObj = new SmithWatermanAligner(); if (AlignmentParamType.AllParam != alignParam) { smithWatermanObj.SimilarityMatrix = sm; smithWatermanObj.GapOpenCost = gapOpenCost; } IList <IPairwiseSequenceAlignment> result = null; switch (alignParam) { case AlignmentParamType.AlignList: switch (alignType) { case AlignmentType.Align: result = smithWatermanObj.Align(new List <ISequence> { aInput, bInput }); break; default: result = smithWatermanObj.AlignSimple(new List <ISequence> { aInput, bInput }); break; } break; case AlignmentParamType.AlignTwo: switch (alignType) { case AlignmentType.Align: result = smithWatermanObj.Align(aInput, bInput); break; default: result = smithWatermanObj.AlignSimple(aInput, bInput); break; } break; case AlignmentParamType.AllParam: switch (alignType) { case AlignmentType.Align: result = smithWatermanObj.Align(sm, gapOpenCost, gapExtensionCost, aInput, bInput); break; default: result = smithWatermanObj.AlignSimple(sm, gapOpenCost, aInput, bInput); break; } break; default: break; } // Read the xml file for getting both the files for aligning. string expectedSequence1, expectedSequence2, expectedScore; switch (alignType) { case AlignmentType.Align: expectedScore = this.utilityObj.xmlUtil.GetTextValue( Constants.SmithWatermanAlignAlgorithmNodeName, Constants.ExpectedGapExtensionScoreNode); expectedSequence1 = this.utilityObj.xmlUtil.GetTextValue( Constants.SmithWatermanAlignAlgorithmNodeName, Constants.ExpectedGapExtensionSequence1Node); expectedSequence2 = this.utilityObj.xmlUtil.GetTextValue( Constants.SmithWatermanAlignAlgorithmNodeName, Constants.ExpectedGapExtensionSequence2Node); break; default: expectedScore = this.utilityObj.xmlUtil.GetTextValue( Constants.SmithWatermanAlignAlgorithmNodeName, Constants.ExpectedScoreNode); expectedSequence1 = this.utilityObj.xmlUtil.GetTextValue( Constants.SmithWatermanAlignAlgorithmNodeName, Constants.ExpectedSequenceNode1); expectedSequence2 = this.utilityObj.xmlUtil.GetTextValue( Constants.SmithWatermanAlignAlgorithmNodeName, Constants.ExpectedSequenceNode2); break; } IList <IPairwiseSequenceAlignment> expectedOutput = new List <IPairwiseSequenceAlignment>(); IPairwiseSequenceAlignment align = new PairwiseSequenceAlignment(); var alignedSeq = new PairwiseAlignedSequence { FirstSequence = new Sequence(alphabet, expectedSequence1), SecondSequence = new Sequence(alphabet, expectedSequence2), Score = Convert.ToInt32(expectedScore, null), FirstOffset = Int32.MinValue, SecondOffset = Int32.MinValue, }; align.PairwiseAlignedSequences.Add(alignedSeq); expectedOutput.Add(align); ApplicationLog.WriteLine(string.Format(null, "SmithWatermanAligner BVT : Final Score '{0}'.", expectedScore)); ApplicationLog.WriteLine(string.Format(null, "SmithWatermanAligner BVT : Aligned First Sequence is '{0}'.", expectedSequence1)); ApplicationLog.WriteLine(string.Format(null, "SmithWatermanAligner BVT : Aligned Second Sequence is '{0}'.", expectedSequence2)); Assert.IsTrue(CompareAlignment(result, expectedOutput)); }
public void TestNUCmer3MultipleReferencesAndQueries() { Sequence referenceSeq = null; Sequence searchSeq = null; List <ISequence> referenceSeqs = null; List <ISequence> searchSeqs = null; referenceSeqs = new List <ISequence>(); string reference = "ATGCGCATCCCC"; referenceSeq = new Sequence(Alphabets.DNA, reference); referenceSeq.ID = "R1"; referenceSeqs.Add(referenceSeq); reference = "TAGCT"; referenceSeq = new Sequence(Alphabets.DNA, reference); referenceSeq.ID = "R11"; referenceSeqs.Add(referenceSeq); searchSeqs = new List <ISequence>(); string search = "CCGCGCCCCCTC"; searchSeq = new Sequence(Alphabets.DNA, search); searchSeq.ID = "Q1"; searchSeqs.Add(searchSeq); search = "AGCT"; searchSeq = new Sequence(Alphabets.DNA, search); searchSeq.ID = "Q11"; searchSeqs.Add(searchSeq); NucmerPairwiseAligner nucmer = new NucmerPairwiseAligner(); nucmer.FixedSeparation = 0; nucmer.MinimumScore = 2; nucmer.SeparationFactor = -1; nucmer.LengthOfMUM = 3; IList <IPairwiseSequenceAlignment> result = nucmer.Align(referenceSeqs, searchSeqs).Select(a => a as IPairwiseSequenceAlignment).ToList(); // Check if output is not null Assert.AreNotEqual(null, result); IList <IPairwiseSequenceAlignment> expectedOutput = new List <IPairwiseSequenceAlignment>(); IPairwiseSequenceAlignment align = new PairwiseSequenceAlignment(); PairwiseAlignedSequence alignedSeq = new PairwiseAlignedSequence(); alignedSeq.FirstSequence = new Sequence(Alphabets.DNA, "GCGCATCCCC"); alignedSeq.SecondSequence = new Sequence(Alphabets.DNA, "GCGC--CCCC"); alignedSeq.Consensus = new Sequence(Alphabets.DNA, "GCGCATCCCC"); alignedSeq.Score = -5; alignedSeq.FirstOffset = 0; alignedSeq.SecondOffset = 0; align.PairwiseAlignedSequences.Add(alignedSeq); expectedOutput.Add(align); align = new PairwiseSequenceAlignment(); alignedSeq = new PairwiseAlignedSequence(); alignedSeq.FirstSequence = new Sequence(Alphabets.DNA, "AGCT"); alignedSeq.SecondSequence = new Sequence(Alphabets.DNA, "AGCT"); alignedSeq.Consensus = new Sequence(Alphabets.DNA, "AGCT"); alignedSeq.Score = 12; alignedSeq.FirstOffset = 0; alignedSeq.SecondOffset = 1; align.PairwiseAlignedSequences.Add(alignedSeq); expectedOutput.Add(align); Assert.IsTrue(CompareAlignment(result, expectedOutput)); }
public void TestNUCmer3CustomBreakLength() { Sequence referenceSeq = null; Sequence searchSeq = null; List <ISequence> referenceSeqs = null; List <ISequence> searchSeqs = null; referenceSeqs = new List <ISequence>(); string reference = "CAAAAGGGATTGCAAATGTTGGAGTGAATGCCATTACCTACCGGCTAGGAGGAGT"; referenceSeq = new Sequence(Alphabets.DNA, reference); referenceSeq.ID = "R1"; referenceSeqs.Add(referenceSeq); reference = "CCCCCCCCC"; referenceSeq = new Sequence(Alphabets.DNA, reference); referenceSeq.ID = "R2"; referenceSeqs.Add(referenceSeq); reference = "TTTTT"; referenceSeq = new Sequence(Alphabets.DNA, reference); referenceSeq.ID = "R3"; referenceSeqs.Add(referenceSeq); searchSeqs = new List <ISequence>(); string search = "CATTAATGATAAAGGGAAAGAAGTCCTCGTGCTATGGGGCATTCACCATCCATCTACTAGTGCTGACCAA"; searchSeq = new Sequence(Alphabets.DNA, search); searchSeq.ID = "Q1"; searchSeqs.Add(searchSeq); search = "CAAAGTCTCTATCAGAATGCAGATGCAGATGTTTTTGTGGGGTCATCAAGATATAGCAAGAAGTTCAAGC"; searchSeq = new Sequence(Alphabets.DNA, search); searchSeq.ID = "Q2"; searchSeqs.Add(searchSeq); search = "AAGCAAAATTAAACAGAGAAGAAATAGATGGGGTAAAGCTGGAATCAACAAGGATTTACCAGATTTTGGC"; searchSeq = new Sequence(Alphabets.DNA, search); searchSeq.ID = "Q3"; searchSeqs.Add(searchSeq); NucmerPairwiseAligner nucmer = new NucmerPairwiseAligner(); nucmer.MaximumSeparation = 0; nucmer.MinimumScore = 2; nucmer.SeparationFactor = 0.12F; nucmer.LengthOfMUM = 5; nucmer.BreakLength = 2; IList <IPairwiseSequenceAlignment> result = nucmer.Align(referenceSeqs, searchSeqs).Select(a => a as IPairwiseSequenceAlignment).ToList(); // Check if output is not null Assert.AreNotEqual(null, result); List <IPairwiseSequenceAlignment> expectedOutput = new List <IPairwiseSequenceAlignment>(); IPairwiseSequenceAlignment align = new PairwiseSequenceAlignment(); PairwiseAlignedSequence alignedSeq = new PairwiseAlignedSequence(); alignedSeq.FirstSequence = new Sequence(Alphabets.DNA, "AAAGGGA"); alignedSeq.SecondSequence = new Sequence(Alphabets.DNA, "AAAGGGA"); alignedSeq.Consensus = new Sequence(Alphabets.DNA, "AAAGGGA"); alignedSeq.Score = 21; alignedSeq.FirstOffset = 8; alignedSeq.SecondOffset = 0; align.PairwiseAlignedSequences.Add(alignedSeq); alignedSeq = new PairwiseAlignedSequence(); alignedSeq.FirstSequence = new Sequence(Alphabets.DNA, "CATTA"); alignedSeq.SecondSequence = new Sequence(Alphabets.DNA, "CATTA"); alignedSeq.Consensus = new Sequence(Alphabets.DNA, "CATTA"); alignedSeq.Score = 15; alignedSeq.FirstOffset = 0; alignedSeq.SecondOffset = 31; align.PairwiseAlignedSequences.Add(alignedSeq); expectedOutput.Add(align); align = new PairwiseSequenceAlignment(); alignedSeq = new PairwiseAlignedSequence(); alignedSeq.FirstSequence = new Sequence(Alphabets.DNA, "ATGTT"); alignedSeq.SecondSequence = new Sequence(Alphabets.DNA, "ATGTT"); alignedSeq.Consensus = new Sequence(Alphabets.DNA, "ATGTT"); alignedSeq.Score = 15; alignedSeq.FirstOffset = 13; alignedSeq.SecondOffset = 0; align.PairwiseAlignedSequences.Add(alignedSeq); alignedSeq = new PairwiseAlignedSequence(); alignedSeq.FirstSequence = new Sequence(Alphabets.DNA, "GAATGC"); alignedSeq.SecondSequence = new Sequence(Alphabets.DNA, "GAATGC"); alignedSeq.Consensus = new Sequence(Alphabets.DNA, "GAATGC"); alignedSeq.Score = 18; alignedSeq.FirstOffset = 0; alignedSeq.SecondOffset = 11; align.PairwiseAlignedSequences.Add(alignedSeq); alignedSeq = new PairwiseAlignedSequence(); alignedSeq.FirstSequence = new Sequence(Alphabets.DNA, "TTTTT"); alignedSeq.SecondSequence = new Sequence(Alphabets.DNA, "TTTTT"); alignedSeq.Consensus = new Sequence(Alphabets.DNA, "TTTTT"); alignedSeq.Score = 15; alignedSeq.FirstOffset = 31; alignedSeq.SecondOffset = 0; align.PairwiseAlignedSequences.Add(alignedSeq); expectedOutput.Add(align); align = new PairwiseSequenceAlignment(); alignedSeq = new PairwiseAlignedSequence(); alignedSeq.FirstSequence = new Sequence(Alphabets.DNA, "CAAAA"); alignedSeq.SecondSequence = new Sequence(Alphabets.DNA, "CAAAA"); alignedSeq.Consensus = new Sequence(Alphabets.DNA, "CAAAA"); alignedSeq.Score = 15; alignedSeq.FirstOffset = 3; alignedSeq.SecondOffset = 0; align.PairwiseAlignedSequences.Add(alignedSeq); alignedSeq = new PairwiseAlignedSequence(); alignedSeq.FirstSequence = new Sequence(Alphabets.DNA, "GGATT"); alignedSeq.SecondSequence = new Sequence(Alphabets.DNA, "GGATT"); alignedSeq.Consensus = new Sequence(Alphabets.DNA, "GGATT"); alignedSeq.Score = 15; alignedSeq.FirstOffset = 45; alignedSeq.SecondOffset = 0; align.PairwiseAlignedSequences.Add(alignedSeq); alignedSeq = new PairwiseAlignedSequence(); alignedSeq.FirstSequence = new Sequence(Alphabets.DNA, "GCAAA"); alignedSeq.SecondSequence = new Sequence(Alphabets.DNA, "GCAAA"); alignedSeq.Consensus = new Sequence(Alphabets.DNA, "GCAAA"); alignedSeq.Score = 15; alignedSeq.FirstOffset = 0; alignedSeq.SecondOffset = 9; align.PairwiseAlignedSequences.Add(alignedSeq); alignedSeq = new PairwiseAlignedSequence(); alignedSeq.FirstSequence = new Sequence(Alphabets.DNA, "TTACC"); alignedSeq.SecondSequence = new Sequence(Alphabets.DNA, "TTACC"); alignedSeq.Consensus = new Sequence(Alphabets.DNA, "TTACC"); alignedSeq.Score = 15; alignedSeq.FirstOffset = 22; alignedSeq.SecondOffset = 0; align.PairwiseAlignedSequences.Add(alignedSeq); expectedOutput.Add(align); Assert.IsTrue(CompareAlignment(result, expectedOutput)); }
public void TestNUCmer3CustomBreakLength() { var referenceSeqs = new List <ISequence> { new Sequence(Alphabets.DNA, "CAAAAGGGATTGCAAATGTTGGAGTGAATGCCATTACCTACCGGCTAGGAGGAGT") { ID = "R1" }, new Sequence(Alphabets.DNA, "CCCCCCCCC") { ID = "R2" }, new Sequence(Alphabets.DNA, "TTTTT") { ID = "R3" }, }; var searchSeqs = new List <ISequence> { new Sequence(Alphabets.DNA, "CATTAATGATAAAGGGAAAGAAGTCCTCGTGCTATGGGGCATTCACCATCCATCTACTAGTGCTGACCAA") { ID = "Q1" }, new Sequence(Alphabets.DNA, "CAAAGTCTCTATCAGAATGCAGATGCAGATGTTTTTGTGGGGTCATCAAGATATAGCAAGAAGTTCAAGC") { ID = "Q2" }, new Sequence(Alphabets.DNA, "AAGCAAAATTAAACAGAGAAGAAATAGATGGGGTAAAGCTGGAATCAACAAGGATTTACCAGATTTTGGC") { ID = "Q3" }, }; NucmerPairwiseAligner nucmer = new NucmerPairwiseAligner { MaximumSeparation = 0, MinimumScore = 2, SeparationFactor = 0.12F, LengthOfMUM = 5, BreakLength = 2, ForwardOnly = true }; var result = nucmer.Align(referenceSeqs, searchSeqs) .Select(a => a as IPairwiseSequenceAlignment) .ToList(); // Check if output is not null Assert.IsNotNull(result); var expectedOutput = new List <IPairwiseSequenceAlignment>(); IPairwiseSequenceAlignment align = new PairwiseSequenceAlignment(); align.PairwiseAlignedSequences.Add(new PairwiseAlignedSequence { FirstSequence = new Sequence(Alphabets.DNA, "AAAGGGA"), SecondSequence = new Sequence(Alphabets.DNA, "AAAGGGA"), Consensus = new Sequence(Alphabets.DNA, "AAAGGGA"), Score = 21, FirstOffset = 8, SecondOffset = 0 }); align.PairwiseAlignedSequences.Add(new PairwiseAlignedSequence { FirstSequence = new Sequence(Alphabets.DNA, "CATTA"), SecondSequence = new Sequence(Alphabets.DNA, "CATTA"), Consensus = new Sequence(Alphabets.DNA, "CATTA"), Score = 15, FirstOffset = 0, SecondOffset = 31 }); expectedOutput.Add(align); align = new PairwiseSequenceAlignment(); align.PairwiseAlignedSequences.Add(new PairwiseAlignedSequence { FirstSequence = new Sequence(Alphabets.DNA, "ATGTT"), SecondSequence = new Sequence(Alphabets.DNA, "ATGTT"), Consensus = new Sequence(Alphabets.DNA, "ATGTT"), Score = 15, FirstOffset = 13, SecondOffset = 0 }); align.PairwiseAlignedSequences.Add(new PairwiseAlignedSequence { FirstSequence = new Sequence(Alphabets.DNA, "GAATGC"), SecondSequence = new Sequence(Alphabets.DNA, "GAATGC"), Consensus = new Sequence(Alphabets.DNA, "GAATGC"), Score = 18, FirstOffset = 0, SecondOffset = 11 }); align.PairwiseAlignedSequences.Add(new PairwiseAlignedSequence { FirstSequence = new Sequence(Alphabets.DNA, "TTTTT"), SecondSequence = new Sequence(Alphabets.DNA, "TTTTT"), Consensus = new Sequence(Alphabets.DNA, "TTTTT"), Score = 15, FirstOffset = 31, SecondOffset = 0 }); expectedOutput.Add(align); align = new PairwiseSequenceAlignment(); align.PairwiseAlignedSequences.Add(new PairwiseAlignedSequence { FirstSequence = new Sequence(Alphabets.DNA, "CAAAA"), SecondSequence = new Sequence(Alphabets.DNA, "CAAAA"), Consensus = new Sequence(Alphabets.DNA, "CAAAA"), Score = 15, FirstOffset = 3, SecondOffset = 0 }); align.PairwiseAlignedSequences.Add(new PairwiseAlignedSequence { FirstSequence = new Sequence(Alphabets.DNA, "GGATT"), SecondSequence = new Sequence(Alphabets.DNA, "GGATT"), Consensus = new Sequence(Alphabets.DNA, "GGATT"), Score = 15, FirstOffset = 45, SecondOffset = 0 }); align.PairwiseAlignedSequences.Add(new PairwiseAlignedSequence { FirstSequence = new Sequence(Alphabets.DNA, "GCAAA"), SecondSequence = new Sequence(Alphabets.DNA, "GCAAA"), Consensus = new Sequence(Alphabets.DNA, "GCAAA"), Score = 15, FirstOffset = 0, SecondOffset = 9 }); align.PairwiseAlignedSequences.Add(new PairwiseAlignedSequence { FirstSequence = new Sequence(Alphabets.DNA, "TTACC"), SecondSequence = new Sequence(Alphabets.DNA, "TTACC"), Consensus = new Sequence(Alphabets.DNA, "TTACC"), Score = 15, FirstOffset = 22, SecondOffset = 0 }); expectedOutput.Add(align); Assert.IsTrue(AlignmentHelpers.CompareAlignment(result, expectedOutput)); }
/// <summary> /// Validates PairwiseOverlapAlignment algorithm for the parameters passed. /// </summary> /// <param name="isTextFile">Is text file an input.</param> /// <param name="alignParam">parameter based on which certain validations are done.</param> /// <param name="alignType">Is the Align type Simple or Align with Gap Extension cost?</param> private void ValidatePairwiseOverlapAlignment(bool isTextFile, AlignmentParamType alignParam, AlignmentType alignType) { ISequence aInput; ISequence bInput; IAlphabet alphabet = Utility.GetAlphabet(this.utilityObj.xmlUtil.GetTextValue(Constants.PairwiseOverlapAlignAlgorithmNodeName, Constants.AlphabetNameNode)); if (isTextFile) { // Read the xml file for getting both the files for aligning. string filePath1 = this.utilityObj.xmlUtil.GetTextValue(Constants.PairwiseOverlapAlignAlgorithmNodeName, Constants.FilePathNode1); string filePath2 = this.utilityObj.xmlUtil.GetTextValue(Constants.PairwiseOverlapAlignAlgorithmNodeName, Constants.FilePathNode2); //Parse the files and get the sequence. var parser = new FastAParser { Alphabet = alphabet }; aInput = parser.Parse(filePath1).ElementAt(0); bInput = parser.Parse(filePath2).ElementAt(0); } else { // Read the xml file for getting both the files for aligning. string origSequence1 = this.utilityObj.xmlUtil.GetTextValue(Constants.PairwiseOverlapAlignAlgorithmNodeName, Constants.SequenceNode1); string origSequence2 = this.utilityObj.xmlUtil.GetTextValue(Constants.PairwiseOverlapAlignAlgorithmNodeName, Constants.SequenceNode2); aInput = new Sequence(alphabet, origSequence1); bInput = new Sequence(alphabet, origSequence2); } var aInputString = aInput.ConvertToString(); var bInputString = bInput.ConvertToString(); ApplicationLog.WriteLine(string.Format(null, "PairwiseOverlapAligner BVT : First sequence used is '{0}'.", aInputString)); ApplicationLog.WriteLine(string.Format(null, "PairwiseOverlapAligner BVT : Second sequence used is '{0}'.", bInputString)); string blosumFilePath = this.utilityObj.xmlUtil.GetTextValue(Constants.PairwiseOverlapAlignAlgorithmNodeName, Constants.BlosumFilePathNode); var sm = new SimilarityMatrix(new StreamReader(blosumFilePath)); int gapOpenCost = int.Parse(this.utilityObj.xmlUtil.GetTextValue(Constants.PairwiseOverlapAlignAlgorithmNodeName, Constants.GapOpenCostNode), null); int gapExtensionCost = int.Parse(this.utilityObj.xmlUtil.GetTextValue(Constants.PairwiseOverlapAlignAlgorithmNodeName, Constants.GapExtensionCostNode), null); var pairwiseOverlapObj = new PairwiseOverlapAligner(); if (AlignmentParamType.AllParam != alignParam) { pairwiseOverlapObj.SimilarityMatrix = sm; pairwiseOverlapObj.GapOpenCost = gapOpenCost; } IList <IPairwiseSequenceAlignment> result = null; switch (alignParam) { case AlignmentParamType.AlignList: var sequences = new List <ISequence> { aInput, bInput }; switch (alignType) { case AlignmentType.Align: result = pairwiseOverlapObj.Align(sequences); break; default: result = pairwiseOverlapObj.AlignSimple(sequences); break; } break; case AlignmentParamType.AlignTwo: switch (alignType) { case AlignmentType.Align: result = pairwiseOverlapObj.Align(aInput, bInput); break; default: result = pairwiseOverlapObj.AlignSimple(aInput, bInput); break; } break; case AlignmentParamType.AllParam: switch (alignType) { case AlignmentType.Align: result = pairwiseOverlapObj.Align(sm, gapOpenCost, gapExtensionCost, aInput, bInput); break; default: result = pairwiseOverlapObj.AlignSimple(sm, gapOpenCost, aInput, bInput); break; } break; default: break; } // Read the xml file for getting both the files for aligning. string expectedSequence1; string expectedSequence2; string expectedScore; switch (alignType) { case AlignmentType.Align: expectedScore = this.utilityObj.xmlUtil.GetTextValue(Constants.PairwiseOverlapAlignAlgorithmNodeName, Constants.ExpectedGapExtensionScoreNode); expectedSequence1 = this.utilityObj.xmlUtil.GetTextValue(Constants.PairwiseOverlapAlignAlgorithmNodeName, Constants.ExpectedGapExtensionSequence1Node); expectedSequence2 = this.utilityObj.xmlUtil.GetTextValue(Constants.PairwiseOverlapAlignAlgorithmNodeName, Constants.ExpectedGapExtensionSequence2Node); break; default: expectedScore = this.utilityObj.xmlUtil.GetTextValue(Constants.PairwiseOverlapAlignAlgorithmNodeName, Constants.ExpectedScoreNode); expectedSequence1 = this.utilityObj.xmlUtil.GetTextValue(Constants.PairwiseOverlapAlignAlgorithmNodeName, Constants.ExpectedSequenceNode1); expectedSequence2 = this.utilityObj.xmlUtil.GetTextValue(Constants.PairwiseOverlapAlignAlgorithmNodeName, Constants.ExpectedSequenceNode2); break; } IList <IPairwiseSequenceAlignment> expectedOutput = new List <IPairwiseSequenceAlignment>(); var seperators = new[] { ';' }; string[] expectedSequences1 = expectedSequence1.Split(seperators); string[] expectedSequences2 = expectedSequence2.Split(seperators); IPairwiseSequenceAlignment align = new PairwiseSequenceAlignment(); for (int i = 0; i < expectedSequences1.Length; i++) { PairwiseAlignedSequence alignedSeq = new PairwiseAlignedSequence { FirstSequence = new Sequence(alphabet, expectedSequences1[i]), SecondSequence = new Sequence(alphabet, expectedSequences2[i]), Score = Convert.ToInt32(expectedScore, null) }; align.PairwiseAlignedSequences.Add(alignedSeq); } expectedOutput.Add(align); Assert.IsTrue(AlignmentHelpers.CompareAlignment(result, expectedOutput)); ApplicationLog.WriteLine(string.Format(null, "PairwiseOverlapAligner BVT : Final Score '{0}'.", expectedScore)); ApplicationLog.WriteLine(string.Format(null, "PairwiseOverlapAligner BVT : Aligned First Sequence is '{0}'.", expectedSequence1)); ApplicationLog.WriteLine(string.Format(null, "PairwiseOverlapAligner BVT : Aligned Second Sequence is '{0}'.", expectedSequence2)); }
void ValidateMUMmerAlignGeneralTestCases(string nodeName) { ISequence referenceSeq = null; IEnumerable <ISequence> querySeqs = null; IEnumerable <ISequence> referenceSeqs = null; // Gets the reference sequence from the configurtion file string filePath = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.FilePathNode); Assert.IsNotNull(filePath); ApplicationLog.WriteLine(string.Format((IFormatProvider)null, "MUMmer P2 : Successfully validated the File Path '{0}'.", filePath)); FastAParser fastaParserObj = new FastAParser(filePath); referenceSeqs = fastaParserObj.Parse(); referenceSeq = referenceSeqs.ElementAt(0); // Gets the reference sequence from the configurtion file string queryFilePath = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.SearchSequenceFilePathNode); Assert.IsNotNull(queryFilePath); ApplicationLog.WriteLine(string.Format((IFormatProvider)null, "MUMmer P2 : Successfully validated the Search File Path '{0}'.", queryFilePath)); FastAParser fastaParserObj1 = new FastAParser(queryFilePath); querySeqs = fastaParserObj1.Parse(); string mumLength = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.MUMAlignLengthNode); MUMmerAligner mum = new MUMmerAligner(); mum.LengthOfMUM = long.Parse(mumLength, null); mum.StoreMUMs = true; mum.PairWiseAlgorithm = new NeedlemanWunschAligner(); mum.GapOpenCost = int.Parse(utilityObj.xmlUtil.GetTextValue(nodeName, Constants.GapOpenCostNode), (IFormatProvider)null); IList <IPairwiseSequenceAlignment> align = null; align = mum.Align(referenceSeq, querySeqs); // Validate FinalMUMs and MUMs Properties. Assert.IsNotNull(mum.MUMs); string expectedScore = utilityObj.xmlUtil.GetTextValue(nodeName, Constants.ScoreNodeName); string[] expectedSequences = utilityObj.xmlUtil.GetTextValues(nodeName, Constants.ExpectedSequencesNode); IList <IPairwiseSequenceAlignment> expectedOutput = new List <IPairwiseSequenceAlignment>(); IPairwiseSequenceAlignment seqAlign = new PairwiseSequenceAlignment(); PairwiseAlignedSequence alignedSeq = new PairwiseAlignedSequence(); alignedSeq.FirstSequence = new Sequence(referenceSeq.Alphabet, expectedSequences[0]); alignedSeq.SecondSequence = new Sequence(referenceSeq.Alphabet, expectedSequences[1]); alignedSeq.Score = Convert.ToInt32(expectedScore, (IFormatProvider)null); seqAlign.PairwiseAlignedSequences.Add(alignedSeq); expectedOutput.Add(seqAlign); Assert.IsTrue(CompareAlignment(align, expectedOutput)); Console.WriteLine("MUMmer P2 : Successfully validated the aligned sequences."); ApplicationLog.WriteLine("MUMmer P2 : Successfully validated the aligned sequences."); }
/// <summary> /// This method is considered as main execute method which defines the /// step by step algorithm. Derived class flows the defined flow by this /// method. Store generated MUMs in properties MUMs, SortedMUMs. /// Alignment first finds MUMs for all the query sequence, and then /// runs pairwise algorithm on gaps to produce alignments. /// </summary> /// <param name="referenceSequence">Reference sequence.</param> /// <param name="querySequenceList">List of input sequences.</param> /// <returns>A list of sequence alignments.</returns> private IList <IPairwiseSequenceAlignment> AlignmentWithAccumulatedMUMs( ISequence referenceSequence, IEnumerable <ISequence> querySequenceList) { // Get MUMs this.mums = new Dictionary <ISequence, IEnumerable <Match> >(); IList <IPairwiseSequenceAlignment> results = new List <IPairwiseSequenceAlignment>(); IPairwiseSequenceAlignment alignment = null; IEnumerable <Match> mum; if (this.Validate(referenceSequence, querySequenceList)) { // Safety check for public methods to ensure that null // inputs are handled. if (referenceSequence == null || querySequenceList == null) { return(null); } Sequence seq = referenceSequence as Sequence; if (seq == null) { throw new ArgumentException(Properties.Resource.OnlySequenceClassSupported); } MUMmer mummer = new MUMmer(seq); mummer.LengthOfMUM = this.LengthOfMUM; mummer.NoAmbiguity = this.AmbigiousMatchesAllowed; foreach (ISequence sequence in querySequenceList) { if (sequence.Equals(referenceSequence)) { continue; } alignment = new PairwiseSequenceAlignment(referenceSequence, sequence); // Step2 : streaming process is performed with the query sequence if (this.MaximumMatchEnabled) { mum = mummer.GetMatches(sequence); } else { mum = mummer.GetMatchesUniqueInReference(sequence); } this.mums.Add(sequence, mum); // Step3(a) : sorted mum list based on reference sequence LongestIncreasingSubsequence lis = new LongestIncreasingSubsequence(); IList <Match> sortedMumList = lis.SortMum(GetMumsForLIS(mum)); if (sortedMumList.Count > 0) { // Step3(b) : LIS using greedy cover algorithm IList <Match> finalMumList = lis.GetLongestSequence(sortedMumList); if (finalMumList.Count > 0) { // Step 4 : get all the gaps in each sequence and call // pairwise alignment alignment.PairwiseAlignedSequences.Add( this.ProcessGaps(referenceSequence, sequence, finalMumList)); } results.Add(alignment); } else { IList <IPairwiseSequenceAlignment> sequenceAlignment = this.RunPairWise( referenceSequence, sequence); foreach (IPairwiseSequenceAlignment pairwiseAlignment in sequenceAlignment) { results.Add(pairwiseAlignment); } } } } return(results); }
public void TestMUMmer3MultipleMumWithCustomMatrix() { string reference = "ATGCGCATCCCCTT"; string search = "GCGCCCCCTA"; Sequence referenceSeq = null; Sequence searchSeq = null; referenceSeq = new Sequence(Alphabets.DNA, reference); searchSeq = new Sequence(Alphabets.DNA, search); List <ISequence> searchSeqs = new List <ISequence>(); searchSeqs.Add(searchSeq); int[,] customMatrix = new int[256, 256]; customMatrix[(byte)'A', (byte)'A'] = 3; customMatrix[(byte)'A', (byte)'T'] = -2; customMatrix[(byte)'A', (byte)'G'] = -2; customMatrix[(byte)'A', (byte)'c'] = -2; customMatrix[(byte)'G', (byte)'G'] = 3; customMatrix[(byte)'G', (byte)'A'] = -2; customMatrix[(byte)'G', (byte)'T'] = -2; customMatrix[(byte)'G', (byte)'C'] = -2; customMatrix[(byte)'T', (byte)'T'] = 3; customMatrix[(byte)'T', (byte)'A'] = -2; customMatrix[(byte)'T', (byte)'G'] = -2; customMatrix[(byte)'T', (byte)'C'] = -2; customMatrix[(byte)'C', (byte)'C'] = 3; customMatrix[(byte)'C', (byte)'T'] = -2; customMatrix[(byte)'C', (byte)'A'] = -2; customMatrix[(byte)'C', (byte)'G'] = -2; DiagonalSimilarityMatrix matrix = new DiagonalSimilarityMatrix(3, -2); int gapOpenCost = -6; MUMmerAligner mummer = new MUMmerAligner(); mummer.LengthOfMUM = 4; mummer.PairWiseAlgorithm = new NeedlemanWunschAligner(); mummer.SimilarityMatrix = matrix; mummer.GapOpenCost = gapOpenCost; mummer.GapExtensionCost = -2; IList <IPairwiseSequenceAlignment> result = mummer.AlignSimple(referenceSeq, searchSeqs); // Check if output is not null Assert.AreNotEqual(null, result); IList <IPairwiseSequenceAlignment> expectedOutput = new List <IPairwiseSequenceAlignment>(); IPairwiseSequenceAlignment align = new PairwiseSequenceAlignment(); PairwiseAlignedSequence alignedSeq = new PairwiseAlignedSequence(); alignedSeq.FirstSequence = new Sequence(Alphabets.DNA, "ATGCGCATCCCCTT"); alignedSeq.SecondSequence = new Sequence(Alphabets.DNA, "--GCGC--CCCCTA"); alignedSeq.Consensus = new Sequence(AmbiguousDnaAlphabet.Instance, "ATGCGCATCCCCTW"); alignedSeq.Score = 1; alignedSeq.FirstOffset = 0; alignedSeq.SecondOffset = 2; align.PairwiseAlignedSequences.Add(alignedSeq); expectedOutput.Add(align); Assert.IsTrue(CompareAlignment(result, expectedOutput)); }
/// <summary> /// Given two byte arrays representing a pairwise alignment, shift them so /// that all deletions start as early as possible. For example: /// /// <code> /// TTTTAAAATTTT -> Converts to -> TTTTAAAATTTT /// TTTTAA--TTTT TTTT--AATTTT /// </code> /// /// This function takes a IPairwiseSequenceAlignment and assumes that the first sequence is the reference and second /// sequence is the query. It returns a new Pairwise sequence alignment with all of the indels left aligned as well as a list of variants. /// </summary> /// <param name="aln">Aln. The second sequence should be of type QualitativeSequence or Sequence</param> /// <param name="callVariants">callVariants. If true, it will call variants, otherwise the second half of tuple will be null. </param> public static Tuple <IPairwiseSequenceAlignment, List <Variant> > LeftAlignIndelsAndCallVariants(IPairwiseSequenceAlignment aln, bool callVariants = true) { if (aln == null) { throw new NullReferenceException("aln"); } if (aln.PairwiseAlignedSequences == null || aln.PairwiseAlignedSequences.Count != 1) { throw new ArgumentException("The pairwise aligned sequence should only have one alignment"); } var frstAln = aln.PairwiseAlignedSequences.First(); var seq1 = frstAln.FirstSequence; var seq2 = frstAln.SecondSequence; if (seq1 == null) { throw new NullReferenceException("seq1"); } else if (seq2 == null) { throw new NullReferenceException("seq2"); } //TODO: Might implement an ambiguity check later. #if FALSE if (seq1.Alphabet.HasAmbiguity || seq2.Alphabet.HasAmbiguity) { throw new ArgumentException("Cannot left align sequences with ambiguous symbols."); } #endif // Note we have to copy unless we can guarantee the array will not be mutated. byte[] refseq = seq1.ToArray(); ISequence newQuery; List <Variant> variants = null; // Call variants for a qualitative sequence if (seq2 is QualitativeSequence) { var qs = seq2 as QualitativeSequence; var query = Enumerable.Zip(qs, qs.GetQualityScores(), (bp, qv) => new BPandQV(bp, (byte)qv, false)).ToArray(); AlignmentUtils.LeftAlignIndels(refseq, query); AlignmentUtils.VerifyNoGapsOnEnds(refseq, query); if (callVariants) { variants = VariantCaller.CallVariants(refseq, query, seq2.IsMarkedAsReverseComplement()); } var newQueryQS = new QualitativeSequence(qs.Alphabet, qs.FormatType, query.Select(z => z.BP).ToArray(), query.Select(p => p.QV).ToArray(), false); newQueryQS.Metadata = seq2.Metadata; newQuery = newQueryQS; } else if (seq2 is Sequence) // For a sequence with no QV values. { var qs = seq2 as Sequence; var query = qs.Select(v => new BPandQV(v, 0, false)).ToArray(); AlignmentUtils.LeftAlignIndels(refseq, query); AlignmentUtils.VerifyNoGapsOnEnds(refseq, query); // ISequence does not have a setable metadata var newQueryS = new Sequence(qs.Alphabet, query.Select(z => z.BP).ToArray(), false); newQueryS.Metadata = seq2.Metadata; if (callVariants) { variants = VariantCaller.CallVariants(refseq, query, seq2.IsMarkedAsReverseComplement()); } newQuery = newQueryS; } else { throw new ArgumentException("Can only left align indels if the query sequence is of type Sequence or QualitativeSequence."); } if (aln.FirstSequence != null && aln.FirstSequence.ID != null) { foreach (var v in variants) { v.RefName = aln.FirstSequence.ID; } } var newRef = new Sequence(seq1.Alphabet, refseq, false); newRef.ID = seq1.ID; newRef.Metadata = seq1.Metadata; newQuery.ID = seq2.ID; var newaln = new PairwiseSequenceAlignment(aln.FirstSequence, aln.SecondSequence); var pas = new PairwiseAlignedSequence(); pas.FirstSequence = newRef; pas.SecondSequence = newQuery; newaln.Add(pas); return(new Tuple <IPairwiseSequenceAlignment, List <Variant> > (newaln, variants)); }
/// <summary> /// Validates PairwiseOverlapAlignment algorithm for the parameters passed. /// </summary> /// <param name="isTextFile">Is text file an input.</param> /// <param name="alignParam">parameter based on which certain validations are done.</param> /// <param name="alignType">Is the Align type Simple or Align with Gap Extension cost?</param> void ValidatePairwiseOverlapAlignment(bool isTextFile, AlignmentParamType alignParam, AlignmentType alignType, bool IsUseEARTHToFillMatrix) { ISequence aInput = null; ISequence bInput = null; IAlphabet alphabet = Utility.GetAlphabet(utilityObj.xmlUtil.GetTextValue( Constants.PairwiseOverlapAlignAlgorithmNodeName, Constants.AlphabetNameNode)); if (isTextFile) { // Read the xml file for getting both the files for aligning. string filePath1 = utilityObj.xmlUtil.GetTextValue( Constants.PairwiseOverlapAlignAlgorithmNodeName, Constants.FilePathNode1); string filePath2 = utilityObj.xmlUtil.GetTextValue( Constants.PairwiseOverlapAlignAlgorithmNodeName, Constants.FilePathNode2); //Parse the files and get the sequence. using (FastAParser parser1 = new FastAParser(filePath1)) { parser1.Alphabet = alphabet; aInput = parser1.Parse().ElementAt(0); } using (FastAParser parser2 = new FastAParser(filePath2)) { parser2.Alphabet = alphabet; bInput = parser2.Parse().ElementAt(0); } } else { // Read the xml file for getting both the files for aligning. string origSequence1 = utilityObj.xmlUtil.GetTextValue( Constants.PairwiseOverlapAlignAlgorithmNodeName, Constants.SequenceNode1); string origSequence2 = utilityObj.xmlUtil.GetTextValue( Constants.PairwiseOverlapAlignAlgorithmNodeName, Constants.SequenceNode2); aInput = new Sequence(alphabet, origSequence1); bInput = new Sequence(alphabet, origSequence2); } string aInputString = new string(aInput.Select(a => (char)a).ToArray()); string bInputString = new string(bInput.Select(a => (char)a).ToArray()); ApplicationLog.WriteLine(string.Format((IFormatProvider)null, "PairwiseOverlapAligner BVT : First sequence used is '{0}'.", aInputString)); ApplicationLog.WriteLine(string.Format((IFormatProvider)null, "PairwiseOverlapAligner BVT : Second sequence used is '{0}'.", bInputString)); Console.WriteLine(string.Format((IFormatProvider)null, "PairwiseOverlapAligner BVT : First sequence used is '{0}'.", aInputString)); Console.WriteLine(string.Format((IFormatProvider)null, "PairwiseOverlapAligner BVT : Second sequence used is '{0}'.", bInputString)); string blosumFilePath = utilityObj.xmlUtil.GetTextValue( Constants.PairwiseOverlapAlignAlgorithmNodeName, Constants.BlosumFilePathNode); SimilarityMatrix sm = new SimilarityMatrix(blosumFilePath); int gapOpenCost = int.Parse(utilityObj.xmlUtil.GetTextValue( Constants.PairwiseOverlapAlignAlgorithmNodeName, Constants.GapOpenCostNode), (IFormatProvider)null); int gapExtensionCost = int.Parse(utilityObj.xmlUtil.GetTextValue( Constants.PairwiseOverlapAlignAlgorithmNodeName, Constants.GapExtensionCostNode), (IFormatProvider)null); PairwiseOverlapAligner pairwiseOverlapObj = new PairwiseOverlapAligner(); if (AlignmentParamType.AllParam != alignParam) { pairwiseOverlapObj.SimilarityMatrix = sm; pairwiseOverlapObj.GapOpenCost = gapOpenCost; } if (IsUseEARTHToFillMatrix) { pairwiseOverlapObj.UseEARTHToFillMatrix = true; } IList <IPairwiseSequenceAlignment> result = null; switch (alignParam) { case AlignmentParamType.AlignList: List <ISequence> sequences = new List <ISequence>(); sequences.Add(aInput); sequences.Add(bInput); switch (alignType) { case AlignmentType.Align: result = pairwiseOverlapObj.Align(sequences); break; default: result = pairwiseOverlapObj.AlignSimple(sequences); break; } break; case AlignmentParamType.AlignTwo: switch (alignType) { case AlignmentType.Align: result = pairwiseOverlapObj.Align(aInput, bInput); break; default: result = pairwiseOverlapObj.AlignSimple(aInput, bInput); break; } break; case AlignmentParamType.AllParam: switch (alignType) { case AlignmentType.Align: result = pairwiseOverlapObj.Align(sm, gapOpenCost, gapExtensionCost, aInput, bInput); break; default: result = pairwiseOverlapObj.AlignSimple(sm, gapOpenCost, aInput, bInput); break; } break; default: break; } pairwiseOverlapObj = null; aInput = null; bInput = null; sm = null; // Read the xml file for getting both the files for aligning. string expectedSequence1 = string.Empty; string expectedSequence2 = string.Empty; string expectedScore = string.Empty; aInput = null; bInput = null; sm = null; switch (alignType) { case AlignmentType.Align: expectedScore = utilityObj.xmlUtil.GetTextValue( Constants.PairwiseOverlapAlignAlgorithmNodeName, Constants.ExpectedGapExtensionScoreNode); expectedSequence1 = utilityObj.xmlUtil.GetTextValue( Constants.PairwiseOverlapAlignAlgorithmNodeName, Constants.ExpectedGapExtensionSequence1Node); expectedSequence2 = utilityObj.xmlUtil.GetTextValue( Constants.PairwiseOverlapAlignAlgorithmNodeName, Constants.ExpectedGapExtensionSequence2Node); break; default: expectedScore = utilityObj.xmlUtil.GetTextValue( Constants.PairwiseOverlapAlignAlgorithmNodeName, Constants.ExpectedScoreNode); expectedSequence1 = utilityObj.xmlUtil.GetTextValue( Constants.PairwiseOverlapAlignAlgorithmNodeName, Constants.ExpectedSequenceNode1); expectedSequence2 = utilityObj.xmlUtil.GetTextValue( Constants.PairwiseOverlapAlignAlgorithmNodeName, Constants.ExpectedSequenceNode2); break; } IList <IPairwiseSequenceAlignment> expectedOutput = new List <IPairwiseSequenceAlignment>(); string[] expectedSequences1, expectedSequences2; char[] seperators = new char[1] { ';' }; expectedSequences1 = expectedSequence1.Split(seperators); expectedSequences2 = expectedSequence2.Split(seperators); IPairwiseSequenceAlignment align = new PairwiseSequenceAlignment(); PairwiseAlignedSequence alignedSeq; for (int i = 0; i < expectedSequences1.Length; i++) { alignedSeq = new PairwiseAlignedSequence(); alignedSeq.FirstSequence = new Sequence(alphabet, expectedSequences1[i]); alignedSeq.SecondSequence = new Sequence(alphabet, expectedSequences2[i]); alignedSeq.Score = Convert.ToInt32(expectedScore, (IFormatProvider)null); align.PairwiseAlignedSequences.Add(alignedSeq); } expectedOutput.Add(align); Assert.IsTrue(CompareAlignment(result, expectedOutput)); ApplicationLog.WriteLine(string.Format((IFormatProvider)null, "PairwiseOverlapAligner BVT : Final Score '{0}'.", expectedScore)); ApplicationLog.WriteLine(string.Format((IFormatProvider)null, "PairwiseOverlapAligner BVT : Aligned First Sequence is '{0}'.", expectedSequence1)); ApplicationLog.WriteLine(string.Format((IFormatProvider)null, "PairwiseOverlapAligner BVT : Aligned Second Sequence is '{0}'.", expectedSequence2)); Console.WriteLine(string.Format((IFormatProvider)null, "PairwiseOverlapAligner BVT : Final Score '{0}'.", expectedScore)); Console.WriteLine(string.Format((IFormatProvider)null, "PairwiseOverlapAligner BVT : Aligned First Sequence is '{0}'.", expectedSequence1)); Console.WriteLine(string.Format((IFormatProvider)null, "PairwiseOverlapAligner BVT : Aligned Second Sequence is '{0}'.", expectedSequence2)); }
/// <summary> /// Validates the Mummer align method for several test cases for the parameters passed. /// </summary> /// <param name="nodeName">Node name to be read from xml</param> /// <param name="isSeqList">Is MUMmer alignment with List of sequences</param> void ValidateMUMmerAlignGeneralTestCases(string nodeName, bool isFilePath, bool isSeqList) { ISequence referenceSeq = null; ISequence querySeq = null; IList <ISequence> querySeqs = null; string referenceSequence = string.Empty; string querySequence = string.Empty; IList <IPairwiseSequenceAlignment> align = null; IList <ISequence> referenceSeqs = null; if (isFilePath) { // Gets the reference sequence from the configurtion file string filePath = _utilityObj._xmlUtil.GetTextValue(nodeName, Constants.FilePathNode); Assert.IsNotNull(filePath); ApplicationLog.WriteLine(string.Format((IFormatProvider)null, "MUMmer BVT : Successfully validated the File Path '{0}'.", filePath)); using (FastaParser parser = new FastaParser()) { referenceSeqs = parser.Parse(filePath); referenceSeq = referenceSeqs[0]; referenceSequence = referenceSeq.ToString(); // Gets the reference sequence from the configurtion file string queryFilePath = _utilityObj._xmlUtil.GetTextValue(nodeName, Constants.SearchSequenceFilePathNode); Assert.IsNotNull(queryFilePath); ApplicationLog.WriteLine(string.Format((IFormatProvider)null, "MUMmer BVT : Successfully validated the Search File Path '{0}'.", queryFilePath)); querySeqs = parser.Parse(queryFilePath); querySeq = querySeqs[0]; querySequence = querySeq.ToString(); } } else { // Gets the reference sequence from the configurtion file referenceSequence = _utilityObj._xmlUtil.GetTextValue(nodeName, Constants.SequenceNode); string referenceSeqAlphabet = _utilityObj._xmlUtil.GetTextValue(nodeName, Constants.AlphabetNameNode); referenceSeq = new Sequence(Utility.GetAlphabet(referenceSeqAlphabet), referenceSequence); querySequence = _utilityObj._xmlUtil.GetTextValue(nodeName, Constants.SearchSequenceNode); referenceSeqAlphabet = _utilityObj._xmlUtil.GetTextValue(nodeName, Constants.SearchSequenceAlphabetNode); querySeq = new Sequence(Utility.GetAlphabet(referenceSeqAlphabet), querySequence); querySeqs = new List <ISequence>(); querySeqs.Add(querySeq); } string mumLength = _utilityObj._xmlUtil.GetTextValue(nodeName, Constants.MUMAlignLengthNode); MUMmer mum = new MUMmer3(); mum.LengthOfMUM = long.Parse(mumLength, null); mum.PairWiseAlgorithm = new NeedlemanWunschAligner(); mum.GapOpenCost = int.Parse(_utilityObj._xmlUtil.GetTextValue(nodeName, Constants.GapOpenCostNode), (IFormatProvider)null); if (isSeqList) { querySeqs.Add(referenceSeq); align = mum.Align(querySeqs); } else { align = mum.AlignSimple(referenceSeq, querySeqs); } string expectedScore = _utilityObj._xmlUtil.GetTextValue(nodeName, Constants.ScoreNodeName); Assert.AreEqual(expectedScore, align[0].PairwiseAlignedSequences[0].Score.ToString((IFormatProvider)null)); Console.WriteLine(string.Format((IFormatProvider)null, "MUMmer BVT : Successfully validated the score for the sequence '{0}' and '{1}'.", referenceSequence, querySequence)); ApplicationLog.WriteLine(string.Format((IFormatProvider)null, "MUMmer BVT : Successfully validated the score for the sequence '{0}' and '{1}'.", referenceSequence, querySequence)); string[] expectedSequences = _utilityObj._xmlUtil.GetTextValues(nodeName, Constants.ExpectedSequencesNode); IList <IPairwiseSequenceAlignment> expectedOutput = new List <IPairwiseSequenceAlignment>(); IPairwiseSequenceAlignment seqAlign = new PairwiseSequenceAlignment(); PairwiseAlignedSequence alignedSeq = new PairwiseAlignedSequence(); alignedSeq.FirstSequence = new Sequence(referenceSeq.Alphabet, expectedSequences[0]); alignedSeq.SecondSequence = new Sequence(referenceSeq.Alphabet, expectedSequences[1]); alignedSeq.Score = Convert.ToInt32(expectedScore, (IFormatProvider)null); seqAlign.PairwiseAlignedSequences.Add(alignedSeq); expectedOutput.Add(seqAlign); Assert.IsTrue(CompareAlignment(align, expectedOutput)); Console.WriteLine("MUMmer BVT : Successfully validated the aligned sequences."); ApplicationLog.WriteLine("MUMmer BVT : Successfully validated the aligned sequences."); }
public void ValidatePairwiseAlignedSequenceMultipleRefQuery() { var referenceSeqs = new List <ISequence>() { new Sequence(Alphabets.DNA, "ATGCGCATCCCC") { ID = "R1" }, new Sequence(Alphabets.DNA, "TAGCT") { ID = "R2" } }; var searchSeqs = new List <ISequence>() { new Sequence(Alphabets.DNA, "CCGCGCCCCCTC") { ID = "Q1" }, new Sequence(Alphabets.DNA, "AGCT") { ID = "Q2" } }; var nucmer = new NucmerPairwiseAligner { FixedSeparation = 0, MinimumScore = 2, SeparationFactor = -1, LengthOfMUM = 3, ForwardOnly = true, }; IList <IPairwiseSequenceAlignment> result = nucmer.Align(referenceSeqs, searchSeqs).Select(a => a as IPairwiseSequenceAlignment).ToList(); // Check if output is not null Assert.AreNotEqual(null, result); IList <IPairwiseSequenceAlignment> expectedOutput = new List <IPairwiseSequenceAlignment>(); IPairwiseSequenceAlignment align = new PairwiseSequenceAlignment(); var alignedSeq = new PairwiseAlignedSequence { FirstSequence = new Sequence(Alphabets.DNA, "GCGCATCCCC"), SecondSequence = new Sequence(Alphabets.DNA, "GCGC--CCCC"), Consensus = new Sequence(Alphabets.DNA, "GCGCATCCCC"), Score = -5, FirstOffset = 0, SecondOffset = 0 }; align.PairwiseAlignedSequences.Add(alignedSeq); expectedOutput.Add(align); align = new PairwiseSequenceAlignment(); alignedSeq = new PairwiseAlignedSequence { FirstSequence = new Sequence(Alphabets.DNA, "AGCT"), SecondSequence = new Sequence(Alphabets.DNA, "AGCT"), Consensus = new Sequence(Alphabets.DNA, "AGCT"), Score = 12, FirstOffset = 0, SecondOffset = 1 }; align.PairwiseAlignedSequences.Add(alignedSeq); expectedOutput.Add(align); Assert.IsTrue(AlignmentHelpers.CompareAlignment(result, expectedOutput)); ApplicationLog.WriteLine("PairwiseAlignedSequence P1: Successfully validated Sequence with Multiple Reference."); }