public void TestSimpleSequenceAssembler() { Trace.Set(Trace.AssemblyDetails); // turn on log dump // test parameters int matchScore = 1; int mismatchScore = -8; int gapCost = -8; double mergeThreshold = 4; double consensusThreshold = 66; Sequence seq1 = new Sequence(Alphabets.DNA, "GCCAAAATTTAGGC"); Sequence seq2 = new Sequence(Alphabets.DNA, "TTATGGCGCCCACGGA"); Sequence seq3 = new Sequence(Alphabets.DNA, "TATAAAGCGCCAA"); // here is how the above sequences should align: // TATAAAGCGCCAA // GCCAAAATTTAGGC // AGGCACCCGCGGTATT <= reversed // // TATAAAGCGCCAAAATTTAGGCACCCGCGGTATT OverlapDeNovoAssembler assembler = new OverlapDeNovoAssembler(); assembler.MergeThreshold = mergeThreshold; assembler.OverlapAlgorithm = new PairwiseOverlapAligner(); ((IPairwiseSequenceAligner)assembler.OverlapAlgorithm).SimilarityMatrix = new DiagonalSimilarityMatrix(matchScore, mismatchScore); ((IPairwiseSequenceAligner)assembler.OverlapAlgorithm).GapOpenCost = gapCost; assembler.ConsensusResolver = new SimpleConsensusResolver(consensusThreshold); assembler.AssumeStandardOrientation = false; List<ISequence> inputs = new List<ISequence>(); inputs.Add(seq1); inputs.Add(seq2); inputs.Add(seq3); IOverlapDeNovoAssembly seqAssembly = (IOverlapDeNovoAssembly)assembler.Assemble(inputs); Assert.AreEqual(0, seqAssembly.UnmergedSequences.Count); Assert.AreEqual(1, seqAssembly.Contigs.Count); Contig contig0 = seqAssembly.Contigs[0]; Assert.AreEqual("TATAAAGCGCCAAAATTTAGGCACCCGCGGTATT", contig0.Consensus.ConvertToString()); Assert.AreEqual(3, contig0.Sequences.Count); }
/// <summary> /// Validate Sequence Assembler Test cases based on additional parameter values /// </summary> /// <param name="additionalParameter">Additional parameters</param> private IOverlapDeNovoAssembly GetSequenceAssembly(string additionalParameter) { // Get the parameters from Xml int matchScore = int.Parse(this.utilityObj.xmlUtil.GetTextValue(Constants.AssemblyAlgorithmNodeName, Constants.MatchScoreNode), null); int mismatchScore = int.Parse(this.utilityObj.xmlUtil.GetTextValue(Constants.AssemblyAlgorithmNodeName, Constants.MisMatchScoreNode), null); int gapCost = int.Parse(this.utilityObj.xmlUtil.GetTextValue(Constants.AssemblyAlgorithmNodeName, Constants.GapCostNode), null); double mergeThreshold = double.Parse(this.utilityObj.xmlUtil.GetTextValue( Constants.AssemblyAlgorithmNodeName, Constants.MergeThresholdNode), null); double consensusThreshold = double.Parse(this.utilityObj.xmlUtil.GetTextValue( Constants.AssemblyAlgorithmNodeName, Constants.ConsensusThresholdNode), null); string sequence1 = this.utilityObj.xmlUtil.GetTextValue(Constants.AssemblyAlgorithmNodeName, Constants.SequenceNode1); string sequence2 = this.utilityObj.xmlUtil.GetTextValue(Constants.AssemblyAlgorithmNodeName, Constants.SequenceNode2); string sequence3 = this.utilityObj.xmlUtil.GetTextValue(Constants.AssemblyAlgorithmNodeName, Constants.SequenceNode3); IAlphabet alphabet = Utility.GetAlphabet(this.utilityObj.xmlUtil.GetTextValue( Constants.AssemblyAlgorithmNodeName, Constants.AlphabetNameNode)); // Log based on the test cases switch (additionalParameter) { case "consensus": // Logs the sequences ApplicationLog.WriteLine(string.Format(null, "SimpleConsensusMethod BVT : Sequence 1 used is '{0}'.", sequence1)); ApplicationLog.WriteLine(string.Format(null, "SimpleConsensusMethod BVT : Sequence 2 used is '{0}'.", sequence2)); ApplicationLog.WriteLine(string.Format(null, "SimpleConsensusMethod BVT : Sequence 3 used is '{0}'.", sequence3)); break; default: // Logs the sequences ApplicationLog.WriteLine(string.Format(null, "SequenceAssembly BVT : Sequence 1 used is '{0}'.", sequence1)); ApplicationLog.WriteLine(string.Format(null, "SequenceAssembly BVT : Sequence 2 used is '{0}'.", sequence2)); ApplicationLog.WriteLine(string.Format(null, "SequenceAssembly BVT : Sequence 3 used is '{0}'.", sequence3)); break; } var seq1 = new Sequence(alphabet, sequence1); var seq2 = new Sequence(alphabet, sequence2); var seq3 = new Sequence(alphabet, sequence3); // here is how the above sequences should align: // TATAAAGCGCCAA // GCCAAAATTTAGGC // AGGCACCCGCGGTATT <= reversed // // TATAAAGCGCCAAAATTTAGGCACCCGCGGTATT var assembler = new OverlapDeNovoAssembler(); assembler.MergeThreshold = mergeThreshold; assembler.OverlapAlgorithm = new PairwiseOverlapAligner(); (assembler.OverlapAlgorithm).SimilarityMatrix = new DiagonalSimilarityMatrix(matchScore, mismatchScore); (assembler.OverlapAlgorithm).GapOpenCost = gapCost; assembler.ConsensusResolver = new SimpleConsensusResolver(consensusThreshold); assembler.AssumeStandardOrientation = false; var inputs = new List<ISequence>(); inputs.Add(seq1); inputs.Add(seq2); inputs.Add(seq3); // Assembles all the sequences. return (IOverlapDeNovoAssembly) assembler.Assemble(inputs); }
public void SimpleConsensusWithMakeConsensusMethod() { IOverlapDeNovoAssembly assembly = this.GetSequenceAssembly("consensus"); string contigConsensus = this.utilityObj.xmlUtil.GetTextValue(Constants.AssemblyAlgorithmNodeName, Constants.ContigConsensusNode); double consensusThreshold = double.Parse(this.utilityObj.xmlUtil.GetTextValue( Constants.AssemblyAlgorithmNodeName, Constants.ConsensusThresholdNode), null); IAlphabet alphabet = Utility.GetAlphabet(this.utilityObj.xmlUtil.GetTextValue( Constants.AssemblyAlgorithmNodeName, Constants.AlphabetNameNode)); // Read the contig from Contig method. Contig contigReadForConsensus = assembly.Contigs[0]; contigReadForConsensus.Consensus = null; var simpleSeqAssembler = new OverlapDeNovoAssembler(); simpleSeqAssembler.ConsensusResolver = new SimpleConsensusResolver(consensusThreshold); simpleSeqAssembler.MakeConsensus(alphabet, contigReadForConsensus); Assert.AreEqual(contigConsensus, new String(contigReadForConsensus.Consensus.Select(a => (char) a).ToArray())); // Log the required info. ApplicationLog.WriteLine(string.Format(null, "SimpleConsensusMethod BVT : Consensus read is '{0}'.", contigReadForConsensus.Consensus)); }
/// <summary> /// Validates the Sequence Assembler for all the general test cases. /// </summary> /// <param name="nodeName">Xml Node Name</param> /// <param name="additionalParameter"> /// Additional Parameter based /// on which the validations are done. /// </param> /// <param name="isSeqAssemblyctr">True if Default contructor is validated or else false.</param> private void ValidateSequenceAssemblerGeneral(string nodeName, AssemblyParameters additionalParameter, bool isSeqAssemblyctr) { // Get the parameters from Xml int matchScore = int.Parse(this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.MatchScoreNode), null); int mismatchScore = int.Parse(this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.MisMatchScoreNode), null); int gapCost = int.Parse(this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.GapCostNode), null); double mergeThreshold = double.Parse(this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.MergeThresholdNode), null); double consensusThreshold = double.Parse(this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.ConsensusThresholdNode), null); string[] sequences = this.utilityObj.xmlUtil.GetTextValues(nodeName, Constants.SequencesNode); IAlphabet alphabet = Utility.GetAlphabet(this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.AlphabetNameNode)); string documentation = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.DocumentaionNode); var info = new SerializationInfo(typeof (OverlapDeNovoAssembly), new FormatterConverter()); var context = new StreamingContext(StreamingContextStates.All); var inputs = new List<ISequence>(); switch (additionalParameter) { case AssemblyParameters.Consensus: for (int i = 0; i < sequences.Length; i++) { // Logs the sequences ApplicationLog.WriteLine(string.Format(null, "SimpleConsensusMethod P1 : Sequence '{0}' used is '{1}'.", i, sequences[i])); var seq = new Sequence(alphabet, sequences[i]); inputs.Add(seq); } break; default: for (int i = 0; i < sequences.Length; i++) { // Logs the sequences ApplicationLog.WriteLine(string.Format(null, "SequenceAssembly P1 : Sequence '{0}' used is '{1}'.", i, sequences[i])); var seq = new Sequence(alphabet, sequences[i]); inputs.Add(seq); } break; } // here is how the above sequences should align: // TATAAAGCGCCAA // GCCAAAATTTAGGC // AGGCACCCGCGGTATT <= reversed // // TATAAAGCGCCAAAATTTAGGCACCCGCGGTATT var assembler = new OverlapDeNovoAssembler { MergeThreshold = mergeThreshold, OverlapAlgorithm = new PairwiseOverlapAligner() }; switch (additionalParameter) { case AssemblyParameters.DiagonalSM: (assembler.OverlapAlgorithm).SimilarityMatrix = new DiagonalSimilarityMatrix(matchScore, mismatchScore); break; case AssemblyParameters.SimilarityMatrix: string blosumFilePath = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.BlosumFilePathNode); (assembler.OverlapAlgorithm).SimilarityMatrix = new SimilarityMatrix(new StreamReader(blosumFilePath)); break; default: (assembler.OverlapAlgorithm).SimilarityMatrix = new DiagonalSimilarityMatrix(matchScore, mismatchScore); break; } (assembler.OverlapAlgorithm).GapOpenCost = gapCost; assembler.ConsensusResolver = new SimpleConsensusResolver(consensusThreshold); assembler.AssumeStandardOrientation = false; // Assembles all the sequences. IOverlapDeNovoAssembly assembly = (IOverlapDeNovoAssembly) assembler.Assemble(inputs); // Set Documentation property. assembly.Documentation = documentation; // Get the parameters from Xml in general int contigSequencesCount = int.Parse(this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.ContigSequencesCountNode), null); string contigConsensus = this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.ContigConsensusNode); switch (additionalParameter) { case AssemblyParameters.Consensus: // Read the contig from Contig method. Contig contigReadForConsensus = assembly.Contigs[0]; contigReadForConsensus.Consensus = null; var simpleSeqAssembler = new OverlapDeNovoAssembler { ConsensusResolver = new SimpleConsensusResolver(consensusThreshold) }; simpleSeqAssembler.MakeConsensus(alphabet, contigReadForConsensus); // Log the required info. ApplicationLog.WriteLine(string.Format(null, "SimpleConsensusMethod BVT : Consensus read is '{0}'.", contigReadForConsensus.Consensus)); Assert.AreEqual(contigConsensus, new String(contigReadForConsensus.Consensus.Select(a => (char) a).ToArray())); break; default: // Get the parameters from Xml for Assemble() method test cases. int unMergedCount = int.Parse(this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.UnMergedSequencesCountNode), null); int contigsCount = int.Parse(this.utilityObj.xmlUtil.GetTextValue(nodeName, Constants.ContigsCountNode), null); Assert.AreEqual(unMergedCount, assembly.UnmergedSequences.Count); Assert.AreEqual(contigsCount, assembly.Contigs.Count); Assert.AreEqual(documentation, assembly.Documentation); Contig contigRead = assembly.Contigs[0]; // Logs the consensus ApplicationLog.WriteLine(string.Format(null, "SequenceAssembly BVT : Un Merged Sequences Count is '{0}'.", assembly.UnmergedSequences.Count)); ApplicationLog.WriteLine(string.Format(null, "SequenceAssembly BVT : Contigs Count is '{0}'.", assembly.Contigs.Count)); ApplicationLog.WriteLine(string.Format(null, "SequenceAssembly BVT : Contig Sequences Count is '{0}'.", contigRead.Sequences.Count)); ApplicationLog.WriteLine(string.Format(null, "SequenceAssembly BVT : Consensus read is '{0}'.", contigRead.Consensus)); Assert.AreEqual(contigConsensus, new String(contigRead.Consensus.Select(a => (char) a).ToArray())); Assert.AreEqual(contigSequencesCount, contigRead.Sequences.Count); break; } }
/// <summary> /// This method runs assembly on the list of sequences passed. /// Additionally the user is allowed to select the /// alignment algorithm. /// </summary> /// <param name="input">Input for the assembly process.</param> /// <param name="worker">The Assembly parser thread</param> /// <returns>IDeNovoAssembly instance.</returns> private static IDeNovoAssembly RunAssembly(AssemblyInputEventArgs input, BackgroundWorker worker) { double mergeThreshold = input.MergeThreshold; List<ISequence> sequence = input.Sequences.ToList(); var assemble = new OverlapDeNovoAssembler(); assemble.OverlapAlgorithm = input.Aligner; // Special casing for SW alignment. if (assemble.OverlapAlgorithm is SmithWatermanAligner) { // If we set the Threshold value lesser than the Max score, then the result will be “JUNK”. // So setting the threshold value to 25 approximately supports sequence length of 15,0000. mergeThreshold = 25; } assemble.MergeThreshold = mergeThreshold; assemble.OverlapAlgorithm.SimilarityMatrix = input.AlignerInput.SimilarityMatrix; assemble.OverlapAlgorithm.GapOpenCost = input.AlignerInput.GapCost; assemble.OverlapAlgorithm.GapExtensionCost = input.AlignerInput.GapExtensionCost; assemble.ConsensusResolver = new SimpleConsensusResolver(input.ConsensusThreshold); assemble.AssumeStandardOrientation = false; AssignAlignerParameter(assemble.OverlapAlgorithm, input.AlignerInput); IDeNovoAssembly assemblyOutput = assemble.Assemble(sequence); if (worker != null && worker.CancellationPending) { return null; } return assemblyOutput; }
public void ValidateOverlapDenovoAssemblyToString() { const int matchScore = 5; const int mismatchScore = -4; const int gapCost = -10; const double mergeThreshold = 4; const double consensusThreshold = 66; ISequence seq1 = new Sequence(Alphabets.DNA, this.utilityObj.xmlUtil.GetTextValue(Constants.ToStringNodeName, Constants.Seq1StrNode)); ISequence seq2 = new Sequence(Alphabets.DNA, this.utilityObj.xmlUtil.GetTextValue(Constants.ToStringNodeName, Constants.Seq2StrNode)); var assembler = new OverlapDeNovoAssembler { MergeThreshold = mergeThreshold, OverlapAlgorithm = new NeedlemanWunschAligner { SimilarityMatrix = new DiagonalSimilarityMatrix(matchScore, mismatchScore), GapOpenCost = gapCost }, ConsensusResolver = new SimpleConsensusResolver(consensusThreshold), AssumeStandardOrientation = false }; var inputs = new List<ISequence> {seq1, seq2}; var seqAssembly = (IOverlapDeNovoAssembly) assembler.Assemble(inputs); Assert.AreEqual(0, seqAssembly.UnmergedSequences.Count); Assert.AreEqual(1, seqAssembly.Contigs.Count); assembler.OverlapAlgorithm = new SmithWatermanAligner { SimilarityMatrix = new DiagonalSimilarityMatrix(matchScore, mismatchScore), GapOpenCost = gapCost }; seqAssembly = (OverlapDeNovoAssembly) assembler.Assemble(inputs); string actualString = seqAssembly.ToString(); const string expectedString = "ACAAAAGCAACAAAAATGAAGGCAATACTAGTAGTTCTGCTATATACATTTGCAACCGCAAATG... +[1678]"; Assert.AreEqual(expectedString, actualString.Replace(System.Environment.NewLine, "")); // Get the parameters from Xml int matchScore1 = int.Parse( this.utilityObj.xmlUtil.GetTextValue(Constants.AssemblyAlgorithmNodeName, Constants.MatchScoreNode), null); int mismatchScore1 = int.Parse( this.utilityObj.xmlUtil.GetTextValue(Constants.AssemblyAlgorithmNodeName, Constants.MisMatchScoreNode), null); int gapCost1 = int.Parse(this.utilityObj.xmlUtil.GetTextValue(Constants.AssemblyAlgorithmNodeName, Constants.GapCostNode), null); double mergeThreshold1 = double.Parse( this.utilityObj.xmlUtil.GetTextValue(Constants.AssemblyAlgorithmNodeName, Constants.MergeThresholdNode), null); double consensusThreshold1 = double.Parse( this.utilityObj.xmlUtil.GetTextValue(Constants.AssemblyAlgorithmNodeName, Constants.ConsensusThresholdNode), null); string sequence1 = this.utilityObj.xmlUtil.GetTextValue(Constants.AssemblyAlgorithmNodeName, Constants.SequenceNode1); string sequence2 = this.utilityObj.xmlUtil.GetTextValue(Constants.AssemblyAlgorithmNodeName, Constants.SequenceNode2); string sequence3 = this.utilityObj.xmlUtil.GetTextValue(Constants.AssemblyAlgorithmNodeName, Constants.SequenceNode3); IAlphabet alphabet = Utility.GetAlphabet(this.utilityObj.xmlUtil.GetTextValue(Constants.AssemblyAlgorithmNodeName, Constants.AlphabetNameNode)); var seq4 = new Sequence(alphabet, sequence1); var seq5 = new Sequence(alphabet, sequence2); var seq6 = new Sequence(alphabet, sequence3); var assembler1 = new OverlapDeNovoAssembler { MergeThreshold = mergeThreshold1, OverlapAlgorithm = new PairwiseOverlapAligner { SimilarityMatrix = new DiagonalSimilarityMatrix(matchScore1, mismatchScore1), GapOpenCost = gapCost1, }, ConsensusResolver = new SimpleConsensusResolver(consensusThreshold1), AssumeStandardOrientation = false, }; var inputs1 = new List<ISequence> {seq4, seq5, seq6}; // Assembles all the sequences. seqAssembly = (OverlapDeNovoAssembly) assembler1.Assemble(inputs1); Assert.AreEqual(0, seqAssembly.UnmergedSequences.Count); Assert.AreEqual(1, seqAssembly.Contigs.Count); assembler1.OverlapAlgorithm = new SmithWatermanAligner(); seqAssembly = (OverlapDeNovoAssembly) assembler1.Assemble(inputs1); string expectedString1 = "TYMKWRRGCGCCAAAATTTAGGC" + System.Environment.NewLine; actualString = seqAssembly.ToString(); Assert.AreEqual(expectedString1, actualString); }
public void ValidateContigToString() { const int matchScore = 5; const int mismatchScore = -4; const int gapCost = -10; const double mergeThreshold = 4; const double consensusThreshold = 66; string seq2Str = this.utilityObj.xmlUtil.GetTextValue(Constants.ToStringNodeName, Constants.Seq2StrNode); string seq1Str = this.utilityObj.xmlUtil.GetTextValue(Constants.ToStringNodeName, Constants.Seq1StrNode); ISequence seq1 = new Sequence(Alphabets.DNA, seq1Str); ISequence seq2 = new Sequence(Alphabets.DNA, seq2Str); var assembler = new OverlapDeNovoAssembler { MergeThreshold = mergeThreshold, OverlapAlgorithm = new NeedlemanWunschAligner { SimilarityMatrix = new DiagonalSimilarityMatrix(matchScore, mismatchScore), GapOpenCost = gapCost }, ConsensusResolver = new SimpleConsensusResolver(consensusThreshold), AssumeStandardOrientation = false }; var seqAssembly = (IOverlapDeNovoAssembly) assembler.Assemble(new List<ISequence> {seq1, seq2}); Contig contig0 = seqAssembly.Contigs[0]; string actualString = contig0.ToString(); string expectedString = this.utilityObj.xmlUtil.GetTextValue(Constants.ToStringNodeName, Constants.OverlapDenovoExpectedNode); Assert.AreEqual(expectedString.Replace("\\r\\n", ""), actualString.Replace("\r\n", "")); // Get the parameters from Xml int matchScore1 = int.Parse( this.utilityObj.xmlUtil.GetTextValue(Constants.AssemblyAlgorithmNodeName, Constants.MatchScoreNode), null); int mismatchScore1 = int.Parse( this.utilityObj.xmlUtil.GetTextValue(Constants.AssemblyAlgorithmNodeName, Constants.MisMatchScoreNode), null); int gapCost1 = int.Parse(this.utilityObj.xmlUtil.GetTextValue(Constants.AssemblyAlgorithmNodeName, Constants.GapCostNode), null); double mergeThreshold1 = double.Parse( this.utilityObj.xmlUtil.GetTextValue(Constants.AssemblyAlgorithmNodeName, Constants.MergeThresholdNode), null); double consensusThreshold1 = double.Parse( this.utilityObj.xmlUtil.GetTextValue(Constants.AssemblyAlgorithmNodeName, Constants.ConsensusThresholdNode), null); string sequence1 = this.utilityObj.xmlUtil.GetTextValue(Constants.AssemblyAlgorithmNodeName, Constants.SequenceNode1); string sequence2 = this.utilityObj.xmlUtil.GetTextValue(Constants.AssemblyAlgorithmNodeName, Constants.SequenceNode2); string sequence3 = this.utilityObj.xmlUtil.GetTextValue(Constants.AssemblyAlgorithmNodeName, Constants.SequenceNode3); IAlphabet alphabet = Utility.GetAlphabet(this.utilityObj.xmlUtil.GetTextValue(Constants.AssemblyAlgorithmNodeName, Constants.AlphabetNameNode)); ISequence seq4 = new Sequence(alphabet, sequence1); ISequence seq5 = new Sequence(alphabet, sequence2); ISequence seq6 = new Sequence(alphabet, sequence3); var assembler1 = new OverlapDeNovoAssembler { MergeThreshold = mergeThreshold1, OverlapAlgorithm = new PairwiseOverlapAligner { SimilarityMatrix = new DiagonalSimilarityMatrix(matchScore1, mismatchScore1), GapOpenCost = gapCost1 }, ConsensusResolver = new SimpleConsensusResolver(consensusThreshold1), AssumeStandardOrientation = false }; // Assembles all the sequences. var seqAssembly1 = (IOverlapDeNovoAssembly) assembler1.Assemble(new List<ISequence> {seq4, seq5, seq6}); Contig contig1 = seqAssembly1.Contigs[0]; string actualString1 = contig1.ToString(); const string expectedString1 = "TATAAAGCGCCAAAATTTAGGCACCCGCGGTATT"; Assert.AreEqual(expectedString1, actualString1); }
public void TestSimpleSequenceAssemblerWithSwineflu() { Trace.Set(Trace.AssemblyDetails); // turn on log dump // test parameters const int matchScore = 5; const int mismatchScore = -4; const int gapCost = -10; const double mergeThreshold = 4; const double consensusThreshold = 66; ISequence seq2 = new Sequence(Alphabets.DNA, "ACAAAAGCAACAAAAATGAAGGCAATACTAGTAGTTCTGCTATATACATTTGCAACCGCAAATGCAGACACATTATGTATAGGTTATCATGCGAACAATTCAACAGACACTGTAGACACAGTACTAGAAAAGAATGTAACAGTAACACACTCTGTTAACCTTCTAGAAGACAAGCATAACGGGAAACTATGCAAACTAAGAGGAGTAGCCCCATTGCATTTGGGTAAATGTAACATTGCTGGCTGGATCCTGGGAAATCCAGAGTGTGAATCACTCTCCACAGCAAGCTCATGGTCCTACATTGTGGAAACATCTAGTTCAGACAATGGAACGTGTTACCCAGGAGATTTCATCGATTATGAGGAGCTAAGAGAGCAATTGAGCTCAGTGTCATCATTTGAAAGGTTTGAGATATTCCCCAAGACAAGTTCATGGCCCAATCATGACTCGAACAAAGGTGTAACGGCAGCATGTCCTCATGCTGGAGCAAAAAGCTTCTACAAAAATTTAATATGGCTAGTTAAAAAAGGAAATTCATACCCAAAGCTCAGCAAATCCTACATTAATGATAAAGGGAAAGAAGTCCTCGTGCTATGGGGCATTCACCATCCATCTACTAGTGCTGACCAACAAAGTCTCTATCAGAATGCAGATGCATATGTTTTTGTGGGGTCATCAAGATATAGCAAGAAGTTCAAGCCGGAAATAGCAATAAGACCCAAAGTGAGGGATCAAGAAGGGAGAATGAACTATTACTGGACACTAGTAGAGCCGGGAGACAAAATAACATTCGAAGCAACTGGAAATCTAGTGGTACCGAGATATGCATTCGCAATGGAAAGAAATGCTGGATCTGGTATTATCATTTCAGATACACCAGTCCACGATTGCAATACAACTTGTCAGACACCCAAGGGTGCTATAAACACCAGCCTCCCATTTCAGAATATACATCCGATCACAATTGGAAAATGTCCAAAATATGTAAAAAGCACAAAATTGAGACTGGCCACAGGATTGAGGAATGTCCCGTCTATTCAATCTAGAGGCCTATTTGGGGCCATTGCCGGTTTCATTGAAGGGGGGTGGACAGGGATGGTAGATGGATGGTACGGTTATCACCATCAAAATGAGCAGGGGTCAGGATATGCAGCCGACCTGAAGAGCACACAGAATGCCATTGACGAGATTACTAACAAAGTAAATTCTGTTATTGAAAAGATGAATATACAGTTCACAGCAGTAGGTAAAGAGTTCAACCACCTGGAAAAAAGAATAGAGAATTTAAATAAAAAAGTTGATGATGGTTTCCTGGACATTTGGACTTACAATGCCGAACTGTTGGTTCTATTGGAAAATGAAAGAACTTTGGACTACCACGATTCGAATGTGAAGAACTTATATGAAAAGGTAAGAAGCCAGCTAAAAAACAATGCCAAGGAAATTGGAAACGGCTGCTTTGAATTTTACCACAAATGCGATAACACGTGCATGGAAAGTGTCAAAAATGGGACTTATGACTACCCAAAATACTCAGAGGAAGCAAAATTAAACAGAGAAGAAATAGATGGGGTAAAGCTGGAATCAACAAGGATTTACCAGATTTTGGCGATCTATTCAACTGTCGCCAGTTCATTGGTACTGGTAGTCTCCCTGGGGGCAATCAGTTTCTGGATGTGCTCTAATGGGTCTCTACAGTGTAGAATATGTATTTAACATTAGGATTTCAGAAGCATGAGAAA"); ISequence seq1 = new Sequence(Alphabets.DNA, "ATGAAGGCAATACTAGTAGTTCTGCTATATACATTTGCAACCGCAAATGCAGACACATTATGTATAGGTTATCATGCGAACAATTCAACAGACACTGTAGACACAGTACTAGAAAAGAATGTAACAGTAACACACTCTGTTAACCTTCTAGAAGACAAGCATAACGGGAAACTATGCAAACTAAGAGGGGTAGCCCCATTGCATTTGGGTAAATGTAACATTGCTGGCTGGATCCTGGGAAATCCAGAGTGTGAATCACTCTCCACAGCAAGCTCATGGTCCTACATTGTGGAAACATCTAGTTCAGACAATGGAACGTGTTACCCAGGAGATTTCATCGATTATGAGGAGCTAAGAGAGCAATTGAGCTCAGTGTCATCATTTGAAAGGTTTGAGATATTCCCCAAGACAAGTTCATGGCCCAATCATGACTCGAACAAAGGTGTAACGGCAGCATGTCCTCATGCTGGAGCAAAAAGCTTCTACAAAAATTTAATATGGCTAGTTAAAAAAGGAAATTCATACCCAAAGCTCAGCAAATCCTACATTAATGATAAAGGGAAAGAAGTCCTCGTGCTATGGGGCATTCACCATCCATCTACTAGTGCTGACCAACAAAGTCTCTATCAGAATGCAGATGCATATGTTTTTGTGGGGACATCAAGATACAGCAAGAAGTTCAAGCCGGAAATAGCAATAAGACCCAAAGTGAGGGATCAAGAAGGGAGAATGAACTATTACTGGACACTAGTAGAGCCGGGAGACAAAATAACATTCGAAGCAACTGGAAATCTAGTGGTACCGAGATATGCATTCGCAATGGAAAGAAATGCTGGATCTGGTATTATCATTTCAGATACACCAGTCCACGATTGCAATACAACTTGTCAGACACCCAAGGGTGCTATAAACACCAGCCTCCCATTTCAGAATATACATCCGATCACAATTGGAAAATGTCCAAAATATGTAAAAAGCACAAAATTGAGACTGGCCACAGGATTGAGGAATGTCCCGTCTATTCAATCTAGAGGCCTATTTGGGGCCATTGCCGGTTTCATTGAAGGGGGGTGGACAGGGATGGTAGATGGATGGTACGGTTATCACCATCAAAATGAGCAGGGGTCAGGATATGCAGCCGACCTGAAGAGCACACAGAATGCCATTGACGAGATTACTAACAAAGTAAATTCTGTTATTGAAAAGATGAATACACAGTTCACAGCAGTAGGTAAAGAGTTCAACCACCTGGAAAAAAGAATAGAGAATTTAAATAAAAAAATTGATGATGGTTTCCTGGACATTTGGACTTACAATGCCGAACTGTTGGTTCTATTGGAAAATGAAAGAACTTTGGACTACCACGATTCAAATGTGAAGAACTTATATGAAAAGGTAAGAAGCCAGTTAAAAAACAATGCCAAGGAAATTGGAAACGGCTGCTTTGAATTTTACCACAAATGCGATAACACGTGCATGGAAAGTGTCAAAAATGGGACTTATGACTACCCAAAATACTCAGAGGAAGCAAAATTAAACAGAGAAGAAATAGATGGGGTAAAGCTGGAATCAACAAGGATTTACCAGATTTTGGCGATCTATTCAACTGTCGCCAGTTCATTGGTACTGGTAGTCTCCCTGGGGGCAATCAGTTTCTGGATGTGCTCTAATGGGTCTCTACAGTGTAGAATATGTATTTAA"); OverlapDeNovoAssembler assembler = new OverlapDeNovoAssembler { MergeThreshold = mergeThreshold, OverlapAlgorithm = new NeedlemanWunschAligner { SimilarityMatrix = new DiagonalSimilarityMatrix(matchScore, mismatchScore), GapOpenCost = gapCost }, ConsensusResolver = new SimpleConsensusResolver(consensusThreshold), AssumeStandardOrientation = false }; var inputs = new List<ISequence> {seq1, seq2}; var seqAssembly = (IOverlapDeNovoAssembly)assembler.Assemble(inputs); Assert.AreEqual(0, seqAssembly.UnmergedSequences.Count); Assert.AreEqual(1, seqAssembly.Contigs.Count); Contig contig0 = seqAssembly.Contigs[0]; string expected = "AYRAARGCAAYAMWARTRRWKSYRMTAYWWRYAKTTSYRMYMKMWAMWKYWGMMACMKYAWRTRYAGRYWMWYWWKSKAWMRRTTMWMMWGMSAMYRWWKMMACAGWMMYWGWARASAMWGTAMYAGWAAMRMAYKYWRYWRWMMYWCWMKMWGWYAASCWTMWMGRRRAMMWRYRYAAMSKRARASKRKKMRMMCYAWKRSRWKTRGSYMMATKKMAYWTKGSTRRMTGKAWCMTKGSWRRYYSRRWSYKKGRAWMWCYMKMSWSWGMAWSMYYMTSSWCMKMMAKYKYRKRRWCMTMYAKTKYRGAMAMWKSWASKTSWKACMMWGGARMKTKYWWCSMWKRWGAKKWSMTMRRWKAKSARKWGMKMWSAGWGYMATYRWKYKMARKGTYWKMRWTWKWMMSSWWKRMRAKWTYMYSSMMSAMWMRTKMMTSGMMCAAWSRTGWMWCGRMMRMAKGTSYWMMKGCWGSAKSWMMWMRYKYYKRMRMAAAWWKMWTMTRSMWARWTWWAAWAKGRMWWKYWWAMMMARRRMWYWSMWAMYCMWASMTYARYRAWWMMKRSAWWRAWGWYMWMGKGMWAKRRGKCMTYSWSCWWYSRKSYAYTMRYSMTSMMYMWMMWAGTSYYKAYCARMAWRSWSWYKMWYAKRWTKYWGWKGSRWMWKYWWKWKWSRGSWMRWMRWKMWAKMSSRARAWRKYMAWRMSRSMMAWAGYRAKRRRWCMMRAAGKGAGRRWKMAMKAWKRSWGRAYRMWMKWWKASYSGRSASWMRWARWRMCRKKMGAMRMAAYWRSAWWYSWAGYRRYWSSRARWYWWGYRKTMSCRAKRKAWRSAWWYGCWRKRKMWRGWAWTRYYRKWTCWGRTAYWMYMRTYYMMGATWSMMMWRYMMMYKRTYRSAMWMCMAMKKGTSMKAYAMMCAMSRGYSYYMYAWWYMMSARYMTMCMWYYKMWSAMWATWSRWMMRWKYMCAAWWKRWRWAWRWMSMAMAWAWKTRARAMKSRCMAMAKKRWKRMKGRMYRYMSSRTYKAKKMAWKYYMSRKSYMTWYWWKSKRSMRKYSYMKKTKKSRYYRWWGSSGGKTKSAYWGRRRKGGKRKRKRSAKGGWWSGKWKATSRMYRKYAMRRTKAKCASSRKYMARRWKAKSMRGSSKMMSKRWAKRSMRCMSASMWKRMSAKYRMMSAGAWTRCYAWYRAMGWRAWTWCTRWYAWWGWAAAKWYKRWTAYWSARWWSAYRRMWRTASRKWWMRMRKYMRWMSRYMWRGARWWMARMMWMSWGRAWWWAARWAWARARAWTKWWRATRRWWWMSTKGAYRWTKGKWYYYWSRAYRYYKRRMYKTWSRWTSYMKWRSWRWWKGWWMKAWYKKWRRAYKAMMRMRMTTYRRAYKWSMASRAYTYRWATGWRAAGRWMWKAWRYSARWWRRWAARMARYSMSMWRRAAAWYRRWRMCRRSKRMWTTGRAWWYKRCYRCWWWKRMKWTWACMMSWRMWKSGAWARYRYSWRMAWKGRRASTKWYRAMWAYSSRAMWTAYKMMKASSMARMAWAMTYARASRRAGMARAAWTARAYRGRGWARARMTRGAWKSRRYAARGMTKKAMYMRAYWWKGRYKWWCYAKWYWWYKGYSRYCWRTTCAWYKGTMSYSRKWKYMTYSSTRSKGGYARTCWSYYTSKGGRYRWKCWSTWWYKGGWYKYKMYMKWRTRGRWYWYKWMWKTRWAGAATATGTATTTAACATTAGGATTTCAGAAGCATGAGAAA"; Assert.AreEqual(expected, contig0.Consensus.ConvertToString(), "NeedlemanWunschAligner"); Assert.AreEqual(2, contig0.Sequences.Count); assembler.OverlapAlgorithm = new SmithWatermanAligner(); seqAssembly = (IOverlapDeNovoAssembly)assembler.Assemble(inputs); Assert.AreEqual(0, seqAssembly.UnmergedSequences.Count); Assert.AreEqual(1, seqAssembly.Contigs.Count); contig0 = seqAssembly.Contigs[0]; expected = "ACAAAAGCAACAAAAATGAAGGCAATACTAGTAGTTCTGCTATATACATTTGCAACCGCAAATGCAGACACATTATGTATAGGTTATCATGCGAACAATTCAACAGACACTGTAGACACAGTACTAGAAAAGAATGTAACAGTAACACACTCTGTTAACCTTCTAGAAGACAAGCATAACGGGAAACTATGCAAACTAAGAGGRGTAGCCCCATTGCATTTGGGTAAATGTAACATTGCTGGCTGGATCCTGGGAAATCCAGAGTGTGAATCACTCTCCACAGCAAGCTCATGGTCCTACATTGTGGAAACATCTAGTTCAGACAATGGAACGTGTTACCCAGGAGATTTCATCGATTATGAGGAGCTAAGAGAGCAATTGAGCTCAGTGTCATCATTTGAAAGGTTTGAGATATTCCCCAAGACAAGTTCATGGCCCAATCATGACTCGAACAAAGGTGTAACGGCAGCATGTCCTCATGCTGGAGCAAAAAGCTTCTACAAAAATTTAATATGGCTAGTTAAAAAAGGAAATTCATACCCAAAGCTCAGCAAATCCTACATTAATGATAAAGGGAAAGAAGTCCTCGTGCTATGGGGCATTCACCATCCATCTACTAGTGCTGACCAACAAAGTCTCTATCAGAATGCAGATGCATATGTTTTTGTGGGGWCATCAAGATAYAGCAAGAAGTTCAAGCCGGAAATAGCAATAAGACCCAAAGTGAGGGATCAAGAAGGGAGAATGAACTATTACTGGACACTAGTAGAGCCGGGAGACAAAATAACATTCGAAGCAACTGGAAATCTAGTGGTACCGAGATATGCATTCGCAATGGAAAGAAATGCTGGATCTGGTATTATCATTTCAGATACACCAGTCCACGATTGCAATACAACTTGTCAGACACCCAAGGGTGCTATAAACACCAGCCTCCCATTTCAGAATATACATCCGATCACAATTGGAAAATGTCCAAAATATGTAAAAAGCACAAAATTGAGACTGGCCACAGGATTGAGGAATGTCCCGTCTATTCAATCTAGAGGCCTATTTGGGGCCATTGCCGGTTTCATTGAAGGGGGGTGGACAGGGATGGTAGATGGATGGTACGGTTATCACCATCAAAATGAGCAGGGGTCAGGATATGCAGCCGACCTGAAGAGCACACAGAATGCCATTGACGAGATTACTAACAAAGTAAATTCTGTTATTGAAAAGATGAATAYACAGTTCACAGCAGTAGGTAAAGAGTTCAACCACCTGGAAAAAAGAATAGAGAATTTAAATAAAAAARTTGATGATGGTTTCCTGGACATTTGGACTTACAATGCCGAACTGTTGGTTCTATTGGAAAATGAAAGAACTTTGGACTACCACGATTCRAATGTGAAGAACTTATATGAAAAGGTAAGAAGCCAGYTAAAAAACAATGCCAAGGAAATTGGAAACGGCTGCTTTGAATTTTACCACAAATGCGATAACACGTGCATGGAAAGTGTCAAAAATGGGACTTATGACTACCCAAAATACTCAGAGGAAGCAAAATTAAACAGAGAAGAAATAGATGGGGTAAAGCTGGAATCAACAAGGATTTACCAGATTTTGGCGATCTATTCAACTGTCGCCAGTTCATTGGTACTGGTAGTCTCCCTGGGGGCAATCAGTTTCTGGATGTGCTCTAATGGGTCTCTACAGTGTAGAATATGTATTTAACATTAGGATTTCAGAAGCATGAGAAA"; Assert.AreEqual(expected, contig0.Consensus.ConvertToString(), "SmithwatermanAligner"); Assert.AreEqual(2, contig0.Sequences.Count); assembler.OverlapAlgorithm = new PairwiseOverlapAligner(); seqAssembly = (IOverlapDeNovoAssembly)assembler.Assemble(inputs); Assert.AreEqual(0, seqAssembly.UnmergedSequences.Count); Assert.AreEqual(1, seqAssembly.Contigs.Count); contig0 = seqAssembly.Contigs[0]; expected = "ACAAAAGCAACAAAAATGAAGGCAATACTAGTAGTTCTGCTATATACATTTGCAACCGCAAATGCAGACACATTATGTATAGGTTATCATGCGAACAATTCAACAGACACTGTAGACACAGTACTAGAAAAGAATGTAACAGTAACACACTCTGTTAACCTTCTAGAAGACAAGCATAACGGGAAACTATGCAAACTAAGAGGRGTAGCCCCATTGCATTTGGGTAAATGTAACATTGCTGGCTGGATCCTGGGAAATCCAGAGTGTGAATCACTCTCCACAGCAAGCTCATGGTCCTACATTGTGGAAACATCTAGTTCAGACAATGGAACGTGTTACCCAGGAGATTTCATCGATTATGAGGAGCTAAGAGAGCAATTGAGCTCAGTGTCATCATTTGAAAGGTTTGAGATATTCCCCAAGACAAGTTCATGGCCCAATCATGACTCGAACAAAGGTGTAACGGCAGCATGTCCTCATGCTGGAGCAAAAAGCTTCTACAAAAATTTAATATGGCTAGTTAAAAAAGGAAATTCATACCCAAAGCTCAGCAAATCCTACATTAATGATAAAGGGAAAGAAGTCCTCGTGCTATGGGGCATTCACCATCCATCTACTAGTGCTGACCAACAAAGTCTCTATCAGAATGCAGATGCATATGTTTTTGTGGGGWCATCAAGATAYAGCAAGAAGTTCAAGCCGGAAATAGCAATAAGACCCAAAGTGAGGGATCAAGAAGGGAGAATGAACTATTACTGGACACTAGTAGAGCCGGGAGACAAAATAACATTCGAAGCAACTGGAAATCTAGTGGTACCGAGATATGCATTCGCAATGGAAAGAAATGCTGGATCTGGTATTATCATTTCAGATACACCAGTCCACGATTGCAATACAACTTGTCAGACACCCAAGGGTGCTATAAACACCAGCCTCCCATTTCAGAATATACATCCGATCACAATTGGAAAATGTCCAAAATATGTAAAAAGCACAAAATTGAGACTGGCCACAGGATTGAGGAATGTCCCGTCTATTCAATCTAGAGGCCTATTTGGGGCCATTGCCGGTTTCATTGAAGGGGGGTGGACAGGGATGGTAGATGGATGGTACGGTTATCACCATCAAAATGAGCAGGGGTCAGGATATGCAGCCGACCTGAAGAGCACACAGAATGCCATTGACGAGATTACTAACAAAGTAAATTCTGTTATTGAAAAGATGAATAYACAGTTCACAGCAGTAGGTAAAGAGTTCAACCACCTGGAAAAAAGAATAGAGAATTTAAATAAAAAARTTGATGATGGTTTCCTGGACATTTGGACTTACAATGCCGAACTGTTGGTTCTATTGGAAAATGAAAGAACTTTGGACTACCACGATTCRAATGTGAAGAACTTATATGAAAAGGTAAGAAGCCAGYTAAAAAACAATGCCAAGGAAATTGGAAACGGCTGCTTTGAATTTTACCACAAATGCGATAACACGTGCATGGAAAGTGTCAAAAATGGGACTTATGACTACCCAAAATACTCAGAGGAAGCAAAATTAAACAGAGAAGAAATAGATGGGGTAAAGCTGGAATCAACAAGGATTTACCAGATTTTGGCGATCTATTCAACTGTCGCCAGTTCATTGGTACTGGTAGTCTCCCTGGGGGCAATCAGTTTCTGGATGTGCTCTAATGGGTCTCTACAGTGTAGAATATGTATTTAACATTAGGATTTCAGAAGCATGAGAAA"; Assert.AreEqual(expected, contig0.Consensus.ConvertToString(), "PairwiseOverlapAligner"); Assert.AreEqual(2, contig0.Sequences.Count); assembler.OverlapAlgorithm = new MUMmerAligner(); seqAssembly = (IOverlapDeNovoAssembly)assembler.Assemble(inputs); Assert.AreEqual(0, seqAssembly.UnmergedSequences.Count); Assert.AreEqual(1, seqAssembly.Contigs.Count); contig0 = seqAssembly.Contigs[0]; Assert.AreEqual(expected, contig0.Consensus.ConvertToString(), "MUMmerAligner"); Assert.AreEqual(2, contig0.Sequences.Count); }
public void TestAssemblerProperties() { IDeNovoAssembler assembler = new OverlapDeNovoAssembler(); Assert.AreEqual(assembler.Name, Properties.Resource.SIMPLE_NAME); Assert.AreEqual(assembler.Description, Properties.Resource.SIMPLE_DESCRIPTION); }
public void TestSimpleSequenceAssemblerWithRandomSequence() { // Test parameters. // // In theory, as long as all positions in the master sequence are // covered by at least one read, we should be able to pass this test. // But some parameter settings will make the test fail, for // various reasons, including: // 1. Short reads, caused by the strategy used to ensure full coverage // at the ends, might not score well enough to merge. // 2. Uncovered positions are always possible due to the random // generation of reads. (Increasing the number of reads helps with this) // 3. The assembler might construct the reverse or complement (or both) // of the master sequence. // 4. Too low a merge threshold could cause incorrect merges, which // the algorithm will not repair. int matchScore = 1; int mismatchScore = -8; int gapCost = -8; double mergeThreshold = 3; double consensusThreshold = 99; const int MasterLength = 100; const int MinReadLength = 10; const int MaxReadLength = 30; const int NumReads = 200; const bool AssumeOrientedReads = true; // if this is uncommented, assembly details appear in log. // this is extremely verbose. // Trace.Set(Trace.AssemblyDetails); // make random master sequence // (use seed for repeatability, or omit seed for // different test each time) // Random randGen = new Random(); Random randGen = new Random(654321); StringBuilder randSeq = new StringBuilder(); for (int i = 0; i < MasterLength; ++i) { int randm = randGen.Next(8); if (randm < 2) { randSeq.Append('A'); } else if (randm < 4) { randSeq.Append('C'); } else if (randm < 6) { randSeq.Append('G'); } else { randSeq.Append('T'); } } Sequence master = new Sequence(Alphabets.AmbiguousDNA, randSeq.ToString()); // create the reads List<ISequence> inputs = new List<ISequence>(); for (int i = 0; i < NumReads; ++i) { // try for uniform coverage clear to the ends (this can lead to short reads, though) int rndPos = Math.Max(0, randGen.Next(-MinReadLength, MasterLength - 1)); int rndLen = Math.Min(MasterLength - rndPos, randGen.Next(MinReadLength, MaxReadLength + 1)); string data = master.ConvertToString().Substring(Math.Max(0, rndPos), rndLen); bool revcomp = randGen.Next(2) > 0; bool reverse = randGen.Next(2) > 0 && !AssumeOrientedReads; ISequence read; if (reverse && revcomp) { Sequence tmp = new Sequence(Alphabets.DNA, data); read = new Sequence(Alphabets.DNA, tmp.GetReversedSequence().ConvertToString()); } else if (revcomp) { Sequence tmp = new Sequence(Alphabets.DNA, data); read = new Sequence(Alphabets.DNA, tmp.GetReverseComplementedSequence().ConvertToString()); } else { read = new Sequence(Alphabets.DNA, data); } ApplicationLog.WriteLine("read {0}: {1}", i, read); inputs.Add(read); } OverlapDeNovoAssembler assembler = new OverlapDeNovoAssembler(); assembler.MergeThreshold = mergeThreshold; assembler.OverlapAlgorithm = new PairwiseOverlapAligner(); ((IPairwiseSequenceAligner)assembler.OverlapAlgorithm).SimilarityMatrix = new DiagonalSimilarityMatrix(matchScore, mismatchScore); ((IPairwiseSequenceAligner)assembler.OverlapAlgorithm).GapOpenCost = gapCost; assembler.ConsensusResolver = new SimpleConsensusResolver(consensusThreshold); assembler.AssumeStandardOrientation = AssumeOrientedReads; IOverlapDeNovoAssembly seqAssembly = (IOverlapDeNovoAssembly)assembler.Assemble(inputs); ApplicationLog.WriteLine( "Assembly finished. Contigs: {0}. Unmerged sequences: {1}.", seqAssembly.Contigs.Count, seqAssembly.UnmergedSequences.Count); Contig contig0 = seqAssembly.Contigs[0]; ApplicationLog.WriteLine("master sequence and contig 0 consensus:"); ApplicationLog.WriteLine(master.ConvertToString()); ApplicationLog.WriteLine(contig0.Consensus.ConvertToString()); Assert.AreEqual(2, seqAssembly.UnmergedSequences.Count); Assert.AreEqual(1, seqAssembly.Contigs.Count); // note that this is tricky, esp. without oriented reads - consensus // could be reversed and/or complemented relative to original Assert.AreEqual(master.ConvertToString(), contig0.Consensus.ConvertToString()); }
public void TestSimpleSequenceAssemblerWithSemiRandomSequence() { // test parameters int matchScore = 1; int mismatchScore = -8; int gapCost = -8; double mergeThreshold = 4; double consensusThreshold = 66; const int MasterLength = 30; const int ReadLength = 10; const int NumReads = 5; const bool AssumeOrientedReads = false; // if this is uncommented, assembly details appear in log. // this is extremely verbose. Trace.Set(Trace.AssemblyDetails); // make random master sequence // (use seed for repeatability, or omit seed for // different test each time) // Random randGen = new Random(); Random randGen = new Random(654321); StringBuilder randSeq = new StringBuilder(); for (int i = 0; i < MasterLength; ++i) { int randm = randGen.Next(8); if (randm < 2) { randSeq.Append('A'); } else if (randm < 4) { randSeq.Append('C'); } else if (randm < 6) { randSeq.Append('G'); } else { randSeq.Append('T'); } } Sequence master = new Sequence(Alphabets.DNA, randSeq.ToString()); // create the reads List<ISequence> inputs = new List<ISequence>(); for (int i = 0; i < NumReads; ++i) { int pos = 5 * i; string data = master.ConvertToString().Substring(pos, ReadLength); bool revcomp = randGen.Next(2) > 0; bool reverse = randGen.Next(2) > 0 && !AssumeOrientedReads; ISequence read; if (reverse && revcomp) { Sequence tmp = new Sequence(Alphabets.DNA, data); read = new Sequence(Alphabets.DNA, tmp.GetReversedSequence().ConvertToString()); } else if (revcomp) { Sequence tmp = new Sequence(Alphabets.DNA, data); read = new Sequence(Alphabets.DNA, tmp.GetReverseComplementedSequence().ConvertToString()); } else { read = new Sequence(Alphabets.DNA, data); } inputs.Add(read); } OverlapDeNovoAssembler assembler = new OverlapDeNovoAssembler(); assembler.MergeThreshold = mergeThreshold; assembler.OverlapAlgorithm = new PairwiseOverlapAligner(); ((IPairwiseSequenceAligner)assembler.OverlapAlgorithm).SimilarityMatrix = new DiagonalSimilarityMatrix(matchScore, mismatchScore); ((IPairwiseSequenceAligner)assembler.OverlapAlgorithm).GapOpenCost = gapCost; assembler.ConsensusResolver = new SimpleConsensusResolver(consensusThreshold); assembler.AssumeStandardOrientation = AssumeOrientedReads; IOverlapDeNovoAssembly seqAssembly = (IOverlapDeNovoAssembly)assembler.Assemble(inputs); Assert.AreEqual(0, seqAssembly.UnmergedSequences.Count); Assert.AreEqual(1, seqAssembly.Contigs.Count); Contig contig0 = seqAssembly.Contigs[0]; ApplicationLog.WriteLine("master sequence and contig 0 consensus:"); ApplicationLog.WriteLine(master.ConvertToString()); ApplicationLog.WriteLine(contig0.Consensus.ConvertToString()); // note that this is tricky, esp. without oriented reads - consensus // could be reversed and/or complemented relative to original Assert.AreEqual(master.ConvertToString(), contig0.Consensus.ConvertToString()); }
/// <summary> /// Do a simple sequence assembly. /// This sample uses NeedlemanWunschAligner. /// </summary> /// <param name="sequences">List of sequences to assemble.</param> /// <returns>IDeNovoAssembly which has the assembled result.</returns> public static IDeNovoAssembly DoSimpleSequenceAssemble(List<ISequence> sequences) { // Create an assembler OverlapDeNovoAssembler assembler = new OverlapDeNovoAssembler(); // Setup the parameters assembler.OverlapAlgorithm = new NeedlemanWunschAligner(); assembler.OverlapAlgorithm.SimilarityMatrix = new DiagonalSimilarityMatrix(5, -4); assembler.OverlapAlgorithm.GapOpenCost = -10; assembler.ConsensusResolver = new SimpleConsensusResolver(66); assembler.AssumeStandardOrientation = false; return assembler.Assemble(sequences); }
public void TestOverlapDenovoAssemblyToString() { const int matchScore = 5; const int mismatchScore = -4; const int gapCost = -10; const double mergeThreshold = 4; const double consensusThreshold = 66; ISequence seq2 = new Sequence(Alphabets.DNA, "ACAAAAGCAACAAAAATGAAGGCAATACTAGTAGTTCTGCTATATACATTTGCAACCGCAAATGCAGACACATTATGTATAGGTTATCATGCGAACAATTCAACAGACACTGTAGACACAGTACTAGAAAAGAATGTAACAGTAACACACTCTGTTAACCTTCTAGAAGACAAGCATAACGGGAAACTATGCAAACTAAGAGGAGTAGCCCCATTGCATTTGGGTAAATGTAACATTGCTGGCTGGATCCTGGGAAATCCAGAGTGTGAATCACTCTCCACAGCAAGCTCATGGTCCTACATTGTGGAAACATCTAGTTCAGACAATGGAACGTGTTACCCAGGAGATTTCATCGATTATGAGGAGCTAAGAGAGCAATTGAGCTCAGTGTCATCATTTGAAAGGTTTGAGATATTCCCCAAGACAAGTTCATGGCCCAATCATGACTCGAACAAAGGTGTAACGGCAGCATGTCCTCATGCTGGAGCAAAAAGCTTCTACAAAAATTTAATATGGCTAGTTAAAAAAGGAAATTCATACCCAAAGCTCAGCAAATCCTACATTAATGATAAAGGGAAAGAAGTCCTCGTGCTATGGGGCATTCACCATCCATCTACTAGTGCTGACCAACAAAGTCTCTATCAGAATGCAGATGCATATGTTTTTGTGGGGTCATCAAGATATAGCAAGAAGTTCAAGCCGGAAATAGCAATAAGACCCAAAGTGAGGGATCAAGAAGGGAGAATGAACTATTACTGGACACTAGTAGAGCCGGGAGACAAAATAACATTCGAAGCAACTGGAAATCTAGTGGTACCGAGATATGCATTCGCAATGGAAAGAAATGCTGGATCTGGTATTATCATTTCAGATACACCAGTCCACGATTGCAATACAACTTGTCAGACACCCAAGGGTGCTATAAACACCAGCCTCCCATTTCAGAATATACATCCGATCACAATTGGAAAATGTCCAAAATATGTAAAAAGCACAAAATTGAGACTGGCCACAGGATTGAGGAATGTCCCGTCTATTCAATCTAGAGGCCTATTTGGGGCCATTGCCGGTTTCATTGAAGGGGGGTGGACAGGGATGGTAGATGGATGGTACGGTTATCACCATCAAAATGAGCAGGGGTCAGGATATGCAGCCGACCTGAAGAGCACACAGAATGCCATTGACGAGATTACTAACAAAGTAAATTCTGTTATTGAAAAGATGAATATACAGTTCACAGCAGTAGGTAAAGAGTTCAACCACCTGGAAAAAAGAATAGAGAATTTAAATAAAAAAGTTGATGATGGTTTCCTGGACATTTGGACTTACAATGCCGAACTGTTGGTTCTATTGGAAAATGAAAGAACTTTGGACTACCACGATTCGAATGTGAAGAACTTATATGAAAAGGTAAGAAGCCAGCTAAAAAACAATGCCAAGGAAATTGGAAACGGCTGCTTTGAATTTTACCACAAATGCGATAACACGTGCATGGAAAGTGTCAAAAATGGGACTTATGACTACCCAAAATACTCAGAGGAAGCAAAATTAAACAGAGAAGAAATAGATGGGGTAAAGCTGGAATCAACAAGGATTTACCAGATTTTGGCGATCTATTCAACTGTCGCCAGTTCATTGGTACTGGTAGTCTCCCTGGGGGCAATCAGTTTCTGGATGTGCTCTAATGGGTCTCTACAGTGTAGAATATGTATTTAACATTAGGATTTCAGAAGCATGAGAAA"); ISequence seq1 = new Sequence(Alphabets.DNA, "ATGAAGGCAATACTAGTAGTTCTGCTATATACATTTGCAACCGCAAATGCAGACACATTATGTATAGGTTATCATGCGAACAATTCAACAGACACTGTAGACACAGTACTAGAAAAGAATGTAACAGTAACACACTCTGTTAACCTTCTAGAAGACAAGCATAACGGGAAACTATGCAAACTAAGAGGGGTAGCCCCATTGCATTTGGGTAAATGTAACATTGCTGGCTGGATCCTGGGAAATCCAGAGTGTGAATCACTCTCCACAGCAAGCTCATGGTCCTACATTGTGGAAACATCTAGTTCAGACAATGGAACGTGTTACCCAGGAGATTTCATCGATTATGAGGAGCTAAGAGAGCAATTGAGCTCAGTGTCATCATTTGAAAGGTTTGAGATATTCCCCAAGACAAGTTCATGGCCCAATCATGACTCGAACAAAGGTGTAACGGCAGCATGTCCTCATGCTGGAGCAAAAAGCTTCTACAAAAATTTAATATGGCTAGTTAAAAAAGGAAATTCATACCCAAAGCTCAGCAAATCCTACATTAATGATAAAGGGAAAGAAGTCCTCGTGCTATGGGGCATTCACCATCCATCTACTAGTGCTGACCAACAAAGTCTCTATCAGAATGCAGATGCATATGTTTTTGTGGGGACATCAAGATACAGCAAGAAGTTCAAGCCGGAAATAGCAATAAGACCCAAAGTGAGGGATCAAGAAGGGAGAATGAACTATTACTGGACACTAGTAGAGCCGGGAGACAAAATAACATTCGAAGCAACTGGAAATCTAGTGGTACCGAGATATGCATTCGCAATGGAAAGAAATGCTGGATCTGGTATTATCATTTCAGATACACCAGTCCACGATTGCAATACAACTTGTCAGACACCCAAGGGTGCTATAAACACCAGCCTCCCATTTCAGAATATACATCCGATCACAATTGGAAAATGTCCAAAATATGTAAAAAGCACAAAATTGAGACTGGCCACAGGATTGAGGAATGTCCCGTCTATTCAATCTAGAGGCCTATTTGGGGCCATTGCCGGTTTCATTGAAGGGGGGTGGACAGGGATGGTAGATGGATGGTACGGTTATCACCATCAAAATGAGCAGGGGTCAGGATATGCAGCCGACCTGAAGAGCACACAGAATGCCATTGACGAGATTACTAACAAAGTAAATTCTGTTATTGAAAAGATGAATACACAGTTCACAGCAGTAGGTAAAGAGTTCAACCACCTGGAAAAAAGAATAGAGAATTTAAATAAAAAAATTGATGATGGTTTCCTGGACATTTGGACTTACAATGCCGAACTGTTGGTTCTATTGGAAAATGAAAGAACTTTGGACTACCACGATTCAAATGTGAAGAACTTATATGAAAAGGTAAGAAGCCAGTTAAAAAACAATGCCAAGGAAATTGGAAACGGCTGCTTTGAATTTTACCACAAATGCGATAACACGTGCATGGAAAGTGTCAAAAATGGGACTTATGACTACCCAAAATACTCAGAGGAAGCAAAATTAAACAGAGAAGAAATAGATGGGGTAAAGCTGGAATCAACAAGGATTTACCAGATTTTGGCGATCTATTCAACTGTCGCCAGTTCATTGGTACTGGTAGTCTCCCTGGGGGCAATCAGTTTCTGGATGTGCTCTAATGGGTCTCTACAGTGTAGAATATGTATTTAA"); IOverlapDeNovoAssembler assembler = new OverlapDeNovoAssembler { MergeThreshold = mergeThreshold, OverlapAlgorithm = new NeedlemanWunschAligner { SimilarityMatrix = new DiagonalSimilarityMatrix(matchScore, mismatchScore), GapOpenCost = gapCost }, ConsensusResolver = new SimpleConsensusResolver(consensusThreshold), AssumeStandardOrientation = false }; var inputs = new List<ISequence> {seq1, seq2}; var seqAssembly = (IOverlapDeNovoAssembly) assembler.Assemble(inputs); Assert.AreEqual(0, seqAssembly.UnmergedSequences.Count); Assert.AreEqual(1, seqAssembly.Contigs.Count); assembler.OverlapAlgorithm = new SmithWatermanAligner(); seqAssembly = (IOverlapDeNovoAssembly) assembler.Assemble(inputs); string expectedString = "ACAAAAGCAACAAAAATGAAGGCAATACTAGTAGTTCTGCTATATACATTTGCAACCGCAAATG... +[1678]\r\n".Replace("\r\n",Environment.NewLine); string actualString = seqAssembly.ToString(); Assert.AreEqual(expectedString, actualString); }
public void TestContigToString() { // test parameters const int matchScore = 5; const int mismatchScore = -4; const int gapCost = -10; const double mergeThreshold = 4; const double consensusThreshold = 66; Sequence seq2 = new Sequence(Alphabets.DNA, "ACAAAAGCAACAAAAATGAAGGCAATACTAGTAGTTCTGCTATATACATTTGCAACCGCAAATGCAGACACATTATGTATAGGTTATCATGCGAACAATTCAACAGACACTGTAGACACAGTACTAGAAAAGAATGTAACAGTAACACACTCTGTTAACCTTCTAGAAGACAAGCATAACGGGAAACTATGCAAACTAAGAGGAGTAGCCCCATTGCATTTGGGTAAATGTAACATTGCTGGCTGGATCCTGGGAAATCCAGAGTGTGAATCACTCTCCACAGCAAGCTCATGGTCCTACATTGTGGAAACATCTAGTTCAGACAATGGAACGTGTTACCCAGGAGATTTCATCGATTATGAGGAGCTAAGAGAGCAATTGAGCTCAGTGTCATCATTTGAAAGGTTTGAGATATTCCCCAAGACAAGTTCATGGCCCAATCATGACTCGAACAAAGGTGTAACGGCAGCATGTCCTCATGCTGGAGCAAAAAGCTTCTACAAAAATTTAATATGGCTAGTTAAAAAAGGAAATTCATACCCAAAGCTCAGCAAATCCTACATTAATGATAAAGGGAAAGAAGTCCTCGTGCTATGGGGCATTCACCATCCATCTACTAGTGCTGACCAACAAAGTCTCTATCAGAATGCAGATGCATATGTTTTTGTGGGGTCATCAAGATATAGCAAGAAGTTCAAGCCGGAAATAGCAATAAGACCCAAAGTGAGGGATCAAGAAGGGAGAATGAACTATTACTGGACACTAGTAGAGCCGGGAGACAAAATAACATTCGAAGCAACTGGAAATCTAGTGGTACCGAGATATGCATTCGCAATGGAAAGAAATGCTGGATCTGGTATTATCATTTCAGATACACCAGTCCACGATTGCAATACAACTTGTCAGACACCCAAGGGTGCTATAAACACCAGCCTCCCATTTCAGAATATACATCCGATCACAATTGGAAAATGTCCAAAATATGTAAAAAGCACAAAATTGAGACTGGCCACAGGATTGAGGAATGTCCCGTCTATTCAATCTAGAGGCCTATTTGGGGCCATTGCCGGTTTCATTGAAGGGGGGTGGACAGGGATGGTAGATGGATGGTACGGTTATCACCATCAAAATGAGCAGGGGTCAGGATATGCAGCCGACCTGAAGAGCACACAGAATGCCATTGACGAGATTACTAACAAAGTAAATTCTGTTATTGAAAAGATGAATATACAGTTCACAGCAGTAGGTAAAGAGTTCAACCACCTGGAAAAAAGAATAGAGAATTTAAATAAAAAAGTTGATGATGGTTTCCTGGACATTTGGACTTACAATGCCGAACTGTTGGTTCTATTGGAAAATGAAAGAACTTTGGACTACCACGATTCGAATGTGAAGAACTTATATGAAAAGGTAAGAAGCCAGCTAAAAAACAATGCCAAGGAAATTGGAAACGGCTGCTTTGAATTTTACCACAAATGCGATAACACGTGCATGGAAAGTGTCAAAAATGGGACTTATGACTACCCAAAATACTCAGAGGAAGCAAAATTAAACAGAGAAGAAATAGATGGGGTAAAGCTGGAATCAACAAGGATTTACCAGATTTTGGCGATCTATTCAACTGTCGCCAGTTCATTGGTACTGGTAGTCTCCCTGGGGGCAATCAGTTTCTGGATGTGCTCTAATGGGTCTCTACAGTGTAGAATATGTATTTAACATTAGGATTTCAGAAGCATGAGAAA"); Sequence seq1 = new Sequence(Alphabets.DNA, "ATGAAGGCAATACTAGTAGTTCTGCTATATACATTTGCAACCGCAAATGCAGACACATTATGTATAGGTTATCATGCGAACAATTCAACAGACACTGTAGACACAGTACTAGAAAAGAATGTAACAGTAACACACTCTGTTAACCTTCTAGAAGACAAGCATAACGGGAAACTATGCAAACTAAGAGGGGTAGCCCCATTGCATTTGGGTAAATGTAACATTGCTGGCTGGATCCTGGGAAATCCAGAGTGTGAATCACTCTCCACAGCAAGCTCATGGTCCTACATTGTGGAAACATCTAGTTCAGACAATGGAACGTGTTACCCAGGAGATTTCATCGATTATGAGGAGCTAAGAGAGCAATTGAGCTCAGTGTCATCATTTGAAAGGTTTGAGATATTCCCCAAGACAAGTTCATGGCCCAATCATGACTCGAACAAAGGTGTAACGGCAGCATGTCCTCATGCTGGAGCAAAAAGCTTCTACAAAAATTTAATATGGCTAGTTAAAAAAGGAAATTCATACCCAAAGCTCAGCAAATCCTACATTAATGATAAAGGGAAAGAAGTCCTCGTGCTATGGGGCATTCACCATCCATCTACTAGTGCTGACCAACAAAGTCTCTATCAGAATGCAGATGCATATGTTTTTGTGGGGACATCAAGATACAGCAAGAAGTTCAAGCCGGAAATAGCAATAAGACCCAAAGTGAGGGATCAAGAAGGGAGAATGAACTATTACTGGACACTAGTAGAGCCGGGAGACAAAATAACATTCGAAGCAACTGGAAATCTAGTGGTACCGAGATATGCATTCGCAATGGAAAGAAATGCTGGATCTGGTATTATCATTTCAGATACACCAGTCCACGATTGCAATACAACTTGTCAGACACCCAAGGGTGCTATAAACACCAGCCTCCCATTTCAGAATATACATCCGATCACAATTGGAAAATGTCCAAAATATGTAAAAAGCACAAAATTGAGACTGGCCACAGGATTGAGGAATGTCCCGTCTATTCAATCTAGAGGCCTATTTGGGGCCATTGCCGGTTTCATTGAAGGGGGGTGGACAGGGATGGTAGATGGATGGTACGGTTATCACCATCAAAATGAGCAGGGGTCAGGATATGCAGCCGACCTGAAGAGCACACAGAATGCCATTGACGAGATTACTAACAAAGTAAATTCTGTTATTGAAAAGATGAATACACAGTTCACAGCAGTAGGTAAAGAGTTCAACCACCTGGAAAAAAGAATAGAGAATTTAAATAAAAAAATTGATGATGGTTTCCTGGACATTTGGACTTACAATGCCGAACTGTTGGTTCTATTGGAAAATGAAAGAACTTTGGACTACCACGATTCAAATGTGAAGAACTTATATGAAAAGGTAAGAAGCCAGTTAAAAAACAATGCCAAGGAAATTGGAAACGGCTGCTTTGAATTTTACCACAAATGCGATAACACGTGCATGGAAAGTGTCAAAAATGGGACTTATGACTACCCAAAATACTCAGAGGAAGCAAAATTAAACAGAGAAGAAATAGATGGGGTAAAGCTGGAATCAACAAGGATTTACCAGATTTTGGCGATCTATTCAACTGTCGCCAGTTCATTGGTACTGGTAGTCTCCCTGGGGGCAATCAGTTTCTGGATGTGCTCTAATGGGTCTCTACAGTGTAGAATATGTATTTAA"); OverlapDeNovoAssembler assembler = new OverlapDeNovoAssembler { MergeThreshold = mergeThreshold, OverlapAlgorithm = new NeedlemanWunschAligner { SimilarityMatrix = new DiagonalSimilarityMatrix(matchScore, mismatchScore), GapOpenCost = gapCost }, ConsensusResolver = new SimpleConsensusResolver(consensusThreshold), AssumeStandardOrientation = false, }; IOverlapDeNovoAssembly seqAssembly = (IOverlapDeNovoAssembly) assembler.Assemble(new List<ISequence> {seq1, seq2}); Contig contig0 = seqAssembly.Contigs[0]; string actualString = contig0.ToString(); //string expectedString = "ACAAAAGCAACAAAAATGAAGGCAATACTAGTAGTTCTGCTATATACATTTGCAACCGCAAATG... +[1678]"; string expectedString = "AYRAARGCAAYAMWARTRRWKSYRMTAYWWRYAKTTSYRMYMKMWAMWKYWGMMACMKYAWRTR... +[1678]"; Assert.AreEqual(actualString, expectedString); }