public void AssemblerTest() { const int KmerLength = 11; const int DangleThreshold = 3; const int RedundantThreshold = 10; List <ISequence> readSeqs = TestInputs.GetDanglingReads(); using (ParallelDeNovoAssembler assembler = new ParallelDeNovoAssembler()) { assembler.KmerLength = KmerLength; assembler.DanglingLinksThreshold = DangleThreshold; assembler.RedundantPathLengthThreshold = RedundantThreshold; IDeNovoAssembly result = assembler.Assemble(readSeqs); // Compare the two graphs Assert.AreEqual(1, result.AssembledSequences.Count()); HashSet <string> expectedContigs = new HashSet <string>() { "ATCGCTAGCATCGAACGATCATT" }; foreach (ISequence contig in result.AssembledSequences) { Assert.IsTrue(expectedContigs.Contains(new string(contig.Select(a => (char)a).ToArray()))); } } }
public void AssemblerTestWithScaffoldBuilder() { const int kmerLength = 6; const int dangleThreshold = 3; const int redundantThreshold = 7; using (ParallelDeNovoAssembler assembler = new ParallelDeNovoAssembler()) { assembler.KmerLength = kmerLength; assembler.DanglingLinksThreshold = dangleThreshold; assembler.RedundantPathLengthThreshold = redundantThreshold; assembler.ScaffoldRedundancy = 0; assembler.Depth = 3; CloneLibrary.Instance.AddLibrary("abc", 5, 20); PadenaAssembly result = (PadenaAssembly)assembler.Assemble(TestInputs.GetReadsForScaffolds(), true); HashSet <string> expectedContigs = new HashSet <string> { "TTTTTT", "CGCGCG", "TTAGCGCG", "CGCGCCGCGC", "GCGCGC", "TTTTTA", "TTTTAA", "TTTAAA", "TTTTAGC", "ATGCCTCCTATCTTAGC" }; AlignmentHelpers.CompareSequenceLists(expectedContigs, result.ContigSequences); HashSet <string> expectedScaffolds = new HashSet <string> { "ATGCCTCCTATCTTAGCGCGC", "TTTAAA", "TTTTTT", "TTTTAGC", "TTTTAA", "CGCGCCGCGC", "TTTTTA", "CGCGCG" }; AlignmentHelpers.CompareSequenceLists(expectedScaffolds, result.Scaffolds); } }
public void ValidateGetSequenceRange() { const int kmerLength = 6; const int dangleThreshold = 3; const int redundantThreshold = 7; using (ParallelDeNovoAssembler assembler = new ParallelDeNovoAssembler()) { assembler.KmerLength = kmerLength; assembler.DanglingLinksThreshold = dangleThreshold; assembler.RedundantPathLengthThreshold = redundantThreshold; assembler.ScaffoldRedundancy = 0; assembler.Depth = 3; CloneLibrary.Instance.AddLibrary("abc", (float)5, (float)20); PadenaAssembly result = (PadenaAssembly)assembler.Assemble(GetReadsForScaffolds(), true); ISequence sequence = result.ContigSequences[0]; ISequence seqRange = Helper.GetSequenceRange(sequence, 2, 3); Assert.AreEqual(3, seqRange.Count); string sequenceStr = new string(sequence.Select(a => (char)a).ToArray()); string seqRangeStr = new string(seqRange.Select(a => (char)a).ToArray()); Assert.IsTrue(sequenceStr.Contains(seqRangeStr)); } }
/// <summary> /// Initializes a new instance of the AssemblerDialog class. /// </summary> /// <param name="algorithms">Algorithms</param> /// <param name="defaultSM">Default similarity matrix</param> /// <param name="sequences">Sequences to assemble</param> public AssemblerDialog(IEnumerable <string> algorithms, string defaultSM, IList <ISequence> sequences) { InitializeComponent(); this.defaultSM = defaultSM; this.InitializeAlignmentAlgorithms(algorithms); if (this.cmbAlgorithms.Items.Count > 0) { IAlignmentAttributes alignmentAttributes = this.GetAlignmentAttribute(this.cmbAlgorithms.SelectedItem.ToString()); this.LoadAlignmentArguments(alignmentAttributes); } this.cmbAlgorithms.SelectionChanged += new SelectionChangedEventHandler(cmbAlgorithms_SelectionChanged); this.Owner = Application.Current.MainWindow; this.simpleSequenceAssemblerOptionButton.Checked += new RoutedEventHandler(OnAssemblerSelectionChanged); this.padenaOptionButton.Checked += new RoutedEventHandler(OnAssemblerSelectionChanged); this.btnSubmit.Click += new RoutedEventHandler(this.OnSubmitButtonClicked); this.btnCancel.Click += new RoutedEventHandler(this.OnCancelClicked); // Get default values for padena int estimatedKmerLength = ParallelDeNovoAssembler.EstimateKmerLength(sequences); txtKmerLength.Text = estimatedKmerLength.ToString(); txtDangleThreshold.Text = (estimatedKmerLength + 1).ToString(); txtRedundantThreshold.Text = (3 * (estimatedKmerLength + 1)).ToString(); this.cmbLibraryNames.SelectionChanged += new SelectionChangedEventHandler(OnLibraryNames_SelectionChanged); this.InitializeLibraryNames(); this.btnSubmit.Focus(); }
/// <summary> /// Method to do a denovo assembly. /// This sample uses Padena Denovo assembler. /// </summary> /// <param name="sequences">List of seuqnces to assemble.</param> /// <returns>PadenaAssembly which contain the assembled result.</returns> static PadenaAssembly DoDenovoAssembly(List <ISequence> sequences) { // Create a denovo assembler ParallelDeNovoAssembler assembler = new ParallelDeNovoAssembler(); #region Setting assembler parameters // Length of kmer assembler.KmerLength = 6; // Threshold to be used for error correction step where dangling links are removed. // All dangling links that have lengths less than specified length will be removed. assembler.DanglingLinksThreshold = 3; // Threshold to be used for error correction step where redundant paths are removed. // Paths that have same start and end points (redundant paths) and whose lengths are less // than specified length will be removed. They will be replaced by a single 'best' path assembler.RedundantPathLengthThreshold = 7; // Enter the name of the library along with mean distance and standard deviation CloneLibrary.Instance.AddLibrary("abc", (float)4, (float)5); #endregion // Assemble return((PadenaAssembly)assembler.Assemble(sequences)); }
/// <summary> /// It assembles the sequences. /// </summary> public virtual void AssembleSequences() { TimeSpan algorithmSpan = new TimeSpan(); Stopwatch runAlgorithm = new Stopwatch(); FileInfo refFileinfo = new FileInfo(this.Filename); long refFileLength = refFileinfo.Length; runAlgorithm.Restart(); IEnumerable <ISequence> reads = this.ParseFile(); runAlgorithm.Stop(); algorithmSpan = algorithmSpan.Add(runAlgorithm.Elapsed); if (this.Verbose) { Console.WriteLine(); Console.WriteLine(" Processed read file: {0}", Path.GetFullPath(this.Filename)); Console.WriteLine(" Read/Processing time: {0}", runAlgorithm.Elapsed); Console.WriteLine(" File Size : {0}", refFileLength); Console.WriteLine(" k-mer Length : {0}", this.KmerLength); } using (ParallelDeNovoAssembler assembler = new ParallelDeNovoAssembler()) { assembler.AllowErosion = this.AllowErosion; assembler.AllowKmerLengthEstimation = this.AllowKmerLengthEstimation; assembler.AllowLowCoverageContigRemoval = this.LowCoverageContigRemovalEnabled; assembler.ContigCoverageThreshold = this.ContigCoverageThreshold; assembler.DanglingLinksThreshold = this.DangleThreshold; assembler.ErosionThreshold = this.ErosionThreshold; if (!this.AllowKmerLengthEstimation) { assembler.KmerLength = this.KmerLength; } assembler.RedundantPathLengthThreshold = this.RedundantPathLengthThreshold; runAlgorithm.Restart(); IDeNovoAssembly assembly = assembler.Assemble(reads); runAlgorithm.Stop(); algorithmSpan = algorithmSpan.Add(runAlgorithm.Elapsed); if (this.Verbose) { Console.WriteLine(); Console.WriteLine(" Compute time: {0}", runAlgorithm.Elapsed); } runAlgorithm.Restart(); this.WriteContigs(assembly); runAlgorithm.Stop(); algorithmSpan = algorithmSpan.Add(runAlgorithm.Elapsed); if (this.Verbose) { Console.WriteLine(); Console.WriteLine(" Write contigs time: {0}", runAlgorithm.Elapsed); Console.WriteLine(" Total runtime: {0}", algorithmSpan); } } }
/// <summary> /// Runs PaDeNA and get the performance numbers /// </summary> /// <param name="QueryFilePath">Query file path</param> /// <returns>Scaffold count</returns> internal int RunPerf(string QueryFilePath) { ParallelDeNovoAssembler parallel = new ParallelDeNovoAssembler(); kLength = 20; rThreshold = 2 * (kLength + 1); dThreshold = kLength; parallel.KmerLength = kLength; parallel.DanglingLinksThreshold = dThreshold; parallel.RedundantPathLengthThreshold = rThreshold; List <ISequence> sequences = new List <ISequence>(); using (StreamReader read = new StreamReader(QueryFilePath)) { string Id = read.ReadLine(); string seq = read.ReadLine(); while (!string.IsNullOrEmpty(seq)) { Sequence sequence = new Sequence(Alphabets.DNA, seq); sequence.DisplayID = Id; sequences.Add(sequence); Id = read.ReadLine(); seq = read.ReadLine(); } } CloneLibrary.Instance.AddLibrary("abc", (float)1000, (float)500); long memoryStart = GC.GetTotalMemory(false); PerfTests._watchObj.Reset(); PerfTests._watchObj.Start(); IDeNovoAssembly assembly = parallel.Assemble(sequences, true); long memoryEnd = GC.GetTotalMemory(false); PerfTests._watchObj.Stop(); MemoryUsed = (memoryEnd - memoryStart).ToString(); return(assembly.AssembledSequences.Count); }
public void PalindromicAssembleTest() { const int KmerLength = 19; string testSeq = @"TTTTTTCAATTGAAAAAAATCTGTATT"; string testSeq2 = "T" + testSeq; var testSequence = new Sequence(DnaAlphabet.Instance, testSeq); var testSequence2 = new Sequence(DnaAlphabet.Instance, testSeq2); List <ISequence> seqs = new List <ISequence>(); //two test sequences that are different but assemble to the same sequence //only one of these can be done correctly in current algorithmic setup //using simple paths, that must be the first one. foreach (var curTestSeq in new[] { testSequence, testSequence2 }) { seqs.Clear(); seqs.Add(curTestSeq); using (ParallelDeNovoAssembler assembler = new ParallelDeNovoAssembler()) { assembler.KmerLength = KmerLength; assembler.AllowErosion = false; assembler.AllowLowCoverageContigRemoval = false; assembler.ContigCoverageThreshold = 0; assembler.DanglingLinksThreshold = 0; IDeNovoAssembly result = assembler.Assemble(seqs); // Compare the two graphs, ensure that an additional base is not added (which might be inco Assert.AreEqual(1, result.AssembledSequences.Count); bool correctContig = result.AssembledSequences[0].SequenceEqual(testSequence); if (!correctContig) { correctContig = result.AssembledSequences[0].GetReverseComplementedSequence().SequenceEqual(testSequence); } Assert.IsTrue(correctContig); } } }
public void AssemblerTestWithScaffoldBuilder() { const int kmerLength = 6; const int dangleThreshold = 3; const int redundantThreshold = 7; using (ParallelDeNovoAssembler assembler = new ParallelDeNovoAssembler()) { assembler.KmerLength = kmerLength; assembler.DanglingLinksThreshold = dangleThreshold; assembler.RedundantPathLengthThreshold = redundantThreshold; assembler.ScaffoldRedundancy = 0; assembler.Depth = 3; CloneLibrary.Instance.AddLibrary("abc", (float)5, (float)20); PaDeNAAssembly result = (PaDeNAAssembly)assembler.Assemble(TestInputs.GetReadsForScaffolds(), true); Assert.AreEqual(10, result.ContigSequences.Count); HashSet <string> expectedContigs = new HashSet <string> { "GCGCGC", "TTTTTT", "TTTTTA", "TTTTAA", "TTTAAA", "ATGCCTCCTATCTTAGC", "TTTTAGC", "TTAGCGCG", "CGCGCCGCGC", "CGCGCG" }; foreach (ISequence contig in result.ContigSequences) { string contigSeq = contig.ToString(); Assert.IsTrue( expectedContigs.Contains(contigSeq) || expectedContigs.Contains(contigSeq.GetReverseComplement(new char[contigSeq.Length]))); } Assert.AreEqual(8, result.Scaffolds.Count); HashSet <string> expectedScaffolds = new HashSet <string> { "ATGCCTCCTATCTTAGCGCGC", "TTTTTT", "TTTTTA", "TTTTAA", "TTTAAA", "CGCGCCGCGC", "TTTTAGC", "CGCGCG" }; foreach (ISequence scaffold in result.Scaffolds) { string scaffoldSeq = scaffold.ToString(); Assert.IsTrue( expectedScaffolds.Contains(scaffoldSeq) || expectedScaffolds.Contains(scaffoldSeq.GetReverseComplement(new char[scaffoldSeq.Length]))); } } }
/// <summary> /// It assembles the sequences. /// </summary> public override void AssembleSequences() { TimeSpan algorithmSpan = new TimeSpan(); Stopwatch runAlgorithm = new Stopwatch(); FileInfo refFileinfo = new FileInfo(this.Filename); long refFileLength = refFileinfo.Length; Output.WriteLine(OutputLevel.Information, Resources.AssemblyScaffoldStarting); if (!string.IsNullOrEmpty(this.CloneLibraryName)) { CloneLibrary.Instance.AddLibrary(this.CloneLibraryName, (float)this.MeanLengthOfInsert, (float)this.StandardDeviationOfInsert); } runAlgorithm.Restart(); IEnumerable <ISequence> reads = ParseFile(); runAlgorithm.Stop(); algorithmSpan = algorithmSpan.Add(runAlgorithm.Elapsed); if (this.Verbose) { Output.WriteLine(OutputLevel.Verbose); Output.WriteLine(OutputLevel.Verbose, "Processed read file: {0}", Path.GetFullPath(this.Filename)); Output.WriteLine(OutputLevel.Verbose, " Read/Processing time: {0}", runAlgorithm.Elapsed); Output.WriteLine(OutputLevel.Verbose, " File Size : {0}", refFileLength); Output.WriteLine(OutputLevel.Verbose, " k-mer Length : {0}", this.KmerLength); } runAlgorithm.Restart(); if (reads.Any(s => s.Alphabet.HasAmbiguity)) { throw new ArgumentException(Resources.AmbiguousReadsNotSupported); } runAlgorithm.Stop(); if (this.Verbose) { Output.WriteLine(OutputLevel.Verbose); Output.WriteLine(OutputLevel.Verbose, "Time taken for Validating reads: {0}", runAlgorithm.Elapsed); } ParallelDeNovoAssembler assembler = new ParallelDeNovoAssembler(); assembler.StatusChanged += this.AssemblerStatusChanged; assembler.AllowErosion = AllowErosion; assembler.AllowKmerLengthEstimation = AllowKmerLengthEstimation; if (ContigCoverageThreshold != -1) { assembler.AllowLowCoverageContigRemoval = true; assembler.ContigCoverageThreshold = ContigCoverageThreshold; } assembler.DanglingLinksThreshold = DangleThreshold; assembler.ErosionThreshold = ErosionThreshold; if (!this.AllowKmerLengthEstimation) { assembler.KmerLength = this.KmerLength; } assembler.RedundantPathLengthThreshold = RedundantPathLengthThreshold; runAlgorithm.Restart(); IDeNovoAssembly assembly = assembler.Assemble(reads, true); runAlgorithm.Stop(); algorithmSpan = algorithmSpan.Add(runAlgorithm.Elapsed); if (this.Verbose) { Output.WriteLine(OutputLevel.Verbose); Output.WriteLine(OutputLevel.Verbose, "Compute time: {0}", runAlgorithm.Elapsed); } runAlgorithm.Restart(); WriteContigs(assembly); runAlgorithm.Stop(); algorithmSpan = algorithmSpan.Add(runAlgorithm.Elapsed); if (this.Verbose) { Output.WriteLine(OutputLevel.Verbose); Output.WriteLine(OutputLevel.Verbose, "Write contigs time: {0}", runAlgorithm.Elapsed); Output.WriteLine(OutputLevel.Verbose, "Total runtime: {0}", algorithmSpan); } }
//TODO: This is really a PADENA Test, needs to be renamed. public void ValidateGetReverseComplement() { const int kmerLength = 6; const int dangleThreshold = 3; const int redundantThreshold = 7; using (ParallelDeNovoAssembler assembler = new ParallelDeNovoAssembler()) { assembler.KmerLength = kmerLength; assembler.DanglingLinksThreshold = dangleThreshold; assembler.RedundantPathLengthThreshold = redundantThreshold; assembler.ScaffoldRedundancy = 0; assembler.Depth = 3; CloneLibrary.Instance.AddLibrary("abc", 5, 20); PadenaAssembly result = (PadenaAssembly)assembler.Assemble(GetReadsForScaffolds(), true); var expectedContigs = new List <string> { "TTTTTT", "TTAGCGCG", "CGCGCCGCGC", "CGCGCG", "GCGCGC", "TTTTTA", "TTTTAGC", "TTTTAA", "TTTAAA", "ATGCCTCCTATCTTAGC", }; Assert.AreEqual(10, result.ContigSequences.Count()); foreach (ISequence contig in result.ContigSequences) { string contigSeq = contig.ConvertToString(); Assert.IsTrue( expectedContigs.Contains(contigSeq) || expectedContigs.Contains(contigSeq.GetReverseComplement(new char[contigSeq.Length])), "Found unknown contig " + contigSeq); } Assert.AreEqual(8, result.Scaffolds.Count()); var expectedScaffolds = new List <string> { "ATGCCTCCTATCTTAGCGCGC", "CGCGCG", "CGCGCCGCGC", "TTTTTA", "TTTTTT", "TTTTAGC", "TTTTAA", "TTTAAA", }; foreach (ISequence scaffold in result.Scaffolds) { string scaffoldSeq = scaffold.ConvertToString(); Assert.IsTrue( expectedScaffolds.Contains(scaffoldSeq) || expectedScaffolds.Contains(scaffoldSeq.GetReverseComplement(new char[scaffoldSeq.Length])), "Found unknown scaffold " + scaffoldSeq); } } }
/// <summary> /// It assembles the sequences. /// </summary> public override void AssembleSequences() { TimeSpan algorithmSpan = new TimeSpan(); Stopwatch runAlgorithm = new Stopwatch(); FileInfo refFileinfo = new FileInfo(this.Filename); long refFileLength = refFileinfo.Length; if (!string.IsNullOrEmpty(this.CloneLibraryName)) { CloneLibrary.Instance.AddLibrary(this.CloneLibraryName, (float)this.MeanLengthOfInsert, (float)this.StandardDeviationOfInsert); } runAlgorithm.Restart(); IEnumerable <ISequence> reads = ParseFile(); runAlgorithm.Stop(); algorithmSpan = algorithmSpan.Add(runAlgorithm.Elapsed); if (this.Verbose) { Console.WriteLine(); Console.WriteLine(" Processed read file: {0}", Path.GetFullPath(this.Filename)); Console.WriteLine(" Read/Processing time: {0}", runAlgorithm.Elapsed); Console.WriteLine(" File Size : {0}", refFileLength); } ParallelDeNovoAssembler assembler = new ParallelDeNovoAssembler(); assembler.AllowErosion = AllowErosion; assembler.AllowKmerLengthEstimation = AllowKmerLengthEstimation; assembler.AllowLowCoverageContigRemoval = LowCoverageContigRemovalEnabled; assembler.ContigCoverageThreshold = ContigCoverageThreshold; assembler.DanglingLinksThreshold = DangleThreshold; assembler.ErosionThreshold = ErosionThreshold; if (!this.AllowKmerLengthEstimation) { assembler.KmerLength = this.KmerLength; } assembler.RedundantPathLengthThreshold = RedundantPathLengthThreshold; runAlgorithm.Restart(); IDeNovoAssembly assembly = assembler.Assemble(reads, true); runAlgorithm.Stop(); algorithmSpan = algorithmSpan.Add(runAlgorithm.Elapsed); if (this.Verbose) { Console.WriteLine(); Console.WriteLine(" Compute time: {0}", runAlgorithm.Elapsed); } runAlgorithm.Restart(); WriteContigs(assembly); runAlgorithm.Stop(); algorithmSpan = algorithmSpan.Add(runAlgorithm.Elapsed); if (this.Verbose) { Console.WriteLine(); Console.WriteLine(" Write time: {0}", runAlgorithm.Elapsed); Console.WriteLine(" Total runtime: {0}", algorithmSpan); } }
/// <summary> /// It assembles the sequences. /// </summary> public virtual void AssembleSequences() { TimeSpan algorithmSpan = new TimeSpan(); Stopwatch runAlgorithm = new Stopwatch(); FileInfo refFileinfo = new FileInfo(this.Filename); long refFileLength = refFileinfo.Length; runAlgorithm.Restart(); IEnumerable <ISequence> reads = this.ParseFile(); runAlgorithm.Stop(); algorithmSpan = algorithmSpan.Add(runAlgorithm.Elapsed); Output.WriteLine(OutputLevel.Information, Resources.AssemblyStarting); if (this.Verbose) { Output.WriteLine(OutputLevel.Verbose); Output.WriteLine(OutputLevel.Verbose, "Processed read file: {0}", Path.GetFullPath(this.Filename)); Output.WriteLine(OutputLevel.Verbose, " Read/Processing time: {0}", runAlgorithm.Elapsed); Output.WriteLine(OutputLevel.Verbose, " File Size : {0}", refFileLength); Output.WriteLine(OutputLevel.Verbose, " k-mer Length : {0}", this.KmerLength); } runAlgorithm.Restart(); ValidateAmbiguousReads(reads); runAlgorithm.Stop(); if (this.Verbose) { Output.WriteLine(OutputLevel.Verbose); Output.WriteLine(OutputLevel.Verbose, "Time taken for Validating reads: {0}", runAlgorithm.Elapsed); } using (ParallelDeNovoAssembler assembler = new ParallelDeNovoAssembler()) { assembler.StatusChanged += this.AssemblerStatusChanged; assembler.AllowErosion = this.AllowErosion; assembler.AllowKmerLengthEstimation = this.AllowKmerLengthEstimation; if (ContigCoverageThreshold != -1) { assembler.AllowLowCoverageContigRemoval = true; assembler.ContigCoverageThreshold = ContigCoverageThreshold; } assembler.DanglingLinksThreshold = this.DangleThreshold; assembler.ErosionThreshold = this.ErosionThreshold; if (!this.AllowKmerLengthEstimation) { assembler.KmerLength = this.KmerLength; } assembler.RedundantPathLengthThreshold = this.RedundantPathLengthThreshold; runAlgorithm.Restart(); IDeNovoAssembly assembly = assembler.Assemble(reads); runAlgorithm.Stop(); algorithmSpan = algorithmSpan.Add(runAlgorithm.Elapsed); if (this.Verbose) { Output.WriteLine(OutputLevel.Verbose); Output.WriteLine(OutputLevel.Verbose, "Compute time: {0}", runAlgorithm.Elapsed); } runAlgorithm.Restart(); this.WriteContigs(assembly); runAlgorithm.Stop(); algorithmSpan = algorithmSpan.Add(runAlgorithm.Elapsed); if (this.Verbose) { Output.WriteLine(OutputLevel.Verbose); Output.WriteLine(OutputLevel.Verbose, "Write contigs time: {0}", runAlgorithm.Elapsed); Output.WriteLine(OutputLevel.Verbose, "Total runtime: {0}", algorithmSpan); } } }