public void AssemblerTest() { const int KmerLength = 11; const int DangleThreshold = 3; const int RedundantThreshold = 10; List <ISequence> readSeqs = TestInputs.GetDanglingReads(); using (ParallelDeNovoAssembler assembler = new ParallelDeNovoAssembler()) { assembler.KmerLength = KmerLength; assembler.DanglingLinksThreshold = DangleThreshold; assembler.RedundantPathLengthThreshold = RedundantThreshold; IDeNovoAssembly result = assembler.Assemble(readSeqs); // Compare the two graphs Assert.AreEqual(1, result.AssembledSequences.Count()); HashSet <string> expectedContigs = new HashSet <string>() { "ATCGCTAGCATCGAACGATCATT" }; foreach (ISequence contig in result.AssembledSequences) { Assert.IsTrue(expectedContigs.Contains(new string(contig.Select(a => (char)a).ToArray()))); } } }
/// <summary> /// It assembles the sequences. /// </summary> public virtual void AssembleSequences() { TimeSpan algorithmSpan = new TimeSpan(); Stopwatch runAlgorithm = new Stopwatch(); FileInfo refFileinfo = new FileInfo(this.Filename); long refFileLength = refFileinfo.Length; runAlgorithm.Restart(); IEnumerable <ISequence> reads = this.ParseFile(); runAlgorithm.Stop(); algorithmSpan = algorithmSpan.Add(runAlgorithm.Elapsed); if (this.Verbose) { Console.WriteLine(); Console.WriteLine(" Processed read file: {0}", Path.GetFullPath(this.Filename)); Console.WriteLine(" Read/Processing time: {0}", runAlgorithm.Elapsed); Console.WriteLine(" File Size : {0}", refFileLength); Console.WriteLine(" k-mer Length : {0}", this.KmerLength); } using (ParallelDeNovoAssembler assembler = new ParallelDeNovoAssembler()) { assembler.AllowErosion = this.AllowErosion; assembler.AllowKmerLengthEstimation = this.AllowKmerLengthEstimation; assembler.AllowLowCoverageContigRemoval = this.LowCoverageContigRemovalEnabled; assembler.ContigCoverageThreshold = this.ContigCoverageThreshold; assembler.DanglingLinksThreshold = this.DangleThreshold; assembler.ErosionThreshold = this.ErosionThreshold; if (!this.AllowKmerLengthEstimation) { assembler.KmerLength = this.KmerLength; } assembler.RedundantPathLengthThreshold = this.RedundantPathLengthThreshold; runAlgorithm.Restart(); IDeNovoAssembly assembly = assembler.Assemble(reads); runAlgorithm.Stop(); algorithmSpan = algorithmSpan.Add(runAlgorithm.Elapsed); if (this.Verbose) { Console.WriteLine(); Console.WriteLine(" Compute time: {0}", runAlgorithm.Elapsed); } runAlgorithm.Restart(); this.WriteContigs(assembly); runAlgorithm.Stop(); algorithmSpan = algorithmSpan.Add(runAlgorithm.Elapsed); if (this.Verbose) { Console.WriteLine(); Console.WriteLine(" Write contigs time: {0}", runAlgorithm.Elapsed); Console.WriteLine(" Total runtime: {0}", algorithmSpan); } } }
protected override ActivityExecutionStatus Execute(ActivityExecutionContext executionContext) { OverlapDeNovoAssembler SSA = new OverlapDeNovoAssembler(); Consensus = SSA.Assemble(Sequences); Contigs = ((IOverlapDeNovoAssembly)Consensus).Contigs; return(ActivityExecutionStatus.Closed); }
public static async void AssemblySequences(string fastqFileName) { var parser = new FastQParser(); List <IQualitativeSequence> sequences = new List <IQualitativeSequence>(); using (var fileStream = new FileStream(fastqFileName, FileMode.Open)) { sequences = parser.Parse(fileStream).ToList(); } OverlapDeNovoAssembler assembler = new OverlapDeNovoAssembler(); IDeNovoAssembly assembly = assembler.Assemble(sequences); FastAFormatter outputFormatter = new FastAFormatter(); outputFormatter.Open("assembled_sequences.fasta"); outputFormatter.Format(assembly.AssembledSequences); outputFormatter.Close(); }
/// <summary> /// Runs PaDeNA and get the performance numbers /// </summary> /// <param name="QueryFilePath">Query file path</param> /// <returns>Scaffold count</returns> internal int RunPerf(string QueryFilePath) { ParallelDeNovoAssembler parallel = new ParallelDeNovoAssembler(); kLength = 20; rThreshold = 2 * (kLength + 1); dThreshold = kLength; parallel.KmerLength = kLength; parallel.DanglingLinksThreshold = dThreshold; parallel.RedundantPathLengthThreshold = rThreshold; List <ISequence> sequences = new List <ISequence>(); using (StreamReader read = new StreamReader(QueryFilePath)) { string Id = read.ReadLine(); string seq = read.ReadLine(); while (!string.IsNullOrEmpty(seq)) { Sequence sequence = new Sequence(Alphabets.DNA, seq); sequence.DisplayID = Id; sequences.Add(sequence); Id = read.ReadLine(); seq = read.ReadLine(); } } CloneLibrary.Instance.AddLibrary("abc", (float)1000, (float)500); long memoryStart = GC.GetTotalMemory(false); PerfTests._watchObj.Reset(); PerfTests._watchObj.Start(); IDeNovoAssembly assembly = parallel.Assemble(sequences, true); long memoryEnd = GC.GetTotalMemory(false); PerfTests._watchObj.Stop(); MemoryUsed = (memoryEnd - memoryStart).ToString(); return(assembly.AssembledSequences.Count); }
/// <summary> /// It Writes the contigs to the file. /// </summary> /// <param name="assembly">IDeNovoAssembly parameter is the result of running De Novo Assembly on a set of two or more sequences. </param> protected void WriteContigs(IDeNovoAssembly assembly) { if (assembly.AssembledSequences.Count == 0) { Output.WriteLine(OutputLevel.Results, "No sequences assembled."); return; } EnsureContigNames(assembly.AssembledSequences); if (!string.IsNullOrEmpty(this.OutputFile)) { using (FastAFormatter formatter = new FastAFormatter(this.OutputFile)) { formatter.AutoFlush = true; foreach (ISequence seq in assembly.AssembledSequences) { formatter.Write(seq); } } Output.WriteLine(OutputLevel.Information, "Wrote {0} sequences to {1}", assembly.AssembledSequences.Count, this.OutputFile); } else { Output.WriteLine(OutputLevel.Information, "Assembled Sequence Results: {0} sequences", assembly.AssembledSequences.Count); using (FastAFormatter formatter = new FastAFormatter()) { formatter.Open(new StreamWriter(Console.OpenStandardOutput())); formatter.MaxSymbolsAllowedPerLine = Math.Min(80, Console.WindowWidth - 2); formatter.AutoFlush = true; foreach (ISequence seq in assembly.AssembledSequences) { formatter.Write(seq); } } } }
/// <summary> /// It Writes the contigs to the file. /// </summary> /// <param name="assembly">IDeNovoAssembly parameter is the result of running De Novo Assembly on a set of two or more sequences. </param> protected void WriteContigs(IDeNovoAssembly assembly) { if (!string.IsNullOrEmpty(this.OutputFile)) { using (FastAFormatter formatter = new FastAFormatter(this.OutputFile)) { formatter.AutoFlush = true; foreach (ISequence seq in assembly.AssembledSequences) { formatter.Write(seq); } } } else { foreach (ISequence seq in assembly.AssembledSequences) { Console.WriteLine(seq.ID); Console.WriteLine(new string(seq.Select(a => (char)a).ToArray())); } } }
public void PalindromicAssembleTest() { const int KmerLength = 19; string testSeq = @"TTTTTTCAATTGAAAAAAATCTGTATT"; string testSeq2 = "T" + testSeq; var testSequence = new Sequence(DnaAlphabet.Instance, testSeq); var testSequence2 = new Sequence(DnaAlphabet.Instance, testSeq2); List <ISequence> seqs = new List <ISequence>(); //two test sequences that are different but assemble to the same sequence //only one of these can be done correctly in current algorithmic setup //using simple paths, that must be the first one. foreach (var curTestSeq in new[] { testSequence, testSequence2 }) { seqs.Clear(); seqs.Add(curTestSeq); using (ParallelDeNovoAssembler assembler = new ParallelDeNovoAssembler()) { assembler.KmerLength = KmerLength; assembler.AllowErosion = false; assembler.AllowLowCoverageContigRemoval = false; assembler.ContigCoverageThreshold = 0; assembler.DanglingLinksThreshold = 0; IDeNovoAssembly result = assembler.Assemble(seqs); // Compare the two graphs, ensure that an additional base is not added (which might be inco Assert.AreEqual(1, result.AssembledSequences.Count); bool correctContig = result.AssembledSequences[0].SequenceEqual(testSequence); if (!correctContig) { correctContig = result.AssembledSequences[0].GetReverseComplementedSequence().SequenceEqual(testSequence); } Assert.IsTrue(correctContig); } } }
/// <summary> /// This method displays the output of a assembly process. /// </summary> /// <param name="assemblerResult">Result of the assembly process.</param> private void BuildConsensusView(IDeNovoAssembly assemblerResult) { var overlapAssemblerResult = assemblerResult as IOverlapDeNovoAssembly; if (overlapAssemblerResult != null) { this.ScreenUpdate(false); Workbook activeWorkBook = Globals.ThisAddIn.Application.ActiveWorkbook; var activesheet = (Worksheet)Globals.ThisAddIn.Application.ActiveSheet; var currentsheet = (Worksheet)activeWorkBook.Worksheets.Add(Type.Missing, activesheet, Type.Missing, Type.Missing); string[,] rangeData; int rowNumber = 1; int contigNumber = 1; int rowCount, rowIndex, columnIndex; ((_Worksheet)currentsheet).Activate(); Globals.ThisAddIn.Application.ActiveWindow.Zoom = ZoomLevel; currentsheet.Name = this.GetValidFileNames( "ConsensusView" + this.currentConsensusSheetNumber.ToString(CultureInfo.CurrentCulture)); this.currentConsensusSheetNumber++; foreach (Contig contig in overlapAssemblerResult.Contigs) { // Write Header Range header = currentsheet.get_Range( "A" + rowNumber.ToString(CultureInfo.CurrentCulture), Type.Missing); WriteRangeValue(header, "Contig" + contigNumber.ToString(CultureInfo.CurrentCulture)); ISequence contigSequence = contig.Consensus; Range currentRange = currentsheet.get_Range( "B" + rowNumber.ToString(CultureInfo.CurrentCulture), Type.Missing); long numberofCharacters = 1; if (contigSequence.Count > MaxExcelColumns) { if (contigSequence.Count % MaxExcelColumns == 0) { numberofCharacters = contigSequence.Count / MaxExcelColumns; } else { numberofCharacters = contigSequence.Count / MaxExcelColumns; numberofCharacters++; } } int columnCount = 1; rowCount = (int)Math.Ceiling(contigSequence.Count / (decimal)MaxExcelColumns); rowIndex = 0; columnIndex = 0; rangeData = new string[rowCount, contigSequence.Count > MaxExcelColumns ? MaxExcelColumns : contigSequence.Count]; for (long i = 0; i < contigSequence.Count; i += numberofCharacters) { if (MaxExcelColumns == columnIndex) { columnIndex = 0; rowIndex++; } ISequence tempSeq = contigSequence.GetSubSequence(i, numberofCharacters); string subsequence = tempSeq.ConvertToString(); rangeData[rowIndex, columnIndex] = subsequence; columnIndex++; columnCount++; } var formulaBuilder = new StringBuilder(); string formula = string.Empty; string name = string.Empty; if (columnCount > 1) { currentRange = currentRange.get_Resize(1, columnCount - 1); currentRange.set_Value(Missing.Value, rangeData); this.FillBackGroundColor(currentRange); formulaBuilder.Append("="); formulaBuilder.Append(currentsheet.Name); formulaBuilder.Append("!"); formulaBuilder.Append("$B$" + rowNumber); formulaBuilder.Append(":$"); formulaBuilder.Append( GetColumnString(columnCount) + "$" + rowNumber.ToString(CultureInfo.CurrentCulture)); formula = formulaBuilder.ToString(); name = Resources.CONTIG + contigNumber.ToString(CultureInfo.CurrentCulture); currentsheet.Names.Add( this.GetValidFileNames(name), formula, true, Type.Missing, Type.Missing, Type.Missing, Type.Missing, Type.Missing, Type.Missing, Type.Missing, Type.Missing); } rowNumber++; int sequenceNumber = 1; foreach (Contig.AssembledSequence assembled in contig.Sequences) { int initialRowNumber = rowNumber; columnCount = 1; ISequence assembledSequence = assembled.Sequence; // Write Header Range sequenceHeader = currentsheet.get_Range("A" + rowNumber.ToString(CultureInfo.CurrentCulture), Type.Missing); WriteRangeValue(sequenceHeader, assembledSequence.ID); if (assembled.IsComplemented && assembled.IsReversed) { assembledSequence = assembled.Sequence.GetReverseComplementedSequence(); sequenceHeader.Cells.AddComment(Resources.SEQUENCE_REVERSECOMPLEMENT); } else if (assembled.IsReversed) { assembledSequence = assembled.Sequence.GetReversedSequence(); sequenceHeader.Cells.AddComment(Resources.SEQUENCE_REVERSE); } else if (assembled.IsComplemented) { assembledSequence = assembled.Sequence.GetComplementedSequence(); sequenceHeader.Cells.AddComment(Resources.SEQUENCE_COMPLEMENT); } long startingColumn = assembled.Position / numberofCharacters; startingColumn++; currentRange = currentsheet.get_Range( GetColumnString(startingColumn + 1) + rowNumber.ToString(CultureInfo.CurrentCulture), Type.Missing); long startingIndex = 0; if (numberofCharacters > 1) { long cellStartIndex = (startingColumn - 1) * numberofCharacters; long endingIndex = cellStartIndex + numberofCharacters - 1; long startextractCharacters = endingIndex - assembled.Position + 1; long numberOfSpaces = Math.Abs(assembled.Position - cellStartIndex); string firstcell = assembledSequence.GetSubSequence(0, startextractCharacters).ConvertToString(); var sb = new StringBuilder(); for (int i = 1; i <= numberOfSpaces; i++) { sb.Append(" "); } sb.Append(firstcell); WriteRangeValue(currentRange, sb.ToString()); startingIndex = startextractCharacters; currentRange = currentRange.Next; } rowCount = (int)Math.Ceiling(assembledSequence.Count / (decimal)MaxExcelColumns); rowIndex = 0; columnIndex = 0; rangeData = new string[rowCount, assembledSequence.Count > MaxExcelColumns ? MaxExcelColumns : assembledSequence.Count]; for (long i = startingIndex; i < assembledSequence.Count; i += numberofCharacters) { if (MaxExcelColumns == columnIndex) { columnIndex = 0; rowIndex++; } ISequence tempSeq = assembledSequence.GetSubSequence(i, numberofCharacters); string derivedSequence = tempSeq.ConvertToString(); rangeData[rowIndex, columnIndex] = derivedSequence; columnIndex++; columnCount++; } if (columnCount > 1) { currentRange = currentRange.get_Resize(1, columnCount - 1); currentRange.set_Value(Missing.Value, rangeData); currentRange.Columns.AutoFit(); this.FillBackGroundColor(currentRange); formulaBuilder = new StringBuilder(); formulaBuilder.Append("="); formulaBuilder.Append(currentsheet.Name); formulaBuilder.Append("!$"); formulaBuilder.Append(GetColumnString(startingColumn + 1) + "$" + initialRowNumber); formulaBuilder.Append(":$"); formulaBuilder.Append( GetColumnString(startingColumn + columnCount - 1) + "$" + rowNumber.ToString(CultureInfo.CurrentCulture)); string sequenceFormula = formulaBuilder.ToString(); name = Resources.CONTIG + contigNumber + "_" + assembledSequence.ID + sequenceNumber.ToString(CultureInfo.CurrentCulture); currentsheet.Names.Add( this.GetValidFileNames(name), sequenceFormula, true, Type.Missing, Type.Missing, Type.Missing, Type.Missing, Type.Missing, Type.Missing, Type.Missing, Type.Missing); } rowNumber++; sequenceNumber++; } contigNumber++; rowNumber++; } int unmerged = 1; foreach (ISequence sequence in overlapAssemblerResult.UnmergedSequences) { // Write Header Range sequenceHeader = currentsheet.get_Range( "A" + rowNumber.ToString(CultureInfo.CurrentCulture), Type.Missing); WriteRangeValue(sequenceHeader, "Unmerged Sequence_" + sequence.ID); long numberofCharacters = 1; if (sequence.Count > MaxExcelColumns) { if (sequence.Count % MaxExcelColumns == 0) { numberofCharacters = sequence.Count / MaxExcelColumns; } else { numberofCharacters = sequence.Count / MaxExcelColumns; numberofCharacters++; } } Range currentRange = currentsheet.get_Range( "B" + rowNumber.ToString(CultureInfo.CurrentCulture), Type.Missing); int columnCount = 1; rowCount = (int)Math.Ceiling(sequence.Count / (decimal)MaxExcelColumns); rowIndex = 0; columnIndex = 0; rangeData = new string[rowCount, sequence.Count > MaxExcelColumns ? MaxExcelColumns : sequence.Count]; for (long i = 0; i < sequence.Count; i += numberofCharacters) { if (MaxExcelColumns == columnIndex) { columnIndex = 0; rowIndex++; } ISequence tempSeq = sequence.GetSubSequence(i, numberofCharacters); string subsequence = tempSeq.ConvertToString(); rangeData[rowIndex, columnIndex] = subsequence; columnIndex++; columnCount++; } if (columnCount > 1) { currentRange = currentRange.get_Resize(1, columnCount - 1); currentRange.set_Value(Missing.Value, rangeData); this.FillBackGroundColor(currentRange); var formulaBuilder = new StringBuilder(); formulaBuilder.Append("="); formulaBuilder.Append(currentsheet.Name); formulaBuilder.Append("!"); formulaBuilder.Append("$B$" + rowNumber); formulaBuilder.Append(":$"); formulaBuilder.Append( GetColumnString(columnCount) + "$" + rowNumber.ToString(CultureInfo.CurrentCulture)); string formula = formulaBuilder.ToString(); string name = Resources.UNMERGED_SEQUENCE + unmerged.ToString(CultureInfo.CurrentCulture); currentsheet.Names.Add( this.GetValidFileNames(name), formula, true, Type.Missing, Type.Missing, Type.Missing, Type.Missing, Type.Missing, Type.Missing, Type.Missing, Type.Missing); } rowNumber++; unmerged++; } currentsheet.Columns.AutoFit(); this.EnableAllControls(); this.ScreenUpdate(true); } }
/// <summary> /// It assembles the sequences. /// </summary> public override void AssembleSequences() { TimeSpan algorithmSpan = new TimeSpan(); Stopwatch runAlgorithm = new Stopwatch(); FileInfo refFileinfo = new FileInfo(this.Filename); long refFileLength = refFileinfo.Length; Output.WriteLine(OutputLevel.Information, Resources.AssemblyScaffoldStarting); if (!string.IsNullOrEmpty(this.CloneLibraryName)) { CloneLibrary.Instance.AddLibrary(this.CloneLibraryName, (float)this.MeanLengthOfInsert, (float)this.StandardDeviationOfInsert); } runAlgorithm.Restart(); IEnumerable <ISequence> reads = ParseFile(); runAlgorithm.Stop(); algorithmSpan = algorithmSpan.Add(runAlgorithm.Elapsed); if (this.Verbose) { Output.WriteLine(OutputLevel.Verbose); Output.WriteLine(OutputLevel.Verbose, "Processed read file: {0}", Path.GetFullPath(this.Filename)); Output.WriteLine(OutputLevel.Verbose, " Read/Processing time: {0}", runAlgorithm.Elapsed); Output.WriteLine(OutputLevel.Verbose, " File Size : {0}", refFileLength); Output.WriteLine(OutputLevel.Verbose, " k-mer Length : {0}", this.KmerLength); } runAlgorithm.Restart(); if (reads.Any(s => s.Alphabet.HasAmbiguity)) { throw new ArgumentException(Resources.AmbiguousReadsNotSupported); } runAlgorithm.Stop(); if (this.Verbose) { Output.WriteLine(OutputLevel.Verbose); Output.WriteLine(OutputLevel.Verbose, "Time taken for Validating reads: {0}", runAlgorithm.Elapsed); } ParallelDeNovoAssembler assembler = new ParallelDeNovoAssembler(); assembler.StatusChanged += this.AssemblerStatusChanged; assembler.AllowErosion = AllowErosion; assembler.AllowKmerLengthEstimation = AllowKmerLengthEstimation; if (ContigCoverageThreshold != -1) { assembler.AllowLowCoverageContigRemoval = true; assembler.ContigCoverageThreshold = ContigCoverageThreshold; } assembler.DanglingLinksThreshold = DangleThreshold; assembler.ErosionThreshold = ErosionThreshold; if (!this.AllowKmerLengthEstimation) { assembler.KmerLength = this.KmerLength; } assembler.RedundantPathLengthThreshold = RedundantPathLengthThreshold; runAlgorithm.Restart(); IDeNovoAssembly assembly = assembler.Assemble(reads, true); runAlgorithm.Stop(); algorithmSpan = algorithmSpan.Add(runAlgorithm.Elapsed); if (this.Verbose) { Output.WriteLine(OutputLevel.Verbose); Output.WriteLine(OutputLevel.Verbose, "Compute time: {0}", runAlgorithm.Elapsed); } runAlgorithm.Restart(); WriteContigs(assembly); runAlgorithm.Stop(); algorithmSpan = algorithmSpan.Add(runAlgorithm.Elapsed); if (this.Verbose) { Output.WriteLine(OutputLevel.Verbose); Output.WriteLine(OutputLevel.Verbose, "Write contigs time: {0}", runAlgorithm.Elapsed); Output.WriteLine(OutputLevel.Verbose, "Total runtime: {0}", algorithmSpan); } }
/// <summary> /// Assembles the sequences and returns the string that can be placed in a CSV output report. /// </summary> /// <returns></returns> public string AssembleSequencesReturningString() { string toReturn = "No String Set"; TimeSpan algorithmSpan = new TimeSpan(); Stopwatch runAlgorithm = new Stopwatch(); FileInfo refFileinfo = new FileInfo(this.Filename); long refFileLength = refFileinfo.Length; runAlgorithm.Restart(); IEnumerable <ISequence> reads = this.ParseFile(); runAlgorithm.Stop(); algorithmSpan = algorithmSpan.Add(runAlgorithm.Elapsed); Output.WriteLine(OutputLevel.Information, StaticResources.AssemblyStarting); if (this.Verbose) { Output.WriteLine(OutputLevel.Verbose); Output.WriteLine(OutputLevel.Verbose, "Processed read file: {0}", Path.GetFullPath(this.Filename)); Output.WriteLine(OutputLevel.Verbose, " Read/Processing time: {0}", runAlgorithm.Elapsed); Output.WriteLine(OutputLevel.Verbose, " File Size : {0}", refFileLength); Output.WriteLine(OutputLevel.Verbose, " k-mer Length : {0}", this.KmerLength); } using (MitoPaintedAssembler assembler = new MitoPaintedAssembler()) { assembler.AllowErosion = true; //assembler.ReferenceGenomeFile = ReferenceGenome; assembler.DiagnosticFileOutputPrefix = DiagnosticFilePrefix; Console.WriteLine("Prefix is: " + assembler.DiagnosticFileOutputPrefix); Console.WriteLine("Diagnostic Information On: " + assembler.OutputDiagnosticInformation.ToString()); assembler.StatusChanged += this.AssemblerStatusChanged; assembler.AllowErosion = this.AllowErosion; assembler.AllowKmerLengthEstimation = this.AllowKmerLengthEstimation; if (ContigCoverageThreshold != -1) { assembler.AllowLowCoverageContigRemoval = true; assembler.ContigCoverageThreshold = ContigCoverageThreshold; } assembler.DanglingLinksThreshold = this.DangleThreshold; assembler.ErosionThreshold = this.ErosionThreshold; if (!this.AllowKmerLengthEstimation) { assembler.KmerLength = this.KmerLength; } assembler.RedundantPathLengthThreshold = this.RedundantPathLengthThreshold; runAlgorithm.Restart(); IDeNovoAssembly assembly = assembler.Assemble(reads); runAlgorithm.Stop(); algorithmSpan = algorithmSpan.Add(runAlgorithm.Elapsed); if (this.Verbose) { Output.WriteLine(OutputLevel.Verbose); Output.WriteLine(OutputLevel.Verbose, "Compute time: {0}", runAlgorithm.Elapsed); } runAlgorithm.Restart(); this.WriteContigs(assembly); runAlgorithm.Stop(); toReturn = assembler.GetReportLine(); if (assembler.OutputDiagnosticInformation) { var outFile = new StreamWriter(ReportOutputPrefix + DiagnosticFilePrefix + ".csv"); outFile.WriteLine(MitoDataAssembler.AssemblyOutput.CreateHeaderLine()); outFile.WriteLine(toReturn); outFile.Close(); } algorithmSpan = algorithmSpan.Add(runAlgorithm.Elapsed); if (this.Verbose) { Output.WriteLine(OutputLevel.Verbose); Output.WriteLine(OutputLevel.Verbose, "Write contigs time: {0}", runAlgorithm.Elapsed); Output.WriteLine(OutputLevel.Verbose, "Total runtime: {0}", algorithmSpan); } } return(toReturn); }
/// <summary> /// It assembles the sequences. /// </summary> public override void AssembleSequences() { TimeSpan algorithmSpan = new TimeSpan(); Stopwatch runAlgorithm = new Stopwatch(); FileInfo refFileinfo = new FileInfo(this.Filename); long refFileLength = refFileinfo.Length; if (!string.IsNullOrEmpty(this.CloneLibraryName)) { CloneLibrary.Instance.AddLibrary(this.CloneLibraryName, (float)this.MeanLengthOfInsert, (float)this.StandardDeviationOfInsert); } runAlgorithm.Restart(); IEnumerable <ISequence> reads = ParseFile(); runAlgorithm.Stop(); algorithmSpan = algorithmSpan.Add(runAlgorithm.Elapsed); if (this.Verbose) { Console.WriteLine(); Console.WriteLine(" Processed read file: {0}", Path.GetFullPath(this.Filename)); Console.WriteLine(" Read/Processing time: {0}", runAlgorithm.Elapsed); Console.WriteLine(" File Size : {0}", refFileLength); } ParallelDeNovoAssembler assembler = new ParallelDeNovoAssembler(); assembler.AllowErosion = AllowErosion; assembler.AllowKmerLengthEstimation = AllowKmerLengthEstimation; assembler.AllowLowCoverageContigRemoval = LowCoverageContigRemovalEnabled; assembler.ContigCoverageThreshold = ContigCoverageThreshold; assembler.DanglingLinksThreshold = DangleThreshold; assembler.ErosionThreshold = ErosionThreshold; if (!this.AllowKmerLengthEstimation) { assembler.KmerLength = this.KmerLength; } assembler.RedundantPathLengthThreshold = RedundantPathLengthThreshold; runAlgorithm.Restart(); IDeNovoAssembly assembly = assembler.Assemble(reads, true); runAlgorithm.Stop(); algorithmSpan = algorithmSpan.Add(runAlgorithm.Elapsed); if (this.Verbose) { Console.WriteLine(); Console.WriteLine(" Compute time: {0}", runAlgorithm.Elapsed); } runAlgorithm.Restart(); WriteContigs(assembly); runAlgorithm.Stop(); algorithmSpan = algorithmSpan.Add(runAlgorithm.Elapsed); if (this.Verbose) { Console.WriteLine(); Console.WriteLine(" Write time: {0}", runAlgorithm.Elapsed); Console.WriteLine(" Total runtime: {0}", algorithmSpan); } }
/// <summary> /// It assembles the sequences. /// </summary> public virtual void AssembleSequences() { TimeSpan algorithmSpan = new TimeSpan(); Stopwatch runAlgorithm = new Stopwatch(); FileInfo refFileinfo = new FileInfo(this.Filename); long refFileLength = refFileinfo.Length; runAlgorithm.Restart(); IEnumerable <ISequence> reads = this.ParseFile(); runAlgorithm.Stop(); algorithmSpan = algorithmSpan.Add(runAlgorithm.Elapsed); Output.WriteLine(OutputLevel.Information, Resources.AssemblyStarting); if (this.Verbose) { Output.WriteLine(OutputLevel.Verbose); Output.WriteLine(OutputLevel.Verbose, "Processed read file: {0}", Path.GetFullPath(this.Filename)); Output.WriteLine(OutputLevel.Verbose, " Read/Processing time: {0}", runAlgorithm.Elapsed); Output.WriteLine(OutputLevel.Verbose, " File Size : {0}", refFileLength); Output.WriteLine(OutputLevel.Verbose, " k-mer Length : {0}", this.KmerLength); } runAlgorithm.Restart(); ValidateAmbiguousReads(reads); runAlgorithm.Stop(); if (this.Verbose) { Output.WriteLine(OutputLevel.Verbose); Output.WriteLine(OutputLevel.Verbose, "Time taken for Validating reads: {0}", runAlgorithm.Elapsed); } using (ParallelDeNovoAssembler assembler = new ParallelDeNovoAssembler()) { assembler.StatusChanged += this.AssemblerStatusChanged; assembler.AllowErosion = this.AllowErosion; assembler.AllowKmerLengthEstimation = this.AllowKmerLengthEstimation; if (ContigCoverageThreshold != -1) { assembler.AllowLowCoverageContigRemoval = true; assembler.ContigCoverageThreshold = ContigCoverageThreshold; } assembler.DanglingLinksThreshold = this.DangleThreshold; assembler.ErosionThreshold = this.ErosionThreshold; if (!this.AllowKmerLengthEstimation) { assembler.KmerLength = this.KmerLength; } assembler.RedundantPathLengthThreshold = this.RedundantPathLengthThreshold; runAlgorithm.Restart(); IDeNovoAssembly assembly = assembler.Assemble(reads); runAlgorithm.Stop(); algorithmSpan = algorithmSpan.Add(runAlgorithm.Elapsed); if (this.Verbose) { Output.WriteLine(OutputLevel.Verbose); Output.WriteLine(OutputLevel.Verbose, "Compute time: {0}", runAlgorithm.Elapsed); } runAlgorithm.Restart(); this.WriteContigs(assembly); runAlgorithm.Stop(); algorithmSpan = algorithmSpan.Add(runAlgorithm.Elapsed); if (this.Verbose) { Output.WriteLine(OutputLevel.Verbose); Output.WriteLine(OutputLevel.Verbose, "Write contigs time: {0}", runAlgorithm.Elapsed); Output.WriteLine(OutputLevel.Verbose, "Total runtime: {0}", algorithmSpan); } } }
/// <summary> /// It Writes the contigs to the file. /// </summary> /// <param name="assembly">IDeNovoAssembly parameter is the result of running De Novo Assembly on a set of two or more sequences. </param> protected void WriteContigs(IDeNovoAssembly assembly) { if (assembly.AssembledSequences.Count == 0) { Output.WriteLine(OutputLevel.Results, "No sequences assembled."); return; } EnsureContigNames(assembly.AssembledSequences); if (!string.IsNullOrEmpty(this.OutputFile)) { FastAFormatter formatter = new FastAFormatter { AutoFlush = true }; using (formatter.Open(this.OutputFile)) { foreach (ISequence seq in assembly.AssembledSequences) { formatter.Format(seq); } } Output.WriteLine(OutputLevel.Information, "Wrote {0} sequences to {1}", assembly.AssembledSequences.Count, this.OutputFile); } else { Output.WriteLine(OutputLevel.Information, "Assembled Sequence Results: {0} sequences", assembly.AssembledSequences.Count); FastAFormatter formatter = new FastAFormatter { AutoFlush = true, MaxSymbolsAllowedPerLine = Math.Min(80, Console.WindowWidth - 2) }; foreach (ISequence seq in assembly.AssembledSequences) formatter.Format(Console.OpenStandardOutput(), seq); } }