public AssemblyReport(MitoPaintedAssembler toReportOn, AlgorithmResult result = AlgorithmResult.Success) : base(result) { HeaderLineForCSV = CreateHeaderLine(); this.DataLineForCSV = String.Join(",", outputValues.Select(x => x.GetValue(toReportOn).ToString()).ToArray()); }
protected AssemblyReport CreateAssemblyAndDepthOfCoverage() { if (Skip_Assembly_Step) { return(new AssemblyReport()); } DepthOfCoverageGraphMaker coveragePlotter = !Skip_DepthOfCoveragePlot ? new DepthOfCoverageGraphMaker() : null; IEnumerable <ISequence> reads = this.createSequenceProducer(this.Filename, coveragePlotter, true); TimeSpan algorithmSpan = new TimeSpan(); Stopwatch runAlgorithm = new Stopwatch(); //Step 1: Initialize assembler. Output.WriteLine(OutputLevel.Verbose, "\nAssemblying mtDNA and obtaining depth of coverage (if asked)."); MitoPaintedAssembler.StatusChanged += this.StatusChanged; MitoPaintedAssembler assembler = new MitoPaintedAssembler() { DiagnosticFileOutputPrefix = DiagnosticFilePrefix, AllowErosion = AllowErosion, AlternateMinimumNodeCount = MinimumNodeCount, DanglingLinksThreshold = DangleThreshold, ErosionThreshold = ErosionThreshold, AllowKmerLengthEstimation = AllowKmerLengthEstimation, RedundantPathLengthThreshold = RedundantPathLengthThreshold, OutputIntermediateGraphSteps = OutputIntermediateGraphSteps, NoContigOutput = NoContigOutput, ForceSqrtThreshold = ForceSqrtThreshold }; if (ContigCoverageThreshold != -1) { assembler.AllowLowCoverageContigRemoval = true; assembler.ContigCoverageThreshold = ContigCoverageThreshold; } if (!this.AllowKmerLengthEstimation) { assembler.KmerLength = this.KmerLength; } //Step 2: Assemble runAlgorithm.Restart(); var assembly = assembler.Assemble(reads); runAlgorithm.Stop(); algorithmSpan = algorithmSpan.Add(runAlgorithm.Elapsed); if (this.Verbose) { Output.WriteLine(OutputLevel.Verbose); Output.WriteLine(OutputLevel.Verbose, "\tCompute time: {0}", runAlgorithm.Elapsed); } //Step 3: Report if (!NoContigOutput) { runAlgorithm.Restart(); this.writeContigs(assembly); runAlgorithm.Stop(); } algorithmSpan = algorithmSpan.Add(runAlgorithm.Elapsed); if (this.Verbose) { Output.WriteLine(OutputLevel.Verbose); Output.WriteLine(OutputLevel.Verbose, "\tWrite contigs time: {0}", runAlgorithm.Elapsed); Output.WriteLine(OutputLevel.Verbose, "\tTotal assembly runtime: {0}", algorithmSpan); } if (coveragePlotter != null) { coveragePlotter.OutputCoverageGraphAndCSV(DiagnosticFilePrefix); } return(assembler.GetReport()); }
/// <summary> /// Assembles the sequences and returns the string that can be placed in a CSV output report. /// </summary> /// <returns></returns> public string AssembleSequencesReturningString() { string toReturn = "No String Set"; TimeSpan algorithmSpan = new TimeSpan(); Stopwatch runAlgorithm = new Stopwatch(); FileInfo refFileinfo = new FileInfo(this.Filename); long refFileLength = refFileinfo.Length; runAlgorithm.Restart(); IEnumerable <ISequence> reads = this.ParseFile(); runAlgorithm.Stop(); algorithmSpan = algorithmSpan.Add(runAlgorithm.Elapsed); Output.WriteLine(OutputLevel.Information, StaticResources.AssemblyStarting); if (this.Verbose) { Output.WriteLine(OutputLevel.Verbose); Output.WriteLine(OutputLevel.Verbose, "Processed read file: {0}", Path.GetFullPath(this.Filename)); Output.WriteLine(OutputLevel.Verbose, " Read/Processing time: {0}", runAlgorithm.Elapsed); Output.WriteLine(OutputLevel.Verbose, " File Size : {0}", refFileLength); Output.WriteLine(OutputLevel.Verbose, " k-mer Length : {0}", this.KmerLength); } using (MitoPaintedAssembler assembler = new MitoPaintedAssembler()) { assembler.AllowErosion = true; //assembler.ReferenceGenomeFile = ReferenceGenome; assembler.DiagnosticFileOutputPrefix = DiagnosticFilePrefix; Console.WriteLine("Prefix is: " + assembler.DiagnosticFileOutputPrefix); Console.WriteLine("Diagnostic Information On: " + assembler.OutputDiagnosticInformation.ToString()); assembler.StatusChanged += this.AssemblerStatusChanged; assembler.AllowErosion = this.AllowErosion; assembler.AllowKmerLengthEstimation = this.AllowKmerLengthEstimation; if (ContigCoverageThreshold != -1) { assembler.AllowLowCoverageContigRemoval = true; assembler.ContigCoverageThreshold = ContigCoverageThreshold; } assembler.DanglingLinksThreshold = this.DangleThreshold; assembler.ErosionThreshold = this.ErosionThreshold; if (!this.AllowKmerLengthEstimation) { assembler.KmerLength = this.KmerLength; } assembler.RedundantPathLengthThreshold = this.RedundantPathLengthThreshold; runAlgorithm.Restart(); IDeNovoAssembly assembly = assembler.Assemble(reads); runAlgorithm.Stop(); algorithmSpan = algorithmSpan.Add(runAlgorithm.Elapsed); if (this.Verbose) { Output.WriteLine(OutputLevel.Verbose); Output.WriteLine(OutputLevel.Verbose, "Compute time: {0}", runAlgorithm.Elapsed); } runAlgorithm.Restart(); this.WriteContigs(assembly); runAlgorithm.Stop(); toReturn = assembler.GetReportLine(); if (assembler.OutputDiagnosticInformation) { var outFile = new StreamWriter(ReportOutputPrefix + DiagnosticFilePrefix + ".csv"); outFile.WriteLine(MitoDataAssembler.AssemblyOutput.CreateHeaderLine()); outFile.WriteLine(toReturn); outFile.Close(); } algorithmSpan = algorithmSpan.Add(runAlgorithm.Elapsed); if (this.Verbose) { Output.WriteLine(OutputLevel.Verbose); Output.WriteLine(OutputLevel.Verbose, "Write contigs time: {0}", runAlgorithm.Elapsed); Output.WriteLine(OutputLevel.Verbose, "Total runtime: {0}", algorithmSpan); } } return(toReturn); }
private void attemptToCreateAssembly() { //TODO: This node should always be a good start node, but may be an erroneous one, check for this. var curNode = gg.MetaNodes.Where(x => x.Lowest_Reference_Position != 0).MaxBy(x => (x.AvgKmerCoverage * x.ConstituentNodes.Count));//*(.2/x.Lowest_Reference_Position));//.MinBy(x => x.Lowest_Reference_Position); //Let's try just going with the forward primer //var match = forwardPrimer.Substring(0, gg.MegaNodes.First().LeadingKmer.Length); //var rc_match = ((new Bio.Sequence(Bio.Alphabets.NoGapDNA, match)).GetReverseComplementedSequence() as Bio.Sequence).ConvertToString(); //var curNode = gg.MegaNodes.Where(x => x.Sequence.Contains(match) || x.Sequence.Contains(rc_match)).First(); _greedyPathAssembly = new PossibleAssembly(); if (!curNode.CircularLoop) { MitoPaintedAssembler.RaiseStatusEvent("\tAttempting to find greedy path, frequencies of majority split below"); //now to attempt to loop back to the start node //will move along while greedily grabbing the next node with the highest kmer coverage //constantly oriented everyone so we go right ot left while (true) { assemblyNodes.Add(curNode); _greedyPathAssembly.AddMetaNode(curNode); var possibles = curNode.GetOutgoingNodes().ToList(); if (possibles.Count > 0) { SplitData sd = new SplitData(possibles); PathSplits.Add(sd); if (possibles.Count > 1) { if (sd.MaxFrequency < MinimumGreedySplit) { MinimumGreedySplit = sd.MaxFrequency; } MitoPaintedAssembler.RaiseStatusEvent("\tPossible Paths: " + possibles.Count + " Frequency: " + sd.MaxFrequency.ToString("P1") + " Range: " + curNode.Lowest_Reference_Position.ToString() + "-" + curNode.Highest_Reference_Position.ToString()); } curNode = sd.BestPath.NeighborNode; if (assemblyNodes.Contains(curNode)) { FormsCompleteLoop = true; break; } } else { FormsCompleteLoop = false; SuccessfulAssembly = false; break; } } } else { FormsCompleteLoop = true; assemblyNodes.Add(curNode); _greedyPathAssembly.AddMetaNode(curNode); MinimumGreedySplit = 1.0; } int length = assemblyNodes.Sum(x => x.LengthOfNode); //now, did we form an assembly? if (FormsCompleteLoop || Math.Abs(length - AssemblyLength) < 100) { SuccessfulAssembly = true; _greedyPathAssembly.FinalizeAndOrientToReference(); AssemblyLength = (int)_greedyPathAssembly.Sequence.Count; //TODO: More sophisticated criteria than larger than 8 kb to validate assembly if (AssemblyLength > StaticResources.SIZE_DIF_BETWEEN_LARGE_AND_SMALL_DELETION) { SuccessfulAssembly = true; MitoPaintedAssembler.RaiseStatusEvent("\tSuccessful assembly of length: " + AssemblyLength.ToString()); } else { SuccessfulAssembly = false; MitoPaintedAssembler.RaiseStatusEvent("\tAssembly failed. Only recovered sequence of length: " + AssemblyLength.ToString()); } } }