예제 #1
0
        public AssemblyReport(MitoPaintedAssembler toReportOn, AlgorithmResult result = AlgorithmResult.Success) : base(result)
        {
            HeaderLineForCSV = CreateHeaderLine();

            this.DataLineForCSV = String.Join(",", outputValues.Select(x => x.GetValue(toReportOn).ToString()).ToArray());
        }
예제 #2
0
        protected AssemblyReport CreateAssemblyAndDepthOfCoverage()
        {
            if (Skip_Assembly_Step)
            {
                return(new AssemblyReport());
            }

            DepthOfCoverageGraphMaker coveragePlotter = !Skip_DepthOfCoveragePlot ?
                                                        new DepthOfCoverageGraphMaker() : null;

            IEnumerable <ISequence> reads = this.createSequenceProducer(this.Filename, coveragePlotter, true);
            TimeSpan  algorithmSpan       = new TimeSpan();
            Stopwatch runAlgorithm        = new Stopwatch();


            //Step 1: Initialize assembler.
            Output.WriteLine(OutputLevel.Verbose, "\nAssemblying mtDNA and obtaining depth of coverage (if asked).");
            MitoPaintedAssembler.StatusChanged += this.StatusChanged;
            MitoPaintedAssembler assembler = new MitoPaintedAssembler()
            {
                DiagnosticFileOutputPrefix = DiagnosticFilePrefix,
                AllowErosion = AllowErosion,
                AlternateMinimumNodeCount    = MinimumNodeCount,
                DanglingLinksThreshold       = DangleThreshold,
                ErosionThreshold             = ErosionThreshold,
                AllowKmerLengthEstimation    = AllowKmerLengthEstimation,
                RedundantPathLengthThreshold = RedundantPathLengthThreshold,
                OutputIntermediateGraphSteps = OutputIntermediateGraphSteps,
                NoContigOutput     = NoContigOutput,
                ForceSqrtThreshold = ForceSqrtThreshold
            };

            if (ContigCoverageThreshold != -1)
            {
                assembler.AllowLowCoverageContigRemoval = true;
                assembler.ContigCoverageThreshold       = ContigCoverageThreshold;
            }
            if (!this.AllowKmerLengthEstimation)
            {
                assembler.KmerLength = this.KmerLength;
            }

            //Step 2: Assemble
            runAlgorithm.Restart();
            var assembly = assembler.Assemble(reads);

            runAlgorithm.Stop();
            algorithmSpan = algorithmSpan.Add(runAlgorithm.Elapsed);
            if (this.Verbose)
            {
                Output.WriteLine(OutputLevel.Verbose);
                Output.WriteLine(OutputLevel.Verbose, "\tCompute time: {0}", runAlgorithm.Elapsed);
            }

            //Step 3: Report
            if (!NoContigOutput)
            {
                runAlgorithm.Restart();
                this.writeContigs(assembly);
                runAlgorithm.Stop();
            }
            algorithmSpan = algorithmSpan.Add(runAlgorithm.Elapsed);

            if (this.Verbose)
            {
                Output.WriteLine(OutputLevel.Verbose);
                Output.WriteLine(OutputLevel.Verbose, "\tWrite contigs time: {0}", runAlgorithm.Elapsed);
                Output.WriteLine(OutputLevel.Verbose, "\tTotal assembly runtime: {0}", algorithmSpan);
            }

            if (coveragePlotter != null)
            {
                coveragePlotter.OutputCoverageGraphAndCSV(DiagnosticFilePrefix);
            }

            return(assembler.GetReport());
        }
예제 #3
0
        /// <summary>
        /// Assembles the sequences and returns the string that can be placed in a CSV output report.
        /// </summary>
        /// <returns></returns>
        public string AssembleSequencesReturningString()
        {
            string    toReturn      = "No String Set";
            TimeSpan  algorithmSpan = new TimeSpan();
            Stopwatch runAlgorithm  = new Stopwatch();
            FileInfo  refFileinfo   = new FileInfo(this.Filename);
            long      refFileLength = refFileinfo.Length;

            runAlgorithm.Restart();
            IEnumerable <ISequence> reads = this.ParseFile();

            runAlgorithm.Stop();
            algorithmSpan = algorithmSpan.Add(runAlgorithm.Elapsed);

            Output.WriteLine(OutputLevel.Information, StaticResources.AssemblyStarting);

            if (this.Verbose)
            {
                Output.WriteLine(OutputLevel.Verbose);
                Output.WriteLine(OutputLevel.Verbose, "Processed read file: {0}", Path.GetFullPath(this.Filename));
                Output.WriteLine(OutputLevel.Verbose, "   Read/Processing time: {0}", runAlgorithm.Elapsed);
                Output.WriteLine(OutputLevel.Verbose, "   File Size           : {0}", refFileLength);
                Output.WriteLine(OutputLevel.Verbose, "   k-mer Length        : {0}", this.KmerLength);
            }

            using (MitoPaintedAssembler assembler = new MitoPaintedAssembler())
            {
                assembler.AllowErosion = true;
                //assembler.ReferenceGenomeFile = ReferenceGenome;
                assembler.DiagnosticFileOutputPrefix = DiagnosticFilePrefix;
                Console.WriteLine("Prefix is: " + assembler.DiagnosticFileOutputPrefix);
                Console.WriteLine("Diagnostic Information On: " + assembler.OutputDiagnosticInformation.ToString());

                assembler.StatusChanged            += this.AssemblerStatusChanged;
                assembler.AllowErosion              = this.AllowErosion;
                assembler.AllowKmerLengthEstimation = this.AllowKmerLengthEstimation;

                if (ContigCoverageThreshold != -1)
                {
                    assembler.AllowLowCoverageContigRemoval = true;
                    assembler.ContigCoverageThreshold       = ContigCoverageThreshold;
                }
                assembler.DanglingLinksThreshold = this.DangleThreshold;
                assembler.ErosionThreshold       = this.ErosionThreshold;
                if (!this.AllowKmerLengthEstimation)
                {
                    assembler.KmerLength = this.KmerLength;
                }

                assembler.RedundantPathLengthThreshold = this.RedundantPathLengthThreshold;
                runAlgorithm.Restart();
                IDeNovoAssembly assembly = assembler.Assemble(reads);
                runAlgorithm.Stop();
                algorithmSpan = algorithmSpan.Add(runAlgorithm.Elapsed);
                if (this.Verbose)
                {
                    Output.WriteLine(OutputLevel.Verbose);
                    Output.WriteLine(OutputLevel.Verbose, "Compute time: {0}", runAlgorithm.Elapsed);
                }

                runAlgorithm.Restart();
                this.WriteContigs(assembly);
                runAlgorithm.Stop();
                toReturn = assembler.GetReportLine();
                if (assembler.OutputDiagnosticInformation)
                {
                    var outFile = new StreamWriter(ReportOutputPrefix + DiagnosticFilePrefix + ".csv");
                    outFile.WriteLine(MitoDataAssembler.AssemblyOutput.CreateHeaderLine());
                    outFile.WriteLine(toReturn);
                    outFile.Close();
                }
                algorithmSpan = algorithmSpan.Add(runAlgorithm.Elapsed);
                if (this.Verbose)
                {
                    Output.WriteLine(OutputLevel.Verbose);
                    Output.WriteLine(OutputLevel.Verbose, "Write contigs time: {0}", runAlgorithm.Elapsed);
                    Output.WriteLine(OutputLevel.Verbose, "Total runtime: {0}", algorithmSpan);
                }
            }
            return(toReturn);
        }
        private void attemptToCreateAssembly()
        {
            //TODO: This node should always be a good start node, but may be an erroneous one, check for this.
            var curNode = gg.MetaNodes.Where(x => x.Lowest_Reference_Position != 0).MaxBy(x => (x.AvgKmerCoverage * x.ConstituentNodes.Count));//*(.2/x.Lowest_Reference_Position));//.MinBy(x => x.Lowest_Reference_Position);

            //Let's try just going with the forward primer
            //var match = forwardPrimer.Substring(0, gg.MegaNodes.First().LeadingKmer.Length);
            //var rc_match = ((new Bio.Sequence(Bio.Alphabets.NoGapDNA, match)).GetReverseComplementedSequence() as Bio.Sequence).ConvertToString();
            //var curNode = gg.MegaNodes.Where(x => x.Sequence.Contains(match) || x.Sequence.Contains(rc_match)).First();
            _greedyPathAssembly = new PossibleAssembly();
            if (!curNode.CircularLoop)
            {
                MitoPaintedAssembler.RaiseStatusEvent("\tAttempting to find greedy path, frequencies of majority split below");
                //now to attempt to loop back to the start node
                //will move along while greedily grabbing the next node with the highest kmer coverage
                //constantly oriented everyone so we go right ot left
                while (true)
                {
                    assemblyNodes.Add(curNode);
                    _greedyPathAssembly.AddMetaNode(curNode);
                    var possibles = curNode.GetOutgoingNodes().ToList();
                    if (possibles.Count > 0)
                    {
                        SplitData sd = new SplitData(possibles);
                        PathSplits.Add(sd);
                        if (possibles.Count > 1)
                        {
                            if (sd.MaxFrequency < MinimumGreedySplit)
                            {
                                MinimumGreedySplit = sd.MaxFrequency;
                            }
                            MitoPaintedAssembler.RaiseStatusEvent("\tPossible Paths: " + possibles.Count
                                                                  + "  Frequency: " + sd.MaxFrequency.ToString("P1")
                                                                  + "  Range: " + curNode.Lowest_Reference_Position.ToString() + "-" + curNode.Highest_Reference_Position.ToString());
                        }
                        curNode = sd.BestPath.NeighborNode;
                        if (assemblyNodes.Contains(curNode))
                        {
                            FormsCompleteLoop = true;
                            break;
                        }
                    }
                    else
                    {
                        FormsCompleteLoop = false; SuccessfulAssembly = false; break;
                    }
                }
            }
            else
            {
                FormsCompleteLoop = true;
                assemblyNodes.Add(curNode);
                _greedyPathAssembly.AddMetaNode(curNode);
                MinimumGreedySplit = 1.0;
            }
            int length = assemblyNodes.Sum(x => x.LengthOfNode);

            //now, did we form an assembly?
            if (FormsCompleteLoop || Math.Abs(length - AssemblyLength) < 100)
            {
                SuccessfulAssembly = true;
                _greedyPathAssembly.FinalizeAndOrientToReference();
                AssemblyLength = (int)_greedyPathAssembly.Sequence.Count;
                //TODO: More sophisticated criteria than larger than 8 kb to validate assembly
                if (AssemblyLength > StaticResources.SIZE_DIF_BETWEEN_LARGE_AND_SMALL_DELETION)
                {
                    SuccessfulAssembly = true;
                    MitoPaintedAssembler.RaiseStatusEvent("\tSuccessful assembly of length: " + AssemblyLength.ToString());
                }
                else
                {
                    SuccessfulAssembly = false;
                    MitoPaintedAssembler.RaiseStatusEvent("\tAssembly failed.  Only recovered sequence of length: " + AssemblyLength.ToString());
                }
            }
        }