/// <summary> /// Create a sequence enumerator that filters the reads and adds them to the depth of coverage counter /// if necessary. /// </summary> /// <param name="fileName">Filename to load data from</param> /// <returns>Enumerable set of ISequence elements</returns> private IEnumerable <CompactSAMSequence> createSequenceProducer(string fileName, DepthOfCoverageGraphMaker coveragePlotter = null, bool alsoGetNuclearHits = false) { if (!Skip_DepthOfCoveragePlot && !Helper.IsBAM(fileName)) { Skip_DepthOfCoveragePlot = true; Output.WriteLine(OutputLevel.Error, "Warning: No coverage plots can be made without an input BAM File"); } IEnumerable <CompactSAMSequence> sequences; if (!alsoGetNuclearHits) { var parser = new BAMSequenceParser(Filename); if (ChromosomeName != string.Empty) { parser.ChromosomeToGet = ChromosomeName; sequences = parser.Parse(); } else { sequences = parser.Parse(); } } else { sequences = BAMNuclearChromosomeReadGenerator.GetNuclearAndMitochondrialReads(fileName); } //Filter by quality return(ReadFilter.FilterReads(sequences, coveragePlotter)); }
public static IEnumerable <CompactSAMSequence> FilterReads(IEnumerable <CompactSAMSequence> preFiltered, DepthOfCoverageGraphMaker coverageCounter = null) { foreach (var toFilter in preFiltered) { if (FilterDuplicates && ((toFilter.SAMFlags & SAMFlags.Duplicate) == SAMFlags.Duplicate)) { continue; } int[] vals = toFilter.GetQualityScores(); int lastAcceptableBase = (int)toFilter.Count - 1; while (lastAcceptableBase >= 0) { if (vals[lastAcceptableBase] >= _trimEndQuality) { break; } lastAcceptableBase--; } if (lastAcceptableBase > _minReadLength) { //check mean double mean = vals.Take(lastAcceptableBase + 1).Average(); if (mean > _meanRequiredQuality) { //only trim if necessary. CompactSAMSequence toReturn; if (lastAcceptableBase < (toFilter.Count - 1)) { toFilter.TrimSequence(lastAcceptableBase + 1); } //Process coverage before returning the read if (coverageCounter != null) { coverageCounter.ProcessCountCoverageFromSequence(toFilter); } yield return(toFilter); } } } }
protected AssemblyReport CreateAssemblyAndDepthOfCoverage() { if (Skip_Assembly_Step) { return(new AssemblyReport()); } DepthOfCoverageGraphMaker coveragePlotter = !Skip_DepthOfCoveragePlot ? new DepthOfCoverageGraphMaker() : null; IEnumerable <ISequence> reads = this.createSequenceProducer(this.Filename, coveragePlotter, true); TimeSpan algorithmSpan = new TimeSpan(); Stopwatch runAlgorithm = new Stopwatch(); //Step 1: Initialize assembler. Output.WriteLine(OutputLevel.Verbose, "\nAssemblying mtDNA and obtaining depth of coverage (if asked)."); MitoPaintedAssembler.StatusChanged += this.StatusChanged; MitoPaintedAssembler assembler = new MitoPaintedAssembler() { DiagnosticFileOutputPrefix = DiagnosticFilePrefix, AllowErosion = AllowErosion, AlternateMinimumNodeCount = MinimumNodeCount, DanglingLinksThreshold = DangleThreshold, ErosionThreshold = ErosionThreshold, AllowKmerLengthEstimation = AllowKmerLengthEstimation, RedundantPathLengthThreshold = RedundantPathLengthThreshold, OutputIntermediateGraphSteps = OutputIntermediateGraphSteps, NoContigOutput = NoContigOutput, ForceSqrtThreshold = ForceSqrtThreshold }; if (ContigCoverageThreshold != -1) { assembler.AllowLowCoverageContigRemoval = true; assembler.ContigCoverageThreshold = ContigCoverageThreshold; } if (!this.AllowKmerLengthEstimation) { assembler.KmerLength = this.KmerLength; } //Step 2: Assemble runAlgorithm.Restart(); var assembly = assembler.Assemble(reads); runAlgorithm.Stop(); algorithmSpan = algorithmSpan.Add(runAlgorithm.Elapsed); if (this.Verbose) { Output.WriteLine(OutputLevel.Verbose); Output.WriteLine(OutputLevel.Verbose, "\tCompute time: {0}", runAlgorithm.Elapsed); } //Step 3: Report if (!NoContigOutput) { runAlgorithm.Restart(); this.writeContigs(assembly); runAlgorithm.Stop(); } algorithmSpan = algorithmSpan.Add(runAlgorithm.Elapsed); if (this.Verbose) { Output.WriteLine(OutputLevel.Verbose); Output.WriteLine(OutputLevel.Verbose, "\tWrite contigs time: {0}", runAlgorithm.Elapsed); Output.WriteLine(OutputLevel.Verbose, "\tTotal assembly runtime: {0}", algorithmSpan); } if (coveragePlotter != null) { coveragePlotter.OutputCoverageGraphAndCSV(DiagnosticFilePrefix); } return(assembler.GetReport()); }