/// <summary> /// Calculates %GC for bins on the chromosome. /// </summary> private void PopulateBinGC() { Console.WriteLine("Calculating %GC for each bin on {0}...", Chromosome); string referenceBases = FastaLoader.LoadFastaSequence(FastaFile, Chromosome); foreach (SampleGenomicBin bin in Bins) { double ntCount = 0; double gcCount = 0; for (int pos = bin.Start; pos < bin.Stop; pos++) { if (referenceBases[pos].Equals('n')) { continue; } ntCount++; if (Utilities.IsGC(referenceBases[pos])) { gcCount++; } } int gc = ntCount > 0 ? (int)(100 * gcCount / ntCount) : 0; bin.GenomicBin.GC = gc; } }
/// <summary> /// Sets up two Dictionaries holding BitArrays, one BitArray for each chromosome in a fasta file. One bit for each nucleotide. /// </summary> /// <param name="fastaFile">Fasta file containing uniquemer-marked reference genome.</param> /// <param name="possibleAlignments">Stores which alignments are possible (perfect and unique).</param> /// <param name="observedAlignments">Stores observed alignments from a sample.</param> /// <param name="fragmentLengths">Stores fragment length (Int16).</param> static void InitializeAlignmentArrays(string fastaFile, string chromosome, CanvasCoverageMode coverageMode, IDictionary <string, BitArray> possibleAlignments, IDictionary <string, HitArray> observedAlignments, IDictionary <string, Int16[]> fragmentLengths) { string referenceBases = FastaLoader.LoadFastaSequence(fastaFile, chromosome); BitArray possible = new BitArray(referenceBases.Length); possibleAlignments[chromosome] = possible; observedAlignments[chromosome] = new HitArray(referenceBases.Length); if (coverageMode == CanvasCoverageMode.GCContentWeighted) { fragmentLengths[chromosome] = new Int16[referenceBases.Length]; } else { fragmentLengths[chromosome] = new Int16[0]; } // Mark which k-mers in the fasta file are unique. These are indicated by upper-case letters. for (int i = 0; i < referenceBases.Length; i++) { if (char.IsUpper(referenceBases[i])) { possible[i] = true; } } }
public void ComputeAccuracy(Dictionary <string, List <CNInterval> > knownCN, string cnvCallsPath, string outputPath, bool includePassingOnly, EvaluateCnvOptions options, Dictionary <string, List <CnvCall> > calls) { // Make a note of how many bases in the truth set are not *actually* considered to be known bases, using // the "cnaqc" exclusion set: bool regionsOfInterest = !_cnvChecker.RegionsOfInterest.Empty(); var baseCounters = new List <BaseCounter> { new BaseCounter(MaxCn, 0, Int32.MaxValue, regionsOfInterest) }; if (options.SplitBySize) { baseCounters.Add(new BaseCounter(MaxCn, 0, 4999, regionsOfInterest)); baseCounters.Add(new BaseCounter(MaxCn, 5000, 9999, regionsOfInterest)); baseCounters.Add(new BaseCounter(MaxCn, 10000, 99999, regionsOfInterest)); baseCounters.Add(new BaseCounter(MaxCn, 100000, 499999, regionsOfInterest)); baseCounters.Add(new BaseCounter(MaxCn, 500000, int.MaxValue, regionsOfInterest)); } // not parallel here as parallelism will be attained at the level of regression workflow _cnvChecker.CountExcludedBasesInTruthSetIntervals(knownCN); Dictionary <string, BitArray> referenceBases = null; if (options.KmerFa != null) { referenceBases = new Dictionary <string, BitArray>(); foreach (var chr in knownCN.Keys) { string chromReferenceBases = FastaLoader.LoadFastaSequence(options.KmerFa, chr); var bitArrayBases = new BitArray(chromReferenceBases.Length); // Mark which k-mers in the fasta file are unique. These are indicated by upper-case letters. for (var i = 0; i < chromReferenceBases.Length; i++) { if (char.IsUpper(chromReferenceBases[i])) { bitArrayBases[i] = true; } } referenceBases[chr] = bitArrayBases; } } foreach (var baseCounter in baseCounters) { _cnvChecker.InitializeIntervalMetrics(knownCN); var metrics = CalculateMetrics(knownCN, calls, baseCounter, options.SkipDiploid, includePassingOnly, referenceBases); string fileName = $"{options.BaseFileName}"; if (options.DQscoreThreshold.HasValue) { fileName += "_denovo"; } if (baseCounter.MinSize != 0 || baseCounter.MaxSize != int.MaxValue) { fileName += $"_{Math.Round(baseCounter.MinSize / 1000.0)}kb"; fileName += baseCounter.MaxSize == int.MaxValue ? "+" : $"_{ Math.Round(baseCounter.MaxSize / 1000.0)}kb"; } fileName += ".txt"; var outputDir = new DirectoryLocation(outputPath); outputDir.Create(); var outputFile = outputDir.GetFileLocation(fileName); using (FileStream stream = new FileStream(outputFile.FullName, includePassingOnly ? FileMode.Create : FileMode.Append, FileAccess.Write)) using (StreamWriter outputWriter = new StreamWriter(stream)) { outputWriter.NewLine = "\n"; WriteResults(cnvCallsPath, outputWriter, baseCounter, includePassingOnly, metrics); } } }