예제 #1
0
            /// <summary>
            /// Calculates %GC for bins on the chromosome.
            /// </summary>
            private void PopulateBinGC()
            {
                Console.WriteLine("Calculating %GC for each bin on {0}...", Chromosome);
                string referenceBases = FastaLoader.LoadFastaSequence(FastaFile, Chromosome);

                foreach (SampleGenomicBin bin in Bins)
                {
                    double ntCount = 0;
                    double gcCount = 0;
                    for (int pos = bin.Start; pos < bin.Stop; pos++)
                    {
                        if (referenceBases[pos].Equals('n'))
                        {
                            continue;
                        }
                        ntCount++;
                        if (Utilities.IsGC(referenceBases[pos]))
                        {
                            gcCount++;
                        }
                    }
                    int gc = ntCount > 0 ? (int)(100 * gcCount / ntCount) : 0;
                    bin.GenomicBin.GC = gc;
                }
            }
예제 #2
0
        /// <summary>
        /// Sets up two Dictionaries holding BitArrays, one BitArray for each chromosome in a fasta file. One bit for each nucleotide.
        /// </summary>
        /// <param name="fastaFile">Fasta file containing uniquemer-marked reference genome.</param>
        /// <param name="possibleAlignments">Stores which alignments are possible (perfect and unique).</param>
        /// <param name="observedAlignments">Stores observed alignments from a sample.</param>
        /// <param name="fragmentLengths">Stores fragment length (Int16).</param>
        static void InitializeAlignmentArrays(string fastaFile, string chromosome, CanvasCoverageMode coverageMode, IDictionary <string, BitArray> possibleAlignments, IDictionary <string, HitArray> observedAlignments, IDictionary <string, Int16[]> fragmentLengths)
        {
            string referenceBases = FastaLoader.LoadFastaSequence(fastaFile, chromosome);

            BitArray possible = new BitArray(referenceBases.Length);

            possibleAlignments[chromosome] = possible;
            observedAlignments[chromosome] = new HitArray(referenceBases.Length);
            if (coverageMode == CanvasCoverageMode.GCContentWeighted)
            {
                fragmentLengths[chromosome] = new Int16[referenceBases.Length];
            }
            else
            {
                fragmentLengths[chromosome] = new Int16[0];
            }
            // Mark which k-mers in the fasta file are unique. These are indicated by upper-case letters.
            for (int i = 0; i < referenceBases.Length; i++)
            {
                if (char.IsUpper(referenceBases[i]))
                {
                    possible[i] = true;
                }
            }
        }
예제 #3
0
        public void ComputeAccuracy(Dictionary <string, List <CNInterval> > knownCN, string cnvCallsPath, string outputPath, bool includePassingOnly, EvaluateCnvOptions options, Dictionary <string, List <CnvCall> > calls)
        {
            // Make a note of how many bases in the truth set are not *actually* considered to be known bases, using
            // the "cnaqc" exclusion set:
            bool regionsOfInterest = !_cnvChecker.RegionsOfInterest.Empty();
            var  baseCounters      = new List <BaseCounter> {
                new BaseCounter(MaxCn, 0, Int32.MaxValue, regionsOfInterest)
            };

            if (options.SplitBySize)
            {
                baseCounters.Add(new BaseCounter(MaxCn, 0, 4999, regionsOfInterest));
                baseCounters.Add(new BaseCounter(MaxCn, 5000, 9999, regionsOfInterest));
                baseCounters.Add(new BaseCounter(MaxCn, 10000, 99999, regionsOfInterest));
                baseCounters.Add(new BaseCounter(MaxCn, 100000, 499999, regionsOfInterest));
                baseCounters.Add(new BaseCounter(MaxCn, 500000, int.MaxValue, regionsOfInterest));
            }

            // not parallel here as parallelism will be attained at the level of regression workflow
            _cnvChecker.CountExcludedBasesInTruthSetIntervals(knownCN);
            Dictionary <string, BitArray> referenceBases = null;

            if (options.KmerFa != null)
            {
                referenceBases = new Dictionary <string, BitArray>();
                foreach (var chr in knownCN.Keys)
                {
                    string chromReferenceBases = FastaLoader.LoadFastaSequence(options.KmerFa, chr);
                    var    bitArrayBases       = new BitArray(chromReferenceBases.Length);
                    // Mark which k-mers in the fasta file are unique. These are indicated by upper-case letters.
                    for (var i = 0; i < chromReferenceBases.Length; i++)
                    {
                        if (char.IsUpper(chromReferenceBases[i]))
                        {
                            bitArrayBases[i] = true;
                        }
                    }
                    referenceBases[chr] = bitArrayBases;
                }
            }

            foreach (var baseCounter in baseCounters)
            {
                _cnvChecker.InitializeIntervalMetrics(knownCN);
                var metrics = CalculateMetrics(knownCN, calls, baseCounter, options.SkipDiploid, includePassingOnly, referenceBases);

                string fileName = $"{options.BaseFileName}";
                if (options.DQscoreThreshold.HasValue)
                {
                    fileName += "_denovo";
                }
                if (baseCounter.MinSize != 0 || baseCounter.MaxSize != int.MaxValue)
                {
                    fileName += $"_{Math.Round(baseCounter.MinSize / 1000.0)}kb";
                    fileName += baseCounter.MaxSize == int.MaxValue ? "+" : $"_{ Math.Round(baseCounter.MaxSize / 1000.0)}kb";
                }
                fileName += ".txt";
                var outputDir = new DirectoryLocation(outputPath);
                outputDir.Create();
                var outputFile = outputDir.GetFileLocation(fileName);
                using (FileStream stream = new FileStream(outputFile.FullName, includePassingOnly ?
                                                          FileMode.Create : FileMode.Append, FileAccess.Write))
                    using (StreamWriter outputWriter = new StreamWriter(stream))
                    {
                        outputWriter.NewLine = "\n";
                        WriteResults(cnvCallsPath, outputWriter, baseCounter, includePassingOnly, metrics);
                    }
            }
        }