Ejemplo n.º 1
0
        private static void GetWeightedAverageBinCount(IEnumerable <string> binnedPaths, string mergedBinnedPath,
                                                       NexteraManifest manifest = null)
        {
            int sampleCount = binnedPaths.Count();

            if (sampleCount == 1) // copy file
            {
                if (File.Exists(binnedPaths.First()))
                {
                    if (File.Exists(mergedBinnedPath))
                    {
                        File.Delete(mergedBinnedPath);
                    }
                    File.Copy(binnedPaths.First(), mergedBinnedPath);
                }
            }
            else // merge normal samples
            {
                double[]        weights           = new double[sampleCount];
                List <double>[] binCountsBySample = new List <double> [sampleCount];
                for (int sampleIndex = 0; sampleIndex < sampleCount; sampleIndex++)
                {
                    string        binnedPath = binnedPaths.ElementAt(sampleIndex);
                    var           binCounts  = new BinCounts(binnedPath, manifest: manifest);
                    List <double> counts     = binCounts.AllCounts;
                    // If a manifest is available, get the median of bins overlapping the targeted regions only.
                    // For small panels, there could be a lot of bins with zero count and the median would be 0 if taken over all the bins, resulting in division by zero.
                    double median = binCounts.OnTargetMedianBinCount;
                    weights[sampleIndex]           = median > 0 ? 1.0 / median : 0;
                    binCountsBySample[sampleIndex] = counts;
                }
                double weightSum = weights.Sum();
                for (int i = 0; i < sampleCount; i++)
                {
                    weights[i] /= weightSum;
                }                                                                  // so weights sum to 1

                // Computed weighted average of bin counts across samples
                using (GzipReader reader = new GzipReader(binnedPaths.First()))
                    using (GzipWriter writer = new GzipWriter(mergedBinnedPath))
                    {
                        string   line;
                        string[] toks;
                        int      lineIdx = 0;
                        while ((line = reader.ReadLine()) != null)
                        {
                            toks = line.Split('\t');
                            double weightedBinCount = 0;
                            for (int i = 0; i < sampleCount; i++)
                            {
                                weightedBinCount += weights[i] * binCountsBySample[i][lineIdx];
                            }
                            toks[3] = String.Format("{0}", weightedBinCount);
                            writer.WriteLine(String.Join("\t", toks));
                            lineIdx++;
                        }
                    }
            }
        }
Ejemplo n.º 2
0
        public void Run(IFileLocation outputFile)
        {
            int bestNormalSampleIndex = 0;
            int normalSampleCount     = _controlBinnedFiles.Count();

            if (normalSampleCount > 1) // find the best normal
            {
                List <double[]> binCountsByNormalSample = new List <double[]>();
                for (int normalSampleIndex = 0; normalSampleIndex < normalSampleCount; normalSampleIndex++)
                {
                    var           controlBinnedFile = _controlBinnedFiles.ElementAt(normalSampleIndex);
                    var           binCounts         = new BinCounts(controlBinnedFile.FullName, manifest: _manifest);
                    List <double> counts            = binCounts.OnTargetCounts;
                    double        median            = binCounts.OnTargetMedianBinCount;
                    // If a manifest is available, get the median of bins overlapping the targeted regions only.
                    // For small panels, there could be a lot of bins with zero count and the median would be 0 if taken over all the bins, resulting in division by zero.
                    double weight = median > 0 ? 1.0 / median : 0;
                    binCountsByNormalSample.Add(counts.Select(cnt => cnt * weight).ToArray());
                }
                double[] tumorBinCounts;
                {
                    var           binCounts   = new BinCounts(_sampleBinnedFile.FullName, manifest: _manifest);
                    List <double> counts      = binCounts.OnTargetCounts;
                    double        tumorMedian = binCounts.OnTargetMedianBinCount;
                    double        tumorWeight = tumorMedian > 0 ? 1.0 / tumorMedian : 0;
                    tumorBinCounts = counts.Select(cnt => cnt * tumorWeight).ToArray();
                }

                // Find the best normal sample
                bestNormalSampleIndex = -1;
                double minMeanSquaredLogRatios = double.PositiveInfinity;
                for (int normalSampleIndex = 0; normalSampleIndex < normalSampleCount; normalSampleIndex++)
                {
                    // Get the sum of squared log ratios
                    var    result = GetMeanSquaredLogRatios(tumorBinCounts, binCountsByNormalSample[normalSampleIndex]);
                    double meanSquaredLogRatios = result.Item1;
                    int    ignoredBinCount      = result.Item2;
                    // TODO: Skip a (bad) normal sample if too many bins were ignored.
                    //       Donavan's script skips a normal sample if more than 100 log ratios is NA.
                    //       The cut-off is likely panel-dependent.
                    if (meanSquaredLogRatios < minMeanSquaredLogRatios)
                    {
                        minMeanSquaredLogRatios = meanSquaredLogRatios;
                        bestNormalSampleIndex   = normalSampleIndex;
                    }
                }
            }

            // copy file
            var srcBinnedFile = _controlBinnedFiles.ElementAt(bestNormalSampleIndex);

            if (outputFile.Exists)
            {
                outputFile.Delete();
            }
            srcBinnedFile.CopyTo(outputFile);
        }
Ejemplo n.º 3
0
        /// <summary>
        /// Pick the best normal control that has the smallest mean squared log-ratios (LR2s).
        /// </summary>
        /// <param name="tumorBinnedPath"></param>
        /// <param name="normalBinnedPaths"></param>
        /// <param name="bestBinnedPath"></param>
        /// <param name="manifest"></param>
        private static void GetBestLR2BinCount(string tumorBinnedPath, IEnumerable<string> normalBinnedPaths, string bestBinnedPath,
            NexteraManifest manifest = null)
        {
            int bestNormalSampleIndex = 0;
            int normalSampleCount = normalBinnedPaths.Count();
            if (normalSampleCount > 1) // find the best normal
            {
                List<double[]> binCountsByNormalSample = new List<double[]>();
                for (int normalSampleIndex = 0; normalSampleIndex < normalSampleCount; normalSampleIndex++)
                {
                    string normalBinnedPath = normalBinnedPaths.ElementAt(normalSampleIndex);
                    var binCounts = new BinCounts(normalBinnedPath, manifest: manifest);
                    List<double> counts = binCounts.OnTargetCounts;
                    double median = binCounts.OnTargetMedianBinCount;
                    // If a manifest is available, get the median of bins overlapping the targeted regions only.
                    // For small panels, there could be a lot of bins with zero count and the median would be 0 if taken over all the bins, resulting in division by zero.
                    double weight = median > 0 ? 1.0 / median : 0;
                    binCountsByNormalSample.Add(counts.Select(cnt => cnt * weight).ToArray());
                }
                double[] tumorBinCounts;
                {
                    var binCounts = new BinCounts(tumorBinnedPath, manifest: manifest);
                    List<double> counts = binCounts.OnTargetCounts;
                    double tumorMedian = binCounts.OnTargetMedianBinCount;
                    double tumorWeight = tumorMedian > 0 ? 1.0 / tumorMedian : 0;
                    tumorBinCounts = counts.Select(cnt => cnt * tumorWeight).ToArray();
                }

                // Find the best normal sample
                bestNormalSampleIndex = -1;
                double minMeanSquaredLogRatios = double.PositiveInfinity;
                for (int normalSampleIndex = 0; normalSampleIndex < normalSampleCount; normalSampleIndex++)
                {
                    // Get the sum of squared log ratios
                    var result = GetMeanSquaredLogRatios(tumorBinCounts, binCountsByNormalSample[normalSampleIndex]);
                    double meanSquaredLogRatios = result.Item1;
                    int ignoredBinCount = result.Item2;
                    // TODO: Skip a (bad) normal sample if too many bins were ignored.
                    //       Donavan's script skips a normal sample if more than 100 log ratios is NA.
                    //       The cut-off is likely panel-dependent.
                    if (meanSquaredLogRatios < minMeanSquaredLogRatios)
                    {
                        minMeanSquaredLogRatios = meanSquaredLogRatios;
                        bestNormalSampleIndex = normalSampleIndex;
                    }
                }
            }

            // copy file
            string srcBinnedPath = normalBinnedPaths.ElementAt(bestNormalSampleIndex);
            if (File.Exists(srcBinnedPath))
            {
                if (File.Exists(bestBinnedPath)) { File.Delete(bestBinnedPath); }
                File.Copy(srcBinnedPath, bestBinnedPath);
            }
        }
Ejemplo n.º 4
0
        private static void GetWeightedAverageBinCount(IEnumerable<string> binnedPaths, string mergedBinnedPath,
            NexteraManifest manifest = null)
        {
            int sampleCount = binnedPaths.Count();
            if (sampleCount == 1) // copy file
            {
                if (File.Exists(binnedPaths.First()))
                {
                    if (File.Exists(mergedBinnedPath)) { File.Delete(mergedBinnedPath); }
                    File.Copy(binnedPaths.First(), mergedBinnedPath);
                }
            }
            else // merge normal samples
            {
                double[] weights = new double[sampleCount];
                List<double>[] binCountsBySample = new List<double>[sampleCount];
                for (int sampleIndex = 0; sampleIndex < sampleCount; sampleIndex++)
                {
                    string binnedPath = binnedPaths.ElementAt(sampleIndex);
                    var binCounts = new BinCounts(binnedPath, manifest: manifest);
                    List<double> counts = binCounts.AllCounts;
                    // If a manifest is available, get the median of bins overlapping the targeted regions only.
                    // For small panels, there could be a lot of bins with zero count and the median would be 0 if taken over all the bins, resulting in division by zero.
                    double median = binCounts.OnTargetMedianBinCount;
                    weights[sampleIndex] = median > 0 ? 1.0 / median : 0;
                    binCountsBySample[sampleIndex] = counts;
                }
                double weightSum = weights.Sum();
                for (int i = 0; i < sampleCount; i++) { weights[i] /= weightSum; } // so weights sum to 1

                // Computed weighted average of bin counts across samples
                using (GzipReader reader = new GzipReader(binnedPaths.First()))
                using (GzipWriter writer = new GzipWriter(mergedBinnedPath))
                {
                    string line;
                    string[] toks;
                    int lineIdx = 0;
                    while ((line = reader.ReadLine()) != null)
                    {
                        toks = line.Split('\t');
                        double weightedBinCount = 0;
                        for (int i = 0; i < sampleCount; i++) { weightedBinCount += weights[i] * binCountsBySample[i][lineIdx]; }
                        toks[3] = String.Format("{0}", weightedBinCount);
                        writer.WriteLine(String.Join("\t", toks));
                        lineIdx++;
                    }
                }
            }
        }
Ejemplo n.º 5
0
        public void Run(IFileLocation outputFile)
        {
            List <SampleGenomicBin> sampleBins = CanvasIO.ReadFromTextFile(_sampleBinnedFile.FullName);

            VerifyBinOrder(sampleBins);

            // set bin count to 1 if less than 1
            foreach (var bin in sampleBins)
            {
                bin.Count = Math.Max(1, bin.Count);
            }

            // center the sample
            var centeredSampleVector = Enumerable.Zip(sampleBins, _model.Mu, (bin, mu) => (double)bin.Count - mu.Count).ToArray();

            // project onto the axes
            var projectedSampleVector = CanvasCommon.Utilities.Project(centeredSampleVector, _model.Axes);

            // undo centering and set bin count to 1 if less than 1
            var referenceVector = Enumerable.Zip(_model.Mu, projectedSampleVector, (bin, count) => Math.Max(1, bin.Count + count));

            // write temporary reference count file
            var tempReferenceFile = new FileLocation(Path.GetTempFileName());
            var tempReferenceBins = Enumerable.Zip(sampleBins, referenceVector,
                                                   (bin, count) => new SampleGenomicBin(bin.GenomicBin.Chromosome, bin.Start, bin.Stop, bin.GenomicBin.GC, (float)count));

            CanvasIO.WriteToTextFile(tempReferenceFile.FullName, tempReferenceBins);

            // calcualte median ratio
            var    ratios      = new BinCounts(_ratioCalculator.Run(_sampleBinnedFile, tempReferenceFile), _manifest);
            double medianRatio = ratios.OnTargetMedianBinCount;

            // delete temporary reference count file
            tempReferenceFile.Delete();

            // multiply reference counts by the median ratio
            var referenceBins = Enumerable.Zip(sampleBins, referenceVector,
                                               (bin, count) => new SampleGenomicBin(bin.GenomicBin.Chromosome, bin.Start, bin.Stop, bin.GenomicBin.GC, (float)(count * medianRatio)));

            // write reference count file
            CanvasIO.WriteToTextFile(outputFile.FullName, referenceBins);
        }
Ejemplo n.º 6
0
        /// <summary>
        /// Pick the best normal control that has the smallest mean squared log-ratios (LR2s).
        /// </summary>
        /// <param name="tumorBinnedPath"></param>
        /// <param name="normalBinnedPaths"></param>
        /// <param name="bestBinnedPath"></param>
        /// <param name="manifest"></param>
        private static void GetBestLR2BinCount(string tumorBinnedPath, IEnumerable <string> normalBinnedPaths, string bestBinnedPath,
                                               NexteraManifest manifest = null)
        {
            int bestNormalSampleIndex = 0;
            int normalSampleCount     = normalBinnedPaths.Count();

            if (normalSampleCount > 1) // find the best normal
            {
                List <double[]> binCountsByNormalSample = new List <double[]>();
                for (int normalSampleIndex = 0; normalSampleIndex < normalSampleCount; normalSampleIndex++)
                {
                    string        normalBinnedPath = normalBinnedPaths.ElementAt(normalSampleIndex);
                    var           binCounts        = new BinCounts(normalBinnedPath, manifest: manifest);
                    List <double> counts           = binCounts.OnTargetCounts;
                    double        median           = binCounts.OnTargetMedianBinCount;
                    // If a manifest is available, get the median of bins overlapping the targeted regions only.
                    // For small panels, there could be a lot of bins with zero count and the median would be 0 if taken over all the bins, resulting in division by zero.
                    double weight = median > 0 ? 1.0 / median : 0;
                    binCountsByNormalSample.Add(counts.Select(cnt => cnt * weight).ToArray());
                }
                double[] tumorBinCounts;
                {
                    var           binCounts   = new BinCounts(tumorBinnedPath, manifest: manifest);
                    List <double> counts      = binCounts.OnTargetCounts;
                    double        tumorMedian = binCounts.OnTargetMedianBinCount;
                    double        tumorWeight = tumorMedian > 0 ? 1.0 / tumorMedian : 0;
                    tumorBinCounts = counts.Select(cnt => cnt * tumorWeight).ToArray();
                }

                // Find the best normal sample
                bestNormalSampleIndex = -1;
                double minMeanSquaredLogRatios = double.PositiveInfinity;
                for (int normalSampleIndex = 0; normalSampleIndex < normalSampleCount; normalSampleIndex++)
                {
                    // Get the sum of squared log ratios
                    var    result = GetMeanSquaredLogRatios(tumorBinCounts, binCountsByNormalSample[normalSampleIndex]);
                    double meanSquaredLogRatios = result.Item1;
                    int    ignoredBinCount      = result.Item2;
                    // TODO: Skip a (bad) normal sample if too many bins were ignored.
                    //       Donavan's script skips a normal sample if more than 100 log ratios is NA.
                    //       The cut-off is likely panel-dependent.
                    if (meanSquaredLogRatios < minMeanSquaredLogRatios)
                    {
                        minMeanSquaredLogRatios = meanSquaredLogRatios;
                        bestNormalSampleIndex   = normalSampleIndex;
                    }
                }
            }

            // copy file
            string srcBinnedPath = normalBinnedPaths.ElementAt(bestNormalSampleIndex);

            if (File.Exists(srcBinnedPath))
            {
                if (File.Exists(bestBinnedPath))
                {
                    File.Delete(bestBinnedPath);
                }
                File.Copy(srcBinnedPath, bestBinnedPath);
            }
        }