Пример #1
0
        public void Run(IFileLocation outputFile)
        {
            int bestNormalSampleIndex = 0;
            int normalSampleCount     = _controlBinnedFiles.Count();

            if (normalSampleCount > 1) // find the best normal
            {
                List <double[]> binCountsByNormalSample = new List <double[]>();
                for (int normalSampleIndex = 0; normalSampleIndex < normalSampleCount; normalSampleIndex++)
                {
                    var           controlBinnedFile = _controlBinnedFiles.ElementAt(normalSampleIndex);
                    var           binCounts         = new BinCounts(controlBinnedFile.FullName, manifest: _manifest);
                    List <double> counts            = binCounts.OnTargetCounts;
                    double        median            = binCounts.OnTargetMedianBinCount;
                    // If a manifest is available, get the median of bins overlapping the targeted regions only.
                    // For small panels, there could be a lot of bins with zero count and the median would be 0 if taken over all the bins, resulting in division by zero.
                    double weight = median > 0 ? 1.0 / median : 0;
                    binCountsByNormalSample.Add(counts.Select(cnt => cnt * weight).ToArray());
                }
                double[] tumorBinCounts;
                {
                    var           binCounts   = new BinCounts(_sampleBinnedFile.FullName, manifest: _manifest);
                    List <double> counts      = binCounts.OnTargetCounts;
                    double        tumorMedian = binCounts.OnTargetMedianBinCount;
                    double        tumorWeight = tumorMedian > 0 ? 1.0 / tumorMedian : 0;
                    tumorBinCounts = counts.Select(cnt => cnt * tumorWeight).ToArray();
                }

                // Find the best normal sample
                bestNormalSampleIndex = -1;
                double minMeanSquaredLogRatios = double.PositiveInfinity;
                for (int normalSampleIndex = 0; normalSampleIndex < normalSampleCount; normalSampleIndex++)
                {
                    // Get the sum of squared log ratios
                    var    result = GetMeanSquaredLogRatios(tumorBinCounts, binCountsByNormalSample[normalSampleIndex]);
                    double meanSquaredLogRatios = result.Item1;
                    int    ignoredBinCount      = result.Item2;
                    // TODO: Skip a (bad) normal sample if too many bins were ignored.
                    //       Donavan's script skips a normal sample if more than 100 log ratios is NA.
                    //       The cut-off is likely panel-dependent.
                    if (meanSquaredLogRatios < minMeanSquaredLogRatios)
                    {
                        minMeanSquaredLogRatios = meanSquaredLogRatios;
                        bestNormalSampleIndex   = normalSampleIndex;
                    }
                }
            }

            // copy file
            var srcBinnedFile = _controlBinnedFiles.ElementAt(bestNormalSampleIndex);

            if (outputFile.Exists)
            {
                outputFile.Delete();
            }
            srcBinnedFile.CopyTo(outputFile);
        }
        public void Run(IFileLocation outputFile)
        {
            int sampleCount = _controlBinnedFiles.Count();

            if (sampleCount == 1) // copy file
            {
                if (outputFile.Exists)
                {
                    outputFile.Delete();
                }
                _controlBinnedFiles.First().CopyTo(outputFile);
            }
            else // merge normal samples
            {
                double[]        weights           = new double[sampleCount];
                List <double>[] binCountsBySample = new List <double> [sampleCount];
                for (int sampleIndex = 0; sampleIndex < sampleCount; sampleIndex++)
                {
                    var           binnedFile = _controlBinnedFiles.ElementAt(sampleIndex);
                    var           binCounts  = new BinCounts(binnedFile.FullName, manifest: _manifest);
                    List <double> counts     = binCounts.AllCounts;
                    // If a manifest is available, get the median of bins overlapping the targeted regions only.
                    // For small panels, there could be a lot of bins with zero count and the median would be 0 if taken over all the bins, resulting in division by zero.
                    double median = binCounts.OnTargetMedianBinCount;
                    weights[sampleIndex]           = median > 0 ? 1.0 / median : 0;
                    binCountsBySample[sampleIndex] = counts;
                }
                double weightSum = weights.Sum();
                for (int i = 0; i < sampleCount; i++)
                {
                    weights[i] /= weightSum;
                }                                                                  // so weights sum to 1

                // Computed weighted average of bin counts across samples
                using (GzipReader reader = new GzipReader(_controlBinnedFiles.First().FullName))
                    using (GzipWriter writer = new GzipWriter(outputFile.FullName))
                    {
                        string   line;
                        string[] toks;
                        int      lineIdx = 0;
                        while ((line = reader.ReadLine()) != null)
                        {
                            toks = line.Split('\t');
                            double weightedBinCount = 0;
                            for (int i = 0; i < sampleCount; i++)
                            {
                                weightedBinCount += weights[i] * binCountsBySample[i][lineIdx];
                            }
                            toks[3] = String.Format("{0}", weightedBinCount);
                            writer.WriteLine(String.Join("\t", toks));
                            lineIdx++;
                        }
                    }
            }
        }
Пример #3
0
        /// <summary>
        /// Moves an existing file to a new location, overwriting any existing file.
        /// If the new location is on a different volume, this is equivalent to CopyTo + Delete
        /// </summary>
        /// <param name="destination">The new location of the file.</param>
        /// <returns>
        /// The new location of the file
        /// </returns>
        public IFileLocation MoveTo(IFileLocation destination)
        {
            if (FullName.Equals(destination.FullName)) // Doesn't handle symlinks...
                return this;

            if (destination.Directory != null)
                destination.Directory.Create();
            if (!Equals(destination))
                destination.Delete();
            File.Move(FullName, destination.FullName);
            return destination;
        }
Пример #4
0
        /// <summary>
        /// Copies an existing file to a new file, overwriting any existing file.
        /// </summary>
        /// <param name="destination">The location of the new file to copy to.</param>
        /// <returns>
        /// The location of the new file
        /// </returns>
        public IFileLocation CopyTo(IFileLocation destination)
        {
            if (FullName.Equals(destination.FullName)) // Doesn't handle symlinks...
                return this;

            destination.Delete();
            File.Copy(FullName, destination.FullName);
            return destination;
        }