public void Run(IFileLocation outputFile) { int bestNormalSampleIndex = 0; int normalSampleCount = _controlBinnedFiles.Count(); if (normalSampleCount > 1) // find the best normal { List <double[]> binCountsByNormalSample = new List <double[]>(); for (int normalSampleIndex = 0; normalSampleIndex < normalSampleCount; normalSampleIndex++) { var controlBinnedFile = _controlBinnedFiles.ElementAt(normalSampleIndex); var binCounts = new BinCounts(controlBinnedFile.FullName, manifest: _manifest); List <double> counts = binCounts.OnTargetCounts; double median = binCounts.OnTargetMedianBinCount; // If a manifest is available, get the median of bins overlapping the targeted regions only. // For small panels, there could be a lot of bins with zero count and the median would be 0 if taken over all the bins, resulting in division by zero. double weight = median > 0 ? 1.0 / median : 0; binCountsByNormalSample.Add(counts.Select(cnt => cnt * weight).ToArray()); } double[] tumorBinCounts; { var binCounts = new BinCounts(_sampleBinnedFile.FullName, manifest: _manifest); List <double> counts = binCounts.OnTargetCounts; double tumorMedian = binCounts.OnTargetMedianBinCount; double tumorWeight = tumorMedian > 0 ? 1.0 / tumorMedian : 0; tumorBinCounts = counts.Select(cnt => cnt * tumorWeight).ToArray(); } // Find the best normal sample bestNormalSampleIndex = -1; double minMeanSquaredLogRatios = double.PositiveInfinity; for (int normalSampleIndex = 0; normalSampleIndex < normalSampleCount; normalSampleIndex++) { // Get the sum of squared log ratios var result = GetMeanSquaredLogRatios(tumorBinCounts, binCountsByNormalSample[normalSampleIndex]); double meanSquaredLogRatios = result.Item1; int ignoredBinCount = result.Item2; // TODO: Skip a (bad) normal sample if too many bins were ignored. // Donavan's script skips a normal sample if more than 100 log ratios is NA. // The cut-off is likely panel-dependent. if (meanSquaredLogRatios < minMeanSquaredLogRatios) { minMeanSquaredLogRatios = meanSquaredLogRatios; bestNormalSampleIndex = normalSampleIndex; } } } // copy file var srcBinnedFile = _controlBinnedFiles.ElementAt(bestNormalSampleIndex); if (outputFile.Exists) { outputFile.Delete(); } srcBinnedFile.CopyTo(outputFile); }
public void Run(IFileLocation outputFile) { int sampleCount = _controlBinnedFiles.Count(); if (sampleCount == 1) // copy file { if (outputFile.Exists) { outputFile.Delete(); } _controlBinnedFiles.First().CopyTo(outputFile); } else // merge normal samples { double[] weights = new double[sampleCount]; List <double>[] binCountsBySample = new List <double> [sampleCount]; for (int sampleIndex = 0; sampleIndex < sampleCount; sampleIndex++) { var binnedFile = _controlBinnedFiles.ElementAt(sampleIndex); var binCounts = new BinCounts(binnedFile.FullName, manifest: _manifest); List <double> counts = binCounts.AllCounts; // If a manifest is available, get the median of bins overlapping the targeted regions only. // For small panels, there could be a lot of bins with zero count and the median would be 0 if taken over all the bins, resulting in division by zero. double median = binCounts.OnTargetMedianBinCount; weights[sampleIndex] = median > 0 ? 1.0 / median : 0; binCountsBySample[sampleIndex] = counts; } double weightSum = weights.Sum(); for (int i = 0; i < sampleCount; i++) { weights[i] /= weightSum; } // so weights sum to 1 // Computed weighted average of bin counts across samples using (GzipReader reader = new GzipReader(_controlBinnedFiles.First().FullName)) using (GzipWriter writer = new GzipWriter(outputFile.FullName)) { string line; string[] toks; int lineIdx = 0; while ((line = reader.ReadLine()) != null) { toks = line.Split('\t'); double weightedBinCount = 0; for (int i = 0; i < sampleCount; i++) { weightedBinCount += weights[i] * binCountsBySample[i][lineIdx]; } toks[3] = String.Format("{0}", weightedBinCount); writer.WriteLine(String.Join("\t", toks)); lineIdx++; } } } }
/// <summary> /// Moves an existing file to a new location, overwriting any existing file. /// If the new location is on a different volume, this is equivalent to CopyTo + Delete /// </summary> /// <param name="destination">The new location of the file.</param> /// <returns> /// The new location of the file /// </returns> public IFileLocation MoveTo(IFileLocation destination) { if (FullName.Equals(destination.FullName)) // Doesn't handle symlinks... return this; if (destination.Directory != null) destination.Directory.Create(); if (!Equals(destination)) destination.Delete(); File.Move(FullName, destination.FullName); return destination; }
/// <summary> /// Copies an existing file to a new file, overwriting any existing file. /// </summary> /// <param name="destination">The location of the new file to copy to.</param> /// <returns> /// The location of the new file /// </returns> public IFileLocation CopyTo(IFileLocation destination) { if (FullName.Equals(destination.FullName)) // Doesn't handle symlinks... return this; destination.Delete(); File.Copy(FullName, destination.FullName); return destination; }