/// <summary> /// Writes copy-number data (cnd) file. /// </summary> /// <param name="fragmentCountFile"></param> /// <param name="referenceCountFile"></param> /// <param name="ratios"></param> /// <param name="outputFile"></param> public static void WriteCndFile(IFileLocation fragmentCountFile, IFileLocation referenceCountFile, IEnumerable <SampleGenomicBin> ratios, IFileLocation outputFile) { IEnumerable <SampleGenomicBin> fragmentCounts = CanvasIO.IterateThroughTextFile(fragmentCountFile.FullName); IEnumerable <SampleGenomicBin> referenceCounts = CanvasIO.IterateThroughTextFile(referenceCountFile.FullName); using (var eFragment = fragmentCounts.GetEnumerator()) using (var eReference = referenceCounts.GetEnumerator()) using (var eRatio = ratios.GetEnumerator()) using (FileStream stream = new FileStream(outputFile.FullName, FileMode.Create, FileAccess.Write)) using (StreamWriter writer = new StreamWriter(stream)) { writer.WriteLine(CSVWriter.GetLine("Fragment Count", "Reference Count", "Chromosome", "Start", "End", "Unsmoothed Log Ratio")); while (eFragment.MoveNext() && eReference.MoveNext() && eRatio.MoveNext()) { // Some bins could have been skipped when calculating the ratios while (!eFragment.Current.IsSameBin(eRatio.Current)) { if (!eFragment.MoveNext()) // Ran out of fragment bins { throw new Illumina.Common.IlluminaException("Fragment bins and ratio bins are not in the same order."); } } while (!eReference.Current.IsSameBin(eRatio.Current)) { if (!eReference.MoveNext()) // Ran out of reference bins { throw new Illumina.Common.IlluminaException("Reference bins and ratio bins are not in the same order."); } } if (!eFragment.Current.IsSameBin(eReference.Current) || !eFragment.Current.IsSameBin(eRatio.Current)) { throw new Illumina.Common.IlluminaException("Bins are not in the same order."); } writer.WriteLine(CSVWriter.GetLine(eFragment.Current.Count.ToString(), eReference.Current.Count.ToString(), eFragment.Current.GenomicBin.Chromosome, eFragment.Current.Start.ToString(), eFragment.Current.Stop.ToString(), eRatio.Current.Count.ToString())); } } }
public IEnumerable <SampleGenomicBin> Run(IFileLocation sampleBedFile, IFileLocation referenceBedFile) { if (!sampleBedFile.Exists) { throw new FileNotFoundException(sampleBedFile.FullName + " does not exist."); } if (!referenceBedFile.Exists) { throw new FileNotFoundException(referenceBedFile.FullName + " does not exist."); } var sampleBins = CanvasIO.IterateThroughTextFile(sampleBedFile.FullName); var referenceBins = CanvasIO.IterateThroughTextFile(referenceBedFile.FullName); using (var eSampleBins = sampleBins.GetEnumerator()) using (var eReferenceBins = referenceBins.GetEnumerator()) { while (eSampleBins.MoveNext() && eReferenceBins.MoveNext()) { var sampleBin = eSampleBins.Current; var referenceBin = eReferenceBins.Current; // Bins with extreme reference counts introduce large variance into the ratios. // It would be better to just drop these bins so we don't introduce too much noise into segmentation and CNV calling. if (referenceBin.Count < _minReferenceCount) { continue; } // skip the bin if (referenceBin.Count > _maxReferenceCount) { continue; } // skip the bin double sampleCount = eSampleBins.Current.Count; double ratio = sampleBin.Count / referenceBin.Count; yield return(new SampleGenomicBin(sampleBin.GenomicBin.Chromosome, sampleBin.Start, sampleBin.Stop, sampleBin.GenomicBin.GC, (float)ratio)); } } }
public IEnumerable <SampleGenomicBin> Run(IFileLocation sampleBedFile, IFileLocation referenceBedFile) { if (!sampleBedFile.Exists) { throw new FileNotFoundException(sampleBedFile.FullName + " does not exist."); } if (!referenceBedFile.Exists) { throw new FileNotFoundException(referenceBedFile.FullName + " does not exist."); } var sampleBins = CanvasIO.IterateThroughTextFile(sampleBedFile.FullName); var referenceBins = CanvasIO.IterateThroughTextFile(referenceBedFile.FullName); double sampleMedian = (new BinCounts(sampleBins, manifest: _manifest)).OnTargetMedianBinCount; double referenceMedian = (new BinCounts(referenceBins, manifest: _manifest)).OnTargetMedianBinCount; double librarySizeFactor = (sampleMedian > 0 && referenceMedian > 0) ? referenceMedian / sampleMedian : 1; using (var eSampleBins = sampleBins.GetEnumerator()) using (var eReferenceBins = referenceBins.GetEnumerator()) { while (eSampleBins.MoveNext() && eReferenceBins.MoveNext()) { var sampleBin = eSampleBins.Current; var referenceBin = eReferenceBins.Current; // The weighted average count of a bin could be less than 1. // Using these small counts for coverage normalization creates large ratios. // It would be better to just drop these bins so we don't introduce too much noise into segmentation and CNV calling. if (referenceBin.Count < 1) { continue; } // skip the bin double ratio = sampleBin.Count / referenceBin.Count * librarySizeFactor; yield return(new SampleGenomicBin(sampleBin.GenomicBin.Chromosome, sampleBin.Start, sampleBin.Stop, sampleBin.GenomicBin.GC, (float)ratio)); } } }
private static void LoadBinCounts(string binnedPath, NexteraManifest manifest, out List <double> binCounts, out List <int> onTargetIndices) { LoadBinCounts(CanvasIO.IterateThroughTextFile(binnedPath), manifest, out binCounts, out onTargetIndices); }