Пример #1
0
        /// <summary>
        /// Writes copy-number data (cnd) file.
        /// </summary>
        /// <param name="fragmentCountFile"></param>
        /// <param name="referenceCountFile"></param>
        /// <param name="ratios"></param>
        /// <param name="outputFile"></param>
        public static void WriteCndFile(IFileLocation fragmentCountFile, IFileLocation referenceCountFile,
                                        IEnumerable <SampleGenomicBin> ratios, IFileLocation outputFile)
        {
            IEnumerable <SampleGenomicBin> fragmentCounts  = CanvasIO.IterateThroughTextFile(fragmentCountFile.FullName);
            IEnumerable <SampleGenomicBin> referenceCounts = CanvasIO.IterateThroughTextFile(referenceCountFile.FullName);

            using (var eFragment = fragmentCounts.GetEnumerator())
                using (var eReference = referenceCounts.GetEnumerator())
                    using (var eRatio = ratios.GetEnumerator())
                        using (FileStream stream = new FileStream(outputFile.FullName, FileMode.Create, FileAccess.Write))
                            using (StreamWriter writer = new StreamWriter(stream))
                            {
                                writer.WriteLine(CSVWriter.GetLine("Fragment Count", "Reference Count", "Chromosome",
                                                                   "Start", "End", "Unsmoothed Log Ratio"));
                                while (eFragment.MoveNext() && eReference.MoveNext() && eRatio.MoveNext())
                                {
                                    // Some bins could have been skipped when calculating the ratios
                                    while (!eFragment.Current.IsSameBin(eRatio.Current))
                                    {
                                        if (!eFragment.MoveNext()) // Ran out of fragment bins
                                        {
                                            throw new Illumina.Common.IlluminaException("Fragment bins and ratio bins are not in the same order.");
                                        }
                                    }
                                    while (!eReference.Current.IsSameBin(eRatio.Current))
                                    {
                                        if (!eReference.MoveNext()) // Ran out of reference bins
                                        {
                                            throw new Illumina.Common.IlluminaException("Reference bins and ratio bins are not in the same order.");
                                        }
                                    }
                                    if (!eFragment.Current.IsSameBin(eReference.Current) ||
                                        !eFragment.Current.IsSameBin(eRatio.Current))
                                    {
                                        throw new Illumina.Common.IlluminaException("Bins are not in the same order.");
                                    }
                                    writer.WriteLine(CSVWriter.GetLine(eFragment.Current.Count.ToString(),
                                                                       eReference.Current.Count.ToString(), eFragment.Current.GenomicBin.Chromosome,
                                                                       eFragment.Current.Start.ToString(), eFragment.Current.Stop.ToString(),
                                                                       eRatio.Current.Count.ToString()));
                                }
                            }
        }
Пример #2
0
        public IEnumerable <SampleGenomicBin> Run(IFileLocation sampleBedFile, IFileLocation referenceBedFile)
        {
            if (!sampleBedFile.Exists)
            {
                throw new FileNotFoundException(sampleBedFile.FullName + " does not exist.");
            }
            if (!referenceBedFile.Exists)
            {
                throw new FileNotFoundException(referenceBedFile.FullName + " does not exist.");
            }

            var sampleBins    = CanvasIO.IterateThroughTextFile(sampleBedFile.FullName);
            var referenceBins = CanvasIO.IterateThroughTextFile(referenceBedFile.FullName);

            using (var eSampleBins = sampleBins.GetEnumerator())
                using (var eReferenceBins = referenceBins.GetEnumerator())
                {
                    while (eSampleBins.MoveNext() && eReferenceBins.MoveNext())
                    {
                        var sampleBin    = eSampleBins.Current;
                        var referenceBin = eReferenceBins.Current;
                        // Bins with extreme reference counts introduce large variance into the ratios.
                        // It would be better to just drop these bins so we don't introduce too much noise into segmentation and CNV calling.
                        if (referenceBin.Count < _minReferenceCount)
                        {
                            continue;
                        }                                                      // skip the bin
                        if (referenceBin.Count > _maxReferenceCount)
                        {
                            continue;
                        }                                                      // skip the bin
                        double sampleCount = eSampleBins.Current.Count;
                        double ratio       = sampleBin.Count / referenceBin.Count;
                        yield return(new SampleGenomicBin(sampleBin.GenomicBin.Chromosome, sampleBin.Start, sampleBin.Stop, sampleBin.GenomicBin.GC, (float)ratio));
                    }
                }
        }
Пример #3
0
        public IEnumerable <SampleGenomicBin> Run(IFileLocation sampleBedFile, IFileLocation referenceBedFile)
        {
            if (!sampleBedFile.Exists)
            {
                throw new FileNotFoundException(sampleBedFile.FullName + " does not exist.");
            }
            if (!referenceBedFile.Exists)
            {
                throw new FileNotFoundException(referenceBedFile.FullName + " does not exist.");
            }

            var    sampleBins        = CanvasIO.IterateThroughTextFile(sampleBedFile.FullName);
            var    referenceBins     = CanvasIO.IterateThroughTextFile(referenceBedFile.FullName);
            double sampleMedian      = (new BinCounts(sampleBins, manifest: _manifest)).OnTargetMedianBinCount;
            double referenceMedian   = (new BinCounts(referenceBins, manifest: _manifest)).OnTargetMedianBinCount;
            double librarySizeFactor = (sampleMedian > 0 && referenceMedian > 0) ? referenceMedian / sampleMedian : 1;

            using (var eSampleBins = sampleBins.GetEnumerator())
                using (var eReferenceBins = referenceBins.GetEnumerator())
                {
                    while (eSampleBins.MoveNext() && eReferenceBins.MoveNext())
                    {
                        var sampleBin    = eSampleBins.Current;
                        var referenceBin = eReferenceBins.Current;
                        // The weighted average count of a bin could be less than 1.
                        // Using these small counts for coverage normalization creates large ratios.
                        // It would be better to just drop these bins so we don't introduce too much noise into segmentation and CNV calling.
                        if (referenceBin.Count < 1)
                        {
                            continue;
                        }                                     // skip the bin
                        double ratio = sampleBin.Count / referenceBin.Count * librarySizeFactor;
                        yield return(new SampleGenomicBin(sampleBin.GenomicBin.Chromosome, sampleBin.Start, sampleBin.Stop, sampleBin.GenomicBin.GC, (float)ratio));
                    }
                }
        }
Пример #4
0
 private static void LoadBinCounts(string binnedPath, NexteraManifest manifest, out List <double> binCounts,
                                   out List <int> onTargetIndices)
 {
     LoadBinCounts(CanvasIO.IterateThroughTextFile(binnedPath), manifest, out binCounts, out onTargetIndices);
 }