Exemple #1
0
        /// <summary>
        /// Perform GC normalization depending on the mode
        /// </summary>
        /// <param name="bins">Bins whose counts are to be normalized</param>
        /// <param name="manifest"></param>
        /// <param name="mode">GC normalization mode</param>
        static void NormalizeByGC(List <GenomicBin> bins, NexteraManifest manifest, CanvasGCNormalizationMode mode)
        {
            switch (mode)
            {
            case CanvasGCNormalizationMode.MedianByGC:
                NormalizeByGC(bins, manifest: manifest);
                break;

            case CanvasGCNormalizationMode.LOESS:
                var normalizer = new LoessGCNormalizer(bins, manifest, robustnessIter: 0,
                                                       countTransformer: x => (double)Math.Log(x),
                                                       invCountTransformer: x => (float)Math.Exp(x));
                normalizer.Normalize();
                break;

            default:
                throw new ApplicationException("Unsupported Canvas GC normalization mode: " + mode.ToString());
            }
        }
Exemple #2
0
        static int Main(string[] args)
        {
            Utilities.LogCommandLine(args);
            string inFile           = null;
            string outFile          = null;
            bool   doGCnorm         = false;
            bool   doSizeFilter     = false;
            bool   doOutlierRemoval = false;
            string ffpeOutliersFile = null;
            string manifestFile     = null;
            CanvasGCNormalizationMode gcNormalizationMode = CanvasGCNormalizationMode.MedianByGC;
            string modeDescription = String.Format("gc normalization mode. Available modes: {0}. Default: {1}",
                                                   String.Join(", ", Enum.GetValues(typeof(CanvasGCNormalizationMode)).Cast <CanvasGCNormalizationMode>()),
                                                   gcNormalizationMode);
            bool needHelp = false;

            OptionSet p = new OptionSet()
            {
                { "i|infile=", "input file - usually generated by CanvasBin", v => inFile = v },
                { "o|outfile=", "text file to output containing cleaned bins", v => outFile = v },
                { "g|gcnorm", "perform GC normalization", v => doGCnorm = v != null },
                { "s|filtsize", "filter out genomically large bins", v => doSizeFilter = v != null },
                { "r|outliers", "filter outlier points", v => doOutlierRemoval = v != null },
                { "f|ffpeoutliers=", "filter regions of FFPE biases", v => ffpeOutliersFile = v },
                { "t|manifest=", "Nextera manifest file", v => manifestFile = v },
                { "w|weightedmedian=", "Minimum number of bins per GC required to calculate weighted median", v => minNumberOfBinsPerGCForWeightedMedian = int.Parse(v) },
                { "m|mode=", modeDescription, v => gcNormalizationMode = Utilities.ParseCanvasGCNormalizationMode(v) },
                { "h|help", "show this message and exit", v => needHelp = v != null },
            };

            List <string> extraArgs = p.Parse(args);

            if (needHelp)
            {
                ShowHelp(p);
                return(0);
            }

            if (inFile == null || outFile == null)
            {
                ShowHelp(p);
                return(0);
            }

            // Does the input file exist?
            if (!File.Exists(inFile))
            {
                Console.WriteLine("CanvasClean.exe: File {0} does not exist! Exiting.", inFile);
                return(1);
            }

            List <SampleGenomicBin> bins = CanvasIO.ReadFromTextFile(inFile);

            if (doOutlierRemoval)
            {
                bins = RemoveOutliers(bins);
            }

            if (doSizeFilter)
            {
                bins = RemoveBigBins(bins);
            }

            // do not run FFPE outlier removal on targeted/low coverage data
            if (ffpeOutliersFile != null && bins.Count < 50000)
            {
                ffpeOutliersFile = null;
            }

            // estimate localSD metric to use in doFFPEOutlierRemoval later and write to a text file
            double LocalSD = -1.0;

            if (ffpeOutliersFile != null)
            {
                LocalSD = getLocalStandardDeviation(bins);
                CanvasIO.WriteLocalSDToTextFile(ffpeOutliersFile, LocalSD);
            }

            if (doGCnorm)
            {
                NexteraManifest         manifest     = manifestFile == null ? null : new NexteraManifest(manifestFile, null, Console.WriteLine);
                List <SampleGenomicBin> strippedBins = gcNormalizationMode == CanvasGCNormalizationMode.MedianByGC
                    ? RemoveBinsWithExtremeGC(bins, defaultMinNumberOfBinsPerGC, manifest: manifest)
                    : bins;
                if (strippedBins.Count == 0)
                {
                    Console.Error.WriteLine("Warning in CanvasClean: Coverage too low to perform GC correction; proceeding without GC correction");
                }
                else
                {
                    bins = strippedBins;
                    NormalizeByGC(bins, manifest, gcNormalizationMode);
                    // Use variance normalization only on large exome panels and whole genome sequencing
                    // The treshold is set to 10% of an average number of bins on CanvasClean data
                    if (ffpeOutliersFile != null && bins.Count > 500000)
                    {
                        bool isNormalizeVarianceByGC = NormalizeVarianceByGC(bins, manifest: manifest);
                        // If normalization by variance was run (isNormalizeVarianceByGC), perform mean centering by using NormalizeByGC
                        if (isNormalizeVarianceByGC)
                        {
                            NormalizeByGC(bins, manifest, gcNormalizationMode);
                        }
                    }
                }
            }

            if (ffpeOutliersFile != null)
            {
                // threshold 20 is derived to separate FF and noisy FFPE samples (derived from a training set of approx. 40 samples)
                List <SampleGenomicBin> LocalMadstrippedBins = RemoveBinsWithExtremeLocalSD(bins, LocalSD, 20, outFile);
                bins = LocalMadstrippedBins;
            }

            CanvasIO.WriteToTextFile(outFile, bins);
            return(0);
        }
Exemple #3
0
 /// <summary>
 /// Perform GC normalization depending on the mode
 /// </summary>
 /// <param name="bins">Bins whose counts are to be normalized</param>
 /// <param name="manifest"></param>
 /// <param name="mode">GC normalization mode</param>
 static void NormalizeByGC(List<GenomicBin> bins, NexteraManifest manifest, CanvasGCNormalizationMode mode)
 {
     switch (mode)
     {
         case CanvasGCNormalizationMode.MedianByGC:
             NormalizeByGC(bins, manifest: manifest);
             break;
         case CanvasGCNormalizationMode.LOESS:
             var normalizer = new LoessGCNormalizer(bins, manifest, robustnessIter: 0,
                 countTransformer: x => (double)Math.Log(x),
                 invCountTransformer: x => (float)Math.Exp(x));
             normalizer.Normalize();
             break;
         default:
             throw new ApplicationException("Unsupported Canvas GC normalization mode: " + mode.ToString());
     }
 }