Esempio n. 1
0
        static int Main(string[] args)
        {
            CanvasCommon.Utilities.LogCommandLine(args);
            List <string>    inFiles           = new List <string>();
            List <string>    outFiles          = new List <string>();
            bool             needHelp          = false;
            bool             isGermline        = false;
            string           bedPath           = null;
            string           commonCNVsbedPath = null;
            double           alpha             = CBSRunner.DefaultAlpha;
            double           madFactor         = WaveletsRunner.DefaultMadFactor;
            SegmentSplitUndo undoMethod        = SegmentSplitUndo.None;

            Segmentation.SegmentationMethod partitionMethod = Segmentation.SegmentationMethod.Wavelets;
            int       maxInterBinDistInSegment = 1000000;
            OptionSet p = new OptionSet()
            {
                { "i|infile=", "input file - usually generated by CanvasClean", v => inFiles.Add(v) },
                { "o|outfile=", "text file to output", v => outFiles.Add(v) },
                { "h|help", "show this message and exit", v => needHelp = v != null },
                { "m|method=", "segmentation method (Wavelets/CBS). Default: " + partitionMethod, v => partitionMethod = (Segmentation.SegmentationMethod)Enum.Parse(typeof(Segmentation.SegmentationMethod), v) },
                { "a|alpha=", "alpha parameter to CBS. Default: " + alpha, v => alpha = float.Parse(v) },
                { "s|split=", "CBS split method (None/Prune/SDUndo). Default: " + undoMethod, v => undoMethod = (SegmentSplitUndo)Enum.Parse(typeof(SegmentSplitUndo), v) },
                { "f|madFactor=", "MAD factor to Wavelets. Default: " + madFactor, v => madFactor = float.Parse(v) },
                { "b|bedfile=", "bed file to exclude (don't span these intervals)", v => bedPath = v },
                { "c|commoncnvs=", "bed file with common CNVs (always include these intervals into segmentation results)", v => commonCNVsbedPath = v },
                { "g|germline", "flag indicating that input file represents germline genome", v => isGermline = v != null },
                { "d|maxInterBinDistInSegment=", "the maximum distance between adjacent bins in a segment (negative numbers turn off splitting segments after segmentation). Default: " + maxInterBinDistInSegment, v => maxInterBinDistInSegment = int.Parse(v) },
            };

            List <string> extraArgs = p.Parse(args);

            if (needHelp)
            {
                ShowHelp(p);
                return(0);
            }

            if (!inFiles.Any() || !outFiles.Any())
            {
                ShowHelp(p);
                return(0);
            }

            if (inFiles.Any(inFile => !File.Exists(inFile)))
            {
                Console.WriteLine("CanvasPartition.exe: File {0} does not exist! Exiting.", inFiles);
                return(1);
            }

            if (!string.IsNullOrEmpty(bedPath) && !File.Exists(bedPath))
            {
                Console.WriteLine("CanvasPartition.exe: File {0} does not exist! Exiting.", bedPath);
                return(1);
            }

            if (partitionMethod != Segmentation.SegmentationMethod.HMM && outFiles.Count > 1)
            {
                Console.WriteLine("CanvasPartition.exe: SegmentationMethod.HMM only works for MultiSample SPW worlfow, " +
                                  "please provide multiple -o arguments");
                return(1);
            }

            if (partitionMethod == Segmentation.SegmentationMethod.HMM && inFiles.Count == 1)
            {
                Console.WriteLine("CanvasPartition.exe: method=HMM option only works when more than one input files (-i) are provided");
                return(1);
            }

            List <Segmentation> segmentationEngine = inFiles.Select(inFile => new Segmentation(inFile, bedPath, maxInterBinDistInSegment)).ToList();

            Segmentation.GenomeSegmentationResults segmentationResults;
            switch (partitionMethod)
            {
            default:    // use Wavelets if CBS is not selected
                Console.WriteLine("{0} Running Wavelet Partitioning", DateTime.Now);
                WaveletsRunner waveletsRunner = new WaveletsRunner(new WaveletsRunner.WaveletsRunnerParams(isGermline, commonCNVsbedPath, madFactor: madFactor, verbose: 2));
                segmentationResults = new Segmentation.GenomeSegmentationResults(waveletsRunner.Run(segmentationEngine.Single()));
                segmentationEngine.Single().WriteCanvasPartitionResults(outFiles.Single(), segmentationResults);
                break;

            case Segmentation.SegmentationMethod.CBS:
                Console.WriteLine("{0} Running CBS Partitioning", DateTime.Now);
                CBSRunner cbsRunner = new CBSRunner(maxInterBinDistInSegment, undoMethod, alpha);
                segmentationResults = new Segmentation.GenomeSegmentationResults(cbsRunner.Run(segmentationEngine.Single(), verbose: 2));
                segmentationEngine.Single().WriteCanvasPartitionResults(outFiles.Single(), segmentationResults);
                break;

            case Segmentation.SegmentationMethod.HMM:
                Console.WriteLine("{0} Running HMM Partitioning", DateTime.Now);
                HiddenMarkovModelsRunner hiddenMarkovModelsRunner = new HiddenMarkovModelsRunner(commonCNVsbedPath, inFiles.Count);
                segmentationResults = new Segmentation.GenomeSegmentationResults(hiddenMarkovModelsRunner.Run(segmentationEngine));
                for (int i = 0; i < segmentationEngine.Count; i++)
                {
                    segmentationEngine[i].WriteCanvasPartitionResults(outFiles[i], segmentationResults);
                }
                break;
            }
            Console.WriteLine("{0} CanvasPartition results written out", DateTime.Now);
            return(0);
        }
Esempio n. 2
0
        static int Main(string[] args)
        {
            CanvasCommon.Utilities.LogCommandLine(args);
            List <string>    cleanedFiles        = new List <string>();
            List <string>    outPartitionedFiles = new List <string>();
            List <string>    vafFiles            = new List <string>();
            bool             needHelp            = false;
            bool             isGermline          = false;
            string           filterBedFile       = null;
            string           referenceFolder     = null;
            string           commonCNVsbedPath   = null;
            string           evennessMetricFile  = null;
            SegmentSplitUndo undoMethod          = SegmentSplitUndo.None;

            SegmentationInput.SegmentationMethod partitionMethod = SegmentationInput.SegmentationMethod.Wavelets;
            string parameterconfigPath = Path.Combine(Isas.Framework.Utilities.Utilities.GetAssemblyFolder(typeof(CanvasPartition)), "CanvasPartitionParameters.json");
            string ploidyVcfPath       = null;


            OptionSet p = new OptionSet()
            {
                { "i|infile=", "input file - usually generated by CanvasClean", v => cleanedFiles.Add(v) },
                { "v|vaffile=", "variant frequencyfiles - usually generated by CanvasSNV", v => vafFiles.Add(v) },
                { "o|outfile=", "text file to output", v => outPartitionedFiles.Add(v) },
                { "m|method=", "segmentation method (Wavelets/CBS). Default: " + partitionMethod, v => partitionMethod = (SegmentationInput.SegmentationMethod)Enum.Parse(typeof(SegmentationInput.SegmentationMethod), v) },
                { "r|reference=", "folder that contains both genome.fa and GenomeSize.xml", v => referenceFolder = v },
                { "s|split=", "CBS split method (None/Prune/SDUndo). Default: " + undoMethod, v => undoMethod = (SegmentSplitUndo)Enum.Parse(typeof(SegmentSplitUndo), v) },
                { "b|bedfile=", "bed file to exclude (don't span these intervals)", v => filterBedFile = v },
                { "c|commoncnvs=", "bed file with common CNVs (always include these intervals into segmentation results)", v => commonCNVsbedPath = v },
                { "g|germline", "flag indicating that input file represents germline genome", v => isGermline = v != null },
                { $"{CommandLineOptions.EvennessMetricFile}=", "output file for evenness metric (optional)", v => evennessMetricFile = v },
                { "p|ploidyVcfFile=", "vcf file specifying reference ploidy (e.g. for sex chromosomes) (optional)", v => ploidyVcfPath = v },
                { "config=", "parameter configuration path (default {parameterconfigPath})", v => parameterconfigPath = v },
                { "h|help", "show this message and exit", v => needHelp = v != null }
            };

            List <string> extraArgs = p.Parse(args);

            if (extraArgs.Any())
            {
                throw new IlluminaException($"Unknown arguments: {string.Join(",", extraArgs)}");
            }

            if (needHelp)
            {
                ShowHelp(p);
                return(0);
            }

            if (!cleanedFiles.Any() || !outPartitionedFiles.Any() || referenceFolder == null)
            {
                ShowHelp(p);
                return(0);
            }

            if (cleanedFiles.Any(inFile => !File.Exists(inFile)))
            {
                Console.WriteLine("CanvasPartition.exe: File {0} does not exist! Exiting.", cleanedFiles);
                return(1);
            }

            if (!string.IsNullOrEmpty(filterBedFile) && !File.Exists(filterBedFile))
            {
                Console.WriteLine("CanvasPartition.exe: File {0} does not exist! Exiting.", filterBedFile);
                return(1);
            }

            if (!File.Exists(parameterconfigPath))
            {
                Console.WriteLine($"CanvasPedigreeCaller.exe: File {parameterconfigPath} does not exist! Exiting.");
                return(1);
            }

            if (!string.IsNullOrEmpty(ploidyVcfPath) && !File.Exists(ploidyVcfPath))
            {
                Console.WriteLine("CanvasPartition.exe: File {0} does not exist! Exiting.", ploidyVcfPath);
                return(1);
            }

            var parameterconfigFile       = new FileLocation(parameterconfigPath);
            var canvasPartitionParameters = Deserialize <CanvasPartitionParameters>(parameterconfigFile);

            ILogger logger    = new Logger(Console.Out.ToEnumerable(), Console.Error.ToEnumerable());
            var     processor = new SegmentationResultsProcessor(canvasPartitionParameters.MaxInterBinDistInSegment);

            var segmentationInputs = vafFiles.Count > 0 && vafFiles.Count == cleanedFiles.Count ?
                                     cleanedFiles.Zip(vafFiles, (inFile, vafFile) => new SegmentationInput(inFile, vafFile, filterBedFile,
                                                                                                           referenceFolder, evennessMetricFile, logger, processor)).ToList() :
                                     cleanedFiles.Select(inFile => new SegmentationInput(inFile, null, filterBedFile,
                                                                                         referenceFolder, evennessMetricFile, logger, processor)).ToList();
            GenomeSegmentationResults segmentationResults;
            PloidyInfo referencePloidy = ploidyVcfPath != null?PloidyInfo.LoadPloidyFromVcfFileNoSampleId(ploidyVcfPath) : null;

            switch (partitionMethod)
            {
            default:     // use Wavelets if CBS is not selected
                Console.WriteLine("{0} Running Wavelet Partitioning", DateTime.Now);
                var waveletsRunner = new WaveletsRunner(new WaveletsRunner.WaveletsRunnerParams(isGermline,
                                                                                                commonCNVsbedPath, madFactor:
                                                                                                canvasPartitionParameters.MadFactor,
                                                                                                thresholdLowerMaf: canvasPartitionParameters.ThresholdLowerMaf,
                                                                                                evennessScoreThreshold: canvasPartitionParameters.EvennessScoreThreshold, verbose: 2));
                segmentationResults = new GenomeSegmentationResults(waveletsRunner.Run(segmentationInputs.Single(),
                                                                                       canvasPartitionParameters.EvennessScoreWindow));

                PostProcessAndWriteResults(segmentationInputs.Single(), outPartitionedFiles.Single(), referencePloidy, segmentationResults);
                break;

            case SegmentationInput.SegmentationMethod.CBS:
            {
                Console.WriteLine("{0} Running CBS Partitioning", DateTime.Now);
                var cbsRunner = new CBSRunner(canvasPartitionParameters.MaxInterBinDistInSegment, undoMethod,
                                              canvasPartitionParameters.CBSalpha);
                var sampleSegmentations = new List <GenomeSegmentationResults>();
                foreach (var input in segmentationInputs)
                {
                    var segmentation = new GenomeSegmentationResults(cbsRunner.Run(input, verbose: 2));
                    sampleSegmentations.Add(segmentation);
                }

                segmentationResults = GenomeSegmentationResults.SplitOverlappingSegments(sampleSegmentations);
                foreach (var(segmentationInput, outPartitionedFile) in segmentationInputs.Zip(outPartitionedFiles))
                {
                    PostProcessAndWriteResults(segmentationInput, outPartitionedFile, referencePloidy, segmentationResults);
                }
                break;
            }

            case SegmentationInput.SegmentationMethod.HMM:
            {
                Console.WriteLine("{0} Running HMM Partitioning", DateTime.Now);
                var  hiddenMarkovModelsRunner = new HiddenMarkovModelsRunner(cleanedFiles.Count);
                bool isPerSample = false;
                segmentationResults =
                    new GenomeSegmentationResults(hiddenMarkovModelsRunner.Run(segmentationInputs, isPerSample));
                for (int i = 0; i < segmentationInputs.Count; i++)
                {
                    PostProcessAndWriteResults(segmentationInputs[i], outPartitionedFiles[i], referencePloidy, segmentationResults);
                }
                break;
            }

            case SegmentationInput.SegmentationMethod.PerSampleHMM:
            {
                Console.WriteLine("{0} Running Per-sample HMM Partitioning", DateTime.Now);
                var  hiddenMarkovModelsRunner = new HiddenMarkovModelsRunner(1);
                var  sampleSegmentations      = new List <GenomeSegmentationResults>();
                bool isPerSample = true;
                foreach (var input in segmentationInputs)
                {
                    var segmentation =
                        new GenomeSegmentationResults(
                            hiddenMarkovModelsRunner.Run(input.Yield().ToList(), isPerSample));
                    sampleSegmentations.Add(segmentation);
                }

                segmentationResults = GenomeSegmentationResults.SplitOverlappingSegments(sampleSegmentations);
                foreach (var(segmentationInput, outPartitionedFile) in segmentationInputs.Zip(outPartitionedFiles))
                {
                    PostProcessAndWriteResults(segmentationInput, outPartitionedFile, referencePloidy,
                                               segmentationResults);
                }
                break;
            }
            }
            Console.WriteLine("{0} CanvasPartition results written out", DateTime.Now);
            return(0);
        }