static int Main(string[] args) { CanvasCommon.Utilities.LogCommandLine(args); List <string> inFiles = new List <string>(); List <string> outFiles = new List <string>(); bool needHelp = false; bool isGermline = false; string bedPath = null; string commonCNVsbedPath = null; double alpha = CBSRunner.DefaultAlpha; double madFactor = WaveletsRunner.DefaultMadFactor; SegmentSplitUndo undoMethod = SegmentSplitUndo.None; Segmentation.SegmentationMethod partitionMethod = Segmentation.SegmentationMethod.Wavelets; int maxInterBinDistInSegment = 1000000; OptionSet p = new OptionSet() { { "i|infile=", "input file - usually generated by CanvasClean", v => inFiles.Add(v) }, { "o|outfile=", "text file to output", v => outFiles.Add(v) }, { "h|help", "show this message and exit", v => needHelp = v != null }, { "m|method=", "segmentation method (Wavelets/CBS). Default: " + partitionMethod, v => partitionMethod = (Segmentation.SegmentationMethod)Enum.Parse(typeof(Segmentation.SegmentationMethod), v) }, { "a|alpha=", "alpha parameter to CBS. Default: " + alpha, v => alpha = float.Parse(v) }, { "s|split=", "CBS split method (None/Prune/SDUndo). Default: " + undoMethod, v => undoMethod = (SegmentSplitUndo)Enum.Parse(typeof(SegmentSplitUndo), v) }, { "f|madFactor=", "MAD factor to Wavelets. Default: " + madFactor, v => madFactor = float.Parse(v) }, { "b|bedfile=", "bed file to exclude (don't span these intervals)", v => bedPath = v }, { "c|commoncnvs=", "bed file with common CNVs (always include these intervals into segmentation results)", v => commonCNVsbedPath = v }, { "g|germline", "flag indicating that input file represents germline genome", v => isGermline = v != null }, { "d|maxInterBinDistInSegment=", "the maximum distance between adjacent bins in a segment (negative numbers turn off splitting segments after segmentation). Default: " + maxInterBinDistInSegment, v => maxInterBinDistInSegment = int.Parse(v) }, }; List <string> extraArgs = p.Parse(args); if (needHelp) { ShowHelp(p); return(0); } if (!inFiles.Any() || !outFiles.Any()) { ShowHelp(p); return(0); } if (inFiles.Any(inFile => !File.Exists(inFile))) { Console.WriteLine("CanvasPartition.exe: File {0} does not exist! Exiting.", inFiles); return(1); } if (!string.IsNullOrEmpty(bedPath) && !File.Exists(bedPath)) { Console.WriteLine("CanvasPartition.exe: File {0} does not exist! Exiting.", bedPath); return(1); } if (partitionMethod != Segmentation.SegmentationMethod.HMM && outFiles.Count > 1) { Console.WriteLine("CanvasPartition.exe: SegmentationMethod.HMM only works for MultiSample SPW worlfow, " + "please provide multiple -o arguments"); return(1); } if (partitionMethod == Segmentation.SegmentationMethod.HMM && inFiles.Count == 1) { Console.WriteLine("CanvasPartition.exe: method=HMM option only works when more than one input files (-i) are provided"); return(1); } List <Segmentation> segmentationEngine = inFiles.Select(inFile => new Segmentation(inFile, bedPath, maxInterBinDistInSegment)).ToList(); Segmentation.GenomeSegmentationResults segmentationResults; switch (partitionMethod) { default: // use Wavelets if CBS is not selected Console.WriteLine("{0} Running Wavelet Partitioning", DateTime.Now); WaveletsRunner waveletsRunner = new WaveletsRunner(new WaveletsRunner.WaveletsRunnerParams(isGermline, commonCNVsbedPath, madFactor: madFactor, verbose: 2)); segmentationResults = new Segmentation.GenomeSegmentationResults(waveletsRunner.Run(segmentationEngine.Single())); segmentationEngine.Single().WriteCanvasPartitionResults(outFiles.Single(), segmentationResults); break; case Segmentation.SegmentationMethod.CBS: Console.WriteLine("{0} Running CBS Partitioning", DateTime.Now); CBSRunner cbsRunner = new CBSRunner(maxInterBinDistInSegment, undoMethod, alpha); segmentationResults = new Segmentation.GenomeSegmentationResults(cbsRunner.Run(segmentationEngine.Single(), verbose: 2)); segmentationEngine.Single().WriteCanvasPartitionResults(outFiles.Single(), segmentationResults); break; case Segmentation.SegmentationMethod.HMM: Console.WriteLine("{0} Running HMM Partitioning", DateTime.Now); HiddenMarkovModelsRunner hiddenMarkovModelsRunner = new HiddenMarkovModelsRunner(commonCNVsbedPath, inFiles.Count); segmentationResults = new Segmentation.GenomeSegmentationResults(hiddenMarkovModelsRunner.Run(segmentationEngine)); for (int i = 0; i < segmentationEngine.Count; i++) { segmentationEngine[i].WriteCanvasPartitionResults(outFiles[i], segmentationResults); } break; } Console.WriteLine("{0} CanvasPartition results written out", DateTime.Now); return(0); }
static int Main(string[] args) { CanvasCommon.Utilities.LogCommandLine(args); List <string> cleanedFiles = new List <string>(); List <string> outPartitionedFiles = new List <string>(); List <string> vafFiles = new List <string>(); bool needHelp = false; bool isGermline = false; string filterBedFile = null; string referenceFolder = null; string commonCNVsbedPath = null; string evennessMetricFile = null; SegmentSplitUndo undoMethod = SegmentSplitUndo.None; SegmentationInput.SegmentationMethod partitionMethod = SegmentationInput.SegmentationMethod.Wavelets; string parameterconfigPath = Path.Combine(Isas.Framework.Utilities.Utilities.GetAssemblyFolder(typeof(CanvasPartition)), "CanvasPartitionParameters.json"); string ploidyVcfPath = null; OptionSet p = new OptionSet() { { "i|infile=", "input file - usually generated by CanvasClean", v => cleanedFiles.Add(v) }, { "v|vaffile=", "variant frequencyfiles - usually generated by CanvasSNV", v => vafFiles.Add(v) }, { "o|outfile=", "text file to output", v => outPartitionedFiles.Add(v) }, { "m|method=", "segmentation method (Wavelets/CBS). Default: " + partitionMethod, v => partitionMethod = (SegmentationInput.SegmentationMethod)Enum.Parse(typeof(SegmentationInput.SegmentationMethod), v) }, { "r|reference=", "folder that contains both genome.fa and GenomeSize.xml", v => referenceFolder = v }, { "s|split=", "CBS split method (None/Prune/SDUndo). Default: " + undoMethod, v => undoMethod = (SegmentSplitUndo)Enum.Parse(typeof(SegmentSplitUndo), v) }, { "b|bedfile=", "bed file to exclude (don't span these intervals)", v => filterBedFile = v }, { "c|commoncnvs=", "bed file with common CNVs (always include these intervals into segmentation results)", v => commonCNVsbedPath = v }, { "g|germline", "flag indicating that input file represents germline genome", v => isGermline = v != null }, { $"{CommandLineOptions.EvennessMetricFile}=", "output file for evenness metric (optional)", v => evennessMetricFile = v }, { "p|ploidyVcfFile=", "vcf file specifying reference ploidy (e.g. for sex chromosomes) (optional)", v => ploidyVcfPath = v }, { "config=", "parameter configuration path (default {parameterconfigPath})", v => parameterconfigPath = v }, { "h|help", "show this message and exit", v => needHelp = v != null } }; List <string> extraArgs = p.Parse(args); if (extraArgs.Any()) { throw new IlluminaException($"Unknown arguments: {string.Join(",", extraArgs)}"); } if (needHelp) { ShowHelp(p); return(0); } if (!cleanedFiles.Any() || !outPartitionedFiles.Any() || referenceFolder == null) { ShowHelp(p); return(0); } if (cleanedFiles.Any(inFile => !File.Exists(inFile))) { Console.WriteLine("CanvasPartition.exe: File {0} does not exist! Exiting.", cleanedFiles); return(1); } if (!string.IsNullOrEmpty(filterBedFile) && !File.Exists(filterBedFile)) { Console.WriteLine("CanvasPartition.exe: File {0} does not exist! Exiting.", filterBedFile); return(1); } if (!File.Exists(parameterconfigPath)) { Console.WriteLine($"CanvasPedigreeCaller.exe: File {parameterconfigPath} does not exist! Exiting."); return(1); } if (!string.IsNullOrEmpty(ploidyVcfPath) && !File.Exists(ploidyVcfPath)) { Console.WriteLine("CanvasPartition.exe: File {0} does not exist! Exiting.", ploidyVcfPath); return(1); } var parameterconfigFile = new FileLocation(parameterconfigPath); var canvasPartitionParameters = Deserialize <CanvasPartitionParameters>(parameterconfigFile); ILogger logger = new Logger(Console.Out.ToEnumerable(), Console.Error.ToEnumerable()); var processor = new SegmentationResultsProcessor(canvasPartitionParameters.MaxInterBinDistInSegment); var segmentationInputs = vafFiles.Count > 0 && vafFiles.Count == cleanedFiles.Count ? cleanedFiles.Zip(vafFiles, (inFile, vafFile) => new SegmentationInput(inFile, vafFile, filterBedFile, referenceFolder, evennessMetricFile, logger, processor)).ToList() : cleanedFiles.Select(inFile => new SegmentationInput(inFile, null, filterBedFile, referenceFolder, evennessMetricFile, logger, processor)).ToList(); GenomeSegmentationResults segmentationResults; PloidyInfo referencePloidy = ploidyVcfPath != null?PloidyInfo.LoadPloidyFromVcfFileNoSampleId(ploidyVcfPath) : null; switch (partitionMethod) { default: // use Wavelets if CBS is not selected Console.WriteLine("{0} Running Wavelet Partitioning", DateTime.Now); var waveletsRunner = new WaveletsRunner(new WaveletsRunner.WaveletsRunnerParams(isGermline, commonCNVsbedPath, madFactor: canvasPartitionParameters.MadFactor, thresholdLowerMaf: canvasPartitionParameters.ThresholdLowerMaf, evennessScoreThreshold: canvasPartitionParameters.EvennessScoreThreshold, verbose: 2)); segmentationResults = new GenomeSegmentationResults(waveletsRunner.Run(segmentationInputs.Single(), canvasPartitionParameters.EvennessScoreWindow)); PostProcessAndWriteResults(segmentationInputs.Single(), outPartitionedFiles.Single(), referencePloidy, segmentationResults); break; case SegmentationInput.SegmentationMethod.CBS: { Console.WriteLine("{0} Running CBS Partitioning", DateTime.Now); var cbsRunner = new CBSRunner(canvasPartitionParameters.MaxInterBinDistInSegment, undoMethod, canvasPartitionParameters.CBSalpha); var sampleSegmentations = new List <GenomeSegmentationResults>(); foreach (var input in segmentationInputs) { var segmentation = new GenomeSegmentationResults(cbsRunner.Run(input, verbose: 2)); sampleSegmentations.Add(segmentation); } segmentationResults = GenomeSegmentationResults.SplitOverlappingSegments(sampleSegmentations); foreach (var(segmentationInput, outPartitionedFile) in segmentationInputs.Zip(outPartitionedFiles)) { PostProcessAndWriteResults(segmentationInput, outPartitionedFile, referencePloidy, segmentationResults); } break; } case SegmentationInput.SegmentationMethod.HMM: { Console.WriteLine("{0} Running HMM Partitioning", DateTime.Now); var hiddenMarkovModelsRunner = new HiddenMarkovModelsRunner(cleanedFiles.Count); bool isPerSample = false; segmentationResults = new GenomeSegmentationResults(hiddenMarkovModelsRunner.Run(segmentationInputs, isPerSample)); for (int i = 0; i < segmentationInputs.Count; i++) { PostProcessAndWriteResults(segmentationInputs[i], outPartitionedFiles[i], referencePloidy, segmentationResults); } break; } case SegmentationInput.SegmentationMethod.PerSampleHMM: { Console.WriteLine("{0} Running Per-sample HMM Partitioning", DateTime.Now); var hiddenMarkovModelsRunner = new HiddenMarkovModelsRunner(1); var sampleSegmentations = new List <GenomeSegmentationResults>(); bool isPerSample = true; foreach (var input in segmentationInputs) { var segmentation = new GenomeSegmentationResults( hiddenMarkovModelsRunner.Run(input.Yield().ToList(), isPerSample)); sampleSegmentations.Add(segmentation); } segmentationResults = GenomeSegmentationResults.SplitOverlappingSegments(sampleSegmentations); foreach (var(segmentationInput, outPartitionedFile) in segmentationInputs.Zip(outPartitionedFiles)) { PostProcessAndWriteResults(segmentationInput, outPartitionedFile, referencePloidy, segmentationResults); } break; } } Console.WriteLine("{0} CanvasPartition results written out", DateTime.Now); return(0); }