Beispiel #1
0
        public void ParseBam_ReturnsSuccessfulResultWithDefaultValues()
        {
            string assemblyFolder = Isas.Framework.Utilities.Utilities.GetAssemblyFolder(typeof(ModeParserTests));
            var    dataFolder     = new DirectoryLocation(assemblyFolder).GetDirectoryLocation("Data");
            var    bamPath        = dataFolder.GetFileLocation("Tiny_COLO829BL_S1.bam");

            string[] stringInputArgument =
            {
                $"--{SmallPedigreeOptionsParser.Bams.Info.Name}", bamPath.FullName, $"--{SmallPedigreeOptionsParser.Bams.Info.Name}", bamPath.FullName, "father", "sampleID"
            };

            var result = SmallPedigreeOptionsParser.Bams.Parse(stringInputArgument);

            Assert.True(result.Success);

            var sampleResult = result.Result.First();

            Assert.Equal(bamPath, sampleResult.Bam);
            Assert.Equal(SampleType.Other, sampleResult.SampleType);
            Assert.Equal("COLO829BL", sampleResult.SampleName);

            sampleResult = result.Result[1];
            Assert.Equal(bamPath, sampleResult.Bam);
            Assert.Equal(SampleType.Father, sampleResult.SampleType);
            Assert.Equal("sampleID", sampleResult.SampleName);
        }
Beispiel #2
0
        public static IEnumerable <FileInfo> EnumerateFiles(DirectoryLocation DirectoryLocation, string Pattern)
        {
            string path = Path.Combine(IO.GetExecutablePath(), DATA_DIRECTORY_NAME);

            var di = new DirectoryInfo(path);

            return(di.EnumerateFiles(Pattern, SearchOption.TopDirectoryOnly));
        }
Beispiel #3
0
 public static void DeleteFile(string FileName, DirectoryLocation DirectoryLocation)
 {
     try
     {
         System.IO.File.Delete(getRootedFileName(FileName, DirectoryLocation));
     }
     catch { }
 }
Beispiel #4
0
        public CanvasRunner Create(bool isSomatic, CanvasCoverageMode coverageMode,
                                   int countsPerBin, Dictionary <string, string> customParameters)
        {
            var settings     = IsasConfigurationSettings.GetConfigSettings();
            var canvasFolder = new DirectoryLocation(Isas.Framework.Utilities.Utilities.GetAssemblyFolder(typeof(CanvasRunner)));

            var commandManager        = new CommandManager(new ExecutableProcessor(settings, _logger, canvasFolder));
            var tabixWrapper          = TabixWrapperFactory.GetTabixWrapper(_logger, _workDoer, commandManager);
            var bAlleleBedGraphWriter = new BAlleleBedGraphWriter(new BgzfBedGraphWriter(new RoundingBedGraphWriter(new BedGraphWriterFacade(), 4), tabixWrapper));

            return(new CanvasRunner(_logger, _workDoer, _checkpointRunner, _runtimeExecutable, _runtimeCommandPrefix, isSomatic, coverageMode, countsPerBin, bAlleleBedGraphWriter, customParameters, canvasFolder.FullName));
        }
Beispiel #5
0
        public void ParsePedigreeSample_ReturnsSuccessfulResultWithDefaultValues()
        {
            string assemblyFolder = Isas.Framework.Utilities.Utilities.GetAssemblyFolder(typeof(ModeParserTests));
            var    dataFolder     = new DirectoryLocation(assemblyFolder).GetDirectoryLocation("Data");
            var    bamPath        = dataFolder.GetFileLocation("Tiny_COLO829BL_S1.bam");

            string[] stringInputArgument =
            {
                $"--{SmallPedigreeOptionsParser.Bams.Info.Name}", bamPath.FullName, "--sample-b-allele-vcf", bamPath.FullName
            };

            var result = new SmallPedigreeOptionsParser().Parse(stringInputArgument);

            Assert.True(result.Success);
        }
Beispiel #6
0
        private static string getRootedFileName(string FileName, DirectoryLocation DirectoryLocation)
        {
            if (!Path.IsPathRooted(FileName))
            {
                switch (DirectoryLocation)
                {
                case SolarMax.DirectoryLocation.Root:
                    FileName = Path.Combine(IO.GetExecutablePath(), FileName);
                    break;

                case SolarMax.DirectoryLocation.Data:
                    FileName = Path.Combine(Path.Combine(IO.GetExecutablePath(), DATA_DIRECTORY_NAME), FileName);
                    break;
                }
            }
            return(FileName);
        }
Beispiel #7
0
        public static void WriteFile(string FileName, DirectoryLocation DirectoryLocation, string Contents)
        {
            FileName = getRootedFileName(FileName, DirectoryLocation);

            TextWriter writer;

            writer = new StreamWriter(FileName);

            try
            {
                writer.Write(Contents);
            }
            catch
            {
            }
            finally
            {
                writer.Close();
            }
        }
Beispiel #8
0
 /// <summary>
 /// Initializes a new instance of the <see cref="FileEnumerator" /> class.
 /// </summary>
 /// <param name="location">Directory location.</param>
 public FileEnumerator(DirectoryLocation location)
 {
     this.location = location;
 }
 /// <summary>
 /// Initializes a new instance of the <see cref="FileEnumerator" /> class.
 /// </summary>
 /// <param name="location">Directory location.</param>
 /// <param name="baseDirectory">Directory location.</param>
 /// <param name="followSymlink">Indicating whether to enumerate symlinked subdirectories.</param>
 public FileHierarchyEnumerator(DirectoryLocation location, string baseDirectory, bool followSymlink)
 {
     this.location      = location;
     this.baseDirectory = baseDirectory;
     this.followSymlink = followSymlink;
 }
Beispiel #10
0
 /// <summary>
 /// Initializes a new instance of the <see cref="FileEnumerator" /> class.
 /// </summary>
 /// <param name="location">Directory location.</param>
 /// <param name="followSymlink">Indicating whether to enumerate symlinked subdirectories.</param>
 public FileEnumerator(DirectoryLocation location, bool followSymlink)
 {
     this.location      = location;
     this.followSymlink = followSymlink;
 }
Beispiel #11
0
        public void ComputeAccuracy(Dictionary <string, List <CNInterval> > knownCN, string cnvCallsPath, string outputPath, bool includePassingOnly, EvaluateCnvOptions options, Dictionary <string, List <CnvCall> > calls)
        {
            // Make a note of how many bases in the truth set are not *actually* considered to be known bases, using
            // the "cnaqc" exclusion set:
            bool regionsOfInterest = !_cnvChecker.RegionsOfInterest.Empty();
            var  baseCounters      = new List <BaseCounter> {
                new BaseCounter(MaxCn, 0, Int32.MaxValue, regionsOfInterest)
            };

            if (options.SplitBySize)
            {
                baseCounters.Add(new BaseCounter(MaxCn, 0, 4999, regionsOfInterest));
                baseCounters.Add(new BaseCounter(MaxCn, 5000, 9999, regionsOfInterest));
                baseCounters.Add(new BaseCounter(MaxCn, 10000, 99999, regionsOfInterest));
                baseCounters.Add(new BaseCounter(MaxCn, 100000, 499999, regionsOfInterest));
                baseCounters.Add(new BaseCounter(MaxCn, 500000, int.MaxValue, regionsOfInterest));
            }

            // not parallel here as parallelism will be attained at the level of regression workflow
            _cnvChecker.CountExcludedBasesInTruthSetIntervals(knownCN);
            Dictionary <string, BitArray> referenceBases = null;

            if (options.KmerFa != null)
            {
                referenceBases = new Dictionary <string, BitArray>();
                foreach (var chr in knownCN.Keys)
                {
                    string chromReferenceBases = FastaLoader.LoadFastaSequence(options.KmerFa, chr);
                    var    bitArrayBases       = new BitArray(chromReferenceBases.Length);
                    // Mark which k-mers in the fasta file are unique. These are indicated by upper-case letters.
                    for (var i = 0; i < chromReferenceBases.Length; i++)
                    {
                        if (char.IsUpper(chromReferenceBases[i]))
                        {
                            bitArrayBases[i] = true;
                        }
                    }
                    referenceBases[chr] = bitArrayBases;
                }
            }

            foreach (var baseCounter in baseCounters)
            {
                _cnvChecker.InitializeIntervalMetrics(knownCN);
                var metrics = CalculateMetrics(knownCN, calls, baseCounter, options.SkipDiploid, includePassingOnly, referenceBases);

                string fileName = $"{options.BaseFileName}";
                if (options.DQscoreThreshold.HasValue)
                {
                    fileName += "_denovo";
                }
                if (baseCounter.MinSize != 0 || baseCounter.MaxSize != int.MaxValue)
                {
                    fileName += $"_{Math.Round(baseCounter.MinSize / 1000.0)}kb";
                    fileName += baseCounter.MaxSize == int.MaxValue ? "+" : $"_{ Math.Round(baseCounter.MaxSize / 1000.0)}kb";
                }
                fileName += ".txt";
                var outputDir = new DirectoryLocation(outputPath);
                outputDir.Create();
                var outputFile = outputDir.GetFileLocation(fileName);
                using (FileStream stream = new FileStream(outputFile.FullName, includePassingOnly ?
                                                          FileMode.Create : FileMode.Append, FileAccess.Write))
                    using (StreamWriter outputWriter = new StreamWriter(stream))
                    {
                        outputWriter.NewLine = "\n";
                        WriteResults(cnvCallsPath, outputWriter, baseCounter, includePassingOnly, metrics);
                    }
            }
        }
 /// <summary>
 /// Initializes a new instance of the <see cref="FileEnumerator" /> class.
 /// </summary>
 /// <param name="location">Directory location.</param>
 public FileEnumerator(DirectoryLocation location)
 {
     this.location = location;
 }
Beispiel #13
0
        public static void Evaluate(string truthSetPath, string cnvCallsPath, string excludedBed, string outputPath, EvaluateCnvOptions options)
        {
            double heterogeneityFraction = options.HeterogeneityFraction;
            var    knownCn = LoadKnownCn(truthSetPath, heterogeneityFraction);

            knownCn = knownCn.SelectValues(
                truthEntries => truthEntries.Where(truthEntry => truthEntry.Length >= options.MinEntrySize).ToList());
            var calls = GetCnvCallsFromVcf(cnvCallsPath, options.DQscoreThreshold);

            calls = calls.SelectValues(
                chromosomeCalls => chromosomeCalls.Where(call => call.Length >= options.MinEntrySize).ToList());

            // LoadRegionsOfInterest(options.RoiBed?.FullName);
            var excludeIntervals = new Dictionary <string, List <CNInterval> >();

            if (!string.IsNullOrEmpty(excludedBed))
            {
                var           excludeIntervalsTmp = LoadIntervalsFromBed(excludedBed, false, 1.0);
                List <string> keys = excludeIntervalsTmp.Keys.ToList();
                foreach (string key in keys)
                {
                    string chr = key;
                    if (!calls.ContainsKey(chr))
                    {
                        chr = key.Replace("chr", "");
                    }
                    if (!calls.ContainsKey(chr))
                    {
                        chr = "chr" + key;
                    }
                    if (!calls.ContainsKey(chr))
                    {
                        Console.WriteLine($"Error: Skipping exclude intervals for chromosome {key} with no truth data." +
                                          $"Check that chromosome names are spelled correctly for exclude intervals");
                        continue;
                    }
                    excludeIntervals[chr] = excludeIntervalsTmp[key];
                }
            }
            Console.WriteLine("TruthSet\t{0}", truthSetPath);
            Console.WriteLine("CNVCalls\t{0}", cnvCallsPath);

            bool includePassingOnly = Path.GetFileName(cnvCallsPath).ToLower().Contains("vcf");
            var  logger             = new Logger(new[] { Console.Out }, new[] { Console.Error });
            var  settings           = IsasConfigurationSettings.GetConfigSettings();
            var  output             = new DirectoryLocation(outputPath);
            var  workerDirectory    = new DirectoryLocation(Isas.Framework.Utilities.Utilities.GetAssemblyFolder(typeof(CNVChecker)));
            var  commandManager     = new CommandManager(new ExecutableProcessor(settings, logger, workerDirectory));

            WorkDoerFactory.RunWithWorkDoer(logger, settings, output, workDoer =>
            {
                var tabixWrapper    = TabixWrapperFactory.GetTabixWrapper(logger, workDoer, commandManager);
                var ploidyCorrector = new PloidyCorrector(logger, workDoer,
                                                          new PloidyEstimator(logger, workDoer, null, false, commandManager), tabixWrapper, false);
                var checker = new CNVChecker(options.DQscoreThreshold, excludeIntervals, ploidyCorrector);
                if (options.PloidyInfo.SexPloidyInfo != null)
                {
                    Console.WriteLine($">>>Getting reference ploidy from provided ploidy information and PAR bed file '{options.PloidyInfo.ParBed}'");

                    var ploidy          = checker.GetPloidy(options.PloidyInfo, output);
                    var referencePloidy = LoadReferencePloidy(options.PloidyInfo.SexPloidyInfo, options.PloidyInfo.ParBed);
                    knownCn             = GetKnownCopyNumberWithReferencePloidy(referencePloidy, knownCn);
                    calls = GetCallsWithRefPloidy(calls, ploidy);
                }
                var cnvEvaluator = new CnvEvaluator(checker);

                if (checker.DQscoreThreshold.HasValue && !Path.GetFileName(cnvCallsPath).ToLower().Contains("vcf"))
                {
                    throw new ArgumentException("CNV.vcf must be in a vcf format when --dqscore option is used");
                }
                cnvEvaluator.ComputeAccuracy(knownCn, cnvCallsPath, outputPath, includePassingOnly, options, calls);
                if (includePassingOnly)
                {
                    cnvEvaluator.ComputeAccuracy(knownCn, cnvCallsPath, outputPath, false, options, calls);
                }
                ComputeCallability(logger, calls, options, output);
                Console.WriteLine(">>>Done - results written to {0}", outputPath);
            });
        }
Beispiel #14
0
        private static int Run(string[] args)
        {
            Utilities.LogCommandLine(args);
            string outDir                 = null;
            var    segmentFiles           = new List <string>();
            var    variantFrequencyFiles  = new List <string>();
            var    sampleTypesString      = new List <string>();
            string ploidyBedPath          = null;
            string referenceFolder        = null;
            var    sampleNames            = new List <string>();
            bool   needHelp               = false;
            int?   qScoreThresholdOption  = null;
            int?   dqScoreThresholdOption = null;
            string commonCnvsBedPath      = null;
            string parameterconfigPath    = Path.Combine(Isas.Framework.Utilities.Utilities.GetAssemblyFolder(typeof(Program)), "PedigreeCallerParameters.json");

            var p = new OptionSet()
            {
                { "i|infile=", "file containing bins, their counts, and assigned segments (obtained from CanvasPartition.exe)", v => segmentFiles.Add(v) },
                { "v|varfile=", "file containing variant frequencies (obtained from CanvasSNV.exe)", v => variantFrequencyFiles.Add(v) },
                { "t|sampleType=", "sample types", v => sampleTypesString.Add(v) },
                { "o|outdir=", "name of output directory", v => outDir = v },
                { "r|reference=", "reference genome folder that contains GenomeSize.xml", v => referenceFolder = v },
                { "n|sampleName=", "sample name for output VCF header (optional)", v => sampleNames.Add(v) },
                { "p|ploidyBed=", "bed file specifying reference ploidy (e.g. for sex chromosomes) (optional)", v => ploidyBedPath = v },
                { "h|help", "show this message and exit", v => needHelp = v != null },
                { "q|qscore=", $"quality filter threshold (default {CanvasPedigreeCaller.DefaultQualityFilterThreshold})", v => qScoreThresholdOption = int.Parse(v) },
                { "commoncnvs=", "bed file with common CNVs (always include these intervals into segmentation results)", v => commonCnvsBedPath = v },
                { "d|dqscore=", $"de novo quality filter threshold (default {CanvasPedigreeCaller.DefaultDeNovoQualityFilterThreshold})", v => dqScoreThresholdOption = int.Parse(v) },
                { "c|config=", $"parameter configuration path (default {parameterconfigPath})", v => parameterconfigPath = v }
            };

            var extraArgs = p.Parse(args);

            if (extraArgs.Count > 0)
            {
                Console.WriteLine("* Error: I don't understand the argument '{0}'", extraArgs[0]);
                needHelp = true;
            }

            if (needHelp)
            {
                ShowHelp(p);
                return(0);
            }

            if (!segmentFiles.Any() || !variantFrequencyFiles.Any() || string.IsNullOrEmpty(referenceFolder) || string.IsNullOrEmpty(outDir))
            {
                ShowHelp(p);
                return(0);
            }

            foreach (string segmentFile in segmentFiles)
            {
                if (File.Exists(segmentFile))
                {
                    continue;
                }
                Console.WriteLine($"CanvasPedigreeCaller.exe: File {segmentFile} does not exist! Exiting.");
                return(1);
            }

            foreach (string variantFrequencyFile in variantFrequencyFiles)
            {
                if (File.Exists(variantFrequencyFile))
                {
                    continue;
                }
                Console.WriteLine($"CanvasPedigreeCaller.exe: File {variantFrequencyFile} does not exist! Exiting.");
                return(1);
            }

            var sampleTypesEnum = sampleTypesString.Select(GetSampleType).ToList();

            if (!File.Exists(Path.Combine(referenceFolder, "GenomeSize.xml")))
            {
                Console.WriteLine($"CanvasPedigreeCaller.exe: File {Path.Combine(referenceFolder, "GenomeSize.xml")} does not exist! Exiting.");
                return(1);
            }

            if (!File.Exists(parameterconfigPath))
            {
                Console.WriteLine($"CanvasPedigreeCaller.exe: File {parameterconfigPath} does not exist! Exiting.");
                return(1);
            }

            if (commonCnvsBedPath != null)
            {
                if (!File.Exists(commonCnvsBedPath))
                {
                    Console.WriteLine($"CanvasPedigreeCaller.exe: File {commonCnvsBedPath} does not exist! Exiting.");
                    return(1);
                }
            }

            var parameterconfigFile = new FileLocation(parameterconfigPath);
            var callerParameters    = Deserialize <PedigreeCallerParameters>(parameterconfigFile);

            int qScoreThreshold = CanvasPedigreeCaller.DefaultQualityFilterThreshold;

            if (qScoreThresholdOption != null)
            {
                qScoreThreshold = qScoreThresholdOption.Value;
                Console.WriteLine($"CanvasPedigreeCaller.exe: Using user-supplied quality score threshold {qScoreThresholdOption}.");
            }
            if (qScoreThreshold < 0 || qScoreThreshold >= callerParameters.MaxQscore)
            {
                throw new IlluminaException($"Quality score threshold must be >= 0 and < {callerParameters.MaxQscore}");
            }

            int dqScoreThreshold = CanvasPedigreeCaller.DefaultDeNovoQualityFilterThreshold;

            if (dqScoreThresholdOption != null)
            {
                dqScoreThreshold = dqScoreThresholdOption.Value;
                Console.WriteLine($"CanvasPedigreeCaller.exe: Using user-supplied de novo quality score threshold {qScoreThresholdOption}.");
            }
            if (dqScoreThreshold < 0 || dqScoreThreshold >= callerParameters.MaxQscore)
            {
                throw new IlluminaException($"De novo quality score threshold must be >= 0 and < {callerParameters.MaxQscore}");
            }

            var logger                      = new Logger(new[] { Console.Out }, new[] { Console.Error });
            var settings                    = IsasConfigurationSettings.GetConfigSettings();
            var outputDirectory             = new DirectoryLocation(outDir);
            var workerDirectory             = new DirectoryLocation(Isas.Framework.Utilities.Utilities.GetAssemblyFolder(typeof(CanvasPedigreeCaller)));
            var commandManager              = new CommandManager(new ExecutableProcessor(settings, logger, workerDirectory));
            var result                      = -1;
            var pedigreeCallerWorkDirectory = outputDirectory.GetDirectoryLocation("CanvasPedigreeCaller");

            WorkDoerFactory.RunWithWorkDoer(logger, settings, pedigreeCallerWorkDirectory, workDoer =>
            {
                var copyNumberLikelihoodCalculator = new CopyNumberLikelihoodCalculator(callerParameters.MaximumCopyNumber);
                var variantCaller = callerParameters.DefaultCaller == CallerType.VariantCaller ?
                                    (IVariantCaller) new VariantCaller(copyNumberLikelihoodCalculator, callerParameters, qScoreThreshold) :
                                    new HaplotypeVariantCaller(copyNumberLikelihoodCalculator, callerParameters, qScoreThreshold);

                var copyNumberModelFactory = new HaplotypeCopyNumberModelFactory();
                var referenceGenome        = new ReferenceGenomeFactory().GetReferenceGenome(new DirectoryLocation(referenceFolder));
                var genomeMetadata         = referenceGenome.GenomeMetadata;

                var coverageBigWigWriterFactory =
                    new CoverageVisualizationWriterFactory(logger, workDoer, commandManager, genomeMetadata);
                var roundingBedGraphWriter        = new RoundingBedGraphWriter(new BedGraphWriterFacade(), 4);
                var coverageBigWigWriter          = coverageBigWigWriterFactory.CreateBinCoverageBigWigWriter(roundingBedGraphWriter);
                var segmentCoverageBedGraphWriter = coverageBigWigWriterFactory.CreateSegmentBedGraphWriter(roundingBedGraphWriter);

                var tabixWrapper             = TabixWrapperFactory.GetTabixWrapper(logger, workDoer, commandManager);
                var bgzfBedGraphWriter       = new BgzfBedGraphWriter(new BedGraphWriterFacade(), tabixWrapper);
                var copyNumberBedGraphWriter = new CopyNumberBedGraphWriter(bgzfBedGraphWriter, new CopyNumberBedGraphCalculator());

                var caller = new CanvasPedigreeCaller(logger, qScoreThreshold, dqScoreThreshold, callerParameters, copyNumberLikelihoodCalculator, variantCaller, coverageBigWigWriter, copyNumberModelFactory, copyNumberBedGraphWriter, segmentCoverageBedGraphWriter);

                var outVcf = outputDirectory.GetFileLocation("CNV.vcf.gz");
                result     = caller.CallVariants(variantFrequencyFiles, segmentFiles, outVcf, ploidyBedPath, referenceFolder, sampleNames, commonCnvsBedPath, sampleTypesEnum);
            });
            return(result);
        }
Beispiel #15
0
 public static string[,] ReadFile(string FileName, DirectoryLocation DirectoryLocation, char Delimiter = ',')
 {
     return(readFile(getRootedFileName(FileName, DirectoryLocation), Delimiter));
 }