private void GetForcedAlleles() { if (_options.ForcedAllelesFileNames == null || _options.ForcedAllelesFileNames.Count == 0) { return; } foreach (var fileName in _options.ForcedAllelesFileNames) { using (var reader = new AlleleReader(fileName, false, false)) { foreach (var variant in reader.GetVariants()) { var chr = variant.Chromosome; var pos = variant.ReferencePosition; var refAllele = variant.ReferenceAllele.ToUpper(); var altAllele = variant.AlternateAllele.ToUpper(); if (!_forcedAllelesByChrom.ContainsKey(chr)) { _forcedAllelesByChrom[chr] = new HashSet <Tuple <string, int, string, string> >(); } if (!IsValidAlt(altAllele, refAllele)) { Logger.WriteToLog($"Invalid forced genotyping variant: {variant}"); continue; } _forcedAllelesByChrom[chr].Add(new Tuple <string, int, string, string>(chr, pos, refAllele, altAllele)); } } } }
public void GetVariantsTests() { var vr = new AlleleReader(VcfTestFile_1); var allVar = vr.GetVariants().ToList(); Assert.Equal(24, allVar.Count); Assert.Equal(10, allVar.First().ReferencePosition); Assert.Equal(4000, allVar.Last().ReferencePosition); }
// tests two bams in different folders // expectations: // - if outputfolder is not specified, logs are in directory of first bam // - if outputfolder specified, logs are in output folder // - vcf files have header and both chromosomes, output is where normally expected private void ExecuteTest(int numberOfThreads, string outputFolder = null) { var sourcePath = Path.Combine(TestPaths.LocalTestDataDirectory, "Chr17Chr19.bam"); var otherTestDirectory = Path.Combine(TestPaths.LocalScratchDirectory, "MultiProcessIn"); var bamFilePath1 = Stage(sourcePath, "In1", otherTestDirectory + "1"); var bamFilePath2 = Stage(sourcePath, "In2", otherTestDirectory + "2"); var genomePath = Path.Combine(TestPaths.SharedGenomesDirectory, "chr17chr19"); var options = new PiscesApplicationOptions { BAMPaths = new[] { bamFilePath1, bamFilePath2 }, GenomePaths = new[] { genomePath }, OutputDirectory = outputFolder, CommandLineArguments = string.Format("-B {0},{1} -g {2}{3} -gVCF false", bamFilePath1, bamFilePath2, genomePath, string.IsNullOrEmpty(outputFolder) ? string.Empty : " -OutFolder " + outputFolder).Split(' '), VcfWritingParameters = new VcfWritingParameters() { OutputGvcfFile = true } }; options.SetIODirectories("Pisces"); var factory = new Factory(options); foreach (var workRequest in factory.WorkRequests) { if (File.Exists(workRequest.OutputFilePath)) { File.Delete(workRequest.OutputFilePath); } } Logger.OpenLog(options.LogFolder, options.LogFileName, true); var processor = new GenomeProcessor(factory, factory.GetReferenceGenome(options.GenomePaths[0]), false, true); processor.Execute(numberOfThreads); Logger.CloseLog(); foreach (var workRequest in factory.WorkRequests) { using (var reader = new AlleleReader(workRequest.OutputFilePath)) { Assert.True(reader.HeaderLines.Any()); var variants = reader.GetVariants().ToList(); Assert.Equal(251, variants.Count()); Assert.Equal("chr17", variants.First().Chromosome); Assert.Equal("chr19", variants.Last().Chromosome); } } Assert.True(Directory.GetFiles(options.LogFolder, options.LogFileNameBase).Any()); }
private void ExecuteEmptyIntervalsTest(bool throttle) { // ---------------------- // test when one bam has intervals and the other is empty // ---------------------- var bamFilePath = Path.Combine(TestPaths.LocalTestDataDirectory, "Chr17Chr19.bam"); var bamFilePath2 = Path.Combine(TestPaths.LocalTestDataDirectory, "Chr17Chr19_removedSQlines.bam"); var genomePath = Path.Combine(TestPaths.SharedGenomesDirectory, "chr17chr19"); var validIntervals = Path.Combine(TestPaths.LocalTestDataDirectory, "chr17only.picard"); var emptyIntervals = Path.Combine(TestPaths.LocalTestDataDirectory, "empty.picard"); var outputFolder = Path.Combine(TestPaths.LocalTestDataDirectory, "EmptyIntervalsTest_Mixed"); var options = new PiscesApplicationOptions { BAMPaths = new[] { bamFilePath, bamFilePath2 }, IntervalPaths = new [] { validIntervals, emptyIntervals }, GenomePaths = new[] { genomePath }, OutputDirectory = outputFolder, VcfWritingParameters = new Domain.Options.VcfWritingParameters() { OutputGvcfFile = true } }; var factory = new Factory(options); var processor = new GenomeProcessor(factory, factory.GetReferenceGenome(genomePath), throttle); processor.Execute(2); // first vcf file should have been processed regularly using (var reader = new AlleleReader(factory.WorkRequests.First().OutputFilePath)) { var variants = reader.GetVariants(); Assert.Equal(11, variants.Count()); } // second vcf file should be empty using (var reader = new AlleleReader(factory.WorkRequests.Last().OutputFilePath)) { var variants = reader.GetVariants(); Assert.Equal(0, variants.Count()); } // ---------------------- // try again but with both bams using empty intervals // ---------------------- options.IntervalPaths = new[] { emptyIntervals }; options.OutputDirectory = Path.Combine(TestPaths.LocalTestDataDirectory, "EmptyIntervalsTest_All"); factory = new Factory(options); processor = new GenomeProcessor(factory, factory.GetReferenceGenome(genomePath), throttle); processor.Execute(2); foreach (var workRequest in factory.WorkRequests) { // both vcf file should be empty using (var reader = new AlleleReader(workRequest.OutputFilePath)) { var variants = reader.GetVariants(); Assert.Equal(0, variants.Count()); } } }
//Test we get the same results when using muliple samples and intervals, in the same order. //Fist test running two samples together, then test running two samples individualy, then test it with threadByChrOn/ //Nothing strange should happen.. public void IntervalTestingWithMultipleSamples() //based on a real bug when a gvcf was found was out of order, that only happened for multiple-bam runs with different interval files. { var bamFile1Path = Path.Combine(TestPaths.LocalTestDataDirectory, "Chr17Chr19.bam"); //has data from chr17,7572952 and chr19,3118883 var bamFile2Path = Path.Combine(TestPaths.LocalTestDataDirectory, "Chr17again.bam"); var interval1Path = Path.Combine(TestPaths.LocalTestDataDirectory, "chr17int.picard"); //chr 17 only var interval2Path = Path.Combine(TestPaths.LocalTestDataDirectory, "poorlyOrdered.picard"); //disordered, chr 19 first. var outDir = Path.Combine(TestPaths.LocalTestDataDirectory, "IntervalTests"); var vcfFile1Path = Path.Combine(outDir, "Chr17Chr19.genome.vcf"); //only results from chr17 var vcfFile2Path = Path.Combine(outDir, "Chr17again.genome.vcf"); //show results from chr17 and 19 var vcfExpectedFile1 = Path.Combine(TestPaths.LocalTestDataDirectory, "Chr17Chr19.expected.genome.vcf"); //only results from chr17 var vcfExpectedFile2 = Path.Combine(TestPaths.LocalTestDataDirectory, "Chr17again.expected.genome.vcf"); //show results from chr17 and 19 var genomeDirectory = Path.Combine(TestPaths.SharedGenomesDirectory, "fourChrs"); var twoSampleFactory = MakeFactory(new List <string> { bamFile1Path, bamFile2Path }, new List <string> { interval1Path, interval2Path }, outDir); var firstSampleFactory = MakeFactory(new List <string> { bamFile1Path }, new List <string> { interval1Path }, outDir); var secondSampleFactory = MakeFactory(new List <string> { bamFile2Path }, new List <string> { interval2Path }, outDir); //regular two-sample run mode. var genome = twoSampleFactory.GetReferenceGenome(genomeDirectory); var genome1 = firstSampleFactory.GetReferenceGenome(genomeDirectory); var genome2 = secondSampleFactory.GetReferenceGenome(genomeDirectory); var processor = new GenomeProcessor(twoSampleFactory, genome); var chrs = genome.ChromosomesToProcess; Assert.Equal("chr7", chrs[0]); Assert.Equal("chr8", chrs[1]); Assert.Equal("chr17", chrs[2]); Assert.Equal("chr19", chrs[3]); processor.InternalExecute(10); chrs = genome.ChromosomesToProcess; Assert.Equal("chr7", chrs[0]); Assert.Equal("chr8", chrs[1]); Assert.Equal("chr17", chrs[2]); Assert.Equal("chr19", chrs[3]); //jsut be aware, when we porcess the samples individually, we use different genome lists. Assert.Equal(4, genome.ChromosomesToProcess.Count); Assert.Equal(1, genome1.ChromosomesToProcess.Count); Assert.Equal(4, genome2.ChromosomesToProcess.Count); Assert.Equal("chr17", genome1.ChromosomesToProcess[0]); Assert.Equal("chr7", genome2.ChromosomesToProcess[0]); Assert.Equal("chr19", genome2.ChromosomesToProcess[3]); var reader1 = new AlleleReader(vcfFile1Path); var reader2 = new AlleleReader(vcfFile2Path); var contigs1Results = GetContigs(reader1); var contigs2Results = GetContigs(reader2); var vcf1Results = reader1.GetVariants().ToList(); var vcf2Results = reader2.GetVariants().ToList(); //the expected results: var readerExp1 = new AlleleReader(vcfExpectedFile1); var readerExp2 = new AlleleReader(vcfExpectedFile2); var contigs1Expected = GetContigs(readerExp1); var contigs2Expected = GetContigs(readerExp2); var vcf1Expected = readerExp1.GetVariants().ToList(); var vcf2Expected = readerExp2.GetVariants().ToList(); Assert.Equal(4, contigs1Results.Count); Assert.Equal(4, contigs2Results.Count); Assert.Equal(11, vcf1Results.Count); Assert.Equal(71, vcf2Results.Count); //check variants and contigs all come out the same CheckForOrdering(contigs1Results, contigs2Results, contigs1Expected, contigs2Expected, vcf1Expected, vcf2Expected); reader1.Dispose(); reader2.Dispose(); File.Delete(vcfFile1Path); File.Delete(vcfFile2Path); //now check again, processing them separately processor = new GenomeProcessor(firstSampleFactory, genome1); processor.InternalExecute(10); processor = new GenomeProcessor(secondSampleFactory, genome2); processor.InternalExecute(10); reader1 = new AlleleReader(vcfFile1Path); reader2 = new AlleleReader(vcfFile2Path); contigs1Results = GetContigs(reader1); contigs2Results = GetContigs(reader2); vcf1Results = reader1.GetVariants().ToList(); vcf2Results = reader2.GetVariants().ToList(); //check variants all come out the same (the contigs will be different as shown) CheckForOrdering(contigs1Results, contigs2Results, new List <string>() { "chr17" }, contigs2Expected, vcf1Expected, vcf2Expected); reader1.Dispose(); reader2.Dispose(); File.Delete(vcfFile1Path); //now check again, processing them "thread by chr" way processor = new GenomeProcessor(twoSampleFactory, genome, false); processor.InternalExecute(10); reader1 = new AlleleReader(vcfFile1Path); reader2 = new AlleleReader(vcfFile2Path); contigs1Results = GetContigs(reader1); contigs2Results = GetContigs(reader2); vcf1Results = reader1.GetVariants().ToList(); vcf2Results = reader2.GetVariants().ToList(); //check variants all come out the same (the contigs will be back to normal) CheckForOrdering(contigs1Results, contigs2Results, contigs2Expected, contigs2Expected, vcf1Expected, vcf2Expected); reader1.Dispose(); reader2.Dispose(); File.Delete(vcfFile1Path); File.Delete(vcfFile2Path); }
public void IntervalTestingWithVcf() { var bamFile1Path = Path.Combine(TestPaths.LocalTestDataDirectory, "Chr17Chr19.bam"); //has data from chr17,7572952 and chr19,3118883 var interval1Path = Path.Combine(TestPaths.LocalTestDataDirectory, "chr17int.picard"); //chr 17 only var outDir = Path.Combine(TestPaths.LocalTestDataDirectory, "IntervalTests"); var vcfObservedFile1Path = Path.Combine(outDir, "Chr17Chr19.vcf"); //only results from chr17 var vcfExpectedFile1 = Path.Combine(TestPaths.LocalTestDataDirectory, "Chr17Chr19.expected.vcf"); //only results from chr17 var genomeDirectory = Path.Combine(TestPaths.SharedGenomesDirectory, "fourChrs"); var factory = MakeVcfFactory(new List <string> { bamFile1Path }, new List <string> { interval1Path }, outDir); var genome1 = factory.GetReferenceGenome(genomeDirectory); var processor = new GenomeProcessor(factory, genome1); var chrs = genome1.ChromosomesToProcess; Assert.Equal("chr17", chrs[0]); processor.InternalExecute(10); Assert.Equal(1, genome1.ChromosomesToProcess.Count); Assert.Equal("chr17", genome1.ChromosomesToProcess[0]); var reader1 = new AlleleReader(vcfObservedFile1Path); var observedFilters1Results = GetFilters(reader1); var observedContigs1Results = GetContigs(reader1); var observedVcf1Results = reader1.GetVariants().ToList(); //the expected results: var readerExp1 = new AlleleReader(vcfExpectedFile1); var filters1Expected = GetFilters(readerExp1); var contigs1Expected = GetContigs(readerExp1); var vcf1Expected = readerExp1.GetVariants().ToList(); /* ##FILTER=<ID=q30,Description="Quality score less than 30"> ##FILTER=<ID=SB,Description="Variant strand bias too high"> ##FILTER=<ID=R5x9,Description="Repeats of part or all of the variant allele (max repeat length 5) in the reference greater than or equal to 9"> ##FILTER=<ID=NC,Description="No-call rate is above 0.6"> * */ Assert.Equal(4, observedFilters1Results.Count); //##contig=<ID=chr17,length=7573100> Assert.Equal(1, observedContigs1Results.Count); Assert.Equal(1, observedVcf1Results.Count); //check variants and contigs all come out the same for (int i = 0; i < contigs1Expected.Count; i++) { Assert.Equal(contigs1Expected[i], observedContigs1Results[i]); } for (int i = 0; i < filters1Expected.Count; i++) { Assert.Equal(filters1Expected[i].ToString(), observedFilters1Results[i].ToString()); } for (int i = 0; i < vcf1Expected.Count; i++) { Assert.Equal(vcf1Expected[i].ToString(), observedVcf1Results[i].ToString()); } reader1.Dispose(); File.Delete(vcfObservedFile1Path); }