public void Pisces_LowDepthTest() { List <ChrReference> chrRef = new List <ChrReference>() { new ChrReference() { Name = "chr19", Sequence = "TTGTCAGTGCGCTTTTCCCAACACCACCTGCTCCGACCACCACCAGTTTGTACTCAGTCATTTCACACCAGCAAGAACCTGTTGGAAACCAGTAATCAGGGTTAATTGGCGGCGAAAAAAAAAAAAAAAAAAAAAAAAAA" } }; var options = new ApplicationOptions() { BAMPaths = new[] { _bamSmallS1 }, GenomePaths = new[] { _genomeChr19 }, //IntervalPaths = new[] { _intervalsChr17Chr19 }, DebugMode = true, CallMNVs = true, UseMNVReallocation = false, MaxSizeMNV = 100, OutputgVCFFiles = true, MinimumDepth = 1000, OutputFolder = UnitTestPaths.TestDataDirectory }; var vcfFilePath = Path.ChangeExtension(options.BAMPaths[0], "genome.vcf"); var factory = new Factory(options); IGenome genomeRef; genomeRef = new MockGenome(chrRef, _genomeChr19); var bp = new GenomeProcessor(factory, genomeRef); bp.Execute(1); List <VcfVariant> coverage1000results = VcfReader.GetAllVariantsInFile(vcfFilePath); options = new ApplicationOptions() { BAMPaths = new[] { _bamSmallS1 }, GenomePaths = new[] { _genomeChr19 }, // IntervalPaths = new[] { _intervalsChr17Chr19 }, DebugMode = true, CallMNVs = true, UseMNVReallocation = false, OutputgVCFFiles = true, OutputFolder = UnitTestPaths.TestDataDirectory }; factory = new Factory(options); bp = new GenomeProcessor(factory, genomeRef); bp.Execute(1); List <VcfVariant> coverage10results = VcfReader.GetAllVariantsInFile(vcfFilePath); // Assert.NotEqual(coverage1000results.Count, coverage10results.Count); // Assert.Equal(coverage1000results.Count, 84); // Assert.Equal(coverage10results.Count, 100); }
// tests two bams in different folders // expectations: // - if outputfolder is not specified, logs are in directory of first bam // - if outputfolder specified, logs are in output folder // - vcf files have header and both chromosomes, output is where normally expected private void ExecuteTest(int numberOfThreads, string outputFolder = null) { var sourcePath = Path.Combine(TestPaths.LocalTestDataDirectory, "Chr17Chr19.bam"); var otherTestDirectory = Path.Combine(TestPaths.LocalScratchDirectory, "MultiProcessIn"); var bamFilePath1 = Stage(sourcePath, "In1", otherTestDirectory + "1"); var bamFilePath2 = Stage(sourcePath, "In2", otherTestDirectory + "2"); var genomePath = Path.Combine(TestPaths.SharedGenomesDirectory, "chr17chr19"); var options = new PiscesApplicationOptions { BAMPaths = new[] { bamFilePath1, bamFilePath2 }, GenomePaths = new[] { genomePath }, OutputDirectory = outputFolder, CommandLineArguments = string.Format("-B {0},{1} -g {2}{3} -gVCF false", bamFilePath1, bamFilePath2, genomePath, string.IsNullOrEmpty(outputFolder) ? string.Empty : " -OutFolder " + outputFolder).Split(' '), VcfWritingParameters = new VcfWritingParameters() { OutputGvcfFile = true } }; options.SetIODirectories("Pisces"); var factory = new Factory(options); foreach (var workRequest in factory.WorkRequests) { if (File.Exists(workRequest.OutputFilePath)) { File.Delete(workRequest.OutputFilePath); } } Logger.OpenLog(options.LogFolder, options.LogFileName, true); var processor = new GenomeProcessor(factory, factory.GetReferenceGenome(options.GenomePaths[0]), false, true); processor.Execute(numberOfThreads); Logger.CloseLog(); foreach (var workRequest in factory.WorkRequests) { using (var reader = new VcfReader(workRequest.OutputFilePath)) { Assert.True(reader.HeaderLines.Any()); var variants = reader.GetVariants().ToList(); Assert.Equal(251, variants.Count()); Assert.Equal("chr17", variants.First().ReferenceName); Assert.Equal("chr19", variants.Last().ReferenceName); } } Assert.True(Directory.GetFiles(options.LogFolder, options.LogFileNameBase).Any()); }
private static void CreateAndExecuteProcessor(bool threadByChr, MockFactoryWithDefaults factory, Genome genome) { if (threadByChr) { var processor = new GenomeProcessor(factory, genome); processor.Execute(1); } else { var processor = new GenomeProcessor(factory, genome); processor.Execute(1); } }
public void FlowWithMultipleBams() { var factory = GetMockFactory(2); var processor = new GenomeProcessor(factory, GetGenome().Object); processor.Execute(1); factory.MockWriter.Verify(w => w.Initialize(), Times.Exactly(2)); factory.MockWriter.Verify(w => w.FlushAllBufferedRecords(), Times.Exactly(6)); // flush inbetween each chr factory.MockWriter.Verify(w => w.FinishAll(), Times.Exactly(2)); factory.MockChrRealigner.Verify(r => r.Execute(), Times.Exactly(6)); }
public void FlowWithChrFilter() { var factory = GetMockFactory(); factory.MockAlignmentExtractor.Setup(e => e.GetNextAlignment(It.IsAny <Read>())).Returns(false); var processor = new GenomeProcessor(factory, GetGenome().Object, "chr2"); processor.Execute(1); factory.MockWriter.Verify(w => w.Initialize(), Times.Exactly(1)); factory.MockWriter.Verify(w => w.FlushAllBufferedRecords(), Times.Exactly(3)); // flush inbetween each chr factory.MockWriter.Verify(w => w.FinishAll(), Times.Exactly(1)); factory.MockChrRealigner.Verify(r => r.Execute(), Times.Exactly(1)); factory.MockAlignmentExtractor.Verify(r => r.GetNextAlignment(It.IsAny <Read>()), Times.Exactly(2)); }
protected override void ProgramExecution() { var factory = new Factory(_options); var distinctGenomeDirectories = _options.GenomePaths.Distinct(); foreach (var genomeDirectory in distinctGenomeDirectories) { var genome = factory.GetReferenceGenome(genomeDirectory); var processor = new GenomeProcessor(factory, genome, _options.ChromosomeFilter); processor.Execute(_options.MaxNumThreads); if (!_options.InsideSubProcess) { ConcatenateLogs(); } } }
public void ReadGenome() { Directory.CreateDirectory(_outputFolder); var options_1 = new HygeaOptions() { BAMPaths = BamProcessorParsingUtils.UpdateBamPathsWithBamsFromFolder(_existingBamFolder), GenomePaths = new[] { _existingGenome }, OutputDirectory = _outputFolder }; var factory = new Factory(options_1); Assert.Equal(factory.GetOutputFile(_existingBamPath), _outputFilePath); // Run the genome processor using the filter for chr19, it will run through the IndelRealigner path as usual. var genome = new Genome(_existingGenome, new List <string>() { "chr19" }); var gp1 = new GenomeProcessor(factory, genome, "chr19"); gp1.Execute(1); var outputFilePath = Path.Combine(_outputFolder, Path.GetFileName(_existingBamPath)); Assert.True(File.Exists(outputFilePath)); Assert.NotEqual(new FileInfo(outputFilePath).Length, new FileInfo(_existingBamPath).Length); File.Delete(outputFilePath); // Run the genome processor using the filter for chr18 to follow the path in GenomeProcessor.Process // for chromosomes outside the filter. var gp2 = new GenomeProcessor(factory, genome, "chr18"); gp2.Execute(1); Assert.True(File.Exists(outputFilePath)); Assert.NotEqual(new FileInfo(outputFilePath).Length, new FileInfo(_existingBamPath).Length); }
public void Execute() { var factory = new Factory(_options); if (!_options.ThreadByChr) { var distinctGenomeDirectories = _options.GenomePaths.Distinct(); foreach (var genomeDirectory in distinctGenomeDirectories) { var genome = factory.GetReferenceGenome(genomeDirectory); var genomeProcessor = new GenomeProcessor(factory, genome); genomeProcessor.Execute(_options.MaxNumThreads); } } else { var workRequest = factory.WorkRequests.First(); var genome = factory.GetReferenceGenome(workRequest.GenomeDirectory); var bamProcessor = new BamProcessor(factory, genome); bamProcessor.Execute(_options.MaxNumThreads); } }
private void ExecuteChromosomeThreadingTest(int numberOfThreads, int expectedNumberOfThreads, string outDir) { var bamFilePath = Path.Combine(TestPaths.LocalTestDataDirectory, "Chr17Chr19.bam"); var vcfFilePath = Path.Combine(outDir, "Chr17Chr19.vcf"); var genomePath = Path.Combine(TestPaths.SharedGenomesDirectory, "chr17chr19"); var options = new PiscesApplicationOptions { BAMPaths = new[] { bamFilePath }, GenomePaths = new[] { genomePath }, VcfWritingParameters = new VcfWritingParameters() { OutputGvcfFile = false }, OutputDirectory = outDir }; options.SetIODirectories("Pisces"); var logFile = Path.Combine(options.LogFolder, options.LogFileName); if (File.Exists(logFile)) { File.Delete(logFile); } Logger.OpenLog(options.LogFolder, options.LogFileName); var factory = new MockFactoryWithDefaults(options); factory.MockSomaticVariantCaller = new Mock <ISmallVariantCaller>(); factory.MockSomaticVariantCaller.Setup(s => s.Execute()).Callback(() => { Thread.Sleep(500); }); var processor = new GenomeProcessor(factory, factory.GetReferenceGenome(genomePath), false); processor.Execute(numberOfThreads); Assert.False(File.Exists(vcfFilePath + "_chr17")); Assert.False(File.Exists(vcfFilePath + "_chr19")); Assert.True(File.Exists(vcfFilePath)); Logger.CloseLog(); //var threadsSpawnedBeforeFirstCompleted = 0; /* dont worry about logging * using (var reader = new StreamReader(new FileStream(logFile, FileMode.Open, FileAccess.Read))) * { * string line; * while ((line = reader.ReadLine()) != null) * { * if (string.IsNullOrEmpty(line)) continue; * * if (line.Contains("Completed processing chr")) break; * * if (line.Contains("Start processing chr")) * threadsSpawnedBeforeFirstCompleted++; * } * }*/ //Assert.Equal(expectedNumberOfThreads, threadsSpawnedBeforeFirstCompleted); }
private void ExecuteEmptyIntervalsTest(bool throttle) { // ---------------------- // test when one bam has intervals and the other is empty // ---------------------- var bamFilePath = Path.Combine(TestPaths.LocalTestDataDirectory, "Chr17Chr19.bam"); var bamFilePath2 = Path.Combine(TestPaths.LocalTestDataDirectory, "Chr17Chr19_removedSQlines.bam"); var genomePath = Path.Combine(TestPaths.SharedGenomesDirectory, "chr17chr19"); var validIntervals = Path.Combine(TestPaths.LocalTestDataDirectory, "chr17only.picard"); var emptyIntervals = Path.Combine(TestPaths.LocalTestDataDirectory, "empty.picard"); var outputFolder = Path.Combine(TestPaths.LocalTestDataDirectory, "EmptyIntervalsTest_Mixed"); var options = new PiscesApplicationOptions { BAMPaths = new[] { bamFilePath, bamFilePath2 }, IntervalPaths = new [] { validIntervals, emptyIntervals }, GenomePaths = new[] { genomePath }, OutputDirectory = outputFolder, VcfWritingParameters = new Domain.Options.VcfWritingParameters() { OutputGvcfFile = true } }; var factory = new Factory(options); var processor = new GenomeProcessor(factory, factory.GetReferenceGenome(genomePath), throttle); processor.Execute(2); // first vcf file should have been processed regularly using (var reader = new AlleleReader(factory.WorkRequests.First().OutputFilePath)) { var variants = reader.GetVariants(); Assert.Equal(11, variants.Count()); } // second vcf file should be empty using (var reader = new AlleleReader(factory.WorkRequests.Last().OutputFilePath)) { var variants = reader.GetVariants(); Assert.Equal(0, variants.Count()); } // ---------------------- // try again but with both bams using empty intervals // ---------------------- options.IntervalPaths = new[] { emptyIntervals }; options.OutputDirectory = Path.Combine(TestPaths.LocalTestDataDirectory, "EmptyIntervalsTest_All"); factory = new Factory(options); processor = new GenomeProcessor(factory, factory.GetReferenceGenome(genomePath), throttle); processor.Execute(2); foreach (var workRequest in factory.WorkRequests) { // both vcf file should be empty using (var reader = new AlleleReader(workRequest.OutputFilePath)) { var variants = reader.GetVariants(); Assert.Equal(0, variants.Count()); } } }
public void Execute( string bamFilePath, string vcfFilePath, string intervalPath, List <BaseCalledAllele> expectedVariants, List <ChrReference> fakeReferences = null, bool doCheckVariants = true, bool doCheckReferences = false, int expectedNumCoveredPositions = 0, bool threadByChr = false, int doCountsOnly = 0, bool doLog = false, bool callMnvs = true, ApplicationOptions applicationOptions = null) { if (doCheckReferences) { vcfFilePath = Path.ChangeExtension(vcfFilePath, "genome.vcf"); } if (applicationOptions == null) { applicationOptions = new ApplicationOptions { BAMPaths = new[] { bamFilePath }, IntervalPaths = string.IsNullOrEmpty(intervalPath) ? null : new[] { intervalPath }, GenomePaths = new[] { GenomeDirectory }, OutputgVCFFiles = doCheckReferences, OutputBiasFiles = true, DebugMode = doLog, MinimumBaseCallQuality = 20, CallMNVs = callMnvs }; } Logger.TryOpenLog(applicationOptions.LogFolder, ApplicationOptions.LogFileName); var factory = GetFactory(applicationOptions); IGenome genome; if (fakeReferences == null) { genome = factory.GetReferenceGenome(GenomeDirectory); } else { genome = new MockGenome(fakeReferences, GenomeDirectory); } if (threadByChr) { var processor = new BamProcessor(factory, genome); processor.Execute(1); } else { var processor = new GenomeProcessor(factory, genome); processor.Execute(1); } Logger.TryCloseLog(); using (var reader = new VcfReader(vcfFilePath)) { var alleles = reader.GetVariants().ToList(); var variantCalls = alleles.Where(a => a.VariantAlleles[0] != ".").ToList(); if (doCheckVariants) { if (doCountsOnly > 0) { Assert.Equal(variantCalls.Count(), doCountsOnly); } else { CheckVariants(variantCalls, expectedVariants); } } if (doCheckReferences) { var referenceAlleles = alleles.Where(a => a.VariantAlleles[0] == ".").ToList(); // make sure no reference calls at variant positions Assert.Equal(referenceAlleles.Count(), alleles.Count(a => !variantCalls.Select(v => v.ReferencePosition).Contains(a.ReferencePosition))); } } }
public void Execute( string bamFilePath, string vcfFilePath, string intervalPath, List <CalledAllele> expectedVariants, List <ChrReference> fakeReferences = null, bool doCheckVariants = true, bool doCheckReferences = false, int expectedNumCoveredPositions = 0, bool threadByChr = false, int doCountsOnly = 0, bool doLog = false, bool callMnvs = true, PiscesApplicationOptions applicationOptions = null, bool collapse = true) { if (doCheckReferences) { vcfFilePath = Path.ChangeExtension(vcfFilePath, "genome.vcf"); } if (applicationOptions == null) { applicationOptions = new PiscesApplicationOptions { BAMPaths = new[] { bamFilePath }, IntervalPaths = string.IsNullOrEmpty(intervalPath) ? null : new[] { intervalPath }, GenomePaths = new[] { GenomeDirectory }, OutputBiasFiles = true, DebugMode = doLog, CallMNVs = callMnvs, MaxGapBetweenMNV = 10, MaxSizeMNV = 15, Collapse = collapse, BamFilterParameters = new BamFilterParameters() { MinimumBaseCallQuality = 20 }, VariantCallingParameters = new VariantCallingParameters(), VcfWritingParameters = new VcfWritingParameters() { OutputGvcfFile = doCheckReferences, }, CommandLineArguments = new string[] { "some", "cmds" } }; } applicationOptions.OutputDirectory = OutputDirectory; var factory = GetFactory(applicationOptions); IGenome genome; if (fakeReferences == null) { genome = factory.GetReferenceGenome(GenomeDirectory); } else { genome = new MockGenome(fakeReferences, GenomeDirectory); } if (threadByChr) { var processor = new GenomeProcessor(factory, genome, false); processor.Execute(1); } else { var processor = new GenomeProcessor(factory, genome); processor.Execute(1); } var alleles = AlleleReader.GetAllVariantsInFile(vcfFilePath); var variantCalls = alleles.Where(a => !a.IsRefType).ToList(); if (doCheckVariants) { if (doCountsOnly > 0) { Assert.Equal(variantCalls.Count(), doCountsOnly); } else { CheckVariants(variantCalls, expectedVariants); } } if (doCheckReferences) { var referenceAlleles = alleles.Where(a => a.IsRefType).ToList(); // make sure no reference calls at variant positions Assert.Equal(referenceAlleles.Count(), alleles.Count(a => !variantCalls.Select(v => v.ReferencePosition).Contains(a.ReferencePosition))); } }
public void Pisces_LowDepthTest() { List <ChrReference> chrRef = new List <ChrReference>() { new ChrReference() { Name = "chr19", Sequence = "TTGTCAGTGCGCTTTTCCCAACACCACCTGCTCCGACCACCACCAGTTTGTACTCAGTCATTTCACACCAGCAAGAACCTGTTGGAAACCAGTAATCAGGGTTAATTGGCGGCGAAAAAAAAAAAAAAAAAAAAAAAAAA" } }; var options = new PiscesApplicationOptions() { BAMPaths = new[] { _bamSmallS1 }, GenomePaths = new[] { _genomeChr19 }, //IntervalPaths = new[] { _intervalsChr17Chr19 }, DebugMode = true, CallMNVs = true, UseMNVReallocation = false, MaxSizeMNV = 100, OutputDirectory = TestPaths.LocalTestDataDirectory, BamFilterParameters = new Domain.Options.BamFilterParameters() { MinimumBaseCallQuality = 20 }, VariantCallingParameters = new Domain.Options.VariantCallingParameters() { MinimumVariantQScore = 20, MinimumCoverage = 1000, }, VcfWritingParameters = new Domain.Options.VcfWritingParameters() { OutputGvcfFile = true, } }; var vcfFilePath = Path.ChangeExtension(options.BAMPaths[0], "genome.vcf"); var factory = new Factory(options); IGenome genomeRef; genomeRef = new MockGenome(chrRef, _genomeChr19); var bp = new GenomeProcessor(factory, genomeRef); bp.Execute(1); var coverage1000results = AlleleReader.GetAllVariantsInFile(vcfFilePath); options = new PiscesApplicationOptions() { BAMPaths = new[] { _bamSmallS1 }, GenomePaths = new[] { _genomeChr19 }, // IntervalPaths = new[] { _intervalsChr17Chr19 }, DebugMode = true, CallMNVs = true, UseMNVReallocation = false, OutputDirectory = TestPaths.LocalTestDataDirectory, VcfWritingParameters = new Domain.Options.VcfWritingParameters() { OutputGvcfFile = false, } }; factory = new Factory(options); bp = new GenomeProcessor(factory, genomeRef); bp.Execute(1); var coverage10results = AlleleReader.GetAllVariantsInFile(vcfFilePath); }
public void IntervalTestingWithVcf() { var bamFile1Path = Path.Combine(TestPaths.LocalTestDataDirectory, "Chr17Chr19.bam"); //has data from chr17,7572952 and chr19,3118883 var interval1Path = Path.Combine(TestPaths.LocalTestDataDirectory, "chr17int.picard"); //chr 17 only var outDir = Path.Combine(TestPaths.LocalTestDataDirectory, "IntervalTests"); var vcfObservedFile1Path = Path.Combine(outDir, "Chr17Chr19.vcf"); //only results from chr17 var vcfExpectedFile1 = Path.Combine(TestPaths.LocalTestDataDirectory, "Chr17Chr19.expected.vcf"); //only results from chr17 var genomeDirectory = Path.Combine(TestPaths.SharedGenomesDirectory, "fourChrs"); var factory = MakeVcfFactory(new List <string> { bamFile1Path }, new List <string> { interval1Path }, outDir); var genome1 = factory.GetReferenceGenome(genomeDirectory); var processor = new GenomeProcessor(factory, genome1); var chrs = genome1.ChromosomesToProcess; Assert.Equal("chr17", chrs[0]); processor.InternalExecute(10); Assert.Equal(1, genome1.ChromosomesToProcess.Count); Assert.Equal("chr17", genome1.ChromosomesToProcess[0]); var reader1 = new AlleleReader(vcfObservedFile1Path); var observedFilters1Results = GetFilters(reader1); var observedContigs1Results = GetContigs(reader1); var observedVcf1Results = reader1.GetVariants().ToList(); //the expected results: var readerExp1 = new AlleleReader(vcfExpectedFile1); var filters1Expected = GetFilters(readerExp1); var contigs1Expected = GetContigs(readerExp1); var vcf1Expected = readerExp1.GetVariants().ToList(); /* ##FILTER=<ID=q30,Description="Quality score less than 30"> ##FILTER=<ID=SB,Description="Variant strand bias too high"> ##FILTER=<ID=R5x9,Description="Repeats of part or all of the variant allele (max repeat length 5) in the reference greater than or equal to 9"> ##FILTER=<ID=NC,Description="No-call rate is above 0.6"> * */ Assert.Equal(4, observedFilters1Results.Count); //##contig=<ID=chr17,length=7573100> Assert.Equal(1, observedContigs1Results.Count); Assert.Equal(1, observedVcf1Results.Count); //check variants and contigs all come out the same for (int i = 0; i < contigs1Expected.Count; i++) { Assert.Equal(contigs1Expected[i], observedContigs1Results[i]); } for (int i = 0; i < filters1Expected.Count; i++) { Assert.Equal(filters1Expected[i].ToString(), observedFilters1Results[i].ToString()); } for (int i = 0; i < vcf1Expected.Count; i++) { Assert.Equal(vcf1Expected[i].ToString(), observedVcf1Results[i].ToString()); } reader1.Dispose(); File.Delete(vcfObservedFile1Path); }
private void ExecuteChromosomeThreadingTest(int numberOfThreads, int expectedNumberOfThreads) { var bamFilePath = Path.Combine(UnitTestPaths.TestDataDirectory, "Chr17Chr19.bam"); var vcfFilePath = Path.Combine(UnitTestPaths.TestDataDirectory, "Chr17Chr19.vcf"); var genomePath = Path.Combine(UnitTestPaths.TestGenomesDirectory, "chr17chr19"); var options = new ApplicationOptions { BAMPaths = new[] { bamFilePath }, GenomePaths = new[] { genomePath }, }; var logFile = Path.Combine(options.LogFolder, options.LogFileName); if (File.Exists(logFile)) { File.Delete(logFile); } Logger.TryOpenLog(options.LogFolder, options.LogFileName); var factory = new MockFactoryWithDefaults(options); factory.MockSomaticVariantCaller = new Mock <ISomaticVariantCaller>(); factory.MockSomaticVariantCaller.Setup(s => s.Execute()).Callback(() => { Thread.Sleep(500); }); var processor = new GenomeProcessor(factory, factory.GetReferenceGenome(genomePath), false); processor.Execute(numberOfThreads); Assert.False(File.Exists(vcfFilePath + "_chr17")); Assert.False(File.Exists(vcfFilePath + "_chr19")); Assert.True(File.Exists(vcfFilePath)); Logger.TryCloseLog(); var threadsSpawnedBeforeFirstCompleted = 0; using (var reader = new StreamReader(logFile)) { string line; while ((line = reader.ReadLine()) != null) { if (string.IsNullOrEmpty(line)) { continue; } if (line.Contains("Completed processing chr")) { break; } if (line.Contains("Start processing chr")) { threadsSpawnedBeforeFirstCompleted++; } } } Assert.Equal(expectedNumberOfThreads, threadsSpawnedBeforeFirstCompleted); }
private void ExecuteTest(int numberOfThreads, int expectedNumberOfThreads) { var bamFilePath = Path.Combine(UnitTestPaths.TestDataDirectory, "var123var35.bam"); var bamFilePath2 = Path.Combine(UnitTestPaths.TestDataDirectory, "var123var35_removedSQlines.bam"); var vcfFilePath = Path.Combine(UnitTestPaths.TestDataDirectory, "var123var35.vcf"); var vcfFilePath2 = Path.Combine(UnitTestPaths.TestDataDirectory, "var123var35_removedSQlines.vcf"); var genomePath = Path.Combine(UnitTestPaths.TestGenomesDirectory, "chr17chr19"); var options = new ApplicationOptions { BAMPaths = new[] { bamFilePath, bamFilePath2 }, GenomePaths = new[] { genomePath }, }; var logFile = Path.Combine(options.LogFolder, ApplicationOptions.LogFileName); if (File.Exists(logFile)) { File.Delete(logFile); } Logger.TryOpenLog(options.LogFolder, ApplicationOptions.LogFileName); var factory = new MockFactoryWithDefaults(options); factory.MockSomaticVariantCaller = new Mock <ISomaticVariantCaller>(); factory.MockSomaticVariantCaller.Setup(s => s.Execute()).Callback(() => { Thread.Sleep(500); }); var processor = new GenomeProcessor(factory, factory.GetReferenceGenome(genomePath)); processor.Execute(numberOfThreads); Assert.True(File.Exists(vcfFilePath)); Assert.True(File.Exists(vcfFilePath2)); Logger.TryCloseLog(); var chrCheck = new Dictionary <string, Tuple <int, bool> >(); chrCheck["chr17"] = new Tuple <int, bool>(0, false); chrCheck["chr19"] = new Tuple <int, bool>(0, false); var startedChr19 = false; using (var reader = new StreamReader(logFile)) { string line; while ((line = reader.ReadLine()) != null) { if (string.IsNullOrEmpty(line)) { continue; } foreach (var chr in chrCheck.Keys.ToList()) { if (line.Contains("Start processing chr " + chr)) { var chrState = chrCheck[chr]; chrCheck[chr] = new Tuple <int, bool>(chrState.Item1 + 1, true); } } foreach (var chr in chrCheck.Keys.ToList()) { if (line.Contains("Completed processing chr " + chr) && chrCheck[chr].Item2) { var chrState = chrCheck[chr]; Assert.Equal(expectedNumberOfThreads, chrState.Item1); chrCheck[chr] = new Tuple <int, bool>(0, false); } } // make sure chr 17 fully completes before 19 starts if (line.Contains("Processing chromosome 'chr19'")) { startedChr19 = true; } Assert.False(line.Contains("Processing chromosome 'chr17'") && startedChr19); } } }
//Test we get the same results when using muliple samples and intervals, in the same order. //Fist test running two samples together, then test running two samples individualy, then test it with threadByChrOn/ //Nothing strange should happen.. public void IntervalTestingWithMultipleSamples() //based on a real bug when a gvcf was found was out of order, that only happened for multiple-bam runs with different interval files. { var bamFile1Path = Path.Combine(TestPaths.LocalTestDataDirectory, "Chr17Chr19.bam"); //has data from chr17,7572952 and chr19,3118883 var bamFile2Path = Path.Combine(TestPaths.LocalTestDataDirectory, "Chr17again.bam"); var interval1Path = Path.Combine(TestPaths.LocalTestDataDirectory, "chr17int.picard"); //chr 17 only var interval2Path = Path.Combine(TestPaths.LocalTestDataDirectory, "poorlyOrdered.picard"); //disordered, chr 19 first. var outDir = Path.Combine(TestPaths.LocalTestDataDirectory, "IntervalTests"); var vcfFile1Path = Path.Combine(outDir, "Chr17Chr19.genome.vcf"); //only results from chr17 var vcfFile2Path = Path.Combine(outDir, "Chr17again.genome.vcf"); //show results from chr17 and 19 var vcfExpectedFile1 = Path.Combine(TestPaths.LocalTestDataDirectory, "Chr17Chr19.expected.genome.vcf"); //only results from chr17 var vcfExpectedFile2 = Path.Combine(TestPaths.LocalTestDataDirectory, "Chr17again.expected.genome.vcf"); //show results from chr17 and 19 var genomeDirectory = Path.Combine(TestPaths.SharedGenomesDirectory, "fourChrs"); var twoSampleFactory = MakeFactory(new List <string> { bamFile1Path, bamFile2Path }, new List <string> { interval1Path, interval2Path }, outDir); var firstSampleFactory = MakeFactory(new List <string> { bamFile1Path }, new List <string> { interval1Path }, outDir); var secondSampleFactory = MakeFactory(new List <string> { bamFile2Path }, new List <string> { interval2Path }, outDir); //regular two-sample run mode. var genome = twoSampleFactory.GetReferenceGenome(genomeDirectory); var genome1 = firstSampleFactory.GetReferenceGenome(genomeDirectory); var genome2 = secondSampleFactory.GetReferenceGenome(genomeDirectory); var processor = new GenomeProcessor(twoSampleFactory, genome); var chrs = genome.ChromosomesToProcess; Assert.Equal("chr7", chrs[0]); Assert.Equal("chr8", chrs[1]); Assert.Equal("chr17", chrs[2]); Assert.Equal("chr19", chrs[3]); processor.InternalExecute(10); chrs = genome.ChromosomesToProcess; Assert.Equal("chr7", chrs[0]); Assert.Equal("chr8", chrs[1]); Assert.Equal("chr17", chrs[2]); Assert.Equal("chr19", chrs[3]); //jsut be aware, when we porcess the samples individually, we use different genome lists. Assert.Equal(4, genome.ChromosomesToProcess.Count); Assert.Equal(1, genome1.ChromosomesToProcess.Count); Assert.Equal(4, genome2.ChromosomesToProcess.Count); Assert.Equal("chr17", genome1.ChromosomesToProcess[0]); Assert.Equal("chr7", genome2.ChromosomesToProcess[0]); Assert.Equal("chr19", genome2.ChromosomesToProcess[3]); var reader1 = new AlleleReader(vcfFile1Path); var reader2 = new AlleleReader(vcfFile2Path); var contigs1Results = GetContigs(reader1); var contigs2Results = GetContigs(reader2); var vcf1Results = reader1.GetVariants().ToList(); var vcf2Results = reader2.GetVariants().ToList(); //the expected results: var readerExp1 = new AlleleReader(vcfExpectedFile1); var readerExp2 = new AlleleReader(vcfExpectedFile2); var contigs1Expected = GetContigs(readerExp1); var contigs2Expected = GetContigs(readerExp2); var vcf1Expected = readerExp1.GetVariants().ToList(); var vcf2Expected = readerExp2.GetVariants().ToList(); Assert.Equal(4, contigs1Results.Count); Assert.Equal(4, contigs2Results.Count); Assert.Equal(11, vcf1Results.Count); Assert.Equal(71, vcf2Results.Count); //check variants and contigs all come out the same CheckForOrdering(contigs1Results, contigs2Results, contigs1Expected, contigs2Expected, vcf1Expected, vcf2Expected); reader1.Dispose(); reader2.Dispose(); File.Delete(vcfFile1Path); File.Delete(vcfFile2Path); //now check again, processing them separately processor = new GenomeProcessor(firstSampleFactory, genome1); processor.InternalExecute(10); processor = new GenomeProcessor(secondSampleFactory, genome2); processor.InternalExecute(10); reader1 = new AlleleReader(vcfFile1Path); reader2 = new AlleleReader(vcfFile2Path); contigs1Results = GetContigs(reader1); contigs2Results = GetContigs(reader2); vcf1Results = reader1.GetVariants().ToList(); vcf2Results = reader2.GetVariants().ToList(); //check variants all come out the same (the contigs will be different as shown) CheckForOrdering(contigs1Results, contigs2Results, new List <string>() { "chr17" }, contigs2Expected, vcf1Expected, vcf2Expected); reader1.Dispose(); reader2.Dispose(); File.Delete(vcfFile1Path); //now check again, processing them "thread by chr" way processor = new GenomeProcessor(twoSampleFactory, genome, false); processor.InternalExecute(10); reader1 = new AlleleReader(vcfFile1Path); reader2 = new AlleleReader(vcfFile2Path); contigs1Results = GetContigs(reader1); contigs2Results = GetContigs(reader2); vcf1Results = reader1.GetVariants().ToList(); vcf2Results = reader2.GetVariants().ToList(); //check variants all come out the same (the contigs will be back to normal) CheckForOrdering(contigs1Results, contigs2Results, contigs2Expected, contigs2Expected, vcf1Expected, vcf2Expected); reader1.Dispose(); reader2.Dispose(); File.Delete(vcfFile1Path); File.Delete(vcfFile2Path); }
public void IntervalTestingWithVcf() { var bamFile1Path = Path.Combine(UnitTestPaths.TestDataDirectory, "Chr17Chr19.bam"); //has data from chr17,7572952 and chr19,3118883 var interval1Path = Path.Combine(UnitTestPaths.TestDataDirectory, "chr17int.picard"); //chr 17 only var outDir = Path.Combine(UnitTestPaths.WorkingDirectory, "IntervalTests"); var vcfFile1Path = Path.Combine(outDir, "Chr17Chr19.vcf"); //only results from chr17 var vcfExpectedFile1 = Path.Combine(UnitTestPaths.TestDataDirectory, "Chr17Chr19.expected.vcf"); //only results from chr17 var genomeDirectory = Path.Combine(UnitTestPaths.TestGenomesDirectory, "fourChrs"); var factory = MakeVcfFactory(new List <string> { bamFile1Path }, new List <string> { interval1Path }, outDir); var genome1 = factory.GetReferenceGenome(genomeDirectory); var processor = new GenomeProcessor(factory, genome1); var chrs = genome1.ChromosomesToProcess; Assert.Equal("chr17", chrs[0]); processor.InternalExecute(10); Assert.Equal(1, genome1.ChromosomesToProcess.Count); Assert.Equal("chr17", genome1.ChromosomesToProcess[0]); var reader1 = new VcfReader(vcfFile1Path); var filters1Results = GetFilters(reader1); var contigs1Results = GetContigs(reader1); var vcf1Results = reader1.GetVariants().ToList(); //the expected results: var readerExp1 = new VcfReader(vcfExpectedFile1); var filters1Expected = GetFilters(readerExp1); var contigs1Expected = GetContigs(readerExp1); var vcf1Expected = readerExp1.GetVariants().ToList(); Assert.Equal(3, filters1Results.Count); Assert.Equal(1, contigs1Results.Count); Assert.Equal(1, vcf1Results.Count); //check variants and contigs all come out the same for (int i = 0; i < contigs1Expected.Count; i++) { Assert.Equal(contigs1Expected[i], contigs1Results[i]); } for (int i = 0; i < filters1Expected.Count; i++) { Assert.Equal(filters1Expected[i].ToString(), filters1Results[i].ToString()); } for (int i = 0; i < vcf1Expected.Count; i++) { Assert.Equal(vcf1Expected[i].ToString(), vcf1Results[i].ToString()); } reader1.Dispose(); File.Delete(vcfFile1Path); }