public void InvalidVcfOutputFolder() { Assert.False(Directory.Exists("56:\\Illumina\\OutputFolder")); var outputFolder = Path.Combine("56:\\Illumina\\OutputFolder"); string bamChr19 = Path.Combine(TestPaths.LocalTestDataDirectory, "Chr19.bam"); string bamChr17Chr19 = Path.Combine(TestPaths.LocalTestDataDirectory, "Chr17Chr19.bam"); string bamChr17Chr19Dup = Path.Combine(TestPaths.LocalTestDataDirectory, "Chr17Chr19_removedSQlines.bam"); string intervalsChr19 = Path.Combine(TestPaths.LocalTestDataDirectory, "Chr19.picard"); string intervalsChr17 = Path.Combine(TestPaths.LocalTestDataDirectory, "chr17only.picard"); string genomeChr19 = Path.Combine(TestPaths.SharedGenomesDirectory, "chr19"); var appOptions = new PiscesApplicationOptions { BAMPaths = new[] { bamChr19, bamChr17Chr19, bamChr17Chr19Dup }, IntervalPaths = new[] { intervalsChr17, intervalsChr19, null }, GenomePaths = new[] { genomeChr19 }, OutputDirectory = outputFolder }; var parser = new PiscesOptionsParser() { Options = appOptions }; Assert.Throws <ArgumentException>(() => parser.ValidateAndSetDerivedValues()); }
public static Tuple <Dictionary <int, List <LoadTestResult> >, List <Domain.Models.Alleles.CandidateAllele> > LoadReads (List <BamAlignment> reads, PiscesApplicationOptions options, ChrReference chrRef, bool expectedvariants, string expectedLoading, string expectedDirectionString) { RegionStateManager manager = new RegionStateManager(expectStitchedReads: true); var variantFinder = new CandidateVariantFinder(options.BamFilterParameters.MinimumBaseCallQuality, options.MaxSizeMNV, options.MaxGapBetweenMNV, options.CallMNVs); var candidateVariants = new List <Domain.Models.Alleles.CandidateAllele>(); try { foreach (var b in reads) { if (b == null) { continue; } var r = new Read(chrRef.Name, b); // find candidate variants candidateVariants = variantFinder.FindCandidates(r, chrRef.Sequence, chrRef.Name).ToList(); // track in state manager manager.AddCandidates(candidateVariants); manager.AddAlleleCounts(r); } Dictionary <int, List <LoadTestResult> > countResults = GetCountsFromManager(manager); var loadingResults = Tuple.Create(countResults, candidateVariants); return(loadingResults); } catch { return(null); } }
public void PopulateBAMPaths() { var BAMFolder = TestPaths.SharedBamDirectory; //Happy Path var options_1 = new PiscesApplicationOptions() { BAMPaths = BamProcessorParsingUtils.UpdateBamPathsWithBamsFromFolder(BAMFolder), GenomePaths = new[] { _existingGenome }, IntervalPaths = new[] { _existingInterval } }; Assert.NotNull(options_1.BAMPaths); Assert.True(options_1.BAMPaths.Length > 0); //no bam files found var options_3 = new PiscesApplicationOptions() { GenomePaths = new[] { _existingGenome }, }; var parser = new PiscesOptionsParser() { Options = options_3 }; Assert.Null(options_3.BAMPaths); Assert.Throws <ArgumentException>(() => parser.ValidateAndSetDerivedValues()); }
public void Fraction() { var options = new PiscesApplicationOptions() { VariantCallingParameters = new Domain.Options.VariantCallingParameters() { MinimumCoverage = 0 }, VcfWritingParameters = new VcfWritingParameters() { OutputGvcfFile = false } }; var chrReference = new ChrReference() { Name = _chrName, Sequence = "ACTCTACTAAGGGGGGACTATCCCG" // 25 chr }; // no no-calls, 1 snp var readSets = new List <Read>(); AddReads(readSets, 50, 1, "ACTCTA", 20, "ATCCCG"); AddReads(readSets, 25, 1, "ACCCTA", 20, "ATCCCG"); var alleles = Call(readSets, chrReference, options); Assert.Equal(1, alleles.Count()); var variant = alleles[0]; Assert.Equal(0, variant.FractionNoCalls); Assert.Equal(75, variant.TotalCoverage); // add no-calls at snp position AddReads(readSets, 10, 1, "ACNCTA", 20, "ATCCCG"); alleles = Call(readSets, chrReference, options); Assert.Equal(1, alleles.Count()); variant = alleles[0]; Assert.Equal(75, variant.TotalCoverage); Assert.Equal(10f / 85f, variant.FractionNoCalls); // add no-calls at reference position options.VcfWritingParameters.OutputGvcfFile = true; AddReads(readSets, 40, 1, "ACTCTN", 20, "ATCCCG"); alleles = Call(readSets, chrReference, options); Assert.Equal(12, alleles.Count()); Assert.Equal(1, alleles.Count(a => (a.Type != Domain.Types.AlleleCategory.Reference))); variant = alleles.First(a => (a.Type != Domain.Types.AlleleCategory.Reference)); Assert.Equal(115, variant.TotalCoverage); Assert.Equal(10f / 125f, variant.FractionNoCalls); foreach (var reference in alleles.Where(a => (a.Type == Domain.Types.AlleleCategory.Reference))) { Assert.Equal(reference.ReferencePosition == 6 ? 40f / 125f : 0f, reference.FractionNoCalls); } }
public void Pisces_Bcereus() // be serious. very, very, serious. { var bacilusBam = Path.Combine(TestPaths.SharedBamDirectory, "Bcereus_S4.bam"); var functionalTestRunner = new SomaticVariantCallerFunctionalTestSetup(); functionalTestRunner.GenomeDirectory = Path.Combine(TestPaths.SharedGenomesDirectory, "Bacillus_cereus", "Sequence", "WholeGenomeFasta"); functionalTestRunner.OutputDirectory = TestPaths.LocalScratchDirectory; var expectedAlleles = new List <CalledAllele> { new CalledAllele(AlleleCategory.Snv) { ReferencePosition = 827, ReferenceAllele = "A", AlternateAllele = "G", Chromosome = "chr" }, new CalledAllele(AlleleCategory.Snv) { ReferencePosition = 1480, ReferenceAllele = "A", AlternateAllele = "T", Chromosome = "chr" }, new CalledAllele(AlleleCategory.Snv) { ReferencePosition = 2282, ReferenceAllele = "A", AlternateAllele = "T", Chromosome = "chr" }, }; //chr 827.A G 100 PASS DP = 37 GT: GQ: AD: DP: VF: NL: SB 0 / 1:100:35,2:37:0.054:1000:-100.0000 //chr 1480.A T 100 PASS DP = 18 GT: GQ: AD: DP: VF: NL: SB 0 / 1:100:16,2:18:0.111:1000:-100.0000 //chr 2282.A T 100 PASS DP = 21 GT: GQ: AD: DP: VF: NL: SB 0 / 1:100:19,2:21:0.095:1000:-100.0000 PiscesApplicationOptions appOptions = new PiscesApplicationOptions(); appOptions.VcfWritingParameters.OutputGvcfFile = true; appOptions.BAMPaths = new string[] { bacilusBam }; appOptions.GenomePaths = new string[] { functionalTestRunner.GenomeDirectory }; appOptions.OutputDirectory = functionalTestRunner.OutputDirectory; appOptions.VariantCallingParameters.NoiseLevelUsedForQScoring = 1000; var vcfFilePath = Path.Combine(TestPaths.LocalScratchDirectory, "Bcereus_S4.genome.vcf"); // without reference calls File.Delete(vcfFilePath); functionalTestRunner.Execute(bacilusBam, vcfFilePath, null, expectedAlleles, applicationOptions: appOptions); }
// tests two bams in different folders // expectations: // - if outputfolder is not specified, logs are in directory of first bam // - if outputfolder specified, logs are in output folder // - vcf files have header and both chromosomes, output is where normally expected private void ExecuteTest(int numberOfThreads, string outputFolder = null) { var sourcePath = Path.Combine(TestPaths.LocalTestDataDirectory, "Chr17Chr19.bam"); var otherTestDirectory = Path.Combine(TestPaths.LocalScratchDirectory, "MultiProcessIn"); var bamFilePath1 = Stage(sourcePath, "In1", otherTestDirectory + "1"); var bamFilePath2 = Stage(sourcePath, "In2", otherTestDirectory + "2"); var genomePath = Path.Combine(TestPaths.SharedGenomesDirectory, "chr17chr19"); var options = new PiscesApplicationOptions { BAMPaths = new[] { bamFilePath1, bamFilePath2 }, GenomePaths = new[] { genomePath }, OutputDirectory = outputFolder, CommandLineArguments = string.Format("-B {0},{1} -g {2}{3} -gVCF false", bamFilePath1, bamFilePath2, genomePath, string.IsNullOrEmpty(outputFolder) ? string.Empty : " -OutFolder " + outputFolder).Split(' '), VcfWritingParameters = new VcfWritingParameters() { OutputGvcfFile = true } }; options.SetIODirectories("Pisces"); var factory = new Factory(options); foreach (var workRequest in factory.WorkRequests) { if (File.Exists(workRequest.OutputFilePath)) { File.Delete(workRequest.OutputFilePath); } } Logger.OpenLog(options.LogFolder, options.LogFileName, true); var processor = new GenomeProcessor(factory, factory.GetReferenceGenome(options.GenomePaths[0]), false, true); processor.Execute(numberOfThreads); Logger.CloseLog(); foreach (var workRequest in factory.WorkRequests) { using (var reader = new VcfReader(workRequest.OutputFilePath)) { Assert.True(reader.HeaderLines.Any()); var variants = reader.GetVariants().ToList(); Assert.Equal(251, variants.Count()); Assert.Equal("chr17", variants.First().ReferenceName); Assert.Equal("chr19", variants.Last().ReferenceName); } } Assert.True(Directory.GetFiles(options.LogFolder, options.LogFileNameBase).Any()); }
public Factory(PiscesApplicationOptions options) : base(options) { _options = options; GlobalConstants.DebugMode = options.DebugMode; // this is a pervasive setting that we'd want available throughout the application UpdateWorkRequests(); UpdateBamIntervals(); UpdateKnownPriors(); GetForcedAlleles(); }
private PiscesApplicationOptions GetBasicOptions() { var options = new PiscesApplicationOptions() { BAMPaths = new[] { _existingBamPath }, GenomePaths = new[] { _existingGenome } }; options.SetIODirectories("Pisces"); return(options); }
public void DefaultVCFOutput() { var appOptions = new PiscesApplicationOptions { BAMPaths = new[] { _bamChr19, _bamChr17Chr19, _bamChr17Chr19Dup }, IntervalPaths = new[] { _intervalsChr17, _intervalsChr19, null }, GenomePaths = new[] { _genomeChr17Chr19 }, VariantCallingParameters = new VariantCallingParameters() { MinimumCoverage = 10, LowDepthFilter = 10, AmpliconBiasFilterThreshold = 0.01F }, VcfWritingParameters = new VcfWritingParameters() { OutputGvcfFile = false, } }; var factory = new Factory(appOptions); var context = new VcfWriterInputContext { QuotedCommandLineString = "myCommandLine", SampleName = "mySample", ReferenceName = "myReference", ContigsByChr = new List <Tuple <string, long> > { new Tuple <string, long>("chr1", 10001), new Tuple <string, long>("chrX", 500) } }; var outputFile = factory.GetOutputFile(appOptions.BAMPaths[0]); var writer = factory.CreateVcfWriter(outputFile, context); var candidates = _defaultCandidates; writer.WriteHeader(); writer.Write(candidates); writer.Dispose(); Assert.True(File.Exists(outputFile)); Assert.Equal(outputFile, Path.ChangeExtension(_bamChr19, ".vcf")); var reader = new AlleleReader(outputFile); var header = reader.HeaderLines; Assert.Equal(header[7], "##FILTER=<ID=q30,Description=\"Quality score less than 30\">"); Assert.Equal(header[8], "##FILTER=<ID=AB,Description=\"Amplicon bias - disparate variant frequencies detected by amplicon\">"); Assert.Equal(header[9], "##FILTER=<ID=SB,Description=\"Variant strand bias too high\">"); Assert.Equal(header[10], "##FILTER=<ID=R5x9,Description=\"Repeats of part or all of the variant allele (max repeat length 5) in the reference greater than or equal to 9\">"); }
public void CheckLogFolderTest() { var bamFilePath = Path.Combine(TestPaths.SharedBamDirectory, "Chr17Chr19.bam"); var genomePath = Path.Combine(TestPaths.SharedGenomesDirectory, "chr17chr19"); var outDir = Path.Combine(TestPaths.LocalScratchDirectory, "PiscesApplicationsOptionsTests"); var defaultLogFolder = "PiscesLogs"; //check when an out folder is specified var options = new PiscesApplicationOptions { BAMPaths = new[] { bamFilePath }, GenomePaths = new[] { genomePath }, VcfWritingParameters = new VcfWritingParameters() { OutputGvcfFile = false }, OutputDirectory = outDir }; options.SetIODirectories("Pisces"); Assert.Equal(Path.Combine(outDir, defaultLogFolder), options.LogFolder); //check when a bam is specified w/no out folder options = new PiscesApplicationOptions { BAMPaths = new[] { bamFilePath }, GenomePaths = new[] { genomePath }, VcfWritingParameters = new VcfWritingParameters() { OutputGvcfFile = false }, }; options.SetIODirectories("Pisces"); Assert.Equal(Path.Combine(TestPaths.SharedBamDirectory, defaultLogFolder), options.LogFolder); //check when a bam parent folder does not exist options = new PiscesApplicationOptions { BAMPaths = new[] { "mybam.bam" }, GenomePaths = new[] { genomePath }, VcfWritingParameters = new VcfWritingParameters() { OutputGvcfFile = false }, }; options.SetIODirectories("Pisces"); Assert.Equal(defaultLogFolder, options.LogFolder); }
public static void CallStrandedVariantsWithMockData(string vcfOutputPath, PiscesApplicationOptions options, AmpliconTestFactory testFactory) { var appFactory = new MockFactoryWithDefaults(options); using (var vcfWriter = appFactory.CreateVcfWriter(vcfOutputPath, new VcfWriterInputContext())) { using (var biasWriter = new StrandBiasFileWriter(vcfOutputPath)) { var svc = CreateMockVariantCaller(vcfWriter, options, testFactory.ChrInfo, testFactory.AlignmentExtractor, biasWriter); vcfWriter.WriteHeader(); biasWriter.WriteHeader(); svc.Execute(); biasWriter.Dispose(); } } Assert.True(File.Exists(vcfOutputPath)); }
private MockFactoryWithDefaults GetMockFactory(PiscesApplicationOptions options, List <Read> readSets) { var currentReadIndex = -1; var factory = new MockFactoryWithDefaults(options); // alignment source var mockAlignmentSource = new Mock <IAlignmentSource>(); mockAlignmentSource.Setup(s => s.GetNextRead()).Returns(() => { currentReadIndex++; return(currentReadIndex < readSets.Count() ? readSets[currentReadIndex] : null); }); mockAlignmentSource.Setup(s => s.LastClearedPosition).Returns(() => 0); factory.MockAlignmentSource = mockAlignmentSource; return(factory); }
public void OpenLogTest() { var outDir = Path.Combine(TestPaths.LocalScratchDirectory, "PiscesTestsOutDir"); var options = new PiscesApplicationOptions(); options.OutputDirectory = outDir; Logger.OpenLog(options.OutputDirectory, "testLog.txt", true); Logger.CloseLog(); Assert.True(Directory.Exists(outDir)); Assert.True(File.Exists(Path.Combine(options.OutputDirectory, "testLog.txt"))); //cleanup and redirect logging var SafeLogDir = TestPaths.LocalScratchDirectory; Logger.OpenLog(SafeLogDir, "DefaultLog.txt", true); Logger.CloseLog(); Directory.Delete(outDir, true); }
public void GvcfHeaderFormat() { var appOptions = new PiscesApplicationOptions { BAMPaths = new[] { _bamChr19, _bamChr17Chr19, _bamChr17Chr19Dup }, IntervalPaths = new[] { _intervalsChr17, _intervalsChr19, null }, GenomePaths = new[] { _genomeChr17Chr19 }, VcfWritingParameters = new VcfWritingParameters() { OutputGvcfFile = true } }; var factory = new Factory(appOptions); var context = new VcfWriterInputContext { QuotedCommandLineString = "myCommandLine", SampleName = "mySample", ReferenceName = "myReference", ContigsByChr = new List <Tuple <string, long> > { new Tuple <string, long>("chr1", 10001), new Tuple <string, long>("chrX", 500) } }; var outputFile = factory.GetOutputFile(appOptions.BAMPaths[0]); var writer = factory.CreateVcfWriter(outputFile, context); var candidates = _defaultCandidates; writer.WriteHeader(); writer.Write(candidates); writer.Dispose(); // Time to read the header //moved to GvcfWritingTests //VcfFileWriterTests.VcfFileFormatValidation(outputFile, 5); }
public static string[] CheckReadLoading(BamAlignment read, PiscesApplicationOptions options, ChrReference chrInfo, bool isVariant, StitchingScenario scenario) { string expectedVarLoading = scenario.RefLoading; string expectedCandidateDireciton = "0"; if (isVariant) { expectedVarLoading = scenario.VarLoading; expectedCandidateDireciton = scenario.CandidateDirection; } var loadingResults = LoadReads(new List <BamAlignment>() { read }, options, chrInfo, isVariant, expectedVarLoading, expectedCandidateDireciton); if (loadingResults == null) { return(new string[] { "total fail to parse variant reads" }); } //coverage check var variantReadLoadResult = CheckLoading(scenario, 1, loadingResults.Item1, isVariant); var variantReadCandidateDirection = CheckCandidateDirection(isVariant, loadingResults.Item2, expectedCandidateDireciton); if (variantReadLoadResult == null) { return(new string[] { "total fail to check loading" }); } if (variantReadCandidateDirection == null) { return(new string[] { "total fail to check direction" }); } return(new string[] { variantReadLoadResult, variantReadCandidateDirection }); }
public void DefaultGVCFOutput() { var appOptions = new PiscesApplicationOptions { BAMPaths = new[] { _bamChr19, _bamChr17Chr19, _bamChr17Chr19Dup }, IntervalPaths = new[] { _intervalsChr17, _intervalsChr19, null }, GenomePaths = new[] { _genomeChr17Chr19 }, VcfWritingParameters = new VcfWritingParameters() { OutputGvcfFile = true } }; var factory = new Factory(appOptions); var context = new VcfWriterInputContext { QuotedCommandLineString = "myCommandLine", SampleName = "mySample", ReferenceName = "myReference", ContigsByChr = new List <Tuple <string, long> > { new Tuple <string, long>("chr1", 10001), new Tuple <string, long>("chrX", 500) } }; var outputFile = factory.GetOutputFile(appOptions.BAMPaths[0]); var writer = factory.CreateVcfWriter(outputFile, context); var candidates = _defaultCandidates; writer.WriteHeader(); writer.Write(candidates); writer.Dispose(); Assert.True(File.Exists(outputFile)); Assert.Equal(outputFile, Path.ChangeExtension(_bamChr19, "genome.vcf")); }
public void Pisces_LowDepthTest() { List <ChrReference> chrRef = new List <ChrReference>() { new ChrReference() { Name = "chr19", Sequence = "TTGTCAGTGCGCTTTTCCCAACACCACCTGCTCCGACCACCACCAGTTTGTACTCAGTCATTTCACACCAGCAAGAACCTGTTGGAAACCAGTAATCAGGGTTAATTGGCGGCGAAAAAAAAAAAAAAAAAAAAAAAAAA" } }; var options = new PiscesApplicationOptions() { BAMPaths = new[] { _bamSmallS1 }, GenomePaths = new[] { _genomeChr19 }, //IntervalPaths = new[] { _intervalsChr17Chr19 }, DebugMode = true, CallMNVs = true, UseMNVReallocation = false, MaxSizeMNV = 100, OutputDirectory = TestPaths.LocalTestDataDirectory, BamFilterParameters = new Domain.Options.BamFilterParameters() { MinimumBaseCallQuality = 20 }, VariantCallingParameters = new Domain.Options.VariantCallingParameters() { MinimumVariantQScore = 20, MinimumCoverage = 1000, }, VcfWritingParameters = new Domain.Options.VcfWritingParameters() { OutputGvcfFile = true, } }; var vcfFilePath = Path.ChangeExtension(options.BAMPaths[0], "genome.vcf"); var factory = new Factory(options); IGenome genomeRef; genomeRef = new MockGenome(chrRef, _genomeChr19); var bp = new GenomeProcessor(factory, genomeRef); bp.Execute(1); var coverage1000results = AlleleReader.GetAllVariantsInFile(vcfFilePath); options = new PiscesApplicationOptions() { BAMPaths = new[] { _bamSmallS1 }, GenomePaths = new[] { _genomeChr19 }, // IntervalPaths = new[] { _intervalsChr17Chr19 }, DebugMode = true, CallMNVs = true, UseMNVReallocation = false, OutputDirectory = TestPaths.LocalTestDataDirectory, VcfWritingParameters = new Domain.Options.VcfWritingParameters() { OutputGvcfFile = false, } }; factory = new Factory(options); bp = new GenomeProcessor(factory, genomeRef); bp.Execute(1); var coverage10results = AlleleReader.GetAllVariantsInFile(vcfFilePath); }
public MockFactoryWithDefaults(PiscesApplicationOptions options) : base(options) { }
private void ExecuteEmptyIntervalsTest(bool throttle) { // ---------------------- // test when one bam has intervals and the other is empty // ---------------------- var bamFilePath = Path.Combine(TestPaths.LocalTestDataDirectory, "Chr17Chr19.bam"); var bamFilePath2 = Path.Combine(TestPaths.LocalTestDataDirectory, "Chr17Chr19_removedSQlines.bam"); var genomePath = Path.Combine(TestPaths.SharedGenomesDirectory, "chr17chr19"); var validIntervals = Path.Combine(TestPaths.LocalTestDataDirectory, "chr17only.picard"); var emptyIntervals = Path.Combine(TestPaths.LocalTestDataDirectory, "empty.picard"); var outputFolder = Path.Combine(TestPaths.LocalTestDataDirectory, "EmptyIntervalsTest_Mixed"); var options = new PiscesApplicationOptions { BAMPaths = new[] { bamFilePath, bamFilePath2 }, IntervalPaths = new [] { validIntervals, emptyIntervals }, GenomePaths = new[] { genomePath }, OutputDirectory = outputFolder, VcfWritingParameters = new Domain.Options.VcfWritingParameters() { OutputGvcfFile = true } }; var factory = new Factory(options); var processor = new GenomeProcessor(factory, factory.GetReferenceGenome(genomePath), throttle); processor.Execute(2); // first vcf file should have been processed regularly using (var reader = new AlleleReader(factory.WorkRequests.First().OutputFilePath)) { var variants = reader.GetVariants(); Assert.Equal(11, variants.Count()); } // second vcf file should be empty using (var reader = new AlleleReader(factory.WorkRequests.Last().OutputFilePath)) { var variants = reader.GetVariants(); Assert.Equal(0, variants.Count()); } // ---------------------- // try again but with both bams using empty intervals // ---------------------- options.IntervalPaths = new[] { emptyIntervals }; options.OutputDirectory = Path.Combine(TestPaths.LocalTestDataDirectory, "EmptyIntervalsTest_All"); factory = new Factory(options); processor = new GenomeProcessor(factory, factory.GetReferenceGenome(genomePath), throttle); processor.Execute(2); foreach (var workRequest in factory.WorkRequests) { // both vcf file should be empty using (var reader = new AlleleReader(workRequest.OutputFilePath)) { var variants = reader.GetVariants(); Assert.Equal(0, variants.Count()); } } }
public void Pisces_PhiX() //Phix it and forget it. { var bacilusBam = Path.Combine(TestPaths.SharedBamDirectory, "PhiX_S3.bam"); var functionalTestRunner = new SomaticVariantCallerFunctionalTestSetup(); functionalTestRunner.GenomeDirectory = Path.Combine(TestPaths.SharedGenomesDirectory, "PhiX", "WholeGenomeFasta"); functionalTestRunner.OutputDirectory = TestPaths.LocalScratchDirectory; var expectedAlleles = new List <CalledAllele> { new CalledAllele(AlleleCategory.Snv) { ReferencePosition = 14, ReferenceAllele = "T", AlternateAllele = "C", Chromosome = "phix" }, new CalledAllele(AlleleCategory.Snv) { ReferencePosition = 14, ReferenceAllele = "T", AlternateAllele = "G", Chromosome = "phix" }, new CalledAllele(AlleleCategory.Snv) { ReferencePosition = 19, ReferenceAllele = "G", AlternateAllele = "T", Chromosome = "phix" }, new CalledAllele(AlleleCategory.Snv) { ReferencePosition = 22, ReferenceAllele = "G", AlternateAllele = "A", Chromosome = "phix" }, new CalledAllele(AlleleCategory.Snv) { ReferencePosition = 25, ReferenceAllele = "G", AlternateAllele = "T", Chromosome = "phix" }, new CalledAllele(AlleleCategory.Snv) { ReferencePosition = 26, ReferenceAllele = "A", AlternateAllele = "C", Chromosome = "phix" }, new CalledAllele(AlleleCategory.Snv) { ReferencePosition = 42, ReferenceAllele = "A", AlternateAllele = "T", Chromosome = "phix" } }; //phix 14.T C 3 q30; LowVariantFreq DP = 236 GT: GQ: AD: DP: VF: NL: SB 0 / 1:3:234,1:236:0.00424:1000:-100.0000 //phix 14.T G 3 q30; LowVariantFreq DP = 236 GT: GQ: AD: DP: VF: NL: SB 0 / 1:3:234,1:236:0.00424:1000:-100.0000 //phix 19.G T 3 q30; LowVariantFreq DP = 243 GT: GQ: AD: DP: VF: NL: SB 0 / 1:3:242,1:243:0.00412:1000:-100.0000 //phix 22.G A 3 q30; LowVariantFreq DP = 225 GT: GQ: AD: DP: VF: NL: SB 0 / 1:3:224,1:225:0.00444:1000:-100.0000 //phix 25.G T 3 q30; LowVariantFreq DP = 244 GT: GQ: AD: DP: VF: NL: SB 0 / 1:3:243,1:244:0.00410:1000:-100.0000 //phix 26.A C 3 q30; LowVariantFreq DP = 242 GT: GQ: AD: DP: VF: NL: SB 0 / 1:3:241,1:242:0.00413:1000:-100.0000 //phix 42.A T 3 q30; LowVariantFreq DP = 199 GT: GQ: AD: DP: VF: NL: SB 0 / 1:3:198,1:199:0.00503:1000:-100.0000 PiscesApplicationOptions appOptions = new PiscesApplicationOptions(); appOptions.VcfWritingParameters.OutputGvcfFile = true; appOptions.BAMPaths = new string[] { bacilusBam }; appOptions.GenomePaths = new string[] { functionalTestRunner.GenomeDirectory }; appOptions.OutputDirectory = functionalTestRunner.OutputDirectory; appOptions.VariantCallingParameters.NoiseLevelUsedForQScoring = 1000; appOptions.VariantCallingParameters.MinimumFrequency = 0.0001f; //make sure we catch something in this little bam appOptions.VariantCallingParameters.MinimumVariantQScore = 3; //make sure we catch something in this little bam var vcfFilePath = Path.Combine(TestPaths.LocalScratchDirectory, "PhiX_S3.genome.vcf"); // without reference calls File.Delete(vcfFilePath); functionalTestRunner.Execute(bacilusBam, vcfFilePath, null, expectedAlleles, applicationOptions: appOptions); }
public void Execute( string bamFilePath, string vcfFilePath, string intervalPath, List <CalledAllele> expectedVariants, List <ChrReference> fakeReferences = null, bool doCheckVariants = true, bool doCheckReferences = false, int expectedNumCoveredPositions = 0, bool threadByChr = false, int doCountsOnly = 0, bool doLog = false, bool callMnvs = true, PiscesApplicationOptions applicationOptions = null, bool collapse = true) { if (doCheckReferences) { vcfFilePath = Path.ChangeExtension(vcfFilePath, "genome.vcf"); } if (applicationOptions == null) { applicationOptions = new PiscesApplicationOptions { BAMPaths = new[] { bamFilePath }, IntervalPaths = string.IsNullOrEmpty(intervalPath) ? null : new[] { intervalPath }, GenomePaths = new[] { GenomeDirectory }, OutputBiasFiles = true, DebugMode = doLog, CallMNVs = callMnvs, MaxGapBetweenMNV = 10, MaxSizeMNV = 15, Collapse = collapse, BamFilterParameters = new BamFilterParameters() { MinimumBaseCallQuality = 20 }, VariantCallingParameters = new VariantCallingParameters(), VcfWritingParameters = new VcfWritingParameters() { OutputGvcfFile = doCheckReferences, }, CommandLineArguments = new string[] { "some", "cmds" } }; } applicationOptions.OutputDirectory = OutputDirectory; var factory = GetFactory(applicationOptions); IGenome genome; if (fakeReferences == null) { genome = factory.GetReferenceGenome(GenomeDirectory); } else { genome = new MockGenome(fakeReferences, GenomeDirectory); } if (threadByChr) { var processor = new GenomeProcessor(factory, genome, false); processor.Execute(1); } else { var processor = new GenomeProcessor(factory, genome); processor.Execute(1); } var alleles = AlleleReader.GetAllVariantsInFile(vcfFilePath); var variantCalls = alleles.Where(a => !a.IsRefType).ToList(); if (doCheckVariants) { if (doCountsOnly > 0) { Assert.Equal(variantCalls.Count(), doCountsOnly); } else { CheckVariants(variantCalls, expectedVariants); } } if (doCheckReferences) { var referenceAlleles = alleles.Where(a => a.IsRefType).ToList(); // make sure no reference calls at variant positions Assert.Equal(referenceAlleles.Count(), alleles.Count(a => !variantCalls.Select(v => v.ReferencePosition).Contains(a.ReferencePosition))); } }
private Factory GetFactory(PiscesApplicationOptions options) { return(new Factory(options)); }
public void StitchedCollapsedBamGroundTruth() { // SNP ground truth from TingTing var bamFilePath = Path.Combine(TestPaths.LocalTestDataDirectory, "collapsed.test.stitched.bam"); var functionalTestRunner = new SomaticVariantCallerFunctionalTestSetup(); functionalTestRunner.GenomeDirectory = Path.Combine(TestPaths.SharedGenomesDirectory, "chr1"); var appOptions = new PiscesApplicationOptions { BAMPaths = new[] { bamFilePath }, IntervalPaths = null, GenomePaths = new[] { Path.Combine(TestPaths.SharedGenomesDirectory, "chr1") }, OutputBiasFiles = true, DebugMode = true, CallMNVs = true, UseMNVReallocation = false, MaxSizeMNV = 100, MaxGapBetweenMNV = 10, NoiseModelHalfWindow = 1, BamFilterParameters = new BamFilterParameters() { MinimumBaseCallQuality = 20, MinimumMapQuality = 1, OnlyUseProperPairs = false, }, VariantCallingParameters = new VariantCallingParameters() { MaximumVariantQScore = 100, MinimumVariantQScoreFilter = 30, MinimumVariantQScore = 20, MinimumCoverage = 10, MinimumFrequency = 0.01f, FilterOutVariantsPresentOnlyOneStrand = false, ForcedNoiseLevel = -1, NoiseModel = NoiseModel.Flat, StrandBiasModel = StrandBiasModel.Extended, AmpliconBiasFilterThreshold = 0.01F }, VcfWritingParameters = new Domain.Options.VcfWritingParameters() { OutputGvcfFile = true, ReportRcCounts = true, ReportTsCounts = true } }; // Time to build the fake sequences for testing. var mockChrRef = new List <ChrReference>() { new ChrReference() { // position 9770498 ~ 9770669 Name = "chr1", Sequence = new string('N', 9770498 - 1) + "GAAGTAACAACGCAGGATGCCCCCTGGGGTGGACTGCCCCATGGAATTCTGGACCAAGGAGGAGAATCAGAGCGTTGTGGTTGACTTCCTGCTGCCCACAGGGGTCTACCTGAACTTCCCTGTGTCCCGCAATGCCAACCTCAGCACCATCAAGCAGGTATGGCCTCCATC" } }; var expectedAlleles = new List <CalledAllele> { new CalledAllele(AlleleCategory.Snv) { ReferencePosition = 9770596, ReferenceAllele = "C", AlternateAllele = "A", Chromosome = "chr1" } }; functionalTestRunner.Execute(bamFilePath, Path.ChangeExtension(bamFilePath, "genome.vcf"), null, expectedAlleles, mockChrRef, applicationOptions: appOptions); var truthvcfFilePath = Path.Combine(Path.GetDirectoryName(appOptions.BAMPaths[0]), "test_truth.stitched.genome.vcf"); var stitchedCollapsedTruth = AlleleReader.GetAllVariantsInFile(truthvcfFilePath); var resultFilePath = Path.ChangeExtension(bamFilePath, "genome.vcf"); var stitchedCollapsedResults = AlleleReader.GetAllVariantsInFile(resultFilePath); TestUtilities.TestHelper.CompareFiles(truthvcfFilePath, resultFilePath); }
public PiscesOptionsParser() { Options = new PiscesApplicationOptions(); }
public void InsertionAtEdgeOfDistribution() { // This test was brought forward to test Deletion at the edge from the previous tests. The test was listed as failing when stitching was included. // Notes from Old SVC: Make sure we can accurately insertions at the edge of the coverage distribution, and not accidentally mark them as SB // This test case was in response to a bug, where originally we called SB here when we should not. // chr7 116376907 . ATTT A 100.00 SB DP=750; var bamFilePath = Path.Combine(TestPaths.LocalTestDataDirectory, "edgeIns_S2.bam"); var functionalTestRunner = new SomaticVariantCallerFunctionalTestSetup(); functionalTestRunner.GenomeDirectory = Path.Combine(TestPaths.SharedGenomesDirectory, "chr17chr19"); var appOptions = new PiscesApplicationOptions { BAMPaths = new[] { bamFilePath }, IntervalPaths = null, GenomePaths = new[] { Path.Combine(TestPaths.SharedGenomesDirectory, "chr17chr19") }, OutputBiasFiles = true, DebugMode = true, CallMNVs = true, UseMNVReallocation = false, MaxSizeMNV = 100, MaxGapBetweenMNV = 10, NoiseModelHalfWindow = 1, BamFilterParameters = new Domain.Options.BamFilterParameters() { MinimumBaseCallQuality = 20, MinimumMapQuality = 1, OnlyUseProperPairs = false, }, VariantCallingParameters = new Domain.Options.VariantCallingParameters() { MaximumVariantQScore = 100, MinimumVariantQScoreFilter = 30, MinimumVariantQScore = 20, MinimumCoverage = 10, MinimumFrequency = 0.01f, FilterOutVariantsPresentOnlyOneStrand = false, ForcedNoiseLevel = -1, NoiseModel = NoiseModel.Flat, StrandBiasModel = StrandBiasModel.Extended, }, VcfWritingParameters = new Domain.Options.VcfWritingParameters() { OutputGvcfFile = true, } }; // Time to build the fake sequences for testing. var mockChrRef = new List <ChrReference>() { new ChrReference() { // position 63 Name = "chr7", Sequence = "NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN" + "GTTGGTCTTCTATTTTATGCGAATTCTTCTAAGATTCCCAGGTTATTTATCATAAGAATTACATTTACATGGCAAATTTAGTTCTGTTCCTAGAAATATCTCCATGACAACCAAAAGGAACTCCTAATTTCTGGCACACATTACTTCAGGGGT" } }; var expectedAlleles = new List <CalledAllele> { new CalledAllele(AlleleCategory.Insertion) { ReferencePosition = 110, ReferenceAllele = "T", AlternateAllele = "TGGG", Chromosome = "chr7" } }; functionalTestRunner.Execute(bamFilePath, Path.ChangeExtension(bamFilePath, "genome.vcf"), null, expectedAlleles, mockChrRef, applicationOptions: appOptions); }
public static ISomaticVariantCaller CreateMockVariantCaller(VcfFileWriter vcfWriter, PiscesApplicationOptions options, ChrReference chrRef, MockAlignmentExtractor mockAlignmentExtractor, IStrandBiasFileWriter biasFileWriter = null, string intervalFilePath = null) { var config = new AlignmentSourceConfig { MinimumMapQuality = options.BamFilterParameters.MinimumMapQuality, OnlyUseProperPairs = options.BamFilterParameters.OnlyUseProperPairs, SkipDuplicates = options.BamFilterParameters.RemoveDuplicates }; AlignmentMateFinder mateFinder = null; var alignmentSource = new AlignmentSource(mockAlignmentExtractor, mateFinder, config); var variantFinder = new CandidateVariantFinder(options.BamFilterParameters.MinimumBaseCallQuality, options.MaxSizeMNV, options.MaxGapBetweenMNV, options.CallMNVs); var coverageCalculator = new CoverageCalculator(); var alleleCaller = new AlleleCaller(new VariantCallerConfig { IncludeReferenceCalls = options.VcfWritingParameters.OutputGvcfFile, MinVariantQscore = options.VariantCallingParameters.MinimumVariantQScore, MaxVariantQscore = options.VariantCallingParameters.MaximumVariantQScore, VariantQscoreFilterThreshold = options.VariantCallingParameters.MinimumVariantQScoreFilter > options.VariantCallingParameters.MinimumVariantQScore ? options.VariantCallingParameters.MinimumVariantQScoreFilter : (int?)null, MinCoverage = options.VariantCallingParameters.MinimumCoverage, MinFrequency = options.VariantCallingParameters.MinimumFrequency, NoiseLevelUsedForQScoring = options.VariantCallingParameters.NoiseLevelUsedForQScoring, StrandBiasModel = options.VariantCallingParameters.StrandBiasModel, StrandBiasFilterThreshold = options.VariantCallingParameters.StrandBiasAcceptanceCriteria, FilterSingleStrandVariants = options.VariantCallingParameters.FilterOutVariantsPresentOnlyOneStrand, ChrReference = chrRef }, coverageCalculator: coverageCalculator, variantCollapser: options.Collapse ? new VariantCollapser(null, true, coverageCalculator) : null); var stateManager = new RegionStateManager( expectStitchedReads: mockAlignmentExtractor.SourceIsStitched, trackOpenEnded: options.Collapse, trackReadSummaries: options.CoverageMethod == CoverageMethod.Approximate); //statmanager is an allele source Assert.Equal(0, stateManager.GetAlleleCount(1, AlleleType.A, DirectionType.Forward)); return(new SomaticVariantCaller( alignmentSource, variantFinder, alleleCaller, vcfWriter, stateManager, chrRef, null, biasFileWriter)); }
private List <CalledAllele> Call(List <Read> readSets, ChrReference chrReference, PiscesApplicationOptions options) { var calledAlleles = new List <CalledAllele>(); var caller = GetMockFactory(options, readSets).CreateSomaticVariantCaller(chrReference, "fakeBamFilePath", GetMockWriter(calledAlleles)); caller.Execute(); return(calledAlleles); }
private void ExecuteChromosomeThreadingTest(int numberOfThreads, int expectedNumberOfThreads, string outDir) { var bamFilePath = Path.Combine(TestPaths.LocalTestDataDirectory, "Chr17Chr19.bam"); var vcfFilePath = Path.Combine(outDir, "Chr17Chr19.vcf"); var genomePath = Path.Combine(TestPaths.SharedGenomesDirectory, "chr17chr19"); var options = new PiscesApplicationOptions { BAMPaths = new[] { bamFilePath }, GenomePaths = new[] { genomePath }, VcfWritingParameters = new VcfWritingParameters() { OutputGvcfFile = false }, OutputDirectory = outDir }; options.SetIODirectories("Pisces"); var logFile = Path.Combine(options.LogFolder, options.LogFileName); if (File.Exists(logFile)) { File.Delete(logFile); } Logger.OpenLog(options.LogFolder, options.LogFileName); var factory = new MockFactoryWithDefaults(options); factory.MockSomaticVariantCaller = new Mock <ISmallVariantCaller>(); factory.MockSomaticVariantCaller.Setup(s => s.Execute()).Callback(() => { Thread.Sleep(500); }); var processor = new GenomeProcessor(factory, factory.GetReferenceGenome(genomePath), false); processor.Execute(numberOfThreads); Assert.False(File.Exists(vcfFilePath + "_chr17")); Assert.False(File.Exists(vcfFilePath + "_chr19")); Assert.True(File.Exists(vcfFilePath)); Logger.CloseLog(); //var threadsSpawnedBeforeFirstCompleted = 0; /* dont worry about logging * using (var reader = new StreamReader(new FileStream(logFile, FileMode.Open, FileAccess.Read))) * { * string line; * while ((line = reader.ReadLine()) != null) * { * if (string.IsNullOrEmpty(line)) continue; * * if (line.Contains("Completed processing chr")) break; * * if (line.Contains("Start processing chr")) * threadsSpawnedBeforeFirstCompleted++; * } * }*/ //Assert.Equal(expectedNumberOfThreads, threadsSpawnedBeforeFirstCompleted); }
private void Write_InFlow(bool threadByChr) { var bamFilePath = Path.Combine(TestPaths.LocalTestDataDirectory, "SBWriter_Sample_S1.bam"); var vcfFilePath = Path.Combine(TestPaths.LocalTestDataDirectory, "SBWriter_Sample_S1.genome.vcf"); var biasFilePath = Path.Combine(TestPaths.LocalTestDataDirectory, "SBWriter_Sample_S1.genome.ReadStrandBias.txt"); if (threadByChr) { biasFilePath = biasFilePath + "_chr19"; //Currently when threading by chrom we are outputting one bias file per chromsome. This is not a customer-facing deliverable and is a low-priority feature. } var expectedBiasResultsPath = Path.Combine(TestPaths.LocalTestDataDirectory, "Expected_Sample_S1.ReadStrandBias.txt"); var genomeDirectory = Path.Combine(TestPaths.SharedGenomesDirectory, "chr19"); var applicationOptions = new PiscesApplicationOptions { BAMPaths = new[] { bamFilePath }, IntervalPaths = null, GenomePaths = new[] { genomeDirectory }, OutputBiasFiles = true, DebugMode = true, VcfWritingParameters = new Domain.Options.VcfWritingParameters() { OutputGvcfFile = true } }; // Using GenomeProcessor //If OutputBiasFiles is true, should output one bias file per vcf var factory = new MockFactoryWithDefaults(applicationOptions); var genome = factory.GetReferenceGenome(genomeDirectory); CreateAndExecuteProcessor(threadByChr, factory, genome); Assert.True(File.Exists(biasFilePath)); //All variants that are present in VCF where ref!=alt should be included var biasFileContents = File.ReadAllLines(biasFilePath); var alleles = AlleleReader.GetAllVariantsInFile(vcfFilePath); var variantCalls = alleles.Where(a => a.AlternateAllele != ".").ToList(); foreach (var variantCall in variantCalls) { Console.WriteLine(variantCall); Assert.True(biasFileContents.Count(l => l.Split('\t')[0] == variantCall.Chromosome && l.Split('\t')[1] == variantCall.ReferencePosition.ToString() && l.Split('\t')[2] == variantCall.ReferenceAllele && l.Split('\t')[3] == variantCall.AlternateAllele) == 1); } foreach (var refCall in alleles.Where(a => a.AlternateAllele == ".").ToList()) { Assert.False(biasFileContents.Count(l => l.Split('\t')[0] == refCall.Chromosome && l.Split('\t')[1] == refCall.ReferencePosition.ToString() && l.Split('\t')[2] == refCall.ReferenceAllele && l.Split('\t')[3] == refCall.AlternateAllele) == 1); } //Bias files should have expected contents var expectedBiasFileContents = File.ReadAllLines(expectedBiasResultsPath); Assert.Equal(expectedBiasFileContents, biasFileContents); //If OutputBiasFiles is false, should not output any bias files File.Delete(biasFilePath); applicationOptions.OutputBiasFiles = false; factory = new MockFactoryWithDefaults(applicationOptions); genome = factory.GetReferenceGenome(genomeDirectory); CreateAndExecuteProcessor(threadByChr, factory, genome); Assert.False(File.Exists(biasFilePath)); }