public void BasicIntervalTest()
        {
            var bamFilePath = Path.Combine(TestPaths.LocalTestDataDirectory, "Chr17Chr19.bam");
            var vcfFilePath = Path.Combine(TestPaths.LocalTestDataDirectory, "Chr17Chr19.vcf");

            var functionalTestRunner = new SomaticVariantCallerFunctionalTestSetup();

            functionalTestRunner.GenomeDirectory = Path.Combine(TestPaths.SharedGenomesDirectory, "chr17chr19");

            var expectedAlleles = new List <CalledAllele>
            {
                new CalledAllele(AlleleCategory.Snv)
                {
                    ReferencePosition = 3118942,
                    ReferenceAllele   = "A",
                    AlternateAllele   = "T",
                    Chromosome        = "chr19"
                },
                new CalledAllele(AlleleCategory.Snv)
                {
                    ReferencePosition = 7572985,
                    ReferenceAllele   = "T",
                    AlternateAllele   = "C",
                    Chromosome        = "chr17"
                }
            };

            // thread by chr
            functionalTestRunner.Execute(bamFilePath, vcfFilePath, null, expectedAlleles, threadByChr: true);
        }
        public void BasicSNVIntervalTesting()
        {
            var bamFilePath          = Path.Combine(TestPaths.LocalTestDataDirectory, "Chr17Chr19.bam");
            var vcfFilePath          = Path.Combine(TestPaths.LocalTestDataDirectory, "Chr17Chr19.vcf");
            var intervalFilePath     = Path.Combine(TestPaths.LocalTestDataDirectory, "chr17only.picard");
            var functionalTestRunner = new SomaticVariantCallerFunctionalTestSetup();

            functionalTestRunner.GenomeDirectory = Path.Combine(TestPaths.SharedGenomesDirectory, "chr17chr19");

            var expectedAlleles = new List <CalledAllele>
            {
                new CalledAllele(AlleleCategory.Snv)
                {
                    ReferencePosition = 3118942,
                    ReferenceAllele   = "A",
                    AlternateAllele   = "T",
                    Chromosome        = "chr19"
                },
                new CalledAllele(AlleleCategory.Snv)
                {
                    ReferencePosition = 7572985,
                    ReferenceAllele   = "T",
                    AlternateAllele   = "C",
                    Chromosome        = "chr17"
                }
            };

            // Spot an expected allele inside an interval.
            functionalTestRunner.Execute(bamFilePath, vcfFilePath, intervalFilePath, expectedAlleles.Where(a => a.Chromosome == "chr17").ToList());

            // Ignore indels spotted outside or overlapping the interval.
            functionalTestRunner.Execute(bamFilePath, vcfFilePath, intervalFilePath, expectedAlleles.Where(a => a.Chromosome == "chr17").ToList());
        }
        public void SomaticVariantCaller_SimpleSnv()
        {
            var functionalTestRunner = new SomaticVariantCallerFunctionalTestSetup();

            functionalTestRunner.GenomeDirectory = Path.Combine(TestPaths.SharedGenomesDirectory, "chr17chr19");

            var expectedAlleles = new List <CalledAllele>
            {
                new CalledAllele(AlleleCategory.Snv)
                {
                    ReferencePosition = 3118942,
                    ReferenceAllele   = "A",
                    AlternateAllele   = "T",
                    Chromosome        = "chr19"
                }
            };

            var vcfFilePath = Path.ChangeExtension(_bam_Sample, "vcf");

            // without reference calls
            File.Delete(vcfFilePath);
            functionalTestRunner.Execute(_bam_Sample, vcfFilePath, null, expectedAlleles);

            // with reference calls
            File.Delete(vcfFilePath);
            functionalTestRunner.Execute(_bam_Sample, vcfFilePath, null, expectedAlleles, null, true, true, 102);

            // with reference calls and intervals
            File.Delete(vcfFilePath);
            functionalTestRunner.Execute(_bam_Sample, vcfFilePath, _interval_Sample, expectedAlleles, null, true, true, 11);

            // with reference calls and intervals that dont overlap variant
            File.Delete(vcfFilePath);
            functionalTestRunner.Execute(_bam_Sample, vcfFilePath, _interval_Sample_negative, new List <CalledAllele>(), null, true, true, 10);
        }
        public void Pisces_Bcereus() // be serious. very, very, serious.
        {
            var bacilusBam           = Path.Combine(TestPaths.SharedBamDirectory, "Bcereus_S4.bam");
            var functionalTestRunner = new SomaticVariantCallerFunctionalTestSetup();

            functionalTestRunner.GenomeDirectory = Path.Combine(TestPaths.SharedGenomesDirectory, "Bacillus_cereus", "Sequence", "WholeGenomeFasta");
            functionalTestRunner.OutputDirectory = TestPaths.LocalScratchDirectory;


            var expectedAlleles = new List <CalledAllele>
            {
                new CalledAllele(AlleleCategory.Snv)
                {
                    ReferencePosition = 827,
                    ReferenceAllele   = "A",
                    AlternateAllele   = "G",
                    Chromosome        = "chr"
                },

                new CalledAllele(AlleleCategory.Snv)
                {
                    ReferencePosition = 1480,
                    ReferenceAllele   = "A",
                    AlternateAllele   = "T",
                    Chromosome        = "chr"
                },

                new CalledAllele(AlleleCategory.Snv)
                {
                    ReferencePosition = 2282,
                    ReferenceAllele   = "A",
                    AlternateAllele   = "T",
                    Chromosome        = "chr"
                },
            };

            //chr 827.A   G   100 PASS DP = 37   GT: GQ: AD: DP: VF: NL: SB    0 / 1:100:35,2:37:0.054:1000:-100.0000
            //chr 1480.A   T   100 PASS DP = 18   GT: GQ: AD: DP: VF: NL: SB    0 / 1:100:16,2:18:0.111:1000:-100.0000
            //chr 2282.A   T   100 PASS DP = 21   GT: GQ: AD: DP: VF: NL: SB    0 / 1:100:19,2:21:0.095:1000:-100.0000


            PiscesApplicationOptions appOptions = new PiscesApplicationOptions();

            appOptions.VcfWritingParameters.OutputGvcfFile = true;
            appOptions.BAMPaths        = new string[] { bacilusBam };
            appOptions.GenomePaths     = new string[] { functionalTestRunner.GenomeDirectory };
            appOptions.OutputDirectory = functionalTestRunner.OutputDirectory;
            appOptions.VariantCallingParameters.NoiseLevelUsedForQScoring = 1000;

            var vcfFilePath = Path.Combine(TestPaths.LocalScratchDirectory, "Bcereus_S4.genome.vcf");

            // without reference calls
            File.Delete(vcfFilePath);
            functionalTestRunner.Execute(bacilusBam, vcfFilePath, null, expectedAlleles, applicationOptions: appOptions);
        }
        public void BasicMnvTesting()
        {
            var functionalTestRunner = new SomaticVariantCallerFunctionalTestSetup();

            functionalTestRunner.GenomeDirectory = Path.Combine(TestPaths.SharedGenomesDirectory, "chr17chr19");

            // Mock ChrReference for the MockGenome.
            List <ChrReference> mockChrRef = new List <ChrReference>()
            {
                new ChrReference()
                {
                    Name     = "chr1",
                    Sequence = "TTGTCAGTGCGCTTTTCCCAACACCACCTGCTCCGACCACCACCAGTTTGTACTCAGTCATTTCACACCAGCAAGAACCTGTTGGAAACCAGTAATCAGGGTTAATTGGCGGCG"
                }
            };

            var expectedAlleles = new List <CalledAllele>
            {
                new CalledAllele(AlleleCategory.Mnv)
                {
                    ReferencePosition = 27,
                    ReferenceAllele   = "CCTGCTCCG",
                    AlternateAllele   = "TTTGCTCCA",
                    Chromosome        = "chr1"
                },
                new CalledAllele(AlleleCategory.Mnv)
                {
                    ReferencePosition = 27,
                    ReferenceAllele   = "CC",
                    AlternateAllele   = "TT",
                    Chromosome        = "chr1"
                },
                new CalledAllele(AlleleCategory.Snv)
                {
                    ReferencePosition = 35,
                    ReferenceAllele   = "G",
                    AlternateAllele   = "A",
                    Chromosome        = "chr1"
                }
            };

            // Testing small_S1.bam with a MockGenome.
            functionalTestRunner.Execute(_bamSmallS1, Path.ChangeExtension(_bamSmallS1, "vcf"), null, expectedAlleles, mockChrRef, doCheckReferences: true, doLog: true, collapse: false);
        }
        public void Pisces_PhiX() //Phix it and forget it.
        {
            var bacilusBam           = Path.Combine(TestPaths.SharedBamDirectory, "PhiX_S3.bam");
            var functionalTestRunner = new SomaticVariantCallerFunctionalTestSetup();

            functionalTestRunner.GenomeDirectory = Path.Combine(TestPaths.SharedGenomesDirectory, "PhiX", "WholeGenomeFasta");
            functionalTestRunner.OutputDirectory = TestPaths.LocalScratchDirectory;


            var expectedAlleles = new List <CalledAllele>
            {
                new CalledAllele(AlleleCategory.Snv)
                {
                    ReferencePosition = 14,
                    ReferenceAllele   = "T",
                    AlternateAllele   = "C",
                    Chromosome        = "phix"
                },

                new CalledAllele(AlleleCategory.Snv)
                {
                    ReferencePosition = 14,
                    ReferenceAllele   = "T",
                    AlternateAllele   = "G",
                    Chromosome        = "phix"
                },

                new CalledAllele(AlleleCategory.Snv)
                {
                    ReferencePosition = 19,
                    ReferenceAllele   = "G",
                    AlternateAllele   = "T",
                    Chromosome        = "phix"
                },
                new CalledAllele(AlleleCategory.Snv)
                {
                    ReferencePosition = 22,
                    ReferenceAllele   = "G",
                    AlternateAllele   = "A",
                    Chromosome        = "phix"
                },

                new CalledAllele(AlleleCategory.Snv)
                {
                    ReferencePosition = 25,
                    ReferenceAllele   = "G",
                    AlternateAllele   = "T",
                    Chromosome        = "phix"
                },

                new CalledAllele(AlleleCategory.Snv)
                {
                    ReferencePosition = 26,
                    ReferenceAllele   = "A",
                    AlternateAllele   = "C",
                    Chromosome        = "phix"
                },

                new CalledAllele(AlleleCategory.Snv)
                {
                    ReferencePosition = 42,
                    ReferenceAllele   = "A",
                    AlternateAllele   = "T",
                    Chromosome        = "phix"
                }
            };

            //phix    14.T   C   3   q30; LowVariantFreq DP = 236  GT: GQ: AD: DP: VF: NL: SB    0 / 1:3:234,1:236:0.00424:1000:-100.0000
            //phix    14.T   G   3   q30; LowVariantFreq DP = 236  GT: GQ: AD: DP: VF: NL: SB    0 / 1:3:234,1:236:0.00424:1000:-100.0000
            //phix    19.G   T   3   q30; LowVariantFreq DP = 243  GT: GQ: AD: DP: VF: NL: SB    0 / 1:3:242,1:243:0.00412:1000:-100.0000
            //phix    22.G   A   3   q30; LowVariantFreq DP = 225  GT: GQ: AD: DP: VF: NL: SB    0 / 1:3:224,1:225:0.00444:1000:-100.0000
            //phix    25.G   T   3   q30; LowVariantFreq DP = 244  GT: GQ: AD: DP: VF: NL: SB    0 / 1:3:243,1:244:0.00410:1000:-100.0000
            //phix    26.A   C   3   q30; LowVariantFreq DP = 242  GT: GQ: AD: DP: VF: NL: SB    0 / 1:3:241,1:242:0.00413:1000:-100.0000
            //phix    42.A   T   3   q30; LowVariantFreq DP = 199  GT: GQ: AD: DP: VF: NL: SB    0 / 1:3:198,1:199:0.00503:1000:-100.0000


            PiscesApplicationOptions appOptions = new PiscesApplicationOptions();

            appOptions.VcfWritingParameters.OutputGvcfFile = true;
            appOptions.BAMPaths        = new string[] { bacilusBam };
            appOptions.GenomePaths     = new string[] { functionalTestRunner.GenomeDirectory };
            appOptions.OutputDirectory = functionalTestRunner.OutputDirectory;
            appOptions.VariantCallingParameters.NoiseLevelUsedForQScoring = 1000;
            appOptions.VariantCallingParameters.MinimumFrequency          = 0.0001f; //make sure we catch something in this little bam
            appOptions.VariantCallingParameters.MinimumVariantQScore      = 3;       //make sure we catch something in this little bam

            var vcfFilePath = Path.Combine(TestPaths.LocalScratchDirectory, "PhiX_S3.genome.vcf");

            // without reference calls
            File.Delete(vcfFilePath);
            functionalTestRunner.Execute(bacilusBam, vcfFilePath, null, expectedAlleles, applicationOptions: appOptions);
        }
        public void StitchedCollapsedBamGroundTruth()
        {
            // SNP ground truth from TingTing
            var bamFilePath          = Path.Combine(TestPaths.LocalTestDataDirectory, "collapsed.test.stitched.bam");
            var functionalTestRunner = new SomaticVariantCallerFunctionalTestSetup();

            functionalTestRunner.GenomeDirectory = Path.Combine(TestPaths.SharedGenomesDirectory, "chr1");

            var appOptions = new PiscesApplicationOptions
            {
                BAMPaths             = new[] { bamFilePath },
                IntervalPaths        = null,
                GenomePaths          = new[] { Path.Combine(TestPaths.SharedGenomesDirectory, "chr1") },
                OutputBiasFiles      = true,
                DebugMode            = true,
                CallMNVs             = true,
                UseMNVReallocation   = false,
                MaxSizeMNV           = 100,
                MaxGapBetweenMNV     = 10,
                NoiseModelHalfWindow = 1,
                BamFilterParameters  = new BamFilterParameters()
                {
                    MinimumBaseCallQuality = 20,
                    MinimumMapQuality      = 1,
                    OnlyUseProperPairs     = false,
                },
                VariantCallingParameters = new VariantCallingParameters()
                {
                    MaximumVariantQScore                  = 100,
                    MinimumVariantQScoreFilter            = 30,
                    MinimumVariantQScore                  = 20,
                    MinimumCoverage                       = 10,
                    MinimumFrequency                      = 0.01f,
                    FilterOutVariantsPresentOnlyOneStrand = false,
                    ForcedNoiseLevel                      = -1,
                    NoiseModel                  = NoiseModel.Flat,
                    StrandBiasModel             = StrandBiasModel.Extended,
                    AmpliconBiasFilterThreshold = 0.01F
                },
                VcfWritingParameters = new Domain.Options.VcfWritingParameters()
                {
                    OutputGvcfFile = true,
                    ReportRcCounts = true,
                    ReportTsCounts = true
                }
            };

            // Time to build the fake sequences for testing.
            var mockChrRef = new List <ChrReference>()
            {
                new ChrReference()
                {
                    // position 9770498 ~ 9770669
                    Name     = "chr1",
                    Sequence = new string('N', 9770498 - 1) + "GAAGTAACAACGCAGGATGCCCCCTGGGGTGGACTGCCCCATGGAATTCTGGACCAAGGAGGAGAATCAGAGCGTTGTGGTTGACTTCCTGCTGCCCACAGGGGTCTACCTGAACTTCCCTGTGTCCCGCAATGCCAACCTCAGCACCATCAAGCAGGTATGGCCTCCATC"
                }
            };

            var expectedAlleles = new List <CalledAllele>
            {
                new CalledAllele(AlleleCategory.Snv)
                {
                    ReferencePosition = 9770596,
                    ReferenceAllele   = "C",
                    AlternateAllele   = "A",
                    Chromosome        = "chr1"
                }
            };

            functionalTestRunner.Execute(bamFilePath, Path.ChangeExtension(bamFilePath, "genome.vcf"), null, expectedAlleles, mockChrRef, applicationOptions: appOptions);
            var truthvcfFilePath         = Path.Combine(Path.GetDirectoryName(appOptions.BAMPaths[0]), "test_truth.stitched.genome.vcf");
            var stitchedCollapsedTruth   = AlleleReader.GetAllVariantsInFile(truthvcfFilePath);
            var resultFilePath           = Path.ChangeExtension(bamFilePath, "genome.vcf");
            var stitchedCollapsedResults = AlleleReader.GetAllVariantsInFile(resultFilePath);

            TestUtilities.TestHelper.CompareFiles(truthvcfFilePath, resultFilePath);
        }
        public void InsertionAtEdgeOfDistribution()
        {
            // This test was brought forward to test Deletion at the edge from the previous tests. The test was listed as failing when stitching was included.
            // Notes from Old SVC: Make sure we can accurately insertions at the edge of the coverage distribution, and not accidentally mark them as SB
            // This test case was in response to a bug, where originally we called SB here when we should not.
            // chr7    116376907       .       ATTT    A       100.00  SB      DP=750;
            var bamFilePath          = Path.Combine(TestPaths.LocalTestDataDirectory, "edgeIns_S2.bam");
            var functionalTestRunner = new SomaticVariantCallerFunctionalTestSetup();

            functionalTestRunner.GenomeDirectory = Path.Combine(TestPaths.SharedGenomesDirectory, "chr17chr19");

            var appOptions = new PiscesApplicationOptions
            {
                BAMPaths             = new[] { bamFilePath },
                IntervalPaths        = null,
                GenomePaths          = new[] { Path.Combine(TestPaths.SharedGenomesDirectory, "chr17chr19") },
                OutputBiasFiles      = true,
                DebugMode            = true,
                CallMNVs             = true,
                UseMNVReallocation   = false,
                MaxSizeMNV           = 100,
                MaxGapBetweenMNV     = 10,
                NoiseModelHalfWindow = 1,
                BamFilterParameters  = new Domain.Options.BamFilterParameters()
                {
                    MinimumBaseCallQuality = 20,
                    MinimumMapQuality      = 1,
                    OnlyUseProperPairs     = false,
                },
                VariantCallingParameters = new Domain.Options.VariantCallingParameters()
                {
                    MaximumVariantQScore                  = 100,
                    MinimumVariantQScoreFilter            = 30,
                    MinimumVariantQScore                  = 20,
                    MinimumCoverage                       = 10,
                    MinimumFrequency                      = 0.01f,
                    FilterOutVariantsPresentOnlyOneStrand = false,
                    ForcedNoiseLevel                      = -1,
                    NoiseModel      = NoiseModel.Flat,
                    StrandBiasModel = StrandBiasModel.Extended,
                },
                VcfWritingParameters = new Domain.Options.VcfWritingParameters()
                {
                    OutputGvcfFile = true,
                }
            };

            // Time to build the fake sequences for testing.
            var mockChrRef = new List <ChrReference>()
            {
                new ChrReference()
                {
                    // position 63
                    Name     = "chr7",
                    Sequence = "NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN" +
                               "GTTGGTCTTCTATTTTATGCGAATTCTTCTAAGATTCCCAGGTTATTTATCATAAGAATTACATTTACATGGCAAATTTAGTTCTGTTCCTAGAAATATCTCCATGACAACCAAAAGGAACTCCTAATTTCTGGCACACATTACTTCAGGGGT"
                }
            };

            var expectedAlleles = new List <CalledAllele>
            {
                new CalledAllele(AlleleCategory.Insertion)
                {
                    ReferencePosition = 110,
                    ReferenceAllele   = "T",
                    AlternateAllele   = "TGGG",
                    Chromosome        = "chr7"
                }
            };

            functionalTestRunner.Execute(bamFilePath, Path.ChangeExtension(bamFilePath, "genome.vcf"), null, expectedAlleles, mockChrRef, applicationOptions: appOptions);
        }
Example #9
0
        public void DeletionAtEdgeOfDistribution()
        {
            // This test was brought forward to test Deletion at the edge from the previous tests. The test was listed as failing when stitching was included.
            // Notes from Old SVC: Make sure we can accurately deletions at the edge of the coverage distribution, and not accidentally mark them as SB
            // This test case was in response to a bug, where originally we called SB here when we should not.
            // chr7    116376907       .       ATTT    A       100.00  SB      DP=750;
            var bamFilePath          = Path.Combine(UnitTestPaths.TestDataDirectory, "edgeIndel_S2.bam");
            var functionalTestRunner = new SomaticVariantCallerFunctionalTestSetup();

            functionalTestRunner.GenomeDirectory = Path.Combine(UnitTestPaths.TestGenomesDirectory, "chr17chr19");

            var appOptions = new ApplicationOptions
            {
                BAMPaths           = new[] { bamFilePath },
                IntervalPaths      = null,
                GenomePaths        = new[] { Path.Combine(UnitTestPaths.TestGenomesDirectory, "chr17chr19") },
                OutputgVCFFiles    = true,
                OutputBiasFiles    = true,
                DebugMode          = true,
                CallMNVs           = true,
                UseMNVReallocation = false,
                MaxSizeMNV         = 100,
                FilterOutVariantsPresentOnlyOneStrand = false,
                AppliedNoiseLevel                  = -1,
                MinimumBaseCallQuality             = 20,
                MaximumVariantQScore               = 100,
                FilteredVariantQScore              = 30,
                MinimumVariantQScore               = 20,
                MaxGapBetweenMNV                   = 10,
                MinimumDepth                       = 10,
                MinimumMapQuality                  = 1,
                MinimumFrequency                   = 0.01f,
                StrandBiasAcceptanceCriteria       = 0.5f,
                StrandBiasScoreMaximumToWriteToVCF = -100,
                StrandBiasScoreMinimumToWriteToVCF = 0,
                OnlyUseProperPairs                 = false,
                NoiseModelHalfWindow               = 1,
                NoiseModel      = NoiseModel.Flat,
                StrandBiasModel = StrandBiasModel.Extended
            };

            var mockChrRef = new List <ChrReference>()
            {
                new ChrReference()
                {
                    // position 63
                    Name     = "chr7",
                    Sequence = "NNNNNNNNNNNNNNNNNNNN" + "NNNNNNNNNNNNNNNNNNNN" + "NNNNNNNNNNNNNNNNNNNN" + "NN" +
                               "GTTGGTCTTCTATTTTATGCGAATTCTTCTAAGATTCCCAGGTTATTTATCATAAGAATTACATTTACATGGCAAATTTAGTTCTGTTCCTAGAAATATCTCCATGACAACCAAAAGGAACTCCTAATTTCTGGCACACATTACTTCAGGGGT"
                }
            };

            var expectedAlleles = new List <CalledAllele>
            {
                new CalledAllele(AlleleCategory.Deletion)
                {
                    Coordinate = 107,
                    Reference  = "ATTT",
                    Alternate  = "A",
                    Chromosome = "chr7"
                }
            };

            functionalTestRunner.Execute(bamFilePath, Path.ChangeExtension(bamFilePath, "genome.vcf"), null, expectedAlleles, mockChrRef, applicationOptions: appOptions);
        }