예제 #1
0
        public void VennVcf_EmptyInputTest()
        {
            var outDir      = TestPaths.LocalTestDataDirectory;
            var VcfPathRoot = _TestDataPath;

            string VcfA       = Path.Combine(VcfPathRoot, "Empty_S1.vcf");
            string VcfB       = Path.Combine(VcfPathRoot, "Empty_S2.vcf");
            string OutputPath = Path.Combine(outDir, "EmptyConsensus.vcf");

            VennVcfOptions parameters = new VennVcfOptions();

            parameters.VariantCallingParams.MinimumFrequencyFilter = 0.03f;
            parameters.VariantCallingParams.MinimumFrequency       = 0.01f;
            parameters.ConsensusFileName = Path.Combine(outDir, "EmptyConsensus.vcf");
            parameters.OutputDirectory   = outDir;
            parameters.DebugMode         = true;

            VennProcessor Venn = new VennProcessor(new string[] { VcfA, VcfB }, parameters);

            Venn.DoPairwiseVenn(false);

            Assert.True(File.Exists(OutputPath));
            var observedVariants = VcfReader.GetAllVariantsInFile(OutputPath);

            Assert.Equal(0, observedVariants.Count);
        }
예제 #2
0
        public void VennVcf_GtTest()
        {
            var outDir      = TestPaths.LocalScratchDirectory;
            var VcfPathRoot = _TestDataPath;

            string VcfA         = Path.Combine(VcfPathRoot, "gtTests_S15.vcf");
            string VcfB         = Path.Combine(VcfPathRoot, "gtTests_S18.vcf");
            string OutputPath   = Path.Combine(outDir, "gtConsensusOut.vcf");
            string ExpectedPath = Path.Combine(VcfPathRoot, "gtConsensus.vcf");

            VennVcfOptions parameters = new VennVcfOptions();

            parameters.VariantCallingParams.MinimumFrequencyFilter = 0.03f;
            parameters.VariantCallingParams.MinimumFrequency       = 0.01f;
            parameters.ConsensusFileName = OutputPath;
            parameters.OutputDirectory   = outDir;

            VennProcessor Venn = new VennProcessor(new string[] { VcfA, VcfB }, parameters);

            Venn.DoPairwiseVenn(false);

            Assert.True(File.Exists(OutputPath));
            var expectedVariants = VcfReader.GetAllVariantsInFile(ExpectedPath);
            var observedVariants = VcfReader.GetAllVariantsInFile(OutputPath);

            Assert.Equal(expectedVariants.Count, observedVariants.Count);

            for (int i = 0; i < expectedVariants.Count; i++)
            {
                var ExpectedVariant = expectedVariants[i];
                var OutputVariant   = observedVariants[i];
                Assert.Equal(ExpectedVariant.ToString(), OutputVariant.ToString());
            }
        }
        public void CallSomaticVariants_LowDepthTest()
        {
            List <ChrReference> chrRef = new List <ChrReference>()
            {
                new ChrReference()
                {
                    Name     = "chr19",
                    Sequence = "TTGTCAGTGCGCTTTTCCCAACACCACCTGCTCCGACCACCACCAGTTTGTACTCAGTCATTTCACACCAGCAAGAACCTGTTGGAAACCAGTAATCAGGGTTAATTGGCGGCGAAAAAAAAAAAAAAAAAAAAAAAAAA"
                }
            };

            var options = new ApplicationOptions()
            {
                BAMPaths    = new[] { _bamSmallS1 },
                GenomePaths = new[] { _genomeChr19 },
                //IntervalPaths = new[] { _intervalsChr17Chr19 },
                DebugMode          = true,
                CallMNVs           = true,
                UseMNVReallocation = false,
                MaxSizeMNV         = 100,
                OutputgVCFFiles    = true,
                MinimumCoverage    = 1000,
                OutputFolder       = UnitTestPaths.TestDataDirectory
            };

            var vcfFilePath = Path.ChangeExtension(options.BAMPaths[0], "genome.vcf");

            var     factory = new Factory(options);
            IGenome genomeRef;

            genomeRef = new MockGenome(chrRef);

            var bp = new BamProcessor(factory, genomeRef);

            bp.Execute(1);
            List <VcfVariant> coverage1000results = VcfReader.GetAllVariantsInFile(vcfFilePath);

            options = new ApplicationOptions()
            {
                BAMPaths    = new[] { _bamSmallS1 },
                GenomePaths = new[] { _genomeChr19 },
                // IntervalPaths = new[] { _intervalsChr17Chr19 },
                DebugMode          = true,
                CallMNVs           = true,
                UseMNVReallocation = false,
                OutputgVCFFiles    = true,
                OutputFolder       = UnitTestPaths.TestDataDirectory
            };
            factory = new Factory(options);
            bp      = new BamProcessor(factory, genomeRef);
            bp.Execute(1);
            List <VcfVariant> coverage10results = VcfReader.GetAllVariantsInFile(vcfFilePath);

            // Assert.NotEqual(coverage1000results.Count, coverage10results.Count);
            // Assert.Equal(coverage1000results.Count, 84);
            // Assert.Equal(coverage10results.Count, 100);
        }
        private void CompareVariants(string expectedResultsFilePath, string actualResultsFilePath)
        {
            List <VcfVariant> results  = VcfReader.GetAllVariantsInFile(actualResultsFilePath);
            List <VcfVariant> expected = VcfReader.GetAllVariantsInFile(expectedResultsFilePath);

            Assert.Equal(results.Count, expected.Count);

            for (int i = 0; i < results.Count; i++)
            {
                Assert.Equal(expected[i].ToString(), results[i].ToString());
            }
        }
예제 #5
0
        public void VennVcf_CombineTwoPoolVariants_MergeRefCalls()
        {
            //this is  from an issue where there were multiple co-located variants in one pool,
            //and just ref in the other, at chr15	92604460.  The consensus answer should be
            // a single ref call (and not multiple ref calls!).
            var outDir      = TestPaths.LocalScratchDirectory;
            var vcfPathRoot = _TestDataPath;

            string VcfPath_PoolA     = Path.Combine(vcfPathRoot, "C64-Ct-4_S17.genome.vcf");
            string VcfPath_PoolB     = Path.Combine(vcfPathRoot, "C64-Ct-4_S18.genome.vcf");
            string VcfPath_Consensus = Path.Combine(vcfPathRoot, "ExpectedConsensus2.vcf");

            string OutputPath = Path.Combine(outDir, "Consensus2.vcf");

            if (File.Exists(OutputPath))
            {
                File.Delete(OutputPath);
            }

            VennVcfOptions parameters = new VennVcfOptions();

            parameters.VariantCallingParams.MinimumFrequencyFilter = 0.03f;
            parameters.InputFiles        = new string[] { VcfPath_PoolA, VcfPath_PoolB };
            parameters.OutputDirectory   = outDir; //Path.Combine(outDir, "RefMergeOut.vcf");
            parameters.ConsensusFileName = OutputPath;
            VennProcessor venn = new VennProcessor(parameters.InputFiles, parameters);

            venn.DoPairwiseVenn(false);

            Assert.Equal(File.Exists(OutputPath), true);
            List <VcfVariant> CombinedVariants = VcfReader.GetAllVariantsInFile(OutputPath);
            List <VcfVariant> ExpectedVariants = VcfReader.GetAllVariantsInFile(VcfPath_Consensus);

            Assert.Equal(ExpectedVariants.Count, CombinedVariants.Count);

            int NumVariantsAtPos92604460 = 0;

            for (int i = 0; i < ExpectedVariants.Count; i++)
            {
                VcfVariant EVariant = ExpectedVariants[i];
                VcfVariant Variant  = CombinedVariants[i];

                if ((Variant.ReferencePosition == 92604460) &&
                    (Variant.ReferenceName == "chr15"))
                {
                    NumVariantsAtPos92604460++;
                }

                Assert.Equal(EVariant.ToString(), Variant.ToString());
            }

            Assert.Equal(NumVariantsAtPos92604460, 1);
        }
예제 #6
0
        public Dictionary <string, Dictionary <int, List <Mapping> > > IntersectVcfWithPanelVar(string file)
        {
            List <VcfVariant> allVariantsFromVcf = VcfReader.GetAllVariantsInFile(file);
            Dictionary <string, Dictionary <int, List <Mapping> > > foundVariantDataByChrAndPos =
                new Dictionary <string, Dictionary <int, List <Mapping> > >();

            foundVariantDataByChrAndPos["chr12"] = new Dictionary <int, List <Mapping> >();
            foundVariantDataByChrAndPos["chr1"]  = new Dictionary <int, List <Mapping> >();

            foreach (VcfVariant vcfVar in allVariantsFromVcf)
            {
                string myChr = vcfVar.ReferenceName;
                int    myPos = vcfVar.ReferencePosition;

                if ((myChr != "chr12") && (myChr != "chr1"))
                {
                    continue;
                }

                if (!TableDataByChrAndPos.ContainsKey(myChr))
                {
                    continue;
                }

                //we have a Var or ref call at a position we care about
                if (TableDataByChrAndPos[myChr].ContainsKey(vcfVar.ReferencePosition))
                {
                    List <RefPanelEntry> panelEntryAtPOsition = TableDataByChrAndPos[myChr][myPos];

                    if (!foundVariantDataByChrAndPos[myChr].ContainsKey(myPos))
                    {
                        foundVariantDataByChrAndPos[myChr][myPos] = new List <Mapping>();
                    }

                    foreach (RefPanelEntry entry in panelEntryAtPOsition)
                    {
                        if (CheckForMatch(entry, vcfVar))
                        {
                            Mapping mapping = new Mapping();
                            mapping.Var   = vcfVar;
                            mapping.Entry = entry;
                            foundVariantDataByChrAndPos[myChr][myPos].Add(mapping);
                            break;
                        }
                    }
                }
            }
            return(foundVariantDataByChrAndPos);
        }
예제 #7
0
        public void GetNeighborhoodsFromMessyVCF()
        {
            var vcfFilePath = Path.Combine(TestPaths.LocalTestDataDirectory, "verymutated.genome.vcf");

            var neighborhoodManager = CreateNbhdBuilder(_sourceVcf0);

            Assert.Equal(0, neighborhoodManager.GetBatchOfNeighborhoods(0).Count());

            List <VcfVariant> LotsOfCoLocatedVariants = VcfReader.GetAllVariantsInFile(vcfFilePath);

            neighborhoodManager = CreateNbhdBuilder(_sourceVcf_Mutated);

            var neighborhoods = neighborhoodManager.GetBatchOfNeighborhoods(0);

            Assert.Equal(1, neighborhoods.Count());
            Assert.Equal(12, neighborhoods.First().VcfVariantSites.Count());
        }
        public void GetNeighborhoodsFromMessyVCF()
        {
            var vcfFilePath = Path.Combine(UnitTestPaths.TestDataDirectory, "verymutated.genome.vcf");
            var outFolder   = Path.Combine(UnitTestPaths.TestDataDirectory, "Out");

            var neighborhoodManager = CreateBuilder(new List <VcfVariant>()
            {
            });

            Assert.Equal(0, neighborhoodManager.GetNeighborhoods().Count());

            List <VcfVariant> LotsOfCoLocatedVariants = VcfReader.GetAllVariantsInFile(vcfFilePath);

            neighborhoodManager = CreateBuilder(LotsOfCoLocatedVariants);

            var neighborhoods = neighborhoodManager.GetNeighborhoods();

            Assert.Equal(1, neighborhoods.Count());
            Assert.Equal(12, neighborhoods.First().VcfVariantSites.Count());
        }
예제 #9
0
        public void Pisces_LowDepthTest()
        {
            List <ChrReference> chrRef = new List <ChrReference>()
            {
                new ChrReference()
                {
                    Name     = "chr19",
                    Sequence = "TTGTCAGTGCGCTTTTCCCAACACCACCTGCTCCGACCACCACCAGTTTGTACTCAGTCATTTCACACCAGCAAGAACCTGTTGGAAACCAGTAATCAGGGTTAATTGGCGGCGAAAAAAAAAAAAAAAAAAAAAAAAAA"
                }
            };

            var options = new PiscesApplicationOptions()
            {
                BAMPaths    = new[] { _bamSmallS1 },
                GenomePaths = new[] { _genomeChr19 },
                //IntervalPaths = new[] { _intervalsChr17Chr19 },
                DebugMode           = true,
                CallMNVs            = true,
                UseMNVReallocation  = false,
                MaxSizeMNV          = 100,
                OutputDirectory     = TestPaths.LocalTestDataDirectory,
                BamFilterParameters = new Domain.Options.BamFilterParameters()
                {
                    MinimumBaseCallQuality = 20
                },
                VariantCallingParameters = new Domain.Options.VariantCallingParameters()
                {
                    MinimumVariantQScore = 20,
                    MinimumCoverage      = 1000,
                },
                VcfWritingParameters = new Domain.Options.VcfWritingParameters()
                {
                    OutputGvcfFile = true,
                }
            };

            var vcfFilePath = Path.ChangeExtension(options.BAMPaths[0], "genome.vcf");

            var     factory = new Factory(options);
            IGenome genomeRef;

            genomeRef = new MockGenome(chrRef, _genomeChr19);

            var bp = new GenomeProcessor(factory, genomeRef);

            bp.Execute(1);
            List <VcfVariant> coverage1000results = VcfReader.GetAllVariantsInFile(vcfFilePath);

            options = new PiscesApplicationOptions()
            {
                BAMPaths    = new[] { _bamSmallS1 },
                GenomePaths = new[] { _genomeChr19 },
                // IntervalPaths = new[] { _intervalsChr17Chr19 },
                DebugMode            = true,
                CallMNVs             = true,
                UseMNVReallocation   = false,
                OutputDirectory      = TestPaths.LocalTestDataDirectory,
                VcfWritingParameters = new Domain.Options.VcfWritingParameters()
                {
                    OutputGvcfFile = false,
                }
            };
            factory = new Factory(options);
            bp      = new GenomeProcessor(factory, genomeRef);
            bp.Execute(1);
            List <VcfVariant> coverage10results = VcfReader.GetAllVariantsInFile(vcfFilePath);

            // Assert.NotEqual(coverage1000results.Count, coverage10results.Count);
            // Assert.Equal(coverage1000results.Count, 84);
            // Assert.Equal(coverage10results.Count, 100);
        }
예제 #10
0
        private void Write_InFlow(bool threadByChr)
        {
            var bamFilePath = Path.Combine(UnitTestPaths.TestDataDirectory, "SBWriter_Sample_S1.bam");

            var vcfFilePath  = Path.Combine(UnitTestPaths.TestDataDirectory, "SBWriter_Sample_S1.genome.vcf");
            var biasFilePath = Path.Combine(UnitTestPaths.TestDataDirectory, "SBWriter_Sample_S1.genome.ReadStrandBias.txt");

            if (threadByChr)
            {
                biasFilePath = biasFilePath + "_chr19";              //Currently when threading by chrom we are outputting one bias file per chromsome. This is not a customer-facing deliverable and is a low-priority feature.
            }
            var expectedBiasResultsPath = Path.Combine(UnitTestPaths.TestDataDirectory, "Expected_Sample_S1.ReadStrandBias.txt");

            var genomeDirectory = Path.Combine(UnitTestPaths.TestGenomesDirectory, "chr19");

            var applicationOptions = new ApplicationOptions
            {
                BAMPaths        = new[] { bamFilePath },
                IntervalPaths   = null,
                GenomePaths     = new[] { genomeDirectory },
                OutputBiasFiles = true,
                DebugMode       = true,
                OutputgVCFFiles = true,
            };

            // Using GenomeProcessor
            //If OutputBiasFiles is true, should output one bias file per vcf
            var factory = new MockFactoryWithDefaults(applicationOptions);
            var genome  = factory.GetReferenceGenome(genomeDirectory);

            CreateAndExecuteProcessor(threadByChr, factory, genome);

            Assert.True(File.Exists(biasFilePath));

            //All variants that are present in VCF where ref!=alt should be included
            var biasFileContents = File.ReadAllLines(biasFilePath);
            var alleles          = VcfReader.GetAllVariantsInFile(vcfFilePath);
            var variantCalls     = alleles.Where(a => a.VariantAlleles[0] != ".").ToList();

            foreach (var variantCall in variantCalls)
            {
                Console.WriteLine(variantCall);
                Assert.True(biasFileContents.Count(l => l.Split('\t')[0] == variantCall.ReferenceName &&
                                                   l.Split('\t')[1] == variantCall.ReferencePosition.ToString() &&
                                                   l.Split('\t')[2] == variantCall.ReferenceAllele &&
                                                   l.Split('\t')[3] == variantCall.VariantAlleles.First()) == 1);
            }
            foreach (var refCall in alleles.Where(a => a.VariantAlleles[0] == ".").ToList())
            {
                Assert.False(biasFileContents.Count(l => l.Split('\t')[0] == refCall.ReferenceName &&
                                                    l.Split('\t')[1] == refCall.ReferencePosition.ToString() &&
                                                    l.Split('\t')[2] == refCall.ReferenceAllele &&
                                                    l.Split('\t')[3] == refCall.VariantAlleles.First()) == 1);
            }

            //Bias files should have expected contents
            var expectedBiasFileContents = File.ReadAllLines(expectedBiasResultsPath);

            Assert.Equal(expectedBiasFileContents, biasFileContents);

            //If OutputBiasFiles is false, should not output any bias files
            File.Delete(biasFilePath);

            applicationOptions.OutputBiasFiles = false;
            factory = new MockFactoryWithDefaults(applicationOptions);
            genome  = factory.GetReferenceGenome(genomeDirectory);
            CreateAndExecuteProcessor(threadByChr, factory, genome);
            Assert.False(File.Exists(biasFilePath));
        }
예제 #11
0
        public void Execute(
            string bamFilePath,
            string vcfFilePath,
            string intervalPath,
            List <CalledAllele> expectedVariants,
            List <ChrReference> fakeReferences = null,
            bool doCheckVariants            = true,
            bool doCheckReferences          = false,
            int expectedNumCoveredPositions = 0,
            bool threadByChr = false,
            int doCountsOnly = 0,
            bool doLog       = false,
            bool callMnvs    = true,
            ApplicationOptions applicationOptions = null,
            bool collapse = true)
        {
            if (doCheckReferences)
            {
                vcfFilePath = Path.ChangeExtension(vcfFilePath, "genome.vcf");
            }

            if (applicationOptions == null)
            {
                applicationOptions = new ApplicationOptions
                {
                    BAMPaths               = new[] { bamFilePath },
                    IntervalPaths          = string.IsNullOrEmpty(intervalPath) ? null : new[] { intervalPath },
                    GenomePaths            = new[] { GenomeDirectory },
                    OutputgVCFFiles        = doCheckReferences,
                    OutputBiasFiles        = true,
                    DebugMode              = doLog,
                    MinimumBaseCallQuality = 20,
                    CallMNVs               = callMnvs,
                    MaxGapBetweenMNV       = 10,
                    MaxSizeMNV             = 15,
                    Collapse               = collapse
                };
            }

            Logger.TryOpenLog(applicationOptions.LogFolder, applicationOptions.LogFileName);

            var factory = GetFactory(applicationOptions);

            IGenome genome;

            if (fakeReferences == null)
            {
                genome = factory.GetReferenceGenome(GenomeDirectory);
            }
            else
            {
                genome = new MockGenome(fakeReferences, GenomeDirectory);
            }

            if (threadByChr)
            {
                var processor = new GenomeProcessor(factory, genome, false);

                processor.Execute(1);
            }
            else
            {
                var processor = new GenomeProcessor(factory, genome);

                processor.Execute(1);
            }

            Logger.TryCloseLog();

            var alleles = VcfReader.GetAllVariantsInFile(vcfFilePath);

            var variantCalls = alleles.Where(a => a.VariantAlleles[0] != ".").ToList();

            if (doCheckVariants)
            {
                if (doCountsOnly > 0)
                {
                    Assert.Equal(variantCalls.Count(), doCountsOnly);
                }
                else
                {
                    CheckVariants(variantCalls, expectedVariants);
                }
            }

            if (doCheckReferences)
            {
                var referenceAlleles = alleles.Where(a => a.VariantAlleles[0] == ".").ToList();

                // make sure no reference calls at variant positions
                Assert.Equal(referenceAlleles.Count(),
                             alleles.Count(a => !variantCalls.Select(v => v.ReferencePosition).Contains(a.ReferencePosition)));
            }
        }
예제 #12
0
        /// <summary>
        /// MNV within overlap region's edge
        /// </summary>
        //[Fact]
        //[Trait("Category", "ReadStitching")]
        //public void MNV_PartialOverlap()
        //{
        //    var outputFileName = "Mnv-PartialOverlap_S1.genome.vcf";

        //    var test = new AmpliconMnvTest()
        //    {
        //        StitchPairedReads = true,
        //        RequireXCTagToStitch = true,
        //        ReadLength = 30,
        //        ReferenceSequenceRelative = "GTTGGTCTTC" + "TATTTTATGCGAATTCTTCT" + "AAGATTCCCA",
        //        VariantPositionRelative = 9,
        //        ChangedSequence = "AAG",
        //        VariantDepth = 25,
        //        ReferenceDepth = 25
        //    };

        //    var expectedResults = new AmpliconTestResult()
        //    {
        //        VariantFrequency = 0.5f,
        //        TotalDepth = 50,
        //        VariantDepth = 25,
        //        ReferenceDepth = 25
        //    };

        //    ExecuteTest(test, outputFileName, expectedResults);
        //    test.RequireXCTagToStitch = false;
        //    ExecuteTest(test, outputFileName, expectedResults);

        //    // Note: don't test without stitch reads
        //    // this test would fail because the two reads would detect different variants
        //    // test.StitchPairedReads = false;
        //    // ExecuteTest(test, outputFileName, expectedResults);

        //    test.VariantDepth = 10;
        //    test.ReferenceDepth = 90;
        //    test.RequireXCTagToStitch = true;

        //    expectedResults = new AmpliconTestResult()
        //    {
        //        VariantFrequency = 0.1f,
        //        TotalDepth = 100,
        //        VariantDepth = 10,
        //        ReferenceDepth = 90
        //    };

        //    ExecuteTest(test, outputFileName, expectedResults);
        //    test.RequireXCTagToStitch = false;
        //    ExecuteTest(test, outputFileName, expectedResults);
        //}

        #region Helpers
        private void ExecuteTest(AmpliconTest test, string outputFileName, AmpliconTestResult expectedResult)
        {
            var appOptions = new ApplicationOptions()
            {
                BAMPaths             = new[] { string.Empty },
                GenomePaths          = new[] { _genomeChr19 },
                OutputFolder         = UnitTestPaths.TestDataDirectory,
                OutputgVCFFiles      = true,
                StitchReads          = test.StitchPairedReads,
                RequireXCTagToStitch = test.RequireXCTagToStitch,
                CallMNVs             = true,
                MaxSizeMNV           = 3,
                MaxGapBetweenMNV     = 1
            };

            var vcfOutputPath = Path.Combine(appOptions.OutputFolder, outputFileName);

            File.Delete(vcfOutputPath);

            test.ChrOffset = CHR_OFFSET;

            // test execution
            var factory = new AmpliconTestFactory(test.ReferenceSequenceAbsolute, test.StitchPairedReads);

            factory.ChrOffset = CHR_OFFSET;

            if (test is AmpliconInsertionTest)
            {
                factory.StageInsertion(
                    test.ReadLength,
                    ((AmpliconInsertionTest)test).InsertionSequence,
                    test.VariantPositionAbsolute,
                    test.VariantDepth,
                    test.ReferenceDepth);
            }
            else if (test is AmpliconDeletionTest)
            {
                factory.StageDeletion(
                    test.ReadLength,
                    ((AmpliconDeletionTest)test).NumberDeletedBases,
                    test.VariantPositionAbsolute,
                    test.VariantDepth,
                    test.ReferenceDepth);
            }
            else if (test is AmpliconMnvTest)
            {
                factory.StageMnv(
                    test.ReadLength,
                    ((AmpliconMnvTest)test).ChangedSequence,
                    test.VariantPositionAbsolute,
                    test.VariantDepth,
                    test.ReferenceDepth);
            }

            CallVariantsWithMockData(vcfOutputPath, appOptions, factory);

            var results = GetResults(VcfReader.GetAllVariantsInFile(vcfOutputPath));

            Assert.True(results.Count == 1);

            var result = results[0];

            if (test is AmpliconInsertionTest)
            {
                Assert.True(result.VariantAllele.Substring(1) == ((AmpliconInsertionTest)test).InsertionSequence);
            }
            else if (test is AmpliconDeletionTest)
            {
                Assert.True(result.ReferenceAllele.Length == ((AmpliconDeletionTest)test).NumberDeletedBases + 1);
            }
            else if (test is AmpliconMnvTest)
            {
                Assert.True(result.VariantAllele == ((AmpliconMnvTest)test).ChangedSequence);
            }

            Assert.True(result.Filters == "PASS");
            Assert.True(result.Filters.Split(',').Count() == 1);
            Assert.True(result.Position == test.VariantPositionAbsolute);

            VerifyEqual(result.VariantFrequency, expectedResult.VariantFrequency, 0.1f);
            VerifyEqual(result.TotalDepth, expectedResult.TotalDepth, 1f);
            VerifyEqual(result.VariantDepth, expectedResult.VariantDepth, 1f);
            VerifyEqual(result.ReferenceDepth, expectedResult.ReferenceDepth, 1f);
        }
예제 #13
0
        public void TestForStrandBiasOnStitchingScenarios(StitchingScenario scenario, string resultFile)
        {
            //limit the scope of concern for now.
            if (scenario.ShouldRefStitch != true)
            {
                return;
            }


            //limit the scope of concern for now.
            if (scenario.ShouldStitch != true)
            {
                return;
            }

            var resultsSummary = Path.Combine(Options.OutputFolder, StrandBiasSummaryFileName);

            using (StreamWriter sw = new StreamWriter(resultsSummary, true))
            {
                var sb = new StringBuilder(
                    string.Join(",", DateTime.Today.ToShortDateString(),
                                DateTime.Now.ToLongTimeString(),
                                scenario.Category, scenario.Id));

                try
                {
                    if (!Directory.Exists(Options.OutputFolder))
                    {
                        Directory.CreateDirectory(Options.OutputFolder);
                    }


                    var factory = new AmpliconTestFactory(new string('A', 100), sourceIsStitched: true);

                    byte qualityForAll      = 30;
                    int  numVariantCounts   = 2; // 10;
                    int  numReferenceCounts = 2; // 90;
                    var  varRead            = BuildRead(scenario.OutputRead1, qualityForAll, StageMNVdata(scenario));
                    var  refRead            = BuildRead(scenario.OutputRefRead1, qualityForAll, NoMNVdata(scenario));

                    if (refRead == null)
                    {
                        return;
                    }

                    factory.StageStitchedVariant(
                        varRead, numVariantCounts,
                        refRead, numReferenceCounts);

                    var outputFileName = string.Format("{0}_{1}.vcf", scenario.Category, scenario.Id);
                    var vcfOutputPath  = Path.Combine(Options.OutputFolder, outputFileName);
                    var biasOutputPath = StrandBiasFileWriter.GetBiasFilePath(vcfOutputPath);

                    File.Delete(vcfOutputPath);
                    File.Delete(biasOutputPath);

                    StitchedReadBiasHelper.CallStrandedVariantsWithMockData(vcfOutputPath, Options, factory);
                    var varResults  = StitchedReadBiasHelper.GetResults(VcfReader.GetAllVariantsInFile(vcfOutputPath));
                    var biasResults = StitchedReadBiasHelper.GetStrandResultsFromFile(biasOutputPath);

                    var observedFrequency = (varResults.Count == 0) ? "0": "";
                    var observedSB        = (biasResults.Count == 0) ? "FN": "";

                    for (int i = 0; i < varResults.Count; i++)
                    {
                        var varResult = varResults[i];
                        if (i != 0)
                        {
                            observedFrequency += ";";
                        }
                        observedFrequency += varResult.VariantFrequency;
                    }

                    for (int i = 0; i < biasResults.Count; i++)
                    {
                        var biasResult = biasResults[i];
                        if (i != 0)
                        {
                            observedSB += ";";
                        }
                        observedSB += biasResult.HasStrandBias;

                        //there should be no SB on our current set of stitched scenarios.
                        Assert.True(!biasResult.HasStrandBias);
                    }

                    var expectedValues = new List <string>()
                    {
                        "1", scenario.Frequency, scenario.ShouldBias
                    };

                    var observedValues = new List <string>()
                    {
                        varResults.Count.ToString(), observedFrequency, observedSB
                    };

                    sb.Append(GetResultString(expectedValues, observedValues));

                    sw.WriteLine(sb.ToString());
                }
                catch (Exception ex)
                {
                    sb.Append(",Fail:  " + ex);
                    sw.WriteLine(sb.ToString());
                }
            }
        }
예제 #14
0
        public void UnpackAlleles()
        {
            //two example vcf files that have been "crushed".
            var crushedVcf1 = Path.Combine(TestPaths.LocalTestDataDirectory, "VcfFileWriterTests_Crushed_Padded_expected.vcf");
            var crushedVcf2 = Path.Combine(TestPaths.LocalTestDataDirectory, "crushed.genome.vcf");

            var vcfVariants1 = VcfReader.GetAllVariantsInFile(crushedVcf1);
            var vcfVariants2 = VcfReader.GetAllVariantsInFile(crushedVcf2);

            Assert.Equal(7, vcfVariants1.Count);
            Assert.Equal(90, vcfVariants2.Count);

            // 1/2 variants
            var hetAlt1     = vcfVariants1[5];
            var hetAlt2     = vcfVariants2[3];
            var hetAlt1next = vcfVariants1[6];
            var hetAlt2next = vcfVariants2[4];

            Assert.Equal(1, hetAlt1.Genotypes.Count);
            Assert.Equal(1, hetAlt2.Genotypes.Count);
            Assert.Equal(2, hetAlt1.VariantAlleles.Count());
            Assert.Equal(2, hetAlt2.VariantAlleles.Count());
            Assert.Equal("2387,2000", hetAlt1.Genotypes[0]["AD"]);
            Assert.Equal("0.8133", hetAlt1.Genotypes[0]["VF"]);
            Assert.Equal("254,254", hetAlt2.Genotypes[0]["AD"]);
            Assert.Equal("AA", hetAlt1.ReferenceAllele);
            Assert.Equal("GA", hetAlt1.VariantAlleles[0]);
            Assert.Equal("G", hetAlt1.VariantAlleles[1]);
            Assert.Equal(".", hetAlt1next.VariantAlleles[0]);
            Assert.Equal("0", hetAlt1next.Genotypes[0]["AD"]);
            Assert.Equal("532", hetAlt2next.Genotypes[0]["AD"]);
            Assert.Equal(10, hetAlt1.ReferencePosition);
            Assert.Equal(223906731, hetAlt2.ReferencePosition);
            Assert.Equal(10 + 1, hetAlt1next.ReferencePosition);
            Assert.Equal(223906731 + 1, hetAlt2next.ReferencePosition);

            var unpackedVariants1 = VcfVariantUtilities.UnpackVariants(vcfVariants1);
            var unpackedVariants2 = VcfVariantUtilities.UnpackVariants(vcfVariants2);

            Assert.Equal(8, unpackedVariants1.Count);
            Assert.Equal(91, unpackedVariants2.Count);

            hetAlt1     = unpackedVariants1[5];
            hetAlt2     = unpackedVariants2[3];
            hetAlt1next = unpackedVariants1[6];
            hetAlt2next = unpackedVariants2[4];

            //example one:
            //total depth = 5394, total variant count = 2387 + 2000 = 4387
            //so, ref counts ~1007.

            //example two:
            //total depth = 532, total variant count = 254 + 254 = 508
            //so, ref counts ~24.

            Assert.Equal(1, hetAlt1.Genotypes.Count);
            Assert.Equal(1, hetAlt2.Genotypes.Count);
            Assert.Equal("1007,2387", hetAlt1.Genotypes[0]["AD"]);
            Assert.Equal("24,254", hetAlt2.Genotypes[0]["AD"]);
            Assert.Equal("0.4425", hetAlt1.Genotypes[0]["VF"]);
            Assert.Equal(1, hetAlt1.VariantAlleles.Count());
            Assert.Equal(1, hetAlt2.VariantAlleles.Count());
            Assert.Equal(1, hetAlt1next.VariantAlleles.Count());
            Assert.Equal(1, hetAlt2next.VariantAlleles.Count());
            Assert.Equal("1007,2000", hetAlt1next.Genotypes[0]["AD"]);
            Assert.Equal("24,254", hetAlt2next.Genotypes[0]["AD"]);
            Assert.Equal("AA", hetAlt1.ReferenceAllele);
            Assert.Equal("GA", hetAlt1.VariantAlleles[0]);
            Assert.Equal("G", hetAlt1next.VariantAlleles[0]);
            Assert.Equal("0.3708", hetAlt1next.Genotypes[0]["VF"]);
            Assert.Equal(10, hetAlt1.ReferencePosition);
            Assert.Equal(223906731, hetAlt2.ReferencePosition);
            Assert.Equal(10, hetAlt1next.ReferencePosition);
            Assert.Equal(223906731, hetAlt2next.ReferencePosition);
        }
예제 #15
0
        public void VennVcf_CombineTwoPoolVariants_ProbePoolBias_Tests()
        {
            //this is  from an issue anita had where a variant was in one pool at 1%, the other at 0%, and showed up as 6% in the combined pool.

            var outDir      = TestPaths.LocalScratchDirectory;
            var VcfPathRoot = _TestDataPath;

            string VcfPath_PoolA = Path.Combine(VcfPathRoot, "small_S14.genome.vcf");
            string VcfPath_PoolB = Path.Combine(VcfPathRoot, "small_S17.genome.vcf");



            VennVcfOptions parameters = new VennVcfOptions();

            parameters.VariantCallingParams.MinimumFrequencyFilter = 0.03f;
            parameters.VariantCallingParams.MinimumFrequency       = 0.01f;
            parameters.ConsensusFileName = Path.Combine(outDir, "Consensus.vcf");
            parameters.OutputDirectory   = outDir;
            if (File.Exists(parameters.ConsensusFileName))
            {
                File.Delete(parameters.ConsensusFileName);
            }


            VennProcessor Venn = new VennProcessor(new string[] { VcfPath_PoolA, VcfPath_PoolB }, parameters);

            Venn.DoPairwiseVenn(false);

            Assert.Equal(File.Exists(parameters.ConsensusFileName), true);

            List <VcfVariant> CombinedVariants = VcfReader.GetAllVariantsInFile(parameters.ConsensusFileName);
            List <VcfVariant> AandBVariants    = VcfReader.GetAllVariantsInFile(Path.Combine(outDir, "small_S14_and_S17.vcf"));
            List <VcfVariant> BandAVariants    = VcfReader.GetAllVariantsInFile(Path.Combine(outDir, "small_S17_and_S14.vcf"));
            List <VcfVariant> AnotBVariants    = VcfReader.GetAllVariantsInFile(Path.Combine(outDir, "small_S14_not_S17.vcf"));
            List <VcfVariant> BnotAVariants    = VcfReader.GetAllVariantsInFile(Path.Combine(outDir, "small_S17_not_S14.vcf"));

            //poolA
            //chr1	     115258743	.	A	.	100	PASS	DP=35354	GT:GQ:AD:VF:NL:SB	0/0:100:30256:0.1442:20:-100.0000
            //chr1       115258743           .               AC          TT           100         PASS      DP=35354            GT:GQ:AD:VF:NL:SB                0/1:100:30720,4634:0.1311:20:-100.0000
            //chr1       115258744           .               C             .               100         PASS      DP=35253            GT:GQ:AD:VF:NL:SB                0/0:100:30277:0.1412:20:-100.0000
            //chr1       115258745           .               C             .               100         PASS      DP=35160            GT:GQ:AD:VF:NL:SB                0/0:100:35130:0.0009:20:-100.0000


            //poolB
            //chr1       115258743           .               AC          TT           100         PASS      DP=49612            GT:GQ:AD:VF:NL:SB                0/1:100:44202,5410:0.1090:20:-100.0000
            //chr1       115258743           .               A             T              100         PASS      DP=49612            GT:GQ:AD:VF:NL:SB                0/1:100:43362,670:0.0135:20:-46.0807
            //chr1       115258744           .               C             T              24           PASS      DP=49902            GT:GQ:AD:VF:NL:SB                0/1:24:43905,560:0.0112:20:-8.3857


            //when we had bug:
            //chr1       115258743           .               AC          TT           100.00   PASS      DP=84966            GT:GQ:AD:VF:NL:                0/1:100:74922,10044:0.1182:20:-100:-100.0000:100
            //chr1       115258743           .               A             T              100.00   PB;LowVF            DP=49612            GT:GQ:AD:VF:NL:                ./.:100:43362,670:0.0135:20:-46.0807:0.0000:100
            //chr1       115258743           .               A             .               100.00   PASS      DP=35354            GT:GQ:AD:VF:NL:                0/0:100:30256:0.1442:20:-100.0000:-100.0000:100
            //chr1       115258744           .               C             T              100.00   PB           DP=85155            GT:GQ:AD:VF:NL:                0/1:100:74182,5536:0.0650:20:-
            //(issue#1) at 743 we had a A->. in only one pool. It should be marked as BIAS and not PASS.
            //(issue#2) at 744 we had a C->T at 6% when it should be at ~0%, and called as a ref.

            string     VFstring     = "";
            VcfVariant FunnyResult0 = CombinedVariants[3];

            VFstring = FunnyResult0.Genotypes[0]["VF"];
            Assert.Equal(VFstring, "0.144");
            Assert.Equal(FunnyResult0.Filters, "PB");
            Assert.Equal(FunnyResult0.ReferenceAllele, "A");
            Assert.Equal(FunnyResult0.VariantAlleles[0], ".");
            //this used to be a reference as a pass, even though it was only called in one pool.

            VcfVariant FunnyResult = CombinedVariants[6];

            Assert.Equal(FunnyResult.ReferencePosition, 115258744);

            VFstring = FunnyResult.Genotypes[0]["VF"];
            Assert.Equal(VFstring, "0.129");
            Assert.Equal(FunnyResult.Filters, "PASS");
            Assert.Equal(FunnyResult.ReferenceAllele, "C");
            Assert.Equal(FunnyResult.VariantAlleles[0], ".");
            //when we had the bug, this used to get called at 6%.


            //now, check the Venn functionality:
            Assert.Equal(2, AandBVariants.Count());
            Assert.Equal(2, BandAVariants.Count());
            Assert.Equal(2, AnotBVariants.Count());
            Assert.Equal(0, BnotAVariants.Count());

            Assert.Equal(115258743, AandBVariants[0].ReferencePosition);
            Assert.Equal("AC", AandBVariants[0].ReferenceAllele);
            Assert.Equal("TT", AandBVariants[0].VariantAlleles[0]);

            Assert.Equal(115258747, AandBVariants[1].ReferencePosition);
            Assert.Equal("C", AandBVariants[1].ReferenceAllele);
            Assert.Equal("T", AandBVariants[1].VariantAlleles[0]);

            Assert.Equal(115258743, BandAVariants[0].ReferencePosition);
            Assert.Equal("AC", BandAVariants[0].ReferenceAllele);
            Assert.Equal("TT", BandAVariants[0].VariantAlleles[0]);

            Assert.Equal(115258747, BandAVariants[1].ReferencePosition);
            Assert.Equal("C", BandAVariants[1].ReferenceAllele);
            Assert.Equal("T", BandAVariants[1].VariantAlleles[0]);

            Assert.Equal(115258743, AnotBVariants[0].ReferencePosition);
            Assert.Equal("A", AnotBVariants[0].ReferenceAllele);
            Assert.Equal("T", AnotBVariants[0].VariantAlleles[0]);

            Assert.Equal(115258744, AnotBVariants[1].ReferencePosition);
            Assert.Equal("C", AnotBVariants[1].ReferenceAllele);
            Assert.Equal("T", AnotBVariants[1].VariantAlleles[0]);
        }
예제 #16
0
        public void VennVcf_CombineTwoPoolVariants_RulesAthroughD_Tests()
        {
            var outDir      = TestPaths.LocalScratchDirectory;
            var VcfPathRoot = _TestDataPath;

            string OutputPath = Path.Combine(outDir, "outEandF.vcf");

            if (File.Exists(OutputPath))
            {
                File.Delete(OutputPath);
            }

            VennVcfOptions parameters = new VennVcfOptions();

            parameters.VariantCallingParams.MinimumFrequencyFilter = 0.03f;
            parameters.VariantCallingParams.MinimumFrequency       = 0.01f;
            parameters.ConsensusFileName = OutputPath;

            string VcfPath_PoolA = Path.Combine(VcfPathRoot, "09H-03403-MT1-1_S7.genome.vcf");
            List <CalledAllele> PoolAVariants = VcfVariantUtilities.Convert(VcfReader.GetAllVariantsInFile(VcfPath_PoolA)).ToList();

            string VcfPath_PoolB = Path.Combine(VcfPathRoot, "09H-03403-MT1-1_S8.genome.vcf");
            List <CalledAllele> PoolBVariants = VcfVariantUtilities.Convert(VcfReader.GetAllVariantsInFile(VcfPath_PoolB)).ToList();

            CalledAllele VariantA = PoolAVariants[0];
            CalledAllele VariantB = PoolBVariants[0];

            List <CalledAllele[]> pairs = VennProcessor.SelectPairs(
                new List <CalledAllele>()
            {
                VariantA
            },
                new List <CalledAllele>
            {
                VariantB
            });

            VariantComparisonCase ComparisonCase   = VennProcessor.GetComparisonCase(pairs[0][0], pairs[0][1]);
            ConsensusBuilder      consensusBuilder = new ConsensusBuilder("", parameters);
            CalledAllele          Consensus        = consensusBuilder.CombineVariants(
                VariantA, VariantB, ComparisonCase);

            //Rule "A" test
            //A	if combined VF<1% and less than 2.6% in each pool, call REF
            //(note, we were Alt in one pool and ref in another)

            Assert.Equal(VariantA.Genotype, Pisces.Domain.Types.Genotype.HomozygousRef);
            Assert.Equal(VariantA.Frequency, 0.9979, 4);
            Assert.Equal(VariantA.VariantQscore, 100);
            Assert.Equal(VariantA.Filters, new List <Pisces.Domain.Types.FilterType> {
            });

            Assert.Equal(VariantB.Genotype, Pisces.Domain.Types.Genotype.HeterozygousAltRef);
            Assert.Equal(VariantB.Frequency, 0.0173, 4);
            Assert.Equal(VariantB.VariantQscore, 100);
            Assert.Equal(VariantB.Filters, new List <Pisces.Domain.Types.FilterType> {
            });

            Assert.Equal(ComparisonCase, VariantComparisonCase.OneReferenceOneAlternate);
            Assert.Equal(Consensus.Genotype, Pisces.Domain.Types.Genotype.HomozygousRef);
            Assert.Equal(Consensus.Frequency, 0.9907, 4);
            Assert.Equal(Consensus.VariantQscore, 100);
            Assert.Equal(Consensus.Filters, new List <Pisces.Domain.Types.FilterType> {
            });                                                                            //<-low VF tag will NOT added by post-processing b/c is ref call

            //B	if combined VF<1% and more than 2.6% in one pool, call NO CALL

            VariantA = PoolAVariants[1];
            VariantB = PoolBVariants[1];

            ComparisonCase = VennProcessor.GetComparisonCase(VariantA, VariantB);
            Consensus      = consensusBuilder.CombineVariants(
                VariantA, VariantB, ComparisonCase);

            Assert.Equal(VariantA.Genotype, Pisces.Domain.Types.Genotype.HeterozygousAltRef);
            Assert.Equal(VariantA.Frequency, 0.0776, 4);
            Assert.Equal(VariantA.VariantQscore, 100);
            Assert.Equal(VariantA.Filters, new List <Pisces.Domain.Types.FilterType> {
            });

            Assert.Equal(VariantB.Genotype, Pisces.Domain.Types.Genotype.HomozygousRef);
            Assert.Equal(VariantB.Frequency, 0.9989, 4);
            Assert.Equal(VariantB.VariantQscore, 100);
            Assert.Equal(VariantB.Filters, new List <Pisces.Domain.Types.FilterType> {
            });

            Assert.Equal(ComparisonCase, VariantComparisonCase.OneReferenceOneAlternate);
            Assert.Equal(Consensus.Genotype, Pisces.Domain.Types.Genotype.AltLikeNoCall);
            Assert.Equal(Consensus.Frequency, 0.0070, 4);
            Assert.Equal(Consensus.VariantQscore, 0);
            Assert.Equal(Consensus.Filters, new List <Pisces.Domain.Types.FilterType>
            {
                Pisces.Domain.Types.FilterType.PoolBias
            });                                          //<-low VF tag will also get added by post-processing

            //Rule "Ca" test
            //C-a	if combined 1%<VF<2.6%
            // and more than 2.6% in one pool and less than 1% in the other, call NO CALL w/PB

            VariantA = PoolAVariants[2];
            VariantB = PoolBVariants[2];

            ComparisonCase = VennProcessor.GetComparisonCase(VariantA, VariantB);
            Consensus      = consensusBuilder.CombineVariants(
                VariantA, VariantB, ComparisonCase);

            Assert.Equal(VariantA.Genotype, Pisces.Domain.Types.Genotype.HeterozygousAltRef);
            Assert.Equal(VariantA.Frequency, 0.0367, 4);
            Assert.Equal(VariantA.VariantQscore, 100);
            Assert.Equal(VariantA.Filters, new List <Pisces.Domain.Types.FilterType> {
            });

            Assert.Equal(VariantB.Genotype, Pisces.Domain.Types.Genotype.HomozygousRef);
            Assert.Equal(VariantB.Frequency, 0.9976, 4);
            Assert.Equal(VariantB.VariantQscore, 100);
            Assert.Equal(VariantB.Filters, new List <Pisces.Domain.Types.FilterType> {
            });

            Assert.Equal(ComparisonCase, VariantComparisonCase.OneReferenceOneAlternate);
            Assert.Equal(Consensus.Genotype, Pisces.Domain.Types.Genotype.AltLikeNoCall);
            Assert.Equal(Consensus.Frequency, 0.0117, 4);
            Assert.Equal(Consensus.VariantQscore, 23);
            Assert.Equal(Consensus.Filters, new List <Pisces.Domain.Types.FilterType> {
                Pisces.Domain.Types.FilterType.PoolBias
            });
            //Rule "Cb" test
            //C-a	if combined 1%<VF<2.6%
            // and more than 2.6% in one pool and between 1% and 2.6% in the other, call NO CALL w/ no PB

            VariantA = PoolAVariants[3];
            VariantB = PoolBVariants[3];

            ComparisonCase = VennProcessor.GetComparisonCase(VariantA, VariantB);
            Consensus      = consensusBuilder.CombineVariants(
                VariantA, VariantB, ComparisonCase);

            Assert.Equal(VariantA.Genotype, Pisces.Domain.Types.Genotype.HeterozygousAltRef);
            Assert.Equal(VariantA.Frequency, 0.01725, 4);
            Assert.Equal(VariantA.VariantQscore, 100);
            Assert.Equal(VariantA.Filters, new List <Pisces.Domain.Types.FilterType> {
            });

            Assert.Equal(VariantB.Genotype, Pisces.Domain.Types.Genotype.HeterozygousAltRef);
            Assert.Equal(VariantB.Frequency, 0.03667, 4);
            Assert.Equal(VariantB.VariantQscore, 100);
            Assert.Equal(VariantB.Filters, new List <Pisces.Domain.Types.FilterType> {
            });

            Assert.Equal(ComparisonCase, VariantComparisonCase.AgreedOnAlternate);
            Assert.Equal(Consensus.Genotype, Pisces.Domain.Types.Genotype.AltLikeNoCall);
            Assert.Equal(Consensus.Frequency, 0.02347, 4);
            Assert.Equal(Consensus.VariantQscore, 100);
            Assert.Equal(Consensus.Filters, new List <Pisces.Domain.Types.FilterType> {
            });                                                                            //<-low VF tag will also get added by post-processing

            //Rule "D" test
            //D	if combined VF>=2.6% call VARIANT (PB if only present in one pool, using 1% as the cutoff)

            VariantA = PoolAVariants[4];
            VariantB = PoolBVariants[4];

            ComparisonCase = VennProcessor.GetComparisonCase(VariantA, VariantB);
            Consensus      = consensusBuilder.CombineVariants(
                VariantA, VariantB, ComparisonCase);

            Assert.Equal(VariantA.Genotype, Pisces.Domain.Types.Genotype.HeterozygousAltRef);
            Assert.Equal(VariantA.Frequency, 0.2509, 4);
            Assert.Equal(VariantA.VariantQscore, 100);
            Assert.Equal(VariantA.Filters, new List <Pisces.Domain.Types.FilterType> {
            });

            Assert.Equal(VariantB.Genotype, Pisces.Domain.Types.Genotype.HeterozygousAltRef);
            Assert.Equal(VariantB.Frequency, 0.0367, 4);
            Assert.Equal(VariantB.VariantQscore, 100);
            Assert.Equal(VariantB.Filters, new List <Pisces.Domain.Types.FilterType> {
            });

            Assert.Equal(ComparisonCase, VariantComparisonCase.AgreedOnAlternate);
            Assert.Equal(Consensus.Genotype, Pisces.Domain.Types.Genotype.HeterozygousAltRef);
            Assert.Equal(Consensus.Frequency, 0.1716, 4);
            Assert.Equal(Consensus.VariantQscore, 100);
            Assert.Equal(Consensus.Filters, new List <Pisces.Domain.Types.FilterType> {
            });                                                                            //<-low VF tag will also get set by post processor
        }
예제 #17
0
        public void StitchedCollapsedBamGroundTruth()
        {
            // SNP ground truth from TingTing
            var bamFilePath          = Path.Combine(TestPaths.LocalTestDataDirectory, "collapsed.test.stitched.bam");
            var functionalTestRunner = new SomaticVariantCallerFunctionalTestSetup();

            functionalTestRunner.GenomeDirectory = Path.Combine(TestPaths.SharedGenomesDirectory, "chr1");

            var appOptions = new PiscesApplicationOptions
            {
                BAMPaths             = new[] { bamFilePath },
                IntervalPaths        = null,
                GenomePaths          = new[] { Path.Combine(TestPaths.SharedGenomesDirectory, "chr1") },
                OutputBiasFiles      = true,
                DebugMode            = true,
                CallMNVs             = true,
                UseMNVReallocation   = false,
                MaxSizeMNV           = 100,
                MaxGapBetweenMNV     = 10,
                NoiseModelHalfWindow = 1,
                BamFilterParameters  = new Domain.Options.BamFilterParameters()
                {
                    MinimumBaseCallQuality = 20,
                    MinimumMapQuality      = 1,
                    OnlyUseProperPairs     = false,
                },
                VariantCallingParameters = new Domain.Options.VariantCallingParameters()
                {
                    MaximumVariantQScore                  = 100,
                    MinimumVariantQScoreFilter            = 30,
                    MinimumVariantQScore                  = 20,
                    MinimumCoverage                       = 10,
                    MinimumFrequency                      = 0.01f,
                    FilterOutVariantsPresentOnlyOneStrand = false,
                    ForcedNoiseLevel                      = -1,
                    NoiseModel      = NoiseModel.Flat,
                    StrandBiasModel = StrandBiasModel.Extended,
                },
                VcfWritingParameters = new Domain.Options.VcfWritingParameters()
                {
                    OutputGvcfFile = true,
                    ReportRcCounts = true,
                    ReportTsCounts = true
                }
            };

            // Time to build the fake sequences for testing.
            var mockChrRef = new List <ChrReference>()
            {
                new ChrReference()
                {
                    // position 9770498 ~ 9770669
                    Name     = "chr1",
                    Sequence = new string('N', 9770498 - 1) + "GAAGTAACAACGCAGGATGCCCCCTGGGGTGGACTGCCCCATGGAATTCTGGACCAAGGAGGAGAATCAGAGCGTTGTGGTTGACTTCCTGCTGCCCACAGGGGTCTACCTGAACTTCCCTGTGTCCCGCAATGCCAACCTCAGCACCATCAAGCAGGTATGGCCTCCATC"
                }
            };

            var expectedAlleles = new List <CalledAllele>
            {
                new CalledAllele(AlleleCategory.Snv)
                {
                    ReferencePosition = 9770596,
                    ReferenceAllele   = "C",
                    AlternateAllele   = "A",
                    Chromosome        = "chr1"
                }
            };

            functionalTestRunner.Execute(bamFilePath, Path.ChangeExtension(bamFilePath, "genome.vcf"), null, expectedAlleles, mockChrRef, applicationOptions: appOptions);
            var truthvcfFilePath         = Path.Combine(Path.GetDirectoryName(appOptions.BAMPaths[0]), "test_truth.stitched.genome.vcf");
            var stitchedCollapsedTruth   = VcfReader.GetAllVariantsInFile(truthvcfFilePath);
            var stitchedCollapsedResults = VcfReader.GetAllVariantsInFile(Path.ChangeExtension(bamFilePath, "genome.vcf"));

            foreach (var variantTruth in stitchedCollapsedTruth)
            {
                var variantActual = stitchedCollapsedResults.First(x => x.ReferencePosition == variantTruth.ReferencePosition);
                Assert.NotNull(variantActual);
                Assert.Equal(variantTruth.Genotypes[0]["US"], variantActual.Genotypes[0]["US"]);
            }
        }
예제 #18
0
        public void VennVcf_CombineTwoPoolVariants_RulesEandF_Tests()
        {
            //Rule "E" test    (ie an Alt+ref call converges to a REf, and we also had a ref call following it)
            //E	if we end up with multiple REF calls for the same loci, combine those .VCF lines into one ref call.

            //Rule "F" test    (ie various alt calls all ended up as no-call.  we dont want multiple no call lines in the vcf.)
            //F	if we end up with multiple NOCALL calls for the same loci, leave those .VCF lines separate

            var outDir      = TestPaths.LocalScratchDirectory;
            var VcfPathRoot = _TestDataPath;

            string VcfPath_PoolA = Path.Combine(VcfPathRoot, "RulesEandF_S1.genome.vcf");
            string VcfPath_PoolB = Path.Combine(VcfPathRoot, "RulesEandF_S2.genome.vcf");

            string OutputPath = Path.Combine(outDir, "outEandF.vcf");

            if (File.Exists(OutputPath))
            {
                File.Delete(OutputPath);
            }

            VennVcfOptions parameters = new VennVcfOptions();

            parameters.VariantCallingParams.MinimumFrequencyFilter = 0.03f;
            parameters.VariantCallingParams.MinimumFrequency       = 0.01f;
            parameters.ConsensusFileName = OutputPath;
            parameters.OutputDirectory   = outDir;
            VennProcessor VennVcf = new VennProcessor(
                new string[] { VcfPath_PoolA, VcfPath_PoolB }, parameters);

            VennVcf.DoPairwiseVenn(false);

            Assert.Equal(File.Exists(OutputPath), true);

            List <VcfVariant> PoolAVariants    = VcfReader.GetAllVariantsInFile(VcfPath_PoolA);
            List <VcfVariant> PoolBVariants    = VcfReader.GetAllVariantsInFile(VcfPath_PoolB);
            List <VcfVariant> CombinedVariants = VcfReader.GetAllVariantsInFile(OutputPath);

            //Rule "E" test    (ie an Alt+ref call converges to a REf, and we also had a ref call following it)
            //E	if we end up with multiple REF calls for the same loci, combine those .VCF lines into one ref call.

            VcfVariant VariantA_1 = PoolAVariants[0];

            Assert.Equal(VariantA_1.Genotypes[0]["GT"], "0/0");
            Assert.Equal(VariantA_1.Genotypes[0]["VF"], "0.0021");
            Assert.Equal(VariantA_1.Quality, 100);
            Assert.Equal(VariantA_1.Filters, "PASS");
            Assert.Equal(VariantA_1.ReferencePosition, 25378561);

            VcfVariant VariantA_2 = PoolAVariants[1];

            Assert.Equal(VariantA_2.ReferencePosition, 25378562);

            VcfVariant VariantB_1 = PoolBVariants[0];

            Assert.Equal(VariantB_1.Genotypes[0]["GT"], "0/1");
            Assert.Equal(VariantB_1.Genotypes[0]["VF"], "0.0173");
            Assert.Equal(VariantB_1.Quality, 100);
            Assert.Equal(VariantB_1.Filters, "PASS");
            Assert.Equal(VariantB_1.ReferencePosition, 25378561);

            VcfVariant VariantB_2 = PoolBVariants[1];

            Assert.Equal(VariantB_2.Genotypes[0]["GT"], "0/0");
            Assert.Equal(VariantB_2.Genotypes[0]["VF"], "0.0021");
            Assert.Equal(VariantB_2.Quality, 100);
            Assert.Equal(VariantB_2.Filters, "PASS");
            Assert.Equal(VariantB_2.ReferencePosition, 25378561);

            VcfVariant Consensus_1 = CombinedVariants[0];

            Assert.Equal(Consensus_1.Genotypes[0]["GT"], "0/0");
            Assert.Equal(Consensus_1.Genotypes[0]["VF"], "0.009"); //slightly improved from .008
            Assert.Equal(Consensus_1.Quality, 100);
            Assert.Equal(Consensus_1.Filters, "PASS");             //<-low VF tag will NOT added by post-processing b/c is ref call
            Assert.Equal(Consensus_1.ReferencePosition, 25378561);

            VcfVariant Consensus_2 = CombinedVariants[1];

            Assert.Equal(Consensus_2.ReferencePosition, 25378562);

            //Rule "F" test    (ie various alt calls all ended up as no-call.
            //F	if we end up with multiple NOCALL calls for the same loci, leave those .VCF lines separate


            VariantA_1 = PoolAVariants[1];
            Assert.Equal(VariantA_1.Genotypes[0]["GT"], "0/1");
            Assert.Equal(VariantA_1.Genotypes[0]["VF"], "0.0725");
            Assert.Equal(VariantA_1.Quality, 100);
            Assert.Equal(VariantA_1.Filters, "PASS");
            Assert.Equal(VariantA_1.ReferencePosition, 25378562);

            VariantA_2 = PoolAVariants[2];
            Assert.Equal(VariantA_2.Genotypes[0]["GT"], "0/1");
            Assert.Equal(VariantA_2.Genotypes[0]["VF"], "0.0725");
            Assert.Equal(VariantA_2.Quality, 100);
            Assert.Equal(VariantA_2.Filters, "PASS");
            Assert.Equal(VariantA_2.ReferencePosition, 25378562);

            VcfVariant VariantA_3 = PoolAVariants[3];

            Assert.Equal(VariantA_3.Genotypes[0]["GT"], "0/1");
            Assert.Equal(VariantA_3.Genotypes[0]["VF"], "0.0725");
            Assert.Equal(VariantA_3.Quality, 100);
            Assert.Equal(VariantA_3.Filters, "PASS");
            Assert.Equal(VariantA_3.ReferencePosition, 25378562);

            VariantB_1 = PoolBVariants[2];
            Assert.Equal(VariantB_1.Genotypes[0]["GT"], "0/0");
            Assert.Equal(VariantB_1.Genotypes[0]["VF"], "0.0024");
            Assert.Equal(VariantB_1.Quality, 100);
            Assert.Equal(VariantB_1.Filters, "PASS");
            Assert.Equal(VariantB_1.ReferencePosition, 25378562);

            VariantB_2 = PoolBVariants[3];
            Assert.Equal(VariantB_2.ReferencePosition, 25378563);

            Consensus_1 = CombinedVariants[1];
            Assert.Equal(Consensus_1.ReferencePosition, 25378562);
            Assert.Equal(Consensus_1.Genotypes[0]["GT"], "./.");
            Assert.Equal(Consensus_1.Genotypes[0]["VF"], "0.007");
            Assert.Equal(Consensus_1.Quality, 0);
            Assert.Equal(Consensus_1.Filters, "PB"); //<-low VF tag will also get added by post-processing
            Assert.Equal(Consensus_1.ReferenceAllele, "C");
            Assert.Equal(Consensus_1.VariantAlleles[0], "T");

            Consensus_2 = CombinedVariants[2];
            Assert.Equal(Consensus_2.ReferencePosition, 25378562);
            Assert.Equal(Consensus_2.Genotypes[0]["GT"], "./.");
            Assert.Equal(Consensus_2.Genotypes[0]["VF"], "0.007");
            Assert.Equal(Consensus_2.Quality, 0);
            Assert.Equal(Consensus_2.Filters, "PB"); //<-low VF tag will also get added by post-processing
            Assert.Equal(Consensus_2.ReferenceAllele, "C");
            Assert.Equal(Consensus_2.VariantAlleles[0], "TT");

            VcfVariant Consensus_3 = CombinedVariants[3];

            Assert.Equal(Consensus_3.ReferencePosition, 25378562);
            Assert.Equal(Consensus_3.Genotypes[0]["GT"], "./.");
            Assert.Equal(Consensus_3.Genotypes[0]["VF"], "0.007");
            Assert.Equal(Consensus_3.Quality, 0);
            Assert.Equal(Consensus_3.Filters, "PB"); //<-low VF tag will also get added by post-processing
            Assert.Equal(Consensus_3.ReferenceAllele, "CC");
            Assert.Equal(Consensus_3.VariantAlleles[0], "T");

            VcfVariant Consensus_4 = CombinedVariants[4];

            Assert.Equal(Consensus_4.ReferencePosition, 25378563);

            if (File.Exists(OutputPath))
            {
                File.Delete(OutputPath);
            }
        }