Esempio n. 1
0
        //Test we get the same results when using muliple samples and intervals, in the same order.
        //Fist test running two samples together, then test running two samples individualy, then test it with threadByChrOn/
        //Nothing strange should happen..
        public void IntervalTestingWithMultipleSamples()                                                             //based on a real bug when a gvcf was found was out of order, that only happened for multiple-bam runs with different interval files.
        {
            var bamFile1Path     = Path.Combine(TestPaths.LocalTestDataDirectory, "Chr17Chr19.bam");                 //has data from chr17,7572952 and chr19,3118883
            var bamFile2Path     = Path.Combine(TestPaths.LocalTestDataDirectory, "Chr17again.bam");
            var interval1Path    = Path.Combine(TestPaths.LocalTestDataDirectory, "chr17int.picard");                //chr 17 only
            var interval2Path    = Path.Combine(TestPaths.LocalTestDataDirectory, "poorlyOrdered.picard");           //disordered, chr 19 first.
            var outDir           = Path.Combine(TestPaths.LocalTestDataDirectory, "IntervalTests");
            var vcfFile1Path     = Path.Combine(outDir, "Chr17Chr19.genome.vcf");                                    //only results from chr17
            var vcfFile2Path     = Path.Combine(outDir, "Chr17again.genome.vcf");                                    //show results from chr17 and 19
            var vcfExpectedFile1 = Path.Combine(TestPaths.LocalTestDataDirectory, "Chr17Chr19.expected.genome.vcf"); //only results from chr17
            var vcfExpectedFile2 = Path.Combine(TestPaths.LocalTestDataDirectory, "Chr17again.expected.genome.vcf"); //show results from chr17 and 19


            var genomeDirectory  = Path.Combine(TestPaths.SharedGenomesDirectory, "fourChrs");
            var twoSampleFactory = MakeFactory(new List <string> {
                bamFile1Path, bamFile2Path
            },
                                               new List <string> {
                interval1Path, interval2Path
            }, outDir);

            var firstSampleFactory = MakeFactory(new List <string> {
                bamFile1Path
            },
                                                 new List <string> {
                interval1Path
            }, outDir);

            var secondSampleFactory = MakeFactory(new List <string> {
                bamFile2Path
            },
                                                  new List <string> {
                interval2Path
            }, outDir);


            //regular two-sample run mode.

            var genome  = twoSampleFactory.GetReferenceGenome(genomeDirectory);
            var genome1 = firstSampleFactory.GetReferenceGenome(genomeDirectory);
            var genome2 = secondSampleFactory.GetReferenceGenome(genomeDirectory);

            var processor = new GenomeProcessor(twoSampleFactory, genome);

            var chrs = genome.ChromosomesToProcess;

            Assert.Equal("chr7", chrs[0]);
            Assert.Equal("chr8", chrs[1]);
            Assert.Equal("chr17", chrs[2]);
            Assert.Equal("chr19", chrs[3]);

            processor.InternalExecute(10);
            chrs = genome.ChromosomesToProcess;
            Assert.Equal("chr7", chrs[0]);
            Assert.Equal("chr8", chrs[1]);
            Assert.Equal("chr17", chrs[2]);
            Assert.Equal("chr19", chrs[3]);

            //jsut be aware, when we porcess the samples individually, we use different genome lists.
            Assert.Equal(4, genome.ChromosomesToProcess.Count);
            Assert.Equal(1, genome1.ChromosomesToProcess.Count);
            Assert.Equal(4, genome2.ChromosomesToProcess.Count);
            Assert.Equal("chr17", genome1.ChromosomesToProcess[0]);
            Assert.Equal("chr7", genome2.ChromosomesToProcess[0]);
            Assert.Equal("chr19", genome2.ChromosomesToProcess[3]);

            var reader1 = new VcfReader(vcfFile1Path);
            var reader2 = new VcfReader(vcfFile2Path);

            var contigs1Results = GetContigs(reader1);
            var contigs2Results = GetContigs(reader2);
            var vcf1Results     = reader1.GetVariants().ToList();
            var vcf2Results     = reader2.GetVariants().ToList();


            //the expected results:
            var readerExp1 = new VcfReader(vcfExpectedFile1);
            var readerExp2 = new VcfReader(vcfExpectedFile2);

            var contigs1Expected = GetContigs(readerExp1);
            var contigs2Expected = GetContigs(readerExp2);
            var vcf1Expected     = readerExp1.GetVariants().ToList();
            var vcf2Expected     = readerExp2.GetVariants().ToList();

            Assert.Equal(4, contigs1Results.Count);
            Assert.Equal(4, contigs2Results.Count);
            Assert.Equal(11, vcf1Results.Count);
            Assert.Equal(71, vcf2Results.Count);

            //check variants and contigs all come out the same
            CheckForOrdering(contigs1Results, contigs2Results, contigs1Expected, contigs2Expected, vcf1Expected, vcf2Expected);

            reader1.Dispose();
            reader2.Dispose();
            File.Delete(vcfFile1Path);
            File.Delete(vcfFile2Path);

            //now check again, processing them separately
            processor = new GenomeProcessor(firstSampleFactory, genome1);
            processor.InternalExecute(10);
            processor = new GenomeProcessor(secondSampleFactory, genome2);
            processor.InternalExecute(10);

            reader1 = new VcfReader(vcfFile1Path);
            reader2 = new VcfReader(vcfFile2Path);

            contigs1Results = GetContigs(reader1);
            contigs2Results = GetContigs(reader2);
            vcf1Results     = reader1.GetVariants().ToList();
            vcf2Results     = reader2.GetVariants().ToList();

            //check variants all come out the same (the contigs will be different as shown)
            CheckForOrdering(contigs1Results, contigs2Results,
                             new List <string>()
            {
                "chr17"
            }, contigs2Expected, vcf1Expected, vcf2Expected);

            reader1.Dispose();
            reader2.Dispose();
            File.Delete(vcfFile1Path);

            //now check again, processing them "thread by chr" way
            processor = new GenomeProcessor(twoSampleFactory, genome, false);
            processor.InternalExecute(10);

            reader1 = new VcfReader(vcfFile1Path);
            reader2 = new VcfReader(vcfFile2Path);

            contigs1Results = GetContigs(reader1);
            contigs2Results = GetContigs(reader2);
            vcf1Results     = reader1.GetVariants().ToList();
            vcf2Results     = reader2.GetVariants().ToList();

            //check variants all come out the same (the contigs will be back to normal)
            CheckForOrdering(contigs1Results, contigs2Results,
                             contigs2Expected, contigs2Expected, vcf1Expected, vcf2Expected);

            reader1.Dispose();
            reader2.Dispose();
            File.Delete(vcfFile1Path);
            File.Delete(vcfFile2Path);
        }
Esempio n. 2
0
 public void GetChromAndLengthInfo_ReturnEmptyArray_NoLengthInfo()
 {
     Assert.Empty(VcfReader.GetChromAndLengthInfo("##contig=<ID=chr1>"));
 }
Esempio n. 3
0
        public void IntervalTestingWithVcf()
        {
            var bamFile1Path     = Path.Combine(TestPaths.LocalTestDataDirectory, "Chr17Chr19.bam");          //has data from chr17,7572952 and chr19,3118883
            var interval1Path    = Path.Combine(TestPaths.LocalTestDataDirectory, "chr17int.picard");         //chr 17 only
            var outDir           = Path.Combine(TestPaths.LocalTestDataDirectory, "IntervalTests");
            var vcfFile1Path     = Path.Combine(outDir, "Chr17Chr19.vcf");                                    //only results from chr17
            var vcfExpectedFile1 = Path.Combine(TestPaths.LocalTestDataDirectory, "Chr17Chr19.expected.vcf"); //only results from chr17


            var genomeDirectory = Path.Combine(TestPaths.SharedGenomesDirectory, "fourChrs");

            var factory = MakeVcfFactory(new List <string> {
                bamFile1Path
            },
                                         new List <string> {
                interval1Path
            }, outDir);

            var genome1 = factory.GetReferenceGenome(genomeDirectory);

            var processor = new GenomeProcessor(factory, genome1);
            var chrs      = genome1.ChromosomesToProcess;

            Assert.Equal("chr17", chrs[0]);

            processor.InternalExecute(10);
            Assert.Equal(1, genome1.ChromosomesToProcess.Count);
            Assert.Equal("chr17", genome1.ChromosomesToProcess[0]);

            var reader1 = new VcfReader(vcfFile1Path);

            var filters1Results = GetFilters(reader1);
            var contigs1Results = GetContigs(reader1);
            var vcf1Results     = reader1.GetVariants().ToList();


            //the expected results:
            var readerExp1 = new VcfReader(vcfExpectedFile1);

            var filters1Expected = GetFilters(readerExp1);
            var contigs1Expected = GetContigs(readerExp1);
            var vcf1Expected     = readerExp1.GetVariants().ToList();

            Assert.Equal(4, filters1Results.Count);
            Assert.Equal(1, contigs1Results.Count);
            Assert.Equal(1, vcf1Results.Count);

            //check variants and contigs all come out the same
            for (int i = 0; i < contigs1Expected.Count; i++)
            {
                Assert.Equal(contigs1Expected[i], contigs1Results[i]);
            }

            for (int i = 0; i < filters1Expected.Count; i++)
            {
                Assert.Equal(filters1Expected[i].ToString(), filters1Results[i].ToString());
            }

            for (int i = 0; i < vcf1Expected.Count; i++)
            {
                Assert.Equal(vcf1Expected[i].ToString(), vcf1Results[i].ToString());
            }


            reader1.Dispose();
            File.Delete(vcfFile1Path);
        }
Esempio n. 4
0
 public void GetChromAndLengthInfo_ReturnEmptyArray_NoProperPrefix()
 {
     Assert.Empty(VcfReader.GetChromAndLengthInfo("##fileformat=VCFv"));
 }
Esempio n. 5
0
 public void GetChromAndLength_AsExpect(string line, string[] info)
 {
     Assert.Equal(info, VcfReader.GetChromAndLengthInfo(line));
 }
Esempio n. 6
0
        public void WriteANbhd()
        {
            var outputFilePath   = Path.Combine(UnitTestPaths.TestDataDirectory, "PhasedVcfFileNbhdWriterTest.vcf");
            var inputFilePath    = Path.Combine(UnitTestPaths.TestDataDirectory, "MergerInput.vcf");
            var expectedFilePath = Path.Combine(UnitTestPaths.TestDataDirectory, "MergerOutput.vcf");

            File.Delete(outputFilePath);

            var context = new VcfWriterInputContext
            {
                CommandLine   = new[] { "myCommandLine" },
                SampleName    = "mySample",
                ReferenceName = "myReference",
                ContigsByChr  = new List <Tuple <string, long> >
                {
                    new Tuple <string, long>("chr1", 10001),
                    new Tuple <string, long>("chrX", 500)
                }
            };

            var config = new VcfWriterConfig
            {
                DepthFilterThreshold                = 500,
                VariantQualityFilterThreshold       = 30,
                FrequencyFilterThreshold            = 0.007f,
                ShouldOutputNoCallFraction          = true,
                ShouldOutputStrandBiasAndNoiseLevel = true,
                EstimatedBaseCallQuality            = 23,
                PloidyModel = PloidyModel.Somatic,
                AllowMultipleVcfLinesPerLoci = true
            };
            var writer = new PhasedVcfWriter(outputFilePath, config, new VcfWriterInputContext(), new List <string>()
            {
            }, null);
            var reader = new VcfReader(inputFilePath, true);


            //set up the original variants
            var originalVcfVariant1 = PhasedVariantTestUtilities.CreateDummyAllele("chr2", 116380048, "A", "New", 1000, 156);
            var originalVcfVariant2 = PhasedVariantTestUtilities.CreateDummyAllele("chr2", 116380048, "AAA", "New", 1000, 156);
            var originalVcfVariant4 = PhasedVariantTestUtilities.CreateDummyAllele("chr7", 116380051, "A", "New", 1000, 156);
            var originalVcfVariant5 = PhasedVariantTestUtilities.CreateDummyAllele("chr7", 116380052, "AC", "New", 1000, 156);

            var vs1 = new VariantSite((originalVcfVariant1));
            var vs2 = new VariantSite((originalVcfVariant2));
            var vs4 = new VariantSite((originalVcfVariant4));
            var vs5 = new VariantSite((originalVcfVariant5));


            //have to replace variants at positon 116380048 (we call two new MNVS here)
            var nbhd1 = new VcfNeighborhood(new VariantCallingParameters(), "chr2", vs1, vs2, "");

            nbhd1.SetRangeOfInterest();

            //have to replace variants at positon 116380051 and 52  (we call one new MNV at 51)
            var nbhd2 = new VcfNeighborhood(new VariantCallingParameters(), "chr7", vs4, vs5, "");

            nbhd2.SetRangeOfInterest();


            VcfMerger           merger         = new VcfMerger(reader);
            List <CalledAllele> allelesPastNbh = new List <CalledAllele>();

            nbhd1.CalledVariants = new Dictionary <int, List <CalledAllele> > {
                { originalVcfVariant1.Coordinate, new List <CalledAllele> {
                      originalVcfVariant1, originalVcfVariant2
                  } }
            };
            nbhd2.CalledVariants = new Dictionary <int, List <CalledAllele> > {
                { originalVcfVariant4.Coordinate, new List <CalledAllele> {
                      originalVcfVariant4
                  } }
            };

            allelesPastNbh = merger.WriteVariantsUptoChr(writer, allelesPastNbh, nbhd1.ReferenceName);

            allelesPastNbh = merger.WriteVariantsUptoIncludingNbhd(nbhd1, writer, allelesPastNbh);

            allelesPastNbh = merger.WriteVariantsUptoChr(writer, allelesPastNbh, nbhd2.ReferenceName);

            allelesPastNbh = merger.WriteVariantsUptoIncludingNbhd(nbhd2, writer, allelesPastNbh);

            merger.WriteRemainingVariants(writer, allelesPastNbh);

            writer.Dispose();

            var expectedLines = File.ReadLines(expectedFilePath).ToList();
            var outputLines   = File.ReadLines(outputFilePath).ToList();

            Assert.Equal(expectedLines.Count(), outputLines.Count());

            for (int i = 0; i < expectedLines.Count; i++)
            {
                Assert.Equal(expectedLines[i], outputLines[i]);
            }
        }
Esempio n. 7
0
        public void UnpackAlleles()
        {
            //two example vcf files that have been "crushed".
            var crushedVcf1 = Path.Combine(UnitTestPaths.TestDataDirectory, "VcfFileWriterTests_Crushed_Padded_expected.vcf");
            var crushedVcf2 = Path.Combine(UnitTestPaths.TestDataDirectory, "crushed.genome.vcf");

            var vcfVariants1 = VcfReader.GetAllVariantsInFile(crushedVcf1);
            var vcfVariants2 = VcfReader.GetAllVariantsInFile(crushedVcf2);

            Assert.Equal(7, vcfVariants1.Count);
            Assert.Equal(90, vcfVariants2.Count);

            // 1/2 variants
            var hetAlt1     = vcfVariants1[5];
            var hetAlt2     = vcfVariants2[3];
            var hetAlt1next = vcfVariants1[6];
            var hetAlt2next = vcfVariants2[4];

            Assert.Equal(1, hetAlt1.Genotypes.Count);
            Assert.Equal(1, hetAlt2.Genotypes.Count);
            Assert.Equal(2, hetAlt1.VariantAlleles.Count());
            Assert.Equal(2, hetAlt2.VariantAlleles.Count());
            Assert.Equal("2387,2000", hetAlt1.Genotypes[0]["AD"]);
            Assert.Equal("0.8133", hetAlt1.Genotypes[0]["VF"]);
            Assert.Equal("254,254", hetAlt2.Genotypes[0]["AD"]);
            Assert.Equal("AA", hetAlt1.ReferenceAllele);
            Assert.Equal("GA", hetAlt1.VariantAlleles[0]);
            Assert.Equal("G", hetAlt1.VariantAlleles[1]);
            Assert.Equal(".", hetAlt1next.VariantAlleles[0]);
            Assert.Equal("0", hetAlt1next.Genotypes[0]["AD"]);
            Assert.Equal("532", hetAlt2next.Genotypes[0]["AD"]);
            Assert.Equal(10, hetAlt1.ReferencePosition);
            Assert.Equal(223906731, hetAlt2.ReferencePosition);
            Assert.Equal(10 + 1, hetAlt1next.ReferencePosition);
            Assert.Equal(223906731 + 1, hetAlt2next.ReferencePosition);

            var unpackedVariants1 = Extensions.UnpackVariants(vcfVariants1);
            var unpackedVariants2 = Extensions.UnpackVariants(vcfVariants2);

            Assert.Equal(8, unpackedVariants1.Count);
            Assert.Equal(91, unpackedVariants2.Count);

            hetAlt1     = unpackedVariants1[5];
            hetAlt2     = unpackedVariants2[3];
            hetAlt1next = unpackedVariants1[6];
            hetAlt2next = unpackedVariants2[4];

            //example one:
            //total depth = 5394, total variant count = 2387 + 2000 = 4387
            //so, ref counts ~1007.

            //example two:
            //total depth = 532, total variant count = 254 + 254 = 508
            //so, ref counts ~24.

            Assert.Equal(1, hetAlt1.Genotypes.Count);
            Assert.Equal(1, hetAlt2.Genotypes.Count);
            Assert.Equal("1007,2387", hetAlt1.Genotypes[0]["AD"]);
            Assert.Equal("24,254", hetAlt2.Genotypes[0]["AD"]);
            Assert.Equal("0.4425", hetAlt1.Genotypes[0]["VF"]);
            Assert.Equal(1, hetAlt1.VariantAlleles.Count());
            Assert.Equal(1, hetAlt2.VariantAlleles.Count());
            Assert.Equal(1, hetAlt1next.VariantAlleles.Count());
            Assert.Equal(1, hetAlt2next.VariantAlleles.Count());
            Assert.Equal("1007,2000", hetAlt1next.Genotypes[0]["AD"]);
            Assert.Equal("24,254", hetAlt2next.Genotypes[0]["AD"]);
            Assert.Equal("AA", hetAlt1.ReferenceAllele);
            Assert.Equal("GA", hetAlt1.VariantAlleles[0]);
            Assert.Equal("G", hetAlt1next.VariantAlleles[0]);
            Assert.Equal("0.3708", hetAlt1next.Genotypes[0]["VF"]);
            Assert.Equal(10, hetAlt1.ReferencePosition);
            Assert.Equal(223906731, hetAlt2.ReferencePosition);
            Assert.Equal(10, hetAlt1next.ReferencePosition);
            Assert.Equal(223906731, hetAlt2next.ReferencePosition);
        }
        private void Write_InFlow(bool threadByChr)
        {
            var bamFilePath = Path.Combine(TestPaths.LocalTestDataDirectory, "SBWriter_Sample_S1.bam");

            var vcfFilePath  = Path.Combine(TestPaths.LocalTestDataDirectory, "SBWriter_Sample_S1.genome.vcf");
            var biasFilePath = Path.Combine(TestPaths.LocalTestDataDirectory, "SBWriter_Sample_S1.genome.ReadStrandBias.txt");

            if (threadByChr)
            {
                biasFilePath = biasFilePath + "_chr19";              //Currently when threading by chrom we are outputting one bias file per chromsome. This is not a customer-facing deliverable and is a low-priority feature.
            }
            var expectedBiasResultsPath = Path.Combine(TestPaths.LocalTestDataDirectory, "Expected_Sample_S1.ReadStrandBias.txt");

            var genomeDirectory = Path.Combine(TestPaths.SharedGenomesDirectory, "chr19");

            var applicationOptions = new PiscesApplicationOptions
            {
                BAMPaths             = new[] { bamFilePath },
                IntervalPaths        = null,
                GenomePaths          = new[] { genomeDirectory },
                OutputBiasFiles      = true,
                DebugMode            = true,
                VcfWritingParameters = new Domain.Options.VcfWritingParameters()
                {
                    OutputGvcfFile = true
                }
            };

            // Using GenomeProcessor
            //If OutputBiasFiles is true, should output one bias file per vcf
            var factory = new MockFactoryWithDefaults(applicationOptions);
            var genome  = factory.GetReferenceGenome(genomeDirectory);

            CreateAndExecuteProcessor(threadByChr, factory, genome);

            Assert.True(File.Exists(biasFilePath));

            //All variants that are present in VCF where ref!=alt should be included
            var biasFileContents = File.ReadAllLines(biasFilePath);
            var alleles          = VcfReader.GetAllVariantsInFile(vcfFilePath);
            var variantCalls     = alleles.Where(a => a.VariantAlleles[0] != ".").ToList();

            foreach (var variantCall in variantCalls)
            {
                Console.WriteLine(variantCall);
                Assert.True(biasFileContents.Count(l => l.Split('\t')[0] == variantCall.ReferenceName &&
                                                   l.Split('\t')[1] == variantCall.ReferencePosition.ToString() &&
                                                   l.Split('\t')[2] == variantCall.ReferenceAllele &&
                                                   l.Split('\t')[3] == variantCall.VariantAlleles.First()) == 1);
            }
            foreach (var refCall in alleles.Where(a => a.VariantAlleles[0] == ".").ToList())
            {
                Assert.False(biasFileContents.Count(l => l.Split('\t')[0] == refCall.ReferenceName &&
                                                    l.Split('\t')[1] == refCall.ReferencePosition.ToString() &&
                                                    l.Split('\t')[2] == refCall.ReferenceAllele &&
                                                    l.Split('\t')[3] == refCall.VariantAlleles.First()) == 1);
            }

            //Bias files should have expected contents
            var expectedBiasFileContents = File.ReadAllLines(expectedBiasResultsPath);

            Assert.Equal(expectedBiasFileContents, biasFileContents);

            //If OutputBiasFiles is false, should not output any bias files
            File.Delete(biasFilePath);

            applicationOptions.OutputBiasFiles = false;
            factory = new MockFactoryWithDefaults(applicationOptions);
            genome  = factory.GetReferenceGenome(genomeDirectory);
            CreateAndExecuteProcessor(threadByChr, factory, genome);
            Assert.False(File.Exists(biasFilePath));
        }