示例#1
0
        public void Initialize()
        {
            VcfWriterConfig config = new VcfWriterConfig
            {
                DepthFilterThreshold                = 500,
                VariantQualityFilterThreshold       = 20,
                StrandBiasFilterThreshold           = 0.5f,
                FrequencyFilterThreshold            = 0.007f,
                MinFrequencyThreshold               = 0.007f,
                ShouldOutputNoCallFraction          = true,
                ShouldOutputStrandBiasAndNoiseLevel = true,
                ShouldFilterOnlyOneStrandCoverage   = true,
                EstimatedBaseCallQuality            = _estimatedBaseCallQuality,
                //AllowMultipleVcfLinesPerLoci = true
            };

            _formatter = new VcfFormatter(config);

            _v1 = TestHelper.CreatePassingVariant(false);
            _v2 = TestHelper.CreatePassingVariant(false);
            _v3 = TestHelper.CreatePassingVariant(false);
        }
示例#2
0
        public void FilterHeader()
        {
            var outputFilePath = Path.Combine(UnitTestPaths.TestDataDirectory, "VcfFileWriterTests_SDS-18.vcf");

            File.Delete(outputFilePath);

            var context = new VcfWriterInputContext
            {
                CommandLine   = "myCommandLine",
                SampleName    = "mySample",
                ReferenceName = "myReference",
                ContigsByChr  = new List <Tuple <string, long> >
                {
                    new Tuple <string, long>("chr1", 10001),
                    new Tuple <string, long>("chrX", 500)
                }
            };

            // Variant strand bias too high or coverage on only one strand
            var config = new VcfWriterConfig
            {
                DepthFilterThreshold                = 500,
                QscoreFilterThreshold               = 20,
                StrandBiasFilterThreshold           = 0.5f,
                FrequencyFilterThreshold            = 0.007f,
                ShouldOutputNoCallFraction          = true,
                ShouldOutputStrandBiasAndNoiseLevel = true,
                ShouldFilterOnlyOneStrandCoverage   = true,
                EstimatedBaseCallQuality            = 23
            };

            var writer = new VcfFileWriter(outputFilePath, config, context);

            writer.WriteHeader();
            writer.Write(_defaultCandidates);
            writer.Dispose();

            VcfHeaderFormatTester(config, outputFilePath);
        }
示例#3
0
        public void WriteANbhd()
        {
            var outputFilePath   = Path.Combine(TestPaths.LocalTestDataDirectory, "PhasedVcfFileNbhdWriterTest.vcf");
            var inputFilePath    = Path.Combine(TestPaths.LocalTestDataDirectory, "MergerInput.vcf");
            var expectedFilePath = Path.Combine(TestPaths.LocalTestDataDirectory, "MergerOutput.vcf");

            File.Delete(outputFilePath);

            var context = new VcfWriterInputContext
            {
                QuotedCommandLineString = "myCommandLine",
                SampleName    = "mySample",
                ReferenceName = "myReference",
                ContigsByChr  = new List <Tuple <string, long> >
                {
                    new Tuple <string, long>("chr1", 10001),
                    new Tuple <string, long>("chrX", 500)
                }
            };

            var config = new VcfWriterConfig
            {
                DepthFilterThreshold                = 500,
                VariantQualityFilterThreshold       = 30,
                FrequencyFilterThreshold            = 0.007f,
                ShouldOutputNoCallFraction          = true,
                ShouldOutputStrandBiasAndNoiseLevel = true,
                EstimatedBaseCallQuality            = 23,
                PloidyModel = PloidyModel.Somatic,
                AllowMultipleVcfLinesPerLoci = true
            };
            var writer = new PhasedVcfWriter(outputFilePath, config, new VcfWriterInputContext(), new List <string>()
            {
            }, null);
            var reader = new VcfReader(inputFilePath, true);


            //set up the original variants
            var originalVcfVariant1 = TestHelper.CreateDummyAllele("chr2", 116380048, "A", "New", 1000, 156);
            var originalVcfVariant2 = TestHelper.CreateDummyAllele("chr2", 116380048, "AAA", "New", 1000, 156);
            var originalVcfVariant4 = TestHelper.CreateDummyAllele("chr7", 116380051, "A", "New", 1000, 156);
            var originalVcfVariant5 = TestHelper.CreateDummyAllele("chr7", 116380052, "AC", "New", 1000, 156);

            var vs1 = new VariantSite((originalVcfVariant1));
            var vs2 = new VariantSite((originalVcfVariant2));
            var vs4 = new VariantSite((originalVcfVariant4));
            var vs5 = new VariantSite((originalVcfVariant5));


            //have to replace variants at positon 116380048 (we call two new MNVS here)
            var nbhd1 = new VcfNeighborhood(new VariantCallingParameters(), 0, "chr2", vs1, vs2, "");

            nbhd1.SetRangeOfInterest();

            //have to replace variants at positon 116380051 and 52  (we call one new MNV at 51)
            var nbhd2 = new VcfNeighborhood(new VariantCallingParameters(), 0, "chr7", vs4, vs5, "");

            nbhd2.SetRangeOfInterest();


            VcfMerger           merger         = new VcfMerger(reader);
            List <CalledAllele> allelesPastNbh = new List <CalledAllele>();

            nbhd1.CalledVariants = new Dictionary <int, List <CalledAllele> > {
                { originalVcfVariant1.ReferencePosition, new List <CalledAllele> {
                      originalVcfVariant1, originalVcfVariant2
                  } }
            };
            nbhd2.CalledVariants = new Dictionary <int, List <CalledAllele> > {
                { originalVcfVariant4.ReferencePosition, new List <CalledAllele> {
                      originalVcfVariant4
                  } }
            };


            allelesPastNbh = merger.WriteVariantsUptoChr(writer, allelesPastNbh, nbhd1.ReferenceName);

            allelesPastNbh = merger.WriteVariantsUptoIncludingNbhd(nbhd1, writer, allelesPastNbh);

            allelesPastNbh = merger.WriteVariantsUptoChr(writer, allelesPastNbh, nbhd2.ReferenceName);

            allelesPastNbh = merger.WriteVariantsUptoIncludingNbhd(nbhd2, writer, allelesPastNbh);

            merger.WriteRemainingVariants(writer, allelesPastNbh);

            writer.Dispose();

            var expectedLines = File.ReadLines(expectedFilePath).ToList();
            var outputLines   = File.ReadLines(outputFilePath).ToList();

            Assert.Equal(expectedLines.Count(), outputLines.Count());

            for (int i = 0; i < expectedLines.Count; i++)
            {
                Assert.Equal(expectedLines[i], outputLines[i]);
            }
        }
示例#4
0
        private void VcfHeaderFormatTester(VcfWriterConfig config, string outputFile)
        {
            // Time to read the header
            var  testFile = File.ReadAllLines(outputFile);
            bool formatLowDP = false, formatQ = false, formatSB = false;

            foreach (var x in testFile.Where(x => Regex.IsMatch(x, "##FILTER=")))
            {
                switch (x.Split(',')[0])
                {
                case "##FILTER=<ID=LowDP":
                    Assert.True(Regex.IsMatch(x, "^##FILTER=<ID=LowDP,Description=\"Low coverage \\(DP tag\\), therefore no genotype called\">$"));
                    formatLowDP = true;
                    break;

                case "##FILTER=<ID=SB":
                    if (config.StrandBiasFilterThreshold.HasValue && config.ShouldFilterOnlyOneStrandCoverage)
                    {
                        Assert.True(Regex.IsMatch(x, "^##FILTER=<ID=SB,Description=\"(Variant strand bias too high or coverage on only one strand)\">$"));
                    }
                    else if (config.StrandBiasFilterThreshold.HasValue)
                    {
                        Assert.True(Regex.IsMatch(x, "^##FILTER=<ID=SB,Description=\"(Variant strand bias too high)\">$"));
                    }
                    else if (config.ShouldFilterOnlyOneStrandCoverage)
                    {
                        Assert.True(Regex.IsMatch(x, "^##FILTER=<ID=SB,Description=\"(Variant support on only one strand)\">$"));
                    }
                    else
                    {
                        Assert.True(false, "StrandBias filter header does not match any expected filter.");
                    }

                    formatSB = true;
                    break;

                default:
                    if (Regex.IsMatch(x, string.Format("##FILTER=<ID=q{0}", config.QscoreFilterThreshold)))
                    {
                        Assert.True(Regex.IsMatch(x, string.Format("^##FILTER=<ID=q{0},Description=\"Quality below {0}\">$", config.QscoreFilterThreshold)));
                        formatQ = true;
                    }
                    else
                    {
                        Assert.True(false, "A filter is listed which does not match any of the specified filters.");
                    }

                    break;
                }
            }

            if (config.QscoreFilterThreshold > 0)
            {
                Assert.True(formatQ);
            }

            if (config.DepthFilterThreshold > 0)
            {
                Assert.True(formatLowDP);
            }

            if (config.ShouldOutputStrandBiasAndNoiseLevel)
            {
                Assert.True(formatSB);
            }
        }
示例#5
0
        public void DataFormatCheck()
        {
            var outputFilePath = Path.Combine(UnitTestPaths.TestDataDirectory, "VcfFileWriterTests_SDS-23.vcf");

            File.Delete(outputFilePath);

            var context = new VcfWriterInputContext
            {
                CommandLine   = "myCommandLine",
                SampleName    = "mySample",
                ReferenceName = "myReference",
                ContigsByChr  = new List <Tuple <string, long> >
                {
                    new Tuple <string, long>("chr1", 10001),
                    new Tuple <string, long>("chrX", 500)
                }
            };

            var config = new VcfWriterConfig
            {
                DepthFilterThreshold                = 500,
                QscoreFilterThreshold               = 20,
                StrandBiasFilterThreshold           = 0.5f,
                FrequencyFilterThreshold            = 0.007f,
                ShouldOutputNoCallFraction          = true,
                ShouldOutputStrandBiasAndNoiseLevel = true,
                ShouldFilterOnlyOneStrandCoverage   = true,
                EstimatedBaseCallQuality            = 23
            };

            var writer = new VcfFileWriter(outputFilePath, config, context);

            writer.WriteHeader();
            writer.Write(_defaultCandidates);
            writer.Dispose();

            var  testFile = File.ReadAllLines(outputFilePath);
            var  formatList = string.Empty;
            bool caseNL = false, caseSB = false, caseNC = false;

            foreach (var x in testFile)
            {
                if (Regex.IsMatch(x, "^##FORMAT"))
                {
                    var formatField = x.Split(',')[0].Substring(13);
                    switch (formatField)
                    {
                    case "NL":
                        if (config.ShouldOutputStrandBiasAndNoiseLevel)
                        {
                            caseNL = true;
                        }
                        break;

                    case "SB":
                        if (config.ShouldOutputStrandBiasAndNoiseLevel)
                        {
                            caseSB = true;
                        }
                        break;

                    case "NC":
                        if (config.ShouldOutputNoCallFraction)
                        {
                            caseNC = true;
                        }
                        break;
                    }

                    if (formatList == string.Empty)
                    {
                        formatList = x.Split(',')[0].Substring(13);
                    }
                    else
                    {
                        formatList += ":" + x.Split(',')[0].Substring(13);
                    }
                }

                if (Regex.IsMatch(x, "^chr\\d+\t"))
                {
                    var y = x.Split('\t');
                    Assert.True(Regex.IsMatch(y[8], formatList));
                }
            }

            if ((!config.ShouldOutputStrandBiasAndNoiseLevel && caseNL) ||
                (config.ShouldOutputStrandBiasAndNoiseLevel && !caseNL))
            {
                Assert.True(false, "Incorrect setting for ShouldOutputStrandBiasAndNoiseLevel and NL format");
            }

            if ((!config.ShouldOutputStrandBiasAndNoiseLevel && caseSB) ||
                (config.ShouldOutputStrandBiasAndNoiseLevel && !caseSB))
            {
                Assert.True(false, "Incorrect setting for ShouldOutputStrandBiasAndNoiseLevel and SB format");
            }

            if ((!config.ShouldOutputNoCallFraction && caseNC) || (config.ShouldOutputNoCallFraction && !caseNC))
            {
                Assert.True(false, "Incorrect setting for NoCall and NC format");
            }
        }
示例#6
0
        public void InfoFormatHeader()
        {
            var outputFilePath = Path.Combine(UnitTestPaths.TestDataDirectory, "VcfFileWriterTests_SDS-17.vcf");

            File.Delete(outputFilePath);

            var context = new VcfWriterInputContext
            {
                CommandLine   = "myCommandLine",
                SampleName    = "mySample",
                ReferenceName = "myReference",
                ContigsByChr  = new List <Tuple <string, long> >
                {
                    new Tuple <string, long>("chr1", 10001),
                    new Tuple <string, long>("chrX", 500)
                }
            };
            var config = new VcfWriterConfig
            {
                DepthFilterThreshold                = 500,
                QscoreFilterThreshold               = 20,
                StrandBiasFilterThreshold           = 0.5f,
                FrequencyFilterThreshold            = 0.007f,
                ShouldOutputNoCallFraction          = true,
                ShouldOutputStrandBiasAndNoiseLevel = true,
                ShouldFilterOnlyOneStrandCoverage   = true,
                EstimatedBaseCallQuality            = 23
            };

            var writer = new VcfFileWriter(outputFilePath, config, context);

            writer.WriteHeader();
            writer.Write(_defaultCandidates);
            writer.Dispose();

            // Time to read the header
            var  testFile = File.ReadAllLines(outputFilePath);
            bool formatNL = false, formatSB = false, formatNC = false;

            foreach (var x in testFile)
            {
                if (Regex.IsMatch(x, "##INFO="))
                {
                    switch (x.Split(',')[0])
                    {
                    case "##INFO=<ID=DP":
                        Assert.True(Regex.IsMatch(x, "^##INFO=<ID=DP,Number=1,Type=Integer,Description=\"Total Depth\">$"));
                        break;

                    case "##INFO=<ID=TI":
                        Assert.True(Regex.IsMatch(x, "^##INFO=<ID=TI,Number=\\.,Type=String,Description=\"Transcript ID\">$"));
                        break;

                    case "##INFO=<ID=GI":
                        Assert.True(Regex.IsMatch(x, "^##INFO=<ID=GI,Number=\\.,Type=String,Description=\"Gene ID\">$"));
                        break;

                    case "##INFO=<ID=EXON":
                        Assert.True(Regex.IsMatch(x, "^##INFO=<ID=EXON,Number=0,Type=Flag,Description=\"Exon Region\">$"));
                        break;

                    case "##INFO=<ID=FC":
                        Assert.True(Regex.IsMatch(x, "^##INFO=<ID=FC,Number=\\.,Type=String,Description=\"Functional Consequence\">$"));
                        break;

                    default:
                        Assert.True(false, "An info is listed which does not match any from the req.`");
                        break;
                    }
                }
                else if (Regex.IsMatch(x, "##FORMAT="))
                {
                    switch (x.Split(',')[0])
                    {
                    case "##FORMAT=<ID=GT":
                        Assert.True(Regex.IsMatch(x, "^##FORMAT=<ID=GT,Number=1,Type=String,Description=\"Genotype\">$"));
                        break;

                    case "##FORMAT=<ID=GQ":
                        Assert.True(Regex.IsMatch(x, "^##FORMAT=<ID=GQ,Number=1,Type=Integer,Description=\"Genotype Quality\">$"));
                        break;

                    case "##FORMAT=<ID=AD":
                        Assert.True(Regex.IsMatch(x, "^##FORMAT=<ID=AD,Number=\\.,Type=Integer,Description=\"Allele Depth\">$"));
                        break;

                    case "##FORMAT=<ID=VF":
                        Assert.True(Regex.IsMatch(x, "^##FORMAT=<ID=VF,Number=1,Type=Float,Description=\"Variant Frequency\">$"));
                        break;

                    case "##FORMAT=<ID=NL":
                        Assert.True(Regex.IsMatch(x, "^##FORMAT=<ID=NL,Number=1,Type=Integer,Description=\"Applied BaseCall Noise Level\">$"));
                        formatNL = true;
                        break;

                    case "##FORMAT=<ID=SB":
                        Assert.True(Regex.IsMatch(x, "^##FORMAT=<ID=SB,Number=1,Type=Float,Description=\"StrandBias Score\">$"));
                        formatSB = true;
                        break;

                    case "##FORMAT=<ID=NC":
                        Assert.True(Regex.IsMatch(x, "^##FORMAT=<ID=NC,Number=1,Type=Float,Description=\"Fraction of bases which were uncalled or with basecall quality below the minimum threshold\">$"));
                        formatNC = true;
                        break;

                    default:
                        Assert.True(false, "A format is listed which does not match any of those listed for the req.");
                        break;
                    }
                }
            }

            if (config.ShouldOutputStrandBiasAndNoiseLevel)
            {
                Assert.True(formatNL);
            }

            if (config.ShouldOutputStrandBiasAndNoiseLevel)
            {
                Assert.True(formatSB);
            }

            if (config.ShouldOutputNoCallFraction)
            {
                Assert.True(formatNC);
            }
        }
        public void FilterHeader()
        {
            var outputFilePath = Path.Combine(TestPaths.LocalTestDataDirectory, "PhasedVcfFileWriterTests.vcf");

            File.Delete(outputFilePath);

            var context = new VcfWriterInputContext
            {
                QuotedCommandLineString = "myCommandLine",
                SampleName    = "mySample",
                ReferenceName = "myReference",
                ContigsByChr  = new List <Tuple <string, long> >
                {
                    new Tuple <string, long>("chr1", 10001),
                    new Tuple <string, long>("chrX", 500)
                }
            };

            // Variant strand bias too high or coverage on only one strand
            var config = new VcfWriterConfig
            {
                DepthFilterThreshold                = 500,
                VariantQualityFilterThreshold       = 30,
                FrequencyFilterThreshold            = 0.007f,
                ShouldOutputNoCallFraction          = true,
                ShouldOutputStrandBiasAndNoiseLevel = true,
                EstimatedBaseCallQuality            = 23,
                PloidyModel = PloidyModel.Diploid,
            };

            //note, scylla has no SB or RMxN or R8 filters.


            var variants = new List <CalledAllele>
            {
                TestHelper.CreateDummyAllele("chrX", 123, "A", "C", 1000, 156),
                TestHelper.CreateDummyAllele("chr10", 124, "A", "C", 1000, 156),
            };

            var originalHeader = new List <string>
            {
                "##fileformat=VCFv4.1",
                "##fileDate=20160620",
                "##source=Pisces 1.0.0.0",
                "##Pisces_cmdline=\"-B KRAS_42_S1.bam -g -MinimumFrequency 0.01 -MinBaseCallQuality 21 -MaxVariantQScore 100 -MinCoverage 300 -MaxAcceptableStrandBiasFilter 0.5 -MinVariantQScore 20 -VariantQualityFilter 20 -gVCF true -CallMNVs True -out \\myout",
                "##reference=WholeGenomeFASTA",
                "##INFO=<ID=DP,Number=1,Type=Integer,Description=\"Total Depth\">",
                "##FILTER=<ID=q20,Description=\"Quality score less than 20\">",
                "##FILTER=<ID=SB,Description=\"Variant strand bias too high\">",
                "##FILTER=<ID=R5x9,Description=\"Repeats of part or all of the variant allele (max repeat length 5) in the reference greater than or equal to 9\">",
                "##FORMAT=<ID=GT,Number=1,Type=String,Description=\"Genotype\">",
                "##FORMAT=<ID=GQ,Number=1,Type=Integer,Description=\"Genotype Quality\">",
                "#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	HD700n560_miseq1_S7.bam"
            };


            var writer = new PhasedVcfWriter(outputFilePath, config, new VcfWriterInputContext(), originalHeader, null);

            writer.WriteHeader();
            writer.Write(variants);
            writer.Dispose();

            VcfReader     reader        = new VcfReader(outputFilePath);
            List <string> writtenHeader = reader.HeaderLines;

            reader.Dispose();

            var expectedHeader1 = new List <string>
            {
                "##fileformat=VCFv4.1",
                "##fileDate=20160620",
                "##source=Pisces 1.0.0.0",
                "##Pisces_cmdline=\"-B KRAS_42_S1.bam -g -MinimumFrequency 0.01 -MinBaseCallQuality 21 -MaxVariantQScore 100 -MinCoverage 300 -MaxAcceptableStrandBiasFilter 0.5 -MinVariantQScore 20 -VariantQualityFilter 20 -gVCF true -CallMNVs True -out \\myout",
                "##VariantPhaser=Scylla 1.0.0.0",
                "##reference=WholeGenomeFASTA",
                "##INFO=<ID=DP,Number=1,Type=Integer,Description=\"Total Depth\">",
                "##FILTER=<ID=q20,Description=\"Quality score less than 20\">",
                "##FILTER=<ID=SB,Description=\"Variant strand bias too high\">",
                "##FILTER=<ID=R5x9,Description=\"Repeats of part or all of the variant allele (max repeat length 5) in the reference greater than or equal to 9\">",
                "##FILTER=<ID=q30,Description=\"Quality score less than 30, by Scylla\">",
                "##FILTER=<ID=LowDP,Description=\"Low coverage (DP tag), therefore no genotype called, by Scylla\">",
                "##FILTER=<ID=LowVariantFreq,Description=\"Variant frequency less than 0.0070, by Scylla\">",
                "##FILTER=<ID=MultiAllelicSite,Description=\"Variant does not conform to diploid model, by Scylla\">",
                "##FORMAT=<ID=GT,Number=1,Type=String,Description=\"Genotype\">",
                "##FORMAT=<ID=GQ,Number=1,Type=Integer,Description=\"Genotype Quality\">",
                "#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	HD700n560_miseq1_S7.bam"
            };


            Assert.Equal(expectedHeader1.Count, writtenHeader.Count);
            for (int i = 0; i < expectedHeader1.Count; i++)
            {
                //let version numbers differ
                if (expectedHeader1[i].StartsWith("##VariantPhaser=Scylla"))
                {
                    Assert.True(writtenHeader[i].StartsWith("##VariantPhaser=Scylla"));
                    continue;
                }
                Assert.Equal(expectedHeader1[i], writtenHeader[i]);
            }

            config = new VcfWriterConfig
            {
                DepthFilterThreshold          = 500,
                VariantQualityFilterThreshold = 22,
                FrequencyFilterThreshold      = 0.007f,
                EstimatedBaseCallQuality      = 23,
                PloidyModel = PloidyModel.Somatic,
            };


            originalHeader = new List <string>
            {
                "##fileformat=VCFv4.1",
                "##fileDate=20160620",
                "##source=Pisces 1.0.0.0",
                "##Pisces_cmdline=\"-B KRAS_42_S1.bam -g -MinimumFrequency 0.01 -MinBaseCallQuality 21 -MaxVariantQScore 100 -MinCoverage 300 -MaxAcceptableStrandBiasFilter 0.5 -MinVariantQScore 20 -VariantQualityFilter 20 -gVCF true -CallMNVs True -out \\myout",
                "##reference=WholeGenomeFASTA",
                "##INFO=<ID=DP,Number=1,Type=Integer,Description=\"Total Depth\">",
                "##FORMAT=<ID=GT,Number=1,Type=String,Description=\"Genotype\">",
                "##FORMAT=<ID=GQ,Number=1,Type=Integer,Description=\"Genotype Quality\">",
                "#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	HD700n560_miseq1_S7.bam"
            };
            writer = new PhasedVcfWriter(outputFilePath, config, new VcfWriterInputContext(), originalHeader, null);


            var expectedHeader2 = new List <string>
            {
                "##fileformat=VCFv4.1",
                "##fileDate=20160620",
                "##source=Pisces 1.0.0.0",
                "##Pisces_cmdline=\"-B KRAS_42_S1.bam -g -MinimumFrequency 0.01 -MinBaseCallQuality 21 -MaxVariantQScore 100 -MinCoverage 300 -MaxAcceptableStrandBiasFilter 0.5 -MinVariantQScore 20 -VariantQualityFilter 20 -gVCF true -CallMNVs True -out \\myout",
                "##VariantPhaser=Scylla 1.0.0.0",
                "##reference=WholeGenomeFASTA",
                "##INFO=<ID=DP,Number=1,Type=Integer,Description=\"Total Depth\">",
                "##FORMAT=<ID=GT,Number=1,Type=String,Description=\"Genotype\">",
                "##FORMAT=<ID=GQ,Number=1,Type=Integer,Description=\"Genotype Quality\">",
                "##FILTER=<ID=q22,Description=\"Quality score less than 22, by Scylla\">",
                "##FILTER=<ID=LowDP,Description=\"Low coverage (DP tag), therefore no genotype called, by Scylla\">",
                "##FILTER=<ID=LowVariantFreq,Description=\"Variant frequency less than 0.0070, by Scylla\">",
                "#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	HD700n560_miseq1_S7.bam"
            };

            writer.WriteHeader();
            writer.Write(variants);
            writer.Dispose();

            reader        = new VcfReader(outputFilePath);
            writtenHeader = reader.HeaderLines;
            reader.Dispose();

            Assert.Equal(expectedHeader2.Count, writtenHeader.Count);
            for (int i = 0; i < expectedHeader2.Count; i++)
            {
                //let version numbers differ
                if (expectedHeader1[i].StartsWith("##VariantPhaser=Scylla"))
                {
                    Assert.True(writtenHeader[i].StartsWith("##VariantPhaser=Scylla"));
                    continue;
                }
                Assert.Equal(expectedHeader2[i], writtenHeader[i]);
            }
        }
示例#8
0
        public static void DoFiltering(PsaraOptions settings)
        {
            var geometricFilter = new GeometricFilter(settings.GeometricFilterParameters);
            //maybe expand to add other filters..

            var vcfIn   = settings.InputVcf;
            var vcfName = Path.GetFileName(vcfIn);

            var outputFile = Path.Combine(settings.OutputDirectory, vcfName.Replace(".vcf", ".filtered.vcf"));

            outputFile = outputFile.Replace(".genome.filtered.vcf", ".filtered.genome.vcf");

            Logger.WriteToLog("filtering " + vcfIn + "...");

            if (File.Exists(outputFile))
            {
                File.Delete(outputFile);
            }

            List <string>   header  = VcfReader.GetAllHeaderLines(vcfIn);
            string          cmdLine = "##Psara_cmdline=" + settings.QuotedCommandLineArgumentsString;
            VcfWriterConfig config  = GetWriterConfigToMatchInputVcf(vcfIn);

            using (PsaraVcfWriter writer = new PsaraVcfWriter(outputFile, config, new VcfWriterInputContext(), header, cmdLine))
            {
                writer.WriteHeader();

                using (VcfReader reader = new VcfReader(vcfIn, false))
                {
                    var backLogVcfVariant = new VcfVariant();
                    var coLocatedAlleles  = new List <CalledAllele>();
                    var moreVariantsInVcf = reader.GetNextVariant(backLogVcfVariant);
                    var incomingBatch     = new List <CalledAllele>();


                    while (moreVariantsInVcf)
                    {
                        if (incomingBatch.Count == 0)
                        {
                            incomingBatch = moreVariantsInVcf ? VcfVariantUtilities.Convert(new List <VcfVariant> {
                                backLogVcfVariant
                            },
                                                                                            config.ShouldOutputRcCounts, config.ShouldOutputTsCounts, false).ToList() : null;
                            moreVariantsInVcf = reader.GetNextVariant(backLogVcfVariant);
                        }
                        if ((coLocatedAlleles.Count == 0) || AreColocated(coLocatedAlleles, incomingBatch))
                        {
                            coLocatedAlleles.AddRange(incomingBatch);
                            incomingBatch.Clear();

                            //colocated alleles are left behind
                        }
                        else
                        {
                            FilterAndStreamOut(coLocatedAlleles, writer, geometricFilter);
                            coLocatedAlleles.Clear();

                            //incomingBatch alleles are left behind
                        }
                    }

                    //if you get here, there is no more unprocessed vcf variants but there could be
                    //colocated or an incoming batch of alleles left over. We need to write them to file before exiting.

                    FilterAndStreamOut(coLocatedAlleles, writer, geometricFilter);

                    FilterAndStreamOut(incomingBatch, writer, geometricFilter);
                }
            }
        }
        public void WriteADiploidNbhd()
        {
            var outputDir        = Path.Combine(TestPaths.LocalScratchDirectory, "MergerWriteADiploidNbhd");
            var outputFilePath   = Path.Combine(outputDir, "TinyDiploid.Phased.vcf");
            var inputFilePath    = Path.Combine(TestPaths.LocalTestDataDirectory, "TinyDiploid.vcf");
            var expectedFilePath = Path.Combine(TestPaths.LocalTestDataDirectory, "TinyDiploidOutput.vcf");

            TestHelper.RecreateDirectory(outputDir);

            var context = new VcfWriterInputContext
            {
                QuotedCommandLineString = "myCommandLine",
                SampleName    = "mySample",
                ReferenceName = "myReference",
                ContigsByChr  = new List <Tuple <string, long> >
                {
                    new Tuple <string, long>("chr1", 10001),
                    new Tuple <string, long>("chr22", 51304566),
                    new Tuple <string, long>("chrX", 500)
                }
            };

            var config = new VcfWriterConfig
            {
                DepthFilterThreshold                = 500,
                VariantQualityFilterThreshold       = 30,
                FrequencyFilterThreshold            = 0.007f,
                ShouldOutputNoCallFraction          = true,
                ShouldOutputStrandBiasAndNoiseLevel = true,
                EstimatedBaseCallQuality            = 23,
                PloidyModel = PloidyModel.DiploidByThresholding,
                AllowMultipleVcfLinesPerLoci = false
            };
            var writer = new PhasedVcfWriter(outputFilePath, config, new VcfWriterInputContext(), new List <string>()
            {
            }, null);
            var reader = new AlleleReader(inputFilePath, true);


            //set up the original variants
            var originalVcfVariant1 = TestHelper.CreateDummyAllele("chr1", 1, "A", "G", 1000, 156);
            var originalVcfVariant2 = TestHelper.CreateDummyAllele("chr1", 1, "A", "T", 1000, 156);
            var originalVcfVariant4 = TestHelper.CreateDummyAllele("chr22", 1230237, "GTC", "G", 1000, 156);
            var originalVcfVariant5 = TestHelper.CreateDummyAllele("chr22", 1230237, "GTC", "GTCT", 1000, 156);

            var vs1 = new VariantSite((originalVcfVariant1));
            var vs2 = new VariantSite((originalVcfVariant2));
            var vs4 = new VariantSite((originalVcfVariant4));
            var vs5 = new VariantSite((originalVcfVariant5));


            //have to replace variants at positon 116380048 (we call two new MNVS here)
            var nbhd1      = new VcfNeighborhood(0, "chr1", vs1, vs2);
            var calledNbh1 = new CallableNeighborhood(nbhd1, new VariantCallingParameters());

            VcfMerger merger = new VcfMerger(reader);
            List <Tuple <CalledAllele, string> > alleleTuplesPastNbhd = new List <Tuple <CalledAllele, string> >();

            //we will just say, we called the variants that were in the origina vcf. Ie, we agree with it.
            calledNbh1.CalledVariants = new Dictionary <int, List <CalledAllele> > {
                { originalVcfVariant1.ReferencePosition, new List <CalledAllele> {
                      originalVcfVariant1, originalVcfVariant2
                  } }
            };

            //Realizes the first nbhd starts at chr1 . We have to do something with the first lines of the vcf (chr1	1	.	A	G,T)
            //so, alleleTuplesPastNbhd = chr1	1	.	A	G,T
            alleleTuplesPastNbhd = merger.WriteVariantsUptoChr(writer, alleleTuplesPastNbhd, nbhd1.ReferenceName);
            Assert.True(alleleTuplesPastNbhd[0].Item1.IsSameAllele(originalVcfVariant1));
            Assert.True(alleleTuplesPastNbhd[1].Item1.IsSameAllele(originalVcfVariant2));

            //This method writes everything up to the end of nbhd 1,
            //so "(chr1	1	.	A	G,T)" from the vcf and the variants scylla detected "(chr1	1	.	A	G,T)" need to be dealt with.
            //Since these 4 variants are actually the same two, we need to remove the vcf ones and only write the scylla ones.
            //Thn we peek into the vcf and see the next line is "chr22	1230237	.	GTC	G,GTCT", clearly outside nbh1.
            //so we write out everything we need for nbhd1, and save the peeked line
            alleleTuplesPastNbhd = merger.WriteVariantsUptoIncludingNbhd(writer, alleleTuplesPastNbhd, calledNbh1);
            Assert.True(alleleTuplesPastNbhd[0].Item1.IsSameAllele(originalVcfVariant4));
            Assert.True(alleleTuplesPastNbhd[1].Item1.IsSameAllele(originalVcfVariant5));

            //now write out
            //chr22   1230237.GTC G,GTCT  50  DP = 1370 GT: GQ: AD: DP: VF: NL: SB: NC: US  1 / 2:100:185,68:364:0.258:20:-100.0000:0.0000:0,0,0,0,0,0,1,1,0,0,0,2
            //chrX    79.CG  GTG,AA  50  DP = 1370 GT: GQ: AD: DP: VF: NL: SB: NC: US  1 / 2:100:185,68:364:0.258:20:-100.0000:0.0000:0,0,0,0,0,0,1,1,0,0,0,2
            merger.WriteRemainingVariants(writer, alleleTuplesPastNbhd);

            writer.Dispose();

            var expectedLines = File.ReadLines(expectedFilePath).ToList();
            var outputLines   = File.ReadLines(outputFilePath).ToList();

            Assert.Equal(expectedLines.Count(), outputLines.Count());

            for (int i = 0; i < expectedLines.Count; i++)
            {
                Assert.Equal(expectedLines[i], outputLines[i]);
            }
        }
 public VennVcfFormatter(VcfWriterConfig Config, bool debugMode)
 {
     _config = Config;
     UpdateFrequencyFormat();
     DebugMode = debugMode;
 }
示例#11
0
        public static void DoReformating(string inputFile, bool crush)
        {
            var outputFile = inputFile.Replace(".vcf", ".uncrushed.vcf");

            if (crush)
            {
                Console.WriteLine("crushing " + inputFile + "...");
                outputFile = inputFile.Replace(".vcf", ".crushed.vcf");
            }
            else
            {
                Console.WriteLine("uncrushing " + inputFile + "...");
            }

            if (File.Exists(outputFile))
            {
                File.Delete(outputFile);
            }

            var config = new VcfWriterConfig()
            {
                AllowMultipleVcfLinesPerLoci = !crush
            };

            using (VcfFileWriter writer = new VcfFileWriter(outputFile, config, new VcfWriterInputContext()))
            {
                writer.WriteHeader();

                using (VcfReader reader = new VcfReader(inputFile, false))
                {
                    var currentAllele     = new CalledAllele();
                    var backLogVcfVariant = new VcfVariant();

                    var backLogExists = reader.GetNextVariant(backLogVcfVariant);

                    while (backLogExists)
                    {
                        var backLogAlleles = backLogExists ? VcfVariantUtilities.Convert(new List <VcfVariant> {
                            backLogVcfVariant
                        }).ToList() : null;

                        foreach (var allele in backLogAlleles)
                        {
                            try
                            {
                                writer.Write(new List <CalledAllele>()
                                {
                                    allele
                                });
                            }
                            catch (Exception ex)
                            {
                                Console.WriteLine("Problem writing " + allele.ToString());
                                Console.WriteLine("Exception: " + ex);
                                return;
                            }
                        }


                        backLogExists = reader.GetNextVariant(backLogVcfVariant);

                        if (backLogAlleles[0].Chromosome != backLogVcfVariant.ReferenceName)
                        {
                            //we have switched to the next chr. flush the buffer.
                            writer.FlushBuffer();
                        }
                    }

                    writer.FlushBuffer();
                }
            }
        }