Esempio n. 1
0
        public void ConfiguredVCFOutput()
        {
            // Paths for currently existing and new folder paths
            var existingOutputFolder = Path.Combine(UnitTestPaths.TestDataDirectory);
            var existingOutputFile   = Path.Combine(existingOutputFolder, "VcfFileWriterTests.vcf");
            var newOutputFolder      = Path.Combine(UnitTestPaths.TestDataDirectory, "SDS-14");
            var newOutputFile        = Path.Combine(newOutputFolder, "VcfFileWriterTests.vcf");

            // Test -OutFolder works for pre-existing folders.
            var context = new VcfWriterInputContext
            {
                CommandLine   = "myCommandLine",
                SampleName    = "mySample",
                ReferenceName = "myReference",
                ContigsByChr  = new List <Tuple <string, long> >
                {
                    new Tuple <string, long>("chr1", 10001),
                    new Tuple <string, long>("chrX", 500)
                }
            };

            var writer = new VcfFileWriter(existingOutputFile,
                                           new VcfWriterConfig
            {
                DepthFilterThreshold                = 500,
                QscoreFilterThreshold               = 20,
                StrandBiasFilterThreshold           = 0.5f,
                FrequencyFilterThreshold            = 0.007f,
                ShouldOutputNoCallFraction          = true,
                ShouldOutputStrandBiasAndNoiseLevel = true,
                ShouldFilterOnlyOneStrandCoverage   = true,
                EstimatedBaseCallQuality            = 23
            },
                                           context);

            writer.WriteHeader();
            writer.Write(_defaultCandidates);
            writer.Dispose();

            Assert.True(File.Exists(existingOutputFile));

            // Test -OutFolder for entirely new directories.
            context = new VcfWriterInputContext
            {
                CommandLine   = "myCommandLine",
                SampleName    = "mySample",
                ReferenceName = "myReference",
                ContigsByChr  = new List <Tuple <string, long> >
                {
                    new Tuple <string, long>("chr1", 10001),
                    new Tuple <string, long>("chrX", 500)
                }
            };

            // Delete the folder if it exists and ensure it's deleted
            if (Directory.Exists(newOutputFolder))
            {
                Directory.Delete(newOutputFolder, true);
            }
        }
Esempio n. 2
0
        public void Test3()
        {
            var outputFile = Path.Combine(UnitTestPaths.TestDataDirectory, "VcfFileWriterTests_Test3.vcf");

            File.Delete(outputFile);

            var context = new VcfWriterInputContext
            {
                CommandLine   = "myCommandLine",
                SampleName    = "mySample",
                ReferenceName = "myReference"
            };

            var writer = new VcfFileWriter(
                outputFile,
                new VcfWriterConfig
            {
                ShouldFilterOnlyOneStrandCoverage = true,
                FrequencyFilterThreshold          = 0.01f,
            },
                context);

            writer.WriteHeader();
            writer.Dispose();

            Compare(outputFile, outputFile.Replace(".vcf", "_expected.vcf"));
        }
Esempio n. 3
0
        public void Test4()
        {
            // strand bias threshold but not filtering on single strand
            var outputFile = Path.Combine(UnitTestPaths.TestDataDirectory, "VcfFileWriterTests_Test4.vcf");

            File.Delete(outputFile);

            var context = new VcfWriterInputContext
            {
                CommandLine   = "myCommandLine",
                SampleName    = "mySample",
                ReferenceName = "myReference"
            };

            var writer = new VcfFileWriter(
                outputFile,
                new VcfWriterConfig
            {
                StrandBiasFilterThreshold = 5,
                FrequencyFilterThreshold  = 0.01f,
            },
                context);

            writer.WriteHeader();
            writer.Dispose();

            Compare(outputFile, outputFile.Replace(".vcf", "_expected.vcf"));
        }
Esempio n. 4
0
 public VcfFileWriter CreateVcfWriter(string outputVcfPath, VcfWriterInputContext context, IRegionMapper mapper = null)
 {
     return(new VcfFileWriter(outputVcfPath,
                              new VcfWriterConfig
     {
         DepthFilterThreshold = _options.OutputgVCFFiles ? _options.MinimumDepth : (_options.LowDepthFilter > _options.MinimumDepth)? _options.LowDepthFilter : (int?)null,
         IndelRepeatFilterThreshold = _options.IndelRepeatFilter > 0 ? _options.IndelRepeatFilter : (int?)null,
         VariantQualityFilterThreshold = _options.FilteredVariantQScore,
         GenotypeQualityFilterThreshold = _options.LowGenotypeQualityFilter.HasValue && _options.LowGenotypeQualityFilter > _options.MinimumVariantQScore ? _options.LowGenotypeQualityFilter : null,
         StrandBiasFilterThreshold = _options.StrandBiasAcceptanceCriteria < 1 ? _options.StrandBiasAcceptanceCriteria : (float?)null,
         FrequencyFilterThreshold = (_options.FilteredVariantFrequency > _options.MinimumFrequency) ? _options.FilteredVariantFrequency : (float?)null,
         MinFrequencyThreshold = _options.MinimumFrequency,
         ShouldOutputNoCallFraction = _options.ReportNoCalls,
         ShouldOutputStrandBiasAndNoiseLevel = _options.OutputNoiseLevelAndStrandBias(),
         ShouldFilterOnlyOneStrandCoverage = _options.FilterOutVariantsPresentOnlyOneStrand,
         EstimatedBaseCallQuality = GetEstimatedBaseCallQuality(),
         ShouldOutputRcCounts = _options.ReportRcCounts,
         AllowMultipleVcfLinesPerLoci = _options.AllowMultipleVcfLinesPerLoci,
         PloidyModel = _options.PloidyModel,
         RMxNFilterMaxLengthRepeat = _options.RMxNFilterMaxLengthRepeat,
         RMxNFilterMinRepetitions = _options.RMxNFilterMinRepetitions,
         RMxNFilterFrequencyLimit = _options.RMxNFilterFrequencyLimit,
         NoiseModel = _options.NoiseModel
     }, context));
 }
Esempio n. 5
0
        public void Test2()
        {
            var outputFile = Path.Combine(UnitTestPaths.TestDataDirectory, "VcfFileWriterTests_Test2.vcf");

            File.Delete(outputFile);

            var context = new VcfWriterInputContext
            {
                CommandLine   = "myCommandLine",
                SampleName    = "mySample",
                ReferenceName = "myReference",
                ContigsByChr  = new List <Tuple <string, long> >
                {
                    new Tuple <string, long>("chr10", 123),
                    new Tuple <string, long>("chr9", 5)
                }
            };

            var writer = new VcfFileWriter(
                outputFile,
                new VcfWriterConfig(),
                context);

            writer.WriteHeader();
            writer.Write(_defaultCandidates.Where(c => !c.Filters.Any()));
            writer.Dispose();

            Compare(outputFile, outputFile.Replace(".vcf", "_expected.vcf"));
        }
Esempio n. 6
0
        public void DefaultVCFOutput()
        {
            var appOptions = new ApplicationOptions
            {
                BAMPaths      = new[] { _bamChr19, _bamChr17Chr19, _bamChr17Chr19Dup },
                IntervalPaths = new[] { _intervalsChr17, _intervalsChr19, null },
                GenomePaths   = new[] { _genomeChr17Chr19 }
            };

            var factory = new Factory(appOptions);

            var context = new VcfWriterInputContext
            {
                CommandLine   = "myCommandLine",
                SampleName    = "mySample",
                ReferenceName = "myReference",
                ContigsByChr  = new List <Tuple <string, long> >
                {
                    new Tuple <string, long>("chr1", 10001),
                    new Tuple <string, long>("chrX", 500)
                }
            };
            var outputFile = factory.GetOutputVcfPath(appOptions.BAMPaths[0]);
            var writer     = factory.CreateVcfWriter(outputFile, context);

            var candidates = _defaultCandidates;

            writer.WriteHeader();
            writer.Write(candidates);
            writer.Dispose();

            Assert.True(File.Exists(outputFile));
            Assert.Equal(outputFile, Path.ChangeExtension(_bamChr19, ".vcf"));
        }
Esempio n. 7
0
 public VQRVcfWriter(string outputFilePath, VcfWriterConfig config, VcfWriterInputContext context, List <string> originalHeader, string phasingCommandLine, int bufferLimit = 2000) : base(outputFilePath, config, context, bufferLimit)
 {
     _originalHeader              = originalHeader;
     _originalFilterLines         = VcfVariantUtilities.GetFilterStringsByType(originalHeader);
     _formatter                   = new VcfFormatter(config);
     AllowMultipleVcfLinesPerLoci = config.AllowMultipleVcfLinesPerLoci;
     _vqrCommandLine              = phasingCommandLine;
 }
Esempio n. 8
0
 public VennVcfWriter(string outputFilePath, VcfWriterConfig config, VcfWriterInputContext context,
                      List <string> originalHeader, string vennVcfCommandLine, int bufferLimit = 2000, bool debugMode = false) : base(outputFilePath, config, context, bufferLimit)
 {
     _originalHeader              = originalHeader;
     _originalFilterLines         = VcfVariantUtilities.GetFilterStringsByType(originalHeader);
     _formatter                   = new VennVcfFormatter(config, debugMode);
     AllowMultipleVcfLinesPerLoci = config.AllowMultipleVcfLinesPerLoci;
     _vennCommandLine             = vennVcfCommandLine;
 }
Esempio n. 9
0
        public void DefaultVCFOutput()
        {
            var appOptions = new PiscesApplicationOptions
            {
                BAMPaths                 = new[] { _bamChr19, _bamChr17Chr19, _bamChr17Chr19Dup },
                IntervalPaths            = new[] { _intervalsChr17, _intervalsChr19, null },
                GenomePaths              = new[] { _genomeChr17Chr19 },
                VariantCallingParameters = new VariantCallingParameters()
                {
                    MinimumCoverage             = 10,
                    LowDepthFilter              = 10,
                    AmpliconBiasFilterThreshold = 0.01F
                },
                VcfWritingParameters = new VcfWritingParameters()
                {
                    OutputGvcfFile = false,
                }
            };

            var factory = new Factory(appOptions);

            var context = new VcfWriterInputContext
            {
                QuotedCommandLineString = "myCommandLine",
                SampleName    = "mySample",
                ReferenceName = "myReference",
                ContigsByChr  = new List <Tuple <string, long> >
                {
                    new Tuple <string, long>("chr1", 10001),
                    new Tuple <string, long>("chrX", 500)
                }
            };
            var outputFile = factory.GetOutputFile(appOptions.BAMPaths[0]);
            var writer     = factory.CreateVcfWriter(outputFile, context);

            var candidates = _defaultCandidates;

            writer.WriteHeader();
            writer.Write(candidates);
            writer.Dispose();

            Assert.True(File.Exists(outputFile));
            Assert.Equal(outputFile, Path.ChangeExtension(_bamChr19, ".vcf"));

            var reader = new AlleleReader(outputFile);
            var header = reader.HeaderLines;

            Assert.Equal(header[7], "##FILTER=<ID=q30,Description=\"Quality score less than 30\">");
            Assert.Equal(header[8], "##FILTER=<ID=AB,Description=\"Amplicon bias - disparate variant frequencies detected by amplicon\">");
            Assert.Equal(header[9], "##FILTER=<ID=SB,Description=\"Variant strand bias too high\">");
            Assert.Equal(header[10], "##FILTER=<ID=R5x9,Description=\"Repeats of part or all of the variant allele (max repeat length 5) in the reference greater than or equal to 9\">");
        }
Esempio n. 10
0
        public void VCFDataSection()
        {
            var outputFilePath = Path.Combine(UnitTestPaths.TestDataDirectory, "VcfFileWriterTests_SDS-19.vcf");

            File.Delete(outputFilePath);

            var context = new VcfWriterInputContext
            {
                CommandLine   = "myCommandLine",
                SampleName    = "mySample",
                ReferenceName = "myReference",
                ContigsByChr  = new List <Tuple <string, long> >
                {
                    new Tuple <string, long>("chr1", 10001),
                    new Tuple <string, long>("chrX", 500)
                }
            };

            var writer = new VcfFileWriter(
                outputFilePath,
                new VcfWriterConfig
            {
                DepthFilterThreshold                = 500,
                QscoreFilterThreshold               = 20,
                StrandBiasFilterThreshold           = 0.5f,
                FrequencyFilterThreshold            = 0.007f,
                ShouldOutputNoCallFraction          = true,
                ShouldOutputStrandBiasAndNoiseLevel = true,
                ShouldFilterOnlyOneStrandCoverage   = true,
                EstimatedBaseCallQuality            = 23
            },
                context);

            writer.WriteHeader();
            writer.Write(_defaultCandidates);
            writer.Dispose();

            var testFile = File.ReadAllLines(outputFilePath);

            var oldPosition = 0;

            foreach (var x in testFile.Where(x => Regex.IsMatch(x.Split('\t')[0], "^chr\\d+")))
            {
                Assert.True(Regex.IsMatch(x, "^chr\\d+\t\\d+\t.+\t.+\t\\d+\t\\S+\tDP=\\d+\t.+\t.+"));

                // at a minimum, should be ordered by coordinate.
                var position = int.Parse(x.Split('\t')[1]);
                Assert.True(position >= oldPosition);
                oldPosition = position;
            }
        }
Esempio n. 11
0
 public VcfFileWriter CreateVcfWriter(string outputVcfPath, VcfWriterInputContext context)
 {
     return(new VcfFileWriter(outputVcfPath,
                              new VcfWriterConfig
     {
         DepthFilterThreshold = _options.MinimumCoverage > 0 ? _options.MinimumCoverage : (int?)null,
         QscoreFilterThreshold = _options.FilteredVariantQScore > _options.MinimumVariantQScore ? _options.FilteredVariantQScore : (int?)null,
         StrandBiasFilterThreshold = _options.StrandBiasAcceptanceCriteria < 1 ? _options.StrandBiasAcceptanceCriteria : (float?)null,
         FrequencyFilterThreshold = _options.MinimumFrequency,
         ShouldOutputNoCallFraction = _options.ReportNoCalls,
         ShouldOutputStrandBiasAndNoiseLevel = _options.OutputNoiseLevelAndStrandBias(),
         ShouldFilterOnlyOneStrandCoverage = _options.FilterOutVariantsPresentOnlyOneStrand,
         EstimatedBaseCallQuality = GetEstimatedBaseCallQuality()
     }, context));
 }
Esempio n. 12
0
        private void OpenVennDiagramStreams(List <string> vcfHeaderLines)
        {
            VcfWriterInputContext basicContext = new VcfWriterInputContext();
            VcfWriterConfig       basicConfig  = new VcfWriterConfig(_parameters.VariantCallingParams, _parameters.VcfWritingParams,
                                                                     _parameters.BamFilterParams, null, false, false);


            _vennDiagramWriters.Add("AnotB", new VennVcfWriter(GetVennFileName(_outDir, "not", 0, 1), basicConfig, basicContext, vcfHeaderLines, null, debugMode: _parameters.DebugMode));
            _vennDiagramWriters.Add("AandB", new VennVcfWriter(GetVennFileName(_outDir, "and", 0, 1), basicConfig, basicContext, vcfHeaderLines, null, debugMode: _parameters.DebugMode));
            _vennDiagramWriters.Add("BnotA", new VennVcfWriter(GetVennFileName(_outDir, "not", 1, 0), basicConfig, basicContext, vcfHeaderLines, null, debugMode: _parameters.DebugMode));
            _vennDiagramWriters.Add("BandA", new VennVcfWriter(GetVennFileName(_outDir, "and", 1, 0), basicConfig, basicContext, vcfHeaderLines, null, debugMode: _parameters.DebugMode));

            foreach (VennVcfWriter writer in _vennDiagramWriters.Values)
            {
                writer.WriteHeader();
            }
        }
Esempio n. 13
0
        public void VCFDataHeaderSection()
        {
            var outputFilePath = Path.Combine(UnitTestPaths.TestDataDirectory, "VcfFileWriterTests_SDS-20.vcf");

            File.Delete(outputFilePath);

            var context = new VcfWriterInputContext
            {
                CommandLine   = "myCommandLine",
                SampleName    = "mySample",
                ReferenceName = "myReference",
                ContigsByChr  = new List <Tuple <string, long> >
                {
                    new Tuple <string, long>("chr1", 10001),
                    new Tuple <string, long>("chrX", 500)
                }
            };

            var writer = new VcfFileWriter(
                outputFilePath,
                new VcfWriterConfig
            {
                DepthFilterThreshold                = 500,
                QscoreFilterThreshold               = 20,
                StrandBiasFilterThreshold           = 0.5f,
                FrequencyFilterThreshold            = 0.007f,
                ShouldOutputNoCallFraction          = true,
                ShouldOutputStrandBiasAndNoiseLevel = true,
                ShouldFilterOnlyOneStrandCoverage   = true,
                EstimatedBaseCallQuality            = 23
            },
                context);

            writer.WriteHeader();
            writer.Write(_defaultCandidates);
            writer.Dispose();

            var testFile = File.ReadAllLines(outputFilePath);

            foreach (var x in testFile.Where(x => Regex.IsMatch(x.Split('\t')[0], "^#CHROM")))
            {
                Assert.True(Regex.IsMatch(x, "^#CHROM\\sPOS\\sID\\sREF\\sALT\\sQUAL\\sFILTER\\sINFO\\sFORMAT\\smySample"));
            }
        }
Esempio n. 14
0
        public void DefaultVCFOutput()
        {
            var appOptions = new ApplicationOptions
            {
                BAMPaths       = new[] { _bamChr19, _bamChr17Chr19, _bamChr17Chr19Dup },
                IntervalPaths  = new[] { _intervalsChr17, _intervalsChr19, null },
                GenomePaths    = new[] { _genomeChr17Chr19 },
                LowDepthFilter = 10,
                MinimumDepth   = 10
            };

            var factory = new Factory(appOptions);

            var context = new VcfWriterInputContext
            {
                CommandLine   = new [] { "myCommandLine" },
                SampleName    = "mySample",
                ReferenceName = "myReference",
                ContigsByChr  = new List <Tuple <string, long> >
                {
                    new Tuple <string, long>("chr1", 10001),
                    new Tuple <string, long>("chrX", 500)
                }
            };
            var outputFile = factory.GetOutputFile(appOptions.BAMPaths[0]);
            var writer     = factory.CreateVcfWriter(outputFile, context);

            var candidates = _defaultCandidates;

            writer.WriteHeader();
            writer.Write(candidates);
            writer.Dispose();

            Assert.True(File.Exists(outputFile));
            Assert.Equal(outputFile, Path.ChangeExtension(_bamChr19, ".vcf"));

            var           reader = new VcfReader(outputFile);
            List <string> header = reader.HeaderLines;

            Assert.Equal(header[6], "##FILTER=<ID=q30,Description=\"Quality score less than 30\">");
            Assert.Equal(header[7], "##FILTER=<ID=SB,Description=\"Variant strand bias too high\">");
            Assert.Equal(header[8], "##FILTER=<ID=R5x9,Description=\"Repeats of part or all of the variant allele (max repeat length 5) in the reference greater than or equal to 9\">");
        }
Esempio n. 15
0
        public PhasedVcfWriter(string outputFilePath, VcfWriterConfig config, VcfWriterInputContext context, List <string> originalHeader, string phasingCommandLine, int bufferLimit = 2000) : base(outputFilePath, config, context, bufferLimit)
        {
            _originalHeader      = originalHeader;
            _originalFilterLines = Extensions.GetFilterStringsByType(originalHeader);


            if (_originalFilterLines.ContainsKey(FilterType.RMxN))
            {
                config = ExtractRMxNThresholds(config);
            }

            if (_originalFilterLines.ContainsKey(FilterType.IndelRepeatLength))
            {
                config = ExtractR8Threshold(config);
            }

            //-ReportNoCalls True
            _formatter = new VcfFormatter(config);
            AllowMultipleVcfLinesPerLoci = config.AllowMultipleVcfLinesPerLoci;
            _phasingCommandLine          = phasingCommandLine;
        }
Esempio n. 16
0
        public void GvcfHeaderFormat()
        {
            var appOptions = new PiscesApplicationOptions
            {
                BAMPaths             = new[] { _bamChr19, _bamChr17Chr19, _bamChr17Chr19Dup },
                IntervalPaths        = new[] { _intervalsChr17, _intervalsChr19, null },
                GenomePaths          = new[] { _genomeChr17Chr19 },
                VcfWritingParameters = new VcfWritingParameters()
                {
                    OutputGvcfFile = true
                }
            };

            var factory = new Factory(appOptions);

            var context = new VcfWriterInputContext
            {
                QuotedCommandLineString = "myCommandLine",
                SampleName    = "mySample",
                ReferenceName = "myReference",
                ContigsByChr  = new List <Tuple <string, long> >
                {
                    new Tuple <string, long>("chr1", 10001),
                    new Tuple <string, long>("chrX", 500)
                }
            };
            var outputFile = factory.GetOutputFile(appOptions.BAMPaths[0]);
            var writer     = factory.CreateVcfWriter(outputFile, context);

            var candidates = _defaultCandidates;

            writer.WriteHeader();
            writer.Write(candidates);
            writer.Dispose();

            // Time to read the header
            //moved to GvcfWritingTests
            //VcfFileWriterTests.VcfFileFormatValidation(outputFile, 5);
        }
Esempio n. 17
0
        public void FilterHeader()
        {
            var outputFilePath = Path.Combine(UnitTestPaths.TestDataDirectory, "VcfFileWriterTests_SDS-18.vcf");

            File.Delete(outputFilePath);

            var context = new VcfWriterInputContext
            {
                CommandLine   = "myCommandLine",
                SampleName    = "mySample",
                ReferenceName = "myReference",
                ContigsByChr  = new List <Tuple <string, long> >
                {
                    new Tuple <string, long>("chr1", 10001),
                    new Tuple <string, long>("chrX", 500)
                }
            };

            // Variant strand bias too high or coverage on only one strand
            var config = new VcfWriterConfig
            {
                DepthFilterThreshold                = 500,
                QscoreFilterThreshold               = 20,
                StrandBiasFilterThreshold           = 0.5f,
                FrequencyFilterThreshold            = 0.007f,
                ShouldOutputNoCallFraction          = true,
                ShouldOutputStrandBiasAndNoiseLevel = true,
                ShouldFilterOnlyOneStrandCoverage   = true,
                EstimatedBaseCallQuality            = 23
            };

            var writer = new VcfFileWriter(outputFilePath, config, context);

            writer.WriteHeader();
            writer.Write(_defaultCandidates);
            writer.Dispose();

            VcfHeaderFormatTester(config, outputFilePath);
        }
Esempio n. 18
0
        public void Test1()
        {
            var outputFile = Path.Combine(UnitTestPaths.TestDataDirectory, "VcfFileWriterTests_Test1.vcf");

            File.Delete(outputFile);

            var context = new VcfWriterInputContext
            {
                CommandLine   = "myCommandLine",
                SampleName    = "mySample",
                ReferenceName = "myReference",
                ContigsByChr  = new List <Tuple <string, long> >
                {
                    new Tuple <string, long>("chr1", 10001),
                    new Tuple <string, long>("chrX", 500)
                }
            };

            var writer = new VcfFileWriter(
                outputFile,
                new VcfWriterConfig
            {
                DepthFilterThreshold                = 500,
                QscoreFilterThreshold               = 20,
                StrandBiasFilterThreshold           = 0.5f,
                FrequencyFilterThreshold            = 0.007f,
                ShouldOutputNoCallFraction          = true,
                ShouldOutputStrandBiasAndNoiseLevel = true,
                ShouldFilterOnlyOneStrandCoverage   = true,
                EstimatedBaseCallQuality            = 23
            },
                context);

            writer.WriteHeader();
            writer.Write(_defaultCandidates);
            writer.Dispose();

            Compare(outputFile, outputFile.Replace(".vcf", "_expected.vcf"));
        }
Esempio n. 19
0
        public void GvcfHeaderFormat()
        {
            var appOptions = new ApplicationOptions
            {
                BAMPaths      = new[] { _bamChr19, _bamChr17Chr19, _bamChr17Chr19Dup },
                IntervalPaths = new[] { _intervalsChr17, _intervalsChr19, null },
                GenomePaths   = new[] { _genomeChr17Chr19 }
            };
            var gVCFOption = new[] { "-gVCF", "true" };

            appOptions.UpdateOptions(gVCFOption);

            var factory = new Factory(appOptions);

            var context = new VcfWriterInputContext
            {
                CommandLine   = new[] { "myCommandLine" },
                SampleName    = "mySample",
                ReferenceName = "myReference",
                ContigsByChr  = new List <Tuple <string, long> >
                {
                    new Tuple <string, long>("chr1", 10001),
                    new Tuple <string, long>("chrX", 500)
                }
            };
            var outputFile = factory.GetOutputFile(appOptions.BAMPaths[0]);
            var writer     = factory.CreateVcfWriter(outputFile, context);

            var candidates = _defaultCandidates;

            writer.WriteHeader();
            writer.Write(candidates);
            writer.Dispose();

            // Time to read the header
            VcfFileWriterTests.VcfFileFormatValidation(outputFile, 5);
        }
Esempio n. 20
0
        public void WriteANbhd()
        {
            var outputFilePath   = Path.Combine(TestPaths.LocalTestDataDirectory, "PhasedVcfFileNbhdWriterTest.vcf");
            var inputFilePath    = Path.Combine(TestPaths.LocalTestDataDirectory, "MergerInput.vcf");
            var expectedFilePath = Path.Combine(TestPaths.LocalTestDataDirectory, "MergerOutput.vcf");

            File.Delete(outputFilePath);

            var context = new VcfWriterInputContext
            {
                QuotedCommandLineString = "myCommandLine",
                SampleName    = "mySample",
                ReferenceName = "myReference",
                ContigsByChr  = new List <Tuple <string, long> >
                {
                    new Tuple <string, long>("chr1", 10001),
                    new Tuple <string, long>("chrX", 500)
                }
            };

            var config = new VcfWriterConfig
            {
                DepthFilterThreshold                = 500,
                VariantQualityFilterThreshold       = 30,
                FrequencyFilterThreshold            = 0.007f,
                ShouldOutputNoCallFraction          = true,
                ShouldOutputStrandBiasAndNoiseLevel = true,
                EstimatedBaseCallQuality            = 23,
                PloidyModel = PloidyModel.Somatic,
                AllowMultipleVcfLinesPerLoci = true
            };
            var writer = new PhasedVcfWriter(outputFilePath, config, new VcfWriterInputContext(), new List <string>()
            {
            }, null);
            var reader = new VcfReader(inputFilePath, true);


            //set up the original variants
            var originalVcfVariant1 = TestHelper.CreateDummyAllele("chr2", 116380048, "A", "New", 1000, 156);
            var originalVcfVariant2 = TestHelper.CreateDummyAllele("chr2", 116380048, "AAA", "New", 1000, 156);
            var originalVcfVariant4 = TestHelper.CreateDummyAllele("chr7", 116380051, "A", "New", 1000, 156);
            var originalVcfVariant5 = TestHelper.CreateDummyAllele("chr7", 116380052, "AC", "New", 1000, 156);

            var vs1 = new VariantSite((originalVcfVariant1));
            var vs2 = new VariantSite((originalVcfVariant2));
            var vs4 = new VariantSite((originalVcfVariant4));
            var vs5 = new VariantSite((originalVcfVariant5));


            //have to replace variants at positon 116380048 (we call two new MNVS here)
            var nbhd1 = new VcfNeighborhood(new VariantCallingParameters(), 0, "chr2", vs1, vs2, "");

            nbhd1.SetRangeOfInterest();

            //have to replace variants at positon 116380051 and 52  (we call one new MNV at 51)
            var nbhd2 = new VcfNeighborhood(new VariantCallingParameters(), 0, "chr7", vs4, vs5, "");

            nbhd2.SetRangeOfInterest();


            VcfMerger           merger         = new VcfMerger(reader);
            List <CalledAllele> allelesPastNbh = new List <CalledAllele>();

            nbhd1.CalledVariants = new Dictionary <int, List <CalledAllele> > {
                { originalVcfVariant1.ReferencePosition, new List <CalledAllele> {
                      originalVcfVariant1, originalVcfVariant2
                  } }
            };
            nbhd2.CalledVariants = new Dictionary <int, List <CalledAllele> > {
                { originalVcfVariant4.ReferencePosition, new List <CalledAllele> {
                      originalVcfVariant4
                  } }
            };


            allelesPastNbh = merger.WriteVariantsUptoChr(writer, allelesPastNbh, nbhd1.ReferenceName);

            allelesPastNbh = merger.WriteVariantsUptoIncludingNbhd(nbhd1, writer, allelesPastNbh);

            allelesPastNbh = merger.WriteVariantsUptoChr(writer, allelesPastNbh, nbhd2.ReferenceName);

            allelesPastNbh = merger.WriteVariantsUptoIncludingNbhd(nbhd2, writer, allelesPastNbh);

            merger.WriteRemainingVariants(writer, allelesPastNbh);

            writer.Dispose();

            var expectedLines = File.ReadLines(expectedFilePath).ToList();
            var outputLines   = File.ReadLines(outputFilePath).ToList();

            Assert.Equal(expectedLines.Count(), outputLines.Count());

            for (int i = 0; i < expectedLines.Count; i++)
            {
                Assert.Equal(expectedLines[i], outputLines[i]);
            }
        }
Esempio n. 21
0
        public void WriteADiploidNbhd()
        {
            var outputDir        = Path.Combine(TestPaths.LocalScratchDirectory, "MergerWriteADiploidNbhd");
            var outputFilePath   = Path.Combine(outputDir, "TinyDiploid.Phased.vcf");
            var inputFilePath    = Path.Combine(TestPaths.LocalTestDataDirectory, "TinyDiploid.vcf");
            var expectedFilePath = Path.Combine(TestPaths.LocalTestDataDirectory, "TinyDiploidOutput.vcf");

            TestHelper.RecreateDirectory(outputDir);

            var context = new VcfWriterInputContext
            {
                QuotedCommandLineString = "myCommandLine",
                SampleName    = "mySample",
                ReferenceName = "myReference",
                ContigsByChr  = new List <Tuple <string, long> >
                {
                    new Tuple <string, long>("chr1", 10001),
                    new Tuple <string, long>("chr22", 51304566),
                    new Tuple <string, long>("chrX", 500)
                }
            };

            var config = new VcfWriterConfig
            {
                DepthFilterThreshold                = 500,
                VariantQualityFilterThreshold       = 30,
                FrequencyFilterThreshold            = 0.007f,
                ShouldOutputNoCallFraction          = true,
                ShouldOutputStrandBiasAndNoiseLevel = true,
                EstimatedBaseCallQuality            = 23,
                PloidyModel = PloidyModel.DiploidByThresholding,
                AllowMultipleVcfLinesPerLoci = false
            };
            var writer = new PhasedVcfWriter(outputFilePath, config, new VcfWriterInputContext(), new List <string>()
            {
            }, null);
            var reader = new AlleleReader(inputFilePath, true);


            //set up the original variants
            var originalVcfVariant1 = TestHelper.CreateDummyAllele("chr1", 1, "A", "G", 1000, 156);
            var originalVcfVariant2 = TestHelper.CreateDummyAllele("chr1", 1, "A", "T", 1000, 156);
            var originalVcfVariant4 = TestHelper.CreateDummyAllele("chr22", 1230237, "GTC", "G", 1000, 156);
            var originalVcfVariant5 = TestHelper.CreateDummyAllele("chr22", 1230237, "GTC", "GTCT", 1000, 156);

            var vs1 = new VariantSite((originalVcfVariant1));
            var vs2 = new VariantSite((originalVcfVariant2));
            var vs4 = new VariantSite((originalVcfVariant4));
            var vs5 = new VariantSite((originalVcfVariant5));


            //have to replace variants at positon 116380048 (we call two new MNVS here)
            var nbhd1      = new VcfNeighborhood(0, "chr1", vs1, vs2);
            var calledNbh1 = new CallableNeighborhood(nbhd1, new VariantCallingParameters());

            VcfMerger merger = new VcfMerger(reader);
            List <Tuple <CalledAllele, string> > alleleTuplesPastNbhd = new List <Tuple <CalledAllele, string> >();

            //we will just say, we called the variants that were in the origina vcf. Ie, we agree with it.
            calledNbh1.CalledVariants = new Dictionary <int, List <CalledAllele> > {
                { originalVcfVariant1.ReferencePosition, new List <CalledAllele> {
                      originalVcfVariant1, originalVcfVariant2
                  } }
            };

            //Realizes the first nbhd starts at chr1 . We have to do something with the first lines of the vcf (chr1	1	.	A	G,T)
            //so, alleleTuplesPastNbhd = chr1	1	.	A	G,T
            alleleTuplesPastNbhd = merger.WriteVariantsUptoChr(writer, alleleTuplesPastNbhd, nbhd1.ReferenceName);
            Assert.True(alleleTuplesPastNbhd[0].Item1.IsSameAllele(originalVcfVariant1));
            Assert.True(alleleTuplesPastNbhd[1].Item1.IsSameAllele(originalVcfVariant2));

            //This method writes everything up to the end of nbhd 1,
            //so "(chr1	1	.	A	G,T)" from the vcf and the variants scylla detected "(chr1	1	.	A	G,T)" need to be dealt with.
            //Since these 4 variants are actually the same two, we need to remove the vcf ones and only write the scylla ones.
            //Thn we peek into the vcf and see the next line is "chr22	1230237	.	GTC	G,GTCT", clearly outside nbh1.
            //so we write out everything we need for nbhd1, and save the peeked line
            alleleTuplesPastNbhd = merger.WriteVariantsUptoIncludingNbhd(writer, alleleTuplesPastNbhd, calledNbh1);
            Assert.True(alleleTuplesPastNbhd[0].Item1.IsSameAllele(originalVcfVariant4));
            Assert.True(alleleTuplesPastNbhd[1].Item1.IsSameAllele(originalVcfVariant5));

            //now write out
            //chr22   1230237.GTC G,GTCT  50  DP = 1370 GT: GQ: AD: DP: VF: NL: SB: NC: US  1 / 2:100:185,68:364:0.258:20:-100.0000:0.0000:0,0,0,0,0,0,1,1,0,0,0,2
            //chrX    79.CG  GTG,AA  50  DP = 1370 GT: GQ: AD: DP: VF: NL: SB: NC: US  1 / 2:100:185,68:364:0.258:20:-100.0000:0.0000:0,0,0,0,0,0,1,1,0,0,0,2
            merger.WriteRemainingVariants(writer, alleleTuplesPastNbhd);

            writer.Dispose();

            var expectedLines = File.ReadLines(expectedFilePath).ToList();
            var outputLines   = File.ReadLines(outputFilePath).ToList();

            Assert.Equal(expectedLines.Count(), outputLines.Count());

            for (int i = 0; i < expectedLines.Count; i++)
            {
                Assert.Equal(expectedLines[i], outputLines[i]);
            }
        }
Esempio n. 22
0
 public VcfFileWriter CreateVcfWriter(string outputVcfPath, VcfWriterInputContext context, IRegionMapper mapper = null)
 {
     return(new VcfFileWriter(outputVcfPath,
                              new VcfWriterConfig(_options.VariantCallingParameters, _options.VcfWritingParameters,
                                                  _options.BamFilterParameters, null, _options.DebugMode, _options.OutputBiasFiles, _options.ForcedAllelesFileNames.Count > 0), context));
 }
Esempio n. 23
0
        public void TestWithVariants()
        {
            var outputFile = Path.Combine(UnitTestPaths.TestDataDirectory, "VcfFileWriterTests_AdHoc.vcf");

            File.Delete(outputFile);

            var context = new VcfWriterInputContext
            {
                CommandLine   = "myCommandLine",
                SampleName    = "mySample",
                ReferenceName = "myReference",
                ContigsByChr  = new List <Tuple <string, long> >
                {
                    new Tuple <string, long>("chr1", 10001),
                    new Tuple <string, long>("chrX", 500)
                }
            };

            var writer = new VcfFileWriter(outputFile,
                                           new VcfWriterConfig
            {
                DepthFilterThreshold                = 500,
                QscoreFilterThreshold               = 20,
                StrandBiasFilterThreshold           = 0.5f,
                FrequencyFilterThreshold            = 0.007f,
                ShouldOutputNoCallFraction          = true,
                ShouldOutputStrandBiasAndNoiseLevel = true,
                ShouldFilterOnlyOneStrandCoverage   = true,
                EstimatedBaseCallQuality            = 23
            },
                                           context);

            var candidates = new List <BaseCalledAllele>()
            {
                new CalledVariant(AlleleCategory.Snv)
                {
                    AlleleSupport = 5387,
                    TotalCoverage = 5394,
                    Chromosome    = "chr4",
                    Coordinate    = 55141055,
                    Reference     = "A",
                    Alternate     = "G",
                    Filters       = new List <FilterType>()
                    {
                    },
                    FractionNoCalls  = 0,
                    Genotype         = Genotype.HomozygousAlt,
                    NumNoCalls       = 0,
                    ReferenceSupport = 7
                }
            };

            writer.WriteHeader();
            writer.Write(candidates);
            writer.Dispose();

            Assert.Throws <Exception>(() => writer.WriteHeader());
            Assert.Throws <Exception>(() => writer.Write(candidates));
            writer.Dispose();

            var variantLine = @"chr4	55141055	.	A	G	0	PASS	DP=5394	GT:GQ:AD:VF:NL:SB:NC	1/1:0:7,5387:0.9987:23:0.0000:0.0000";
            var fileLines   = File.ReadAllLines(outputFile);

            Assert.True(fileLines.Contains(variantLine));
        }
        public void FilterHeader()
        {
            var outputFilePath = Path.Combine(TestPaths.LocalTestDataDirectory, "PhasedVcfFileWriterTests.vcf");

            File.Delete(outputFilePath);

            var context = new VcfWriterInputContext
            {
                QuotedCommandLineString = "myCommandLine",
                SampleName    = "mySample",
                ReferenceName = "myReference",
                ContigsByChr  = new List <Tuple <string, long> >
                {
                    new Tuple <string, long>("chr1", 10001),
                    new Tuple <string, long>("chrX", 500)
                }
            };

            // Variant strand bias too high or coverage on only one strand
            var config = new VcfWriterConfig
            {
                DepthFilterThreshold                = 500,
                VariantQualityFilterThreshold       = 30,
                FrequencyFilterThreshold            = 0.007f,
                ShouldOutputNoCallFraction          = true,
                ShouldOutputStrandBiasAndNoiseLevel = true,
                EstimatedBaseCallQuality            = 23,
                PloidyModel = PloidyModel.Diploid,
            };

            //note, scylla has no SB or RMxN or R8 filters.


            var variants = new List <CalledAllele>
            {
                TestHelper.CreateDummyAllele("chrX", 123, "A", "C", 1000, 156),
                TestHelper.CreateDummyAllele("chr10", 124, "A", "C", 1000, 156),
            };

            var originalHeader = new List <string>
            {
                "##fileformat=VCFv4.1",
                "##fileDate=20160620",
                "##source=Pisces 1.0.0.0",
                "##Pisces_cmdline=\"-B KRAS_42_S1.bam -g -MinimumFrequency 0.01 -MinBaseCallQuality 21 -MaxVariantQScore 100 -MinCoverage 300 -MaxAcceptableStrandBiasFilter 0.5 -MinVariantQScore 20 -VariantQualityFilter 20 -gVCF true -CallMNVs True -out \\myout",
                "##reference=WholeGenomeFASTA",
                "##INFO=<ID=DP,Number=1,Type=Integer,Description=\"Total Depth\">",
                "##FILTER=<ID=q20,Description=\"Quality score less than 20\">",
                "##FILTER=<ID=SB,Description=\"Variant strand bias too high\">",
                "##FILTER=<ID=R5x9,Description=\"Repeats of part or all of the variant allele (max repeat length 5) in the reference greater than or equal to 9\">",
                "##FORMAT=<ID=GT,Number=1,Type=String,Description=\"Genotype\">",
                "##FORMAT=<ID=GQ,Number=1,Type=Integer,Description=\"Genotype Quality\">",
                "#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	HD700n560_miseq1_S7.bam"
            };


            var writer = new PhasedVcfWriter(outputFilePath, config, new VcfWriterInputContext(), originalHeader, null);

            writer.WriteHeader();
            writer.Write(variants);
            writer.Dispose();

            VcfReader     reader        = new VcfReader(outputFilePath);
            List <string> writtenHeader = reader.HeaderLines;

            reader.Dispose();

            var expectedHeader1 = new List <string>
            {
                "##fileformat=VCFv4.1",
                "##fileDate=20160620",
                "##source=Pisces 1.0.0.0",
                "##Pisces_cmdline=\"-B KRAS_42_S1.bam -g -MinimumFrequency 0.01 -MinBaseCallQuality 21 -MaxVariantQScore 100 -MinCoverage 300 -MaxAcceptableStrandBiasFilter 0.5 -MinVariantQScore 20 -VariantQualityFilter 20 -gVCF true -CallMNVs True -out \\myout",
                "##VariantPhaser=Scylla 1.0.0.0",
                "##reference=WholeGenomeFASTA",
                "##INFO=<ID=DP,Number=1,Type=Integer,Description=\"Total Depth\">",
                "##FILTER=<ID=q20,Description=\"Quality score less than 20\">",
                "##FILTER=<ID=SB,Description=\"Variant strand bias too high\">",
                "##FILTER=<ID=R5x9,Description=\"Repeats of part or all of the variant allele (max repeat length 5) in the reference greater than or equal to 9\">",
                "##FILTER=<ID=q30,Description=\"Quality score less than 30, by Scylla\">",
                "##FILTER=<ID=LowDP,Description=\"Low coverage (DP tag), therefore no genotype called, by Scylla\">",
                "##FILTER=<ID=LowVariantFreq,Description=\"Variant frequency less than 0.0070, by Scylla\">",
                "##FILTER=<ID=MultiAllelicSite,Description=\"Variant does not conform to diploid model, by Scylla\">",
                "##FORMAT=<ID=GT,Number=1,Type=String,Description=\"Genotype\">",
                "##FORMAT=<ID=GQ,Number=1,Type=Integer,Description=\"Genotype Quality\">",
                "#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	HD700n560_miseq1_S7.bam"
            };


            Assert.Equal(expectedHeader1.Count, writtenHeader.Count);
            for (int i = 0; i < expectedHeader1.Count; i++)
            {
                //let version numbers differ
                if (expectedHeader1[i].StartsWith("##VariantPhaser=Scylla"))
                {
                    Assert.True(writtenHeader[i].StartsWith("##VariantPhaser=Scylla"));
                    continue;
                }
                Assert.Equal(expectedHeader1[i], writtenHeader[i]);
            }

            config = new VcfWriterConfig
            {
                DepthFilterThreshold          = 500,
                VariantQualityFilterThreshold = 22,
                FrequencyFilterThreshold      = 0.007f,
                EstimatedBaseCallQuality      = 23,
                PloidyModel = PloidyModel.Somatic,
            };


            originalHeader = new List <string>
            {
                "##fileformat=VCFv4.1",
                "##fileDate=20160620",
                "##source=Pisces 1.0.0.0",
                "##Pisces_cmdline=\"-B KRAS_42_S1.bam -g -MinimumFrequency 0.01 -MinBaseCallQuality 21 -MaxVariantQScore 100 -MinCoverage 300 -MaxAcceptableStrandBiasFilter 0.5 -MinVariantQScore 20 -VariantQualityFilter 20 -gVCF true -CallMNVs True -out \\myout",
                "##reference=WholeGenomeFASTA",
                "##INFO=<ID=DP,Number=1,Type=Integer,Description=\"Total Depth\">",
                "##FORMAT=<ID=GT,Number=1,Type=String,Description=\"Genotype\">",
                "##FORMAT=<ID=GQ,Number=1,Type=Integer,Description=\"Genotype Quality\">",
                "#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	HD700n560_miseq1_S7.bam"
            };
            writer = new PhasedVcfWriter(outputFilePath, config, new VcfWriterInputContext(), originalHeader, null);


            var expectedHeader2 = new List <string>
            {
                "##fileformat=VCFv4.1",
                "##fileDate=20160620",
                "##source=Pisces 1.0.0.0",
                "##Pisces_cmdline=\"-B KRAS_42_S1.bam -g -MinimumFrequency 0.01 -MinBaseCallQuality 21 -MaxVariantQScore 100 -MinCoverage 300 -MaxAcceptableStrandBiasFilter 0.5 -MinVariantQScore 20 -VariantQualityFilter 20 -gVCF true -CallMNVs True -out \\myout",
                "##VariantPhaser=Scylla 1.0.0.0",
                "##reference=WholeGenomeFASTA",
                "##INFO=<ID=DP,Number=1,Type=Integer,Description=\"Total Depth\">",
                "##FORMAT=<ID=GT,Number=1,Type=String,Description=\"Genotype\">",
                "##FORMAT=<ID=GQ,Number=1,Type=Integer,Description=\"Genotype Quality\">",
                "##FILTER=<ID=q22,Description=\"Quality score less than 22, by Scylla\">",
                "##FILTER=<ID=LowDP,Description=\"Low coverage (DP tag), therefore no genotype called, by Scylla\">",
                "##FILTER=<ID=LowVariantFreq,Description=\"Variant frequency less than 0.0070, by Scylla\">",
                "#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	HD700n560_miseq1_S7.bam"
            };

            writer.WriteHeader();
            writer.Write(variants);
            writer.Dispose();

            reader        = new VcfReader(outputFilePath);
            writtenHeader = reader.HeaderLines;
            reader.Dispose();

            Assert.Equal(expectedHeader2.Count, writtenHeader.Count);
            for (int i = 0; i < expectedHeader2.Count; i++)
            {
                //let version numbers differ
                if (expectedHeader1[i].StartsWith("##VariantPhaser=Scylla"))
                {
                    Assert.True(writtenHeader[i].StartsWith("##VariantPhaser=Scylla"));
                    continue;
                }
                Assert.Equal(expectedHeader2[i], writtenHeader[i]);
            }
        }
Esempio n. 25
0
        public void InfoFormatHeader()
        {
            var outputFilePath = Path.Combine(UnitTestPaths.TestDataDirectory, "VcfFileWriterTests_SDS-17.vcf");

            File.Delete(outputFilePath);

            var context = new VcfWriterInputContext
            {
                CommandLine   = "myCommandLine",
                SampleName    = "mySample",
                ReferenceName = "myReference",
                ContigsByChr  = new List <Tuple <string, long> >
                {
                    new Tuple <string, long>("chr1", 10001),
                    new Tuple <string, long>("chrX", 500)
                }
            };
            var config = new VcfWriterConfig
            {
                DepthFilterThreshold                = 500,
                QscoreFilterThreshold               = 20,
                StrandBiasFilterThreshold           = 0.5f,
                FrequencyFilterThreshold            = 0.007f,
                ShouldOutputNoCallFraction          = true,
                ShouldOutputStrandBiasAndNoiseLevel = true,
                ShouldFilterOnlyOneStrandCoverage   = true,
                EstimatedBaseCallQuality            = 23
            };

            var writer = new VcfFileWriter(outputFilePath, config, context);

            writer.WriteHeader();
            writer.Write(_defaultCandidates);
            writer.Dispose();

            // Time to read the header
            var  testFile = File.ReadAllLines(outputFilePath);
            bool formatNL = false, formatSB = false, formatNC = false;

            foreach (var x in testFile)
            {
                if (Regex.IsMatch(x, "##INFO="))
                {
                    switch (x.Split(',')[0])
                    {
                    case "##INFO=<ID=DP":
                        Assert.True(Regex.IsMatch(x, "^##INFO=<ID=DP,Number=1,Type=Integer,Description=\"Total Depth\">$"));
                        break;

                    case "##INFO=<ID=TI":
                        Assert.True(Regex.IsMatch(x, "^##INFO=<ID=TI,Number=\\.,Type=String,Description=\"Transcript ID\">$"));
                        break;

                    case "##INFO=<ID=GI":
                        Assert.True(Regex.IsMatch(x, "^##INFO=<ID=GI,Number=\\.,Type=String,Description=\"Gene ID\">$"));
                        break;

                    case "##INFO=<ID=EXON":
                        Assert.True(Regex.IsMatch(x, "^##INFO=<ID=EXON,Number=0,Type=Flag,Description=\"Exon Region\">$"));
                        break;

                    case "##INFO=<ID=FC":
                        Assert.True(Regex.IsMatch(x, "^##INFO=<ID=FC,Number=\\.,Type=String,Description=\"Functional Consequence\">$"));
                        break;

                    default:
                        Assert.True(false, "An info is listed which does not match any from the req.`");
                        break;
                    }
                }
                else if (Regex.IsMatch(x, "##FORMAT="))
                {
                    switch (x.Split(',')[0])
                    {
                    case "##FORMAT=<ID=GT":
                        Assert.True(Regex.IsMatch(x, "^##FORMAT=<ID=GT,Number=1,Type=String,Description=\"Genotype\">$"));
                        break;

                    case "##FORMAT=<ID=GQ":
                        Assert.True(Regex.IsMatch(x, "^##FORMAT=<ID=GQ,Number=1,Type=Integer,Description=\"Genotype Quality\">$"));
                        break;

                    case "##FORMAT=<ID=AD":
                        Assert.True(Regex.IsMatch(x, "^##FORMAT=<ID=AD,Number=\\.,Type=Integer,Description=\"Allele Depth\">$"));
                        break;

                    case "##FORMAT=<ID=VF":
                        Assert.True(Regex.IsMatch(x, "^##FORMAT=<ID=VF,Number=1,Type=Float,Description=\"Variant Frequency\">$"));
                        break;

                    case "##FORMAT=<ID=NL":
                        Assert.True(Regex.IsMatch(x, "^##FORMAT=<ID=NL,Number=1,Type=Integer,Description=\"Applied BaseCall Noise Level\">$"));
                        formatNL = true;
                        break;

                    case "##FORMAT=<ID=SB":
                        Assert.True(Regex.IsMatch(x, "^##FORMAT=<ID=SB,Number=1,Type=Float,Description=\"StrandBias Score\">$"));
                        formatSB = true;
                        break;

                    case "##FORMAT=<ID=NC":
                        Assert.True(Regex.IsMatch(x, "^##FORMAT=<ID=NC,Number=1,Type=Float,Description=\"Fraction of bases which were uncalled or with basecall quality below the minimum threshold\">$"));
                        formatNC = true;
                        break;

                    default:
                        Assert.True(false, "A format is listed which does not match any of those listed for the req.");
                        break;
                    }
                }
            }

            if (config.ShouldOutputStrandBiasAndNoiseLevel)
            {
                Assert.True(formatNL);
            }

            if (config.ShouldOutputStrandBiasAndNoiseLevel)
            {
                Assert.True(formatSB);
            }

            if (config.ShouldOutputNoCallFraction)
            {
                Assert.True(formatNC);
            }
        }
Esempio n. 26
0
        public void DataAlleleCheck()
        {
            var outputFilePath = Path.Combine(UnitTestPaths.TestDataDirectory, "VcfFileWriterTests_SDS-21.vcf");

            File.Delete(outputFilePath);

            var context = new VcfWriterInputContext
            {
                CommandLine   = "myCommandLine",
                SampleName    = "mySample",
                ReferenceName = "myReference",
                ContigsByChr  = new List <Tuple <string, long> >
                {
                    new Tuple <string, long>("chr1", 10001),
                    new Tuple <string, long>("chrX", 500)
                }
            };

            var writer = new VcfFileWriter(
                outputFilePath,
                new VcfWriterConfig
            {
                DepthFilterThreshold                = 500,
                QscoreFilterThreshold               = 20,
                StrandBiasFilterThreshold           = 0.5f,
                FrequencyFilterThreshold            = 0.007f,
                ShouldOutputNoCallFraction          = true,
                ShouldOutputStrandBiasAndNoiseLevel = true,
                ShouldFilterOnlyOneStrandCoverage   = true,
                EstimatedBaseCallQuality            = 23
            },
                context);

            writer.WriteHeader();
            writer.Write(_defaultCandidates);
            writer.Dispose();

            var testFile   = File.ReadAllLines(outputFilePath);
            var chromCount = 0;
            var formatList = string.Empty;

            foreach (var x in testFile)
            {
                if (Regex.IsMatch(x, "^##FORMAT"))
                {
                    if (formatList == string.Empty)
                    {
                        formatList = x.Split(',')[0].Substring(13);
                    }
                    else
                    {
                        formatList += ":" + x.Split(',')[0].Substring(13);
                    }
                }
                else if (Regex.IsMatch(x, "^chr\\d+\t"))
                {
                    var y = x.Split('\t');
                    Assert.True(Regex.IsMatch(y[0], "chr\\d+"));
                    Assert.True(Regex.IsMatch(y[1], "\\d+"));
                    Assert.True(Regex.IsMatch(y[2], "\\."));
                    Assert.True(Regex.IsMatch(y[3], "([ACGT\\.])+"));
                    Assert.True(Regex.IsMatch(y[4], "([ACGT\\.])+"));
                    Assert.True(Regex.IsMatch(y[5], "\\d+"));
                    Assert.True(Regex.IsMatch(y[6], ".+"));
                    Assert.True(Regex.IsMatch(y[7], "DP=\\d+"));
                    Assert.True(Regex.IsMatch(y[8], formatList));
                    Assert.True(Regex.IsMatch(y[9], ".+"));
                    chromCount++;
                }
            }

            Assert.Equal(chromCount, 5);
        }
Esempio n. 27
0
        public void DataFormatCheck()
        {
            var outputFilePath = Path.Combine(UnitTestPaths.TestDataDirectory, "VcfFileWriterTests_SDS-23.vcf");

            File.Delete(outputFilePath);

            var context = new VcfWriterInputContext
            {
                CommandLine   = "myCommandLine",
                SampleName    = "mySample",
                ReferenceName = "myReference",
                ContigsByChr  = new List <Tuple <string, long> >
                {
                    new Tuple <string, long>("chr1", 10001),
                    new Tuple <string, long>("chrX", 500)
                }
            };

            var config = new VcfWriterConfig
            {
                DepthFilterThreshold                = 500,
                QscoreFilterThreshold               = 20,
                StrandBiasFilterThreshold           = 0.5f,
                FrequencyFilterThreshold            = 0.007f,
                ShouldOutputNoCallFraction          = true,
                ShouldOutputStrandBiasAndNoiseLevel = true,
                ShouldFilterOnlyOneStrandCoverage   = true,
                EstimatedBaseCallQuality            = 23
            };

            var writer = new VcfFileWriter(outputFilePath, config, context);

            writer.WriteHeader();
            writer.Write(_defaultCandidates);
            writer.Dispose();

            var  testFile = File.ReadAllLines(outputFilePath);
            var  formatList = string.Empty;
            bool caseNL = false, caseSB = false, caseNC = false;

            foreach (var x in testFile)
            {
                if (Regex.IsMatch(x, "^##FORMAT"))
                {
                    var formatField = x.Split(',')[0].Substring(13);
                    switch (formatField)
                    {
                    case "NL":
                        if (config.ShouldOutputStrandBiasAndNoiseLevel)
                        {
                            caseNL = true;
                        }
                        break;

                    case "SB":
                        if (config.ShouldOutputStrandBiasAndNoiseLevel)
                        {
                            caseSB = true;
                        }
                        break;

                    case "NC":
                        if (config.ShouldOutputNoCallFraction)
                        {
                            caseNC = true;
                        }
                        break;
                    }

                    if (formatList == string.Empty)
                    {
                        formatList = x.Split(',')[0].Substring(13);
                    }
                    else
                    {
                        formatList += ":" + x.Split(',')[0].Substring(13);
                    }
                }

                if (Regex.IsMatch(x, "^chr\\d+\t"))
                {
                    var y = x.Split('\t');
                    Assert.True(Regex.IsMatch(y[8], formatList));
                }
            }

            if ((!config.ShouldOutputStrandBiasAndNoiseLevel && caseNL) ||
                (config.ShouldOutputStrandBiasAndNoiseLevel && !caseNL))
            {
                Assert.True(false, "Incorrect setting for ShouldOutputStrandBiasAndNoiseLevel and NL format");
            }

            if ((!config.ShouldOutputStrandBiasAndNoiseLevel && caseSB) ||
                (config.ShouldOutputStrandBiasAndNoiseLevel && !caseSB))
            {
                Assert.True(false, "Incorrect setting for ShouldOutputStrandBiasAndNoiseLevel and SB format");
            }

            if ((!config.ShouldOutputNoCallFraction && caseNC) || (config.ShouldOutputNoCallFraction && !caseNC))
            {
                Assert.True(false, "Incorrect setting for NoCall and NC format");
            }
        }