public void Test4()
        {
            // strand bias threshold but not filtering on single strand
            var outputFile = Path.Combine(UnitTestPaths.TestDataDirectory, "VcfFileWriterTests_Test4.vcf");

            File.Delete(outputFile);

            var context = new VcfWriterInputContext
            {
                CommandLine   = "myCommandLine",
                SampleName    = "mySample",
                ReferenceName = "myReference"
            };

            var writer = new VcfFileWriter(
                outputFile,
                new VcfWriterConfig
            {
                StrandBiasFilterThreshold = 5,
                FrequencyFilterThreshold  = 0.01f,
            },
                context);

            writer.WriteHeader();
            writer.Dispose();

            Compare(outputFile, outputFile.Replace(".vcf", "_expected.vcf"));
        }
        public void Test2()
        {
            var outputFile = Path.Combine(UnitTestPaths.TestDataDirectory, "VcfFileWriterTests_Test2.vcf");

            File.Delete(outputFile);

            var context = new VcfWriterInputContext
            {
                CommandLine   = "myCommandLine",
                SampleName    = "mySample",
                ReferenceName = "myReference",
                ContigsByChr  = new List <Tuple <string, long> >
                {
                    new Tuple <string, long>("chr10", 123),
                    new Tuple <string, long>("chr9", 5)
                }
            };

            var writer = new VcfFileWriter(
                outputFile,
                new VcfWriterConfig(),
                context);

            writer.WriteHeader();
            writer.Write(_defaultCandidates.Where(c => !c.Filters.Any()));
            writer.Dispose();

            Compare(outputFile, outputFile.Replace(".vcf", "_expected.vcf"));
        }
        public void Test3()
        {
            var outputFile = Path.Combine(UnitTestPaths.TestDataDirectory, "VcfFileWriterTests_Test3.vcf");

            File.Delete(outputFile);

            var context = new VcfWriterInputContext
            {
                CommandLine   = "myCommandLine",
                SampleName    = "mySample",
                ReferenceName = "myReference"
            };

            var writer = new VcfFileWriter(
                outputFile,
                new VcfWriterConfig
            {
                ShouldFilterOnlyOneStrandCoverage = true,
                FrequencyFilterThreshold          = 0.01f,
            },
                context);

            writer.WriteHeader();
            writer.Dispose();

            Compare(outputFile, outputFile.Replace(".vcf", "_expected.vcf"));
        }
        public void ConfiguredVCFOutput()
        {
            // Paths for currently existing and new folder paths
            var existingOutputFolder = Path.Combine(UnitTestPaths.TestDataDirectory);
            var existingOutputFile   = Path.Combine(existingOutputFolder, "VcfFileWriterTests.vcf");
            var newOutputFolder      = Path.Combine(UnitTestPaths.TestDataDirectory, "SDS-14");
            var newOutputFile        = Path.Combine(newOutputFolder, "VcfFileWriterTests.vcf");

            // Test -OutFolder works for pre-existing folders.
            var context = new VcfWriterInputContext
            {
                CommandLine   = "myCommandLine",
                SampleName    = "mySample",
                ReferenceName = "myReference",
                ContigsByChr  = new List <Tuple <string, long> >
                {
                    new Tuple <string, long>("chr1", 10001),
                    new Tuple <string, long>("chrX", 500)
                }
            };

            var writer = new VcfFileWriter(existingOutputFile,
                                           new VcfWriterConfig
            {
                DepthFilterThreshold                = 500,
                QscoreFilterThreshold               = 20,
                StrandBiasFilterThreshold           = 0.5f,
                FrequencyFilterThreshold            = 0.007f,
                ShouldOutputNoCallFraction          = true,
                ShouldOutputStrandBiasAndNoiseLevel = true,
                ShouldFilterOnlyOneStrandCoverage   = true,
                EstimatedBaseCallQuality            = 23
            },
                                           context);

            writer.WriteHeader();
            writer.Write(_defaultCandidates);
            writer.Dispose();

            Assert.True(File.Exists(existingOutputFile));

            // Test -OutFolder for entirely new directories.
            context = new VcfWriterInputContext
            {
                CommandLine   = "myCommandLine",
                SampleName    = "mySample",
                ReferenceName = "myReference",
                ContigsByChr  = new List <Tuple <string, long> >
                {
                    new Tuple <string, long>("chr1", 10001),
                    new Tuple <string, long>("chrX", 500)
                }
            };

            // Delete the folder if it exists and ensure it's deleted
            if (Directory.Exists(newOutputFolder))
            {
                Directory.Delete(newOutputFolder, true);
            }
        }
Exemple #5
0
        private ISomaticVariantCaller CreateMockVariantCaller(VcfFileWriter vcfWriter, ApplicationOptions options, ChrReference chrRef, MockAlignmentExtractor mae, IStrandBiasFileWriter biasFileWriter = null, string intervalFilePath = null)
        {
            var config = new AlignmentSourceConfig
            {
                MinimumMapQuality  = options.MinimumMapQuality,
                OnlyUseProperPairs = options.OnlyUseProperPairs,
            };

            IAlignmentStitcher stitcher = null;

            if (options.StitchReads)
            {
                if (options.UseXCStitcher)
                {
                    stitcher = new XCStitcher(options.MinimumBaseCallQuality);
                }
                else
                {
                    stitcher = new BasicStitcher(options.MinimumBaseCallQuality);
                }
            }

            var mateFinder      = options.StitchReads ? new AlignmentMateFinder(MAX_FRAGMENT_SIZE) : null;
            var RegionPadder    = new RegionPadder(chrRef, null);
            var alignmentSource = new AlignmentSource(mae, mateFinder, stitcher, config);
            var variantFinder   = new CandidateVariantFinder(options.MinimumBaseCallQuality, options.MaxSizeMNV, options.MaxGapBetweenMNV, options.CallMNVs);
            var alleleCaller    = new AlleleCaller(new VariantCallerConfig
            {
                IncludeReferenceCalls        = options.OutputgVCFFiles,
                MinVariantQscore             = options.MinimumVariantQScore,
                MaxVariantQscore             = options.MaximumVariantQScore,
                VariantQscoreFilterThreshold = options.FilteredVariantQScore > options.MinimumVariantQScore ? options.FilteredVariantQScore : (int?)null,
                MinCoverage                = options.MinimumCoverage,
                MinFrequency               = options.MinimumFrequency,
                EstimatedBaseCallQuality   = options.AppliedNoiseLevel == -1 ? options.MinimumBaseCallQuality : options.AppliedNoiseLevel,
                StrandBiasModel            = options.StrandBiasModel,
                StrandBiasFilterThreshold  = options.StrandBiasAcceptanceCriteria,
                FilterSingleStrandVariants = options.FilterOutVariantsPresentOnlyOneStrand,
                GenotypeModel              = options.GTModel
            });
            var stateManager = new RegionStateManager();

            return(new SomaticVariantCaller(
                       alignmentSource,
                       variantFinder,
                       alleleCaller,
                       vcfWriter,
                       stateManager,
                       chrRef,
                       RegionPadder,
                       biasFileWriter));
        }
        public void VCFDataSection()
        {
            var outputFilePath = Path.Combine(UnitTestPaths.TestDataDirectory, "VcfFileWriterTests_SDS-19.vcf");

            File.Delete(outputFilePath);

            var context = new VcfWriterInputContext
            {
                CommandLine   = "myCommandLine",
                SampleName    = "mySample",
                ReferenceName = "myReference",
                ContigsByChr  = new List <Tuple <string, long> >
                {
                    new Tuple <string, long>("chr1", 10001),
                    new Tuple <string, long>("chrX", 500)
                }
            };

            var writer = new VcfFileWriter(
                outputFilePath,
                new VcfWriterConfig
            {
                DepthFilterThreshold                = 500,
                QscoreFilterThreshold               = 20,
                StrandBiasFilterThreshold           = 0.5f,
                FrequencyFilterThreshold            = 0.007f,
                ShouldOutputNoCallFraction          = true,
                ShouldOutputStrandBiasAndNoiseLevel = true,
                ShouldFilterOnlyOneStrandCoverage   = true,
                EstimatedBaseCallQuality            = 23
            },
                context);

            writer.WriteHeader();
            writer.Write(_defaultCandidates);
            writer.Dispose();

            var testFile = File.ReadAllLines(outputFilePath);

            var oldPosition = 0;

            foreach (var x in testFile.Where(x => Regex.IsMatch(x.Split('\t')[0], "^chr\\d+")))
            {
                Assert.True(Regex.IsMatch(x, "^chr\\d+\t\\d+\t.+\t.+\t\\d+\t\\S+\tDP=\\d+\t.+\t.+"));

                // at a minimum, should be ordered by coordinate.
                var position = int.Parse(x.Split('\t')[1]);
                Assert.True(position >= oldPosition);
                oldPosition = position;
            }
        }
        public static ISomaticVariantCaller CreateMockVariantCaller(VcfFileWriter vcfWriter, ApplicationOptions options, ChrReference chrRef, MockAlignmentExtractor mockAlignmentExtractor, IStrandBiasFileWriter biasFileWriter = null, string intervalFilePath = null)
        {
            var config = new AlignmentSourceConfig
            {
                MinimumMapQuality  = options.MinimumMapQuality,
                OnlyUseProperPairs = options.OnlyUseProperPairs,
            };


            //var mateFinder = options.StitchReads ? new AlignmentMateFinder() : null;
            AlignmentMateFinder mateFinder = null;
            var alignmentSource            = new AlignmentSource(mockAlignmentExtractor, mateFinder, config);
            var variantFinder      = new CandidateVariantFinder(options.MinimumBaseCallQuality, options.MaxSizeMNV, options.MaxGapBetweenMNV, options.CallMNVs);
            var coverageCalculator = new CoverageCalculator();

            var alleleCaller = new AlleleCaller(new VariantCallerConfig
            {
                IncludeReferenceCalls        = options.OutputgVCFFiles,
                MinVariantQscore             = options.MinimumVariantQScore,
                MaxVariantQscore             = options.MaximumVariantQScore,
                VariantQscoreFilterThreshold = options.FilteredVariantQScore > options.MinimumVariantQScore ? options.FilteredVariantQScore : (int?)null,
                MinCoverage                = options.MinimumDepth,
                MinFrequency               = options.MinimumFrequency,
                EstimatedBaseCallQuality   = options.AppliedNoiseLevel == -1 ? options.MinimumBaseCallQuality : options.AppliedNoiseLevel,
                StrandBiasModel            = options.StrandBiasModel,
                StrandBiasFilterThreshold  = options.StrandBiasAcceptanceCriteria,
                FilterSingleStrandVariants = options.FilterOutVariantsPresentOnlyOneStrand,
                ChrReference               = chrRef
            },
                                                coverageCalculator: coverageCalculator,
                                                variantCollapser: options.Collapse ? new VariantCollapser(null, coverageCalculator) : null);

            var stateManager = new RegionStateManager(
                expectStitchedReads: mockAlignmentExtractor.SourceIsStitched,
                trackOpenEnded: options.Collapse, trackReadSummaries: options.CoverageMethod == CoverageMethod.Approximate);

            //statmanager is an allele source
            Assert.Equal(0, stateManager.GetAlleleCount(1, AlleleType.A, DirectionType.Forward));


            return(new SomaticVariantCaller(
                       alignmentSource,
                       variantFinder,
                       alleleCaller,
                       vcfWriter,
                       stateManager,
                       chrRef,
                       null,
                       biasFileWriter));
        }
Exemple #8
0
        private static void FilterAndStreamOut(List <CalledAllele> alleles, VcfFileWriter writer, GeometricFilter filter)
        {
            alleles = filter.DoFiltering(alleles);


            try
            {
                writer.Write(alleles);
            }
            catch (Exception ex)
            {
                Logger.WriteWarningToLog("Problem writing alleles to vcf.");
                Logger.WriteExceptionToLog(ex);
                return;
            }

            writer.FlushBuffer();
        }
        public void VCFDataHeaderSection()
        {
            var outputFilePath = Path.Combine(UnitTestPaths.TestDataDirectory, "VcfFileWriterTests_SDS-20.vcf");

            File.Delete(outputFilePath);

            var context = new VcfWriterInputContext
            {
                CommandLine   = "myCommandLine",
                SampleName    = "mySample",
                ReferenceName = "myReference",
                ContigsByChr  = new List <Tuple <string, long> >
                {
                    new Tuple <string, long>("chr1", 10001),
                    new Tuple <string, long>("chrX", 500)
                }
            };

            var writer = new VcfFileWriter(
                outputFilePath,
                new VcfWriterConfig
            {
                DepthFilterThreshold                = 500,
                QscoreFilterThreshold               = 20,
                StrandBiasFilterThreshold           = 0.5f,
                FrequencyFilterThreshold            = 0.007f,
                ShouldOutputNoCallFraction          = true,
                ShouldOutputStrandBiasAndNoiseLevel = true,
                ShouldFilterOnlyOneStrandCoverage   = true,
                EstimatedBaseCallQuality            = 23
            },
                context);

            writer.WriteHeader();
            writer.Write(_defaultCandidates);
            writer.Dispose();

            var testFile = File.ReadAllLines(outputFilePath);

            foreach (var x in testFile.Where(x => Regex.IsMatch(x.Split('\t')[0], "^#CHROM")))
            {
                Assert.True(Regex.IsMatch(x, "^#CHROM\\sPOS\\sID\\sREF\\sALT\\sQUAL\\sFILTER\\sINFO\\sFORMAT\\smySample"));
            }
        }
        public void Test1()
        {
            var outputFile = Path.Combine(UnitTestPaths.TestDataDirectory, "VcfFileWriterTests_Test1.vcf");

            File.Delete(outputFile);

            var context = new VcfWriterInputContext
            {
                CommandLine   = "myCommandLine",
                SampleName    = "mySample",
                ReferenceName = "myReference",
                ContigsByChr  = new List <Tuple <string, long> >
                {
                    new Tuple <string, long>("chr1", 10001),
                    new Tuple <string, long>("chrX", 500)
                }
            };

            var writer = new VcfFileWriter(
                outputFile,
                new VcfWriterConfig
            {
                DepthFilterThreshold                = 500,
                QscoreFilterThreshold               = 20,
                StrandBiasFilterThreshold           = 0.5f,
                FrequencyFilterThreshold            = 0.007f,
                ShouldOutputNoCallFraction          = true,
                ShouldOutputStrandBiasAndNoiseLevel = true,
                ShouldFilterOnlyOneStrandCoverage   = true,
                EstimatedBaseCallQuality            = 23
            },
                context);

            writer.WriteHeader();
            writer.Write(_defaultCandidates);
            writer.Dispose();

            Compare(outputFile, outputFile.Replace(".vcf", "_expected.vcf"));
        }
        public void FilterHeader()
        {
            var outputFilePath = Path.Combine(UnitTestPaths.TestDataDirectory, "VcfFileWriterTests_SDS-18.vcf");

            File.Delete(outputFilePath);

            var context = new VcfWriterInputContext
            {
                CommandLine   = "myCommandLine",
                SampleName    = "mySample",
                ReferenceName = "myReference",
                ContigsByChr  = new List <Tuple <string, long> >
                {
                    new Tuple <string, long>("chr1", 10001),
                    new Tuple <string, long>("chrX", 500)
                }
            };

            // Variant strand bias too high or coverage on only one strand
            var config = new VcfWriterConfig
            {
                DepthFilterThreshold                = 500,
                QscoreFilterThreshold               = 20,
                StrandBiasFilterThreshold           = 0.5f,
                FrequencyFilterThreshold            = 0.007f,
                ShouldOutputNoCallFraction          = true,
                ShouldOutputStrandBiasAndNoiseLevel = true,
                ShouldFilterOnlyOneStrandCoverage   = true,
                EstimatedBaseCallQuality            = 23
            };

            var writer = new VcfFileWriter(outputFilePath, config, context);

            writer.WriteHeader();
            writer.Write(_defaultCandidates);
            writer.Dispose();

            VcfHeaderFormatTester(config, outputFilePath);
        }
        public void TestWithVariants()
        {
            var outputFile = Path.Combine(UnitTestPaths.TestDataDirectory, "VcfFileWriterTests_AdHoc.vcf");

            File.Delete(outputFile);

            var context = new VcfWriterInputContext
            {
                CommandLine   = "myCommandLine",
                SampleName    = "mySample",
                ReferenceName = "myReference",
                ContigsByChr  = new List <Tuple <string, long> >
                {
                    new Tuple <string, long>("chr1", 10001),
                    new Tuple <string, long>("chrX", 500)
                }
            };

            var writer = new VcfFileWriter(outputFile,
                                           new VcfWriterConfig
            {
                DepthFilterThreshold                = 500,
                QscoreFilterThreshold               = 20,
                StrandBiasFilterThreshold           = 0.5f,
                FrequencyFilterThreshold            = 0.007f,
                ShouldOutputNoCallFraction          = true,
                ShouldOutputStrandBiasAndNoiseLevel = true,
                ShouldFilterOnlyOneStrandCoverage   = true,
                EstimatedBaseCallQuality            = 23
            },
                                           context);

            var candidates = new List <BaseCalledAllele>()
            {
                new CalledVariant(AlleleCategory.Snv)
                {
                    AlleleSupport = 5387,
                    TotalCoverage = 5394,
                    Chromosome    = "chr4",
                    Coordinate    = 55141055,
                    Reference     = "A",
                    Alternate     = "G",
                    Filters       = new List <FilterType>()
                    {
                    },
                    FractionNoCalls  = 0,
                    Genotype         = Genotype.HomozygousAlt,
                    NumNoCalls       = 0,
                    ReferenceSupport = 7
                }
            };

            writer.WriteHeader();
            writer.Write(candidates);
            writer.Dispose();

            Assert.Throws <Exception>(() => writer.WriteHeader());
            Assert.Throws <Exception>(() => writer.Write(candidates));
            writer.Dispose();

            var variantLine = @"chr4	55141055	.	A	G	0	PASS	DP=5394	GT:GQ:AD:VF:NL:SB:NC	1/1:0:7,5387:0.9987:23:0.0000:0.0000";
            var fileLines   = File.ReadAllLines(outputFile);

            Assert.True(fileLines.Contains(variantLine));
        }
        public void DataFormatCheck()
        {
            var outputFilePath = Path.Combine(UnitTestPaths.TestDataDirectory, "VcfFileWriterTests_SDS-23.vcf");

            File.Delete(outputFilePath);

            var context = new VcfWriterInputContext
            {
                CommandLine   = "myCommandLine",
                SampleName    = "mySample",
                ReferenceName = "myReference",
                ContigsByChr  = new List <Tuple <string, long> >
                {
                    new Tuple <string, long>("chr1", 10001),
                    new Tuple <string, long>("chrX", 500)
                }
            };

            var config = new VcfWriterConfig
            {
                DepthFilterThreshold                = 500,
                QscoreFilterThreshold               = 20,
                StrandBiasFilterThreshold           = 0.5f,
                FrequencyFilterThreshold            = 0.007f,
                ShouldOutputNoCallFraction          = true,
                ShouldOutputStrandBiasAndNoiseLevel = true,
                ShouldFilterOnlyOneStrandCoverage   = true,
                EstimatedBaseCallQuality            = 23
            };

            var writer = new VcfFileWriter(outputFilePath, config, context);

            writer.WriteHeader();
            writer.Write(_defaultCandidates);
            writer.Dispose();

            var  testFile = File.ReadAllLines(outputFilePath);
            var  formatList = string.Empty;
            bool caseNL = false, caseSB = false, caseNC = false;

            foreach (var x in testFile)
            {
                if (Regex.IsMatch(x, "^##FORMAT"))
                {
                    var formatField = x.Split(',')[0].Substring(13);
                    switch (formatField)
                    {
                    case "NL":
                        if (config.ShouldOutputStrandBiasAndNoiseLevel)
                        {
                            caseNL = true;
                        }
                        break;

                    case "SB":
                        if (config.ShouldOutputStrandBiasAndNoiseLevel)
                        {
                            caseSB = true;
                        }
                        break;

                    case "NC":
                        if (config.ShouldOutputNoCallFraction)
                        {
                            caseNC = true;
                        }
                        break;
                    }

                    if (formatList == string.Empty)
                    {
                        formatList = x.Split(',')[0].Substring(13);
                    }
                    else
                    {
                        formatList += ":" + x.Split(',')[0].Substring(13);
                    }
                }

                if (Regex.IsMatch(x, "^chr\\d+\t"))
                {
                    var y = x.Split('\t');
                    Assert.True(Regex.IsMatch(y[8], formatList));
                }
            }

            if ((!config.ShouldOutputStrandBiasAndNoiseLevel && caseNL) ||
                (config.ShouldOutputStrandBiasAndNoiseLevel && !caseNL))
            {
                Assert.True(false, "Incorrect setting for ShouldOutputStrandBiasAndNoiseLevel and NL format");
            }

            if ((!config.ShouldOutputStrandBiasAndNoiseLevel && caseSB) ||
                (config.ShouldOutputStrandBiasAndNoiseLevel && !caseSB))
            {
                Assert.True(false, "Incorrect setting for ShouldOutputStrandBiasAndNoiseLevel and SB format");
            }

            if ((!config.ShouldOutputNoCallFraction && caseNC) || (config.ShouldOutputNoCallFraction && !caseNC))
            {
                Assert.True(false, "Incorrect setting for NoCall and NC format");
            }
        }
        public void DataAlleleCheck()
        {
            var outputFilePath = Path.Combine(UnitTestPaths.TestDataDirectory, "VcfFileWriterTests_SDS-21.vcf");

            File.Delete(outputFilePath);

            var context = new VcfWriterInputContext
            {
                CommandLine   = "myCommandLine",
                SampleName    = "mySample",
                ReferenceName = "myReference",
                ContigsByChr  = new List <Tuple <string, long> >
                {
                    new Tuple <string, long>("chr1", 10001),
                    new Tuple <string, long>("chrX", 500)
                }
            };

            var writer = new VcfFileWriter(
                outputFilePath,
                new VcfWriterConfig
            {
                DepthFilterThreshold                = 500,
                QscoreFilterThreshold               = 20,
                StrandBiasFilterThreshold           = 0.5f,
                FrequencyFilterThreshold            = 0.007f,
                ShouldOutputNoCallFraction          = true,
                ShouldOutputStrandBiasAndNoiseLevel = true,
                ShouldFilterOnlyOneStrandCoverage   = true,
                EstimatedBaseCallQuality            = 23
            },
                context);

            writer.WriteHeader();
            writer.Write(_defaultCandidates);
            writer.Dispose();

            var testFile   = File.ReadAllLines(outputFilePath);
            var chromCount = 0;
            var formatList = string.Empty;

            foreach (var x in testFile)
            {
                if (Regex.IsMatch(x, "^##FORMAT"))
                {
                    if (formatList == string.Empty)
                    {
                        formatList = x.Split(',')[0].Substring(13);
                    }
                    else
                    {
                        formatList += ":" + x.Split(',')[0].Substring(13);
                    }
                }
                else if (Regex.IsMatch(x, "^chr\\d+\t"))
                {
                    var y = x.Split('\t');
                    Assert.True(Regex.IsMatch(y[0], "chr\\d+"));
                    Assert.True(Regex.IsMatch(y[1], "\\d+"));
                    Assert.True(Regex.IsMatch(y[2], "\\."));
                    Assert.True(Regex.IsMatch(y[3], "([ACGT\\.])+"));
                    Assert.True(Regex.IsMatch(y[4], "([ACGT\\.])+"));
                    Assert.True(Regex.IsMatch(y[5], "\\d+"));
                    Assert.True(Regex.IsMatch(y[6], ".+"));
                    Assert.True(Regex.IsMatch(y[7], "DP=\\d+"));
                    Assert.True(Regex.IsMatch(y[8], formatList));
                    Assert.True(Regex.IsMatch(y[9], ".+"));
                    chromCount++;
                }
            }

            Assert.Equal(chromCount, 5);
        }
        public void InfoFormatHeader()
        {
            var outputFilePath = Path.Combine(UnitTestPaths.TestDataDirectory, "VcfFileWriterTests_SDS-17.vcf");

            File.Delete(outputFilePath);

            var context = new VcfWriterInputContext
            {
                CommandLine   = "myCommandLine",
                SampleName    = "mySample",
                ReferenceName = "myReference",
                ContigsByChr  = new List <Tuple <string, long> >
                {
                    new Tuple <string, long>("chr1", 10001),
                    new Tuple <string, long>("chrX", 500)
                }
            };
            var config = new VcfWriterConfig
            {
                DepthFilterThreshold                = 500,
                QscoreFilterThreshold               = 20,
                StrandBiasFilterThreshold           = 0.5f,
                FrequencyFilterThreshold            = 0.007f,
                ShouldOutputNoCallFraction          = true,
                ShouldOutputStrandBiasAndNoiseLevel = true,
                ShouldFilterOnlyOneStrandCoverage   = true,
                EstimatedBaseCallQuality            = 23
            };

            var writer = new VcfFileWriter(outputFilePath, config, context);

            writer.WriteHeader();
            writer.Write(_defaultCandidates);
            writer.Dispose();

            // Time to read the header
            var  testFile = File.ReadAllLines(outputFilePath);
            bool formatNL = false, formatSB = false, formatNC = false;

            foreach (var x in testFile)
            {
                if (Regex.IsMatch(x, "##INFO="))
                {
                    switch (x.Split(',')[0])
                    {
                    case "##INFO=<ID=DP":
                        Assert.True(Regex.IsMatch(x, "^##INFO=<ID=DP,Number=1,Type=Integer,Description=\"Total Depth\">$"));
                        break;

                    case "##INFO=<ID=TI":
                        Assert.True(Regex.IsMatch(x, "^##INFO=<ID=TI,Number=\\.,Type=String,Description=\"Transcript ID\">$"));
                        break;

                    case "##INFO=<ID=GI":
                        Assert.True(Regex.IsMatch(x, "^##INFO=<ID=GI,Number=\\.,Type=String,Description=\"Gene ID\">$"));
                        break;

                    case "##INFO=<ID=EXON":
                        Assert.True(Regex.IsMatch(x, "^##INFO=<ID=EXON,Number=0,Type=Flag,Description=\"Exon Region\">$"));
                        break;

                    case "##INFO=<ID=FC":
                        Assert.True(Regex.IsMatch(x, "^##INFO=<ID=FC,Number=\\.,Type=String,Description=\"Functional Consequence\">$"));
                        break;

                    default:
                        Assert.True(false, "An info is listed which does not match any from the req.`");
                        break;
                    }
                }
                else if (Regex.IsMatch(x, "##FORMAT="))
                {
                    switch (x.Split(',')[0])
                    {
                    case "##FORMAT=<ID=GT":
                        Assert.True(Regex.IsMatch(x, "^##FORMAT=<ID=GT,Number=1,Type=String,Description=\"Genotype\">$"));
                        break;

                    case "##FORMAT=<ID=GQ":
                        Assert.True(Regex.IsMatch(x, "^##FORMAT=<ID=GQ,Number=1,Type=Integer,Description=\"Genotype Quality\">$"));
                        break;

                    case "##FORMAT=<ID=AD":
                        Assert.True(Regex.IsMatch(x, "^##FORMAT=<ID=AD,Number=\\.,Type=Integer,Description=\"Allele Depth\">$"));
                        break;

                    case "##FORMAT=<ID=VF":
                        Assert.True(Regex.IsMatch(x, "^##FORMAT=<ID=VF,Number=1,Type=Float,Description=\"Variant Frequency\">$"));
                        break;

                    case "##FORMAT=<ID=NL":
                        Assert.True(Regex.IsMatch(x, "^##FORMAT=<ID=NL,Number=1,Type=Integer,Description=\"Applied BaseCall Noise Level\">$"));
                        formatNL = true;
                        break;

                    case "##FORMAT=<ID=SB":
                        Assert.True(Regex.IsMatch(x, "^##FORMAT=<ID=SB,Number=1,Type=Float,Description=\"StrandBias Score\">$"));
                        formatSB = true;
                        break;

                    case "##FORMAT=<ID=NC":
                        Assert.True(Regex.IsMatch(x, "^##FORMAT=<ID=NC,Number=1,Type=Float,Description=\"Fraction of bases which were uncalled or with basecall quality below the minimum threshold\">$"));
                        formatNC = true;
                        break;

                    default:
                        Assert.True(false, "A format is listed which does not match any of those listed for the req.");
                        break;
                    }
                }
            }

            if (config.ShouldOutputStrandBiasAndNoiseLevel)
            {
                Assert.True(formatNL);
            }

            if (config.ShouldOutputStrandBiasAndNoiseLevel)
            {
                Assert.True(formatSB);
            }

            if (config.ShouldOutputNoCallFraction)
            {
                Assert.True(formatNC);
            }
        }
Exemple #16
0
        public static void DoReformating(string inputFile, bool crush)
        {
            var outputFile = inputFile.Replace(".vcf", ".uncrushed.vcf");

            if (crush)
            {
                Console.WriteLine("crushing " + inputFile + "...");
                outputFile = inputFile.Replace(".vcf", ".crushed.vcf");
            }
            else
            {
                Console.WriteLine("uncrushing " + inputFile + "...");
            }

            if (File.Exists(outputFile))
            {
                File.Delete(outputFile);
            }

            var config = new VcfWriterConfig()
            {
                AllowMultipleVcfLinesPerLoci = !crush
            };

            using (VcfFileWriter writer = new VcfFileWriter(outputFile, config, new VcfWriterInputContext()))
            {
                writer.WriteHeader();

                using (VcfReader reader = new VcfReader(inputFile, false))
                {
                    var currentAllele     = new CalledAllele();
                    var backLogVcfVariant = new VcfVariant();

                    var backLogExists = reader.GetNextVariant(backLogVcfVariant);

                    while (backLogExists)
                    {
                        var backLogAlleles = backLogExists ? VcfVariantUtilities.Convert(new List <VcfVariant> {
                            backLogVcfVariant
                        }).ToList() : null;

                        foreach (var allele in backLogAlleles)
                        {
                            try
                            {
                                writer.Write(new List <CalledAllele>()
                                {
                                    allele
                                });
                            }
                            catch (Exception ex)
                            {
                                Console.WriteLine("Problem writing " + allele.ToString());
                                Console.WriteLine("Exception: " + ex);
                                return;
                            }
                        }


                        backLogExists = reader.GetNextVariant(backLogVcfVariant);

                        if (backLogAlleles[0].Chromosome != backLogVcfVariant.ReferenceName)
                        {
                            //we have switched to the next chr. flush the buffer.
                            writer.FlushBuffer();
                        }
                    }

                    writer.FlushBuffer();
                }
            }
        }