public void VennVcf_EmptyInputTest() { var outDir = TestPaths.LocalTestDataDirectory; var VcfPathRoot = _TestDataPath; string VcfA = Path.Combine(VcfPathRoot, "Empty_S1.vcf"); string VcfB = Path.Combine(VcfPathRoot, "Empty_S2.vcf"); string OutputPath = Path.Combine(outDir, "EmptyConsensus.vcf"); VennVcfOptions parameters = new VennVcfOptions(); parameters.VariantCallingParams.MinimumFrequencyFilter = 0.03f; parameters.VariantCallingParams.MinimumFrequency = 0.01f; parameters.ConsensusFileName = Path.Combine(outDir, "EmptyConsensus.vcf"); parameters.OutputDirectory = outDir; parameters.DebugMode = true; VennProcessor Venn = new VennProcessor(new string[] { VcfA, VcfB }, parameters); Venn.DoPairwiseVenn(); Assert.True(File.Exists(OutputPath)); var observedVariants = AlleleReader.GetAllVariantsInFile(OutputPath); Assert.Equal(0, observedVariants.Count); }
public static void AssertSameVariants_QScoreAgnostic(string file1, string file2) { var variant1List = new List <CalledAllele>(); var variant2List = new List <CalledAllele>(); using (var reader1 = new AlleleReader(file1)) { reader1.GetNextVariants(out variant1List); using (var reader2 = new AlleleReader(file2)) { reader2.GetNextVariants(out variant2List); Assert.Equal(variant1List.Count, variant2List.Count); for (int i = 0; i < variant1List.Count; i++) { var variant1 = variant1List[i]; var variant2 = variant2List[i]; Assert.Equal(variant1.Genotype, variant2.Genotype); Assert.Equal(variant1.AlternateAllele, variant2.AlternateAllele); } } } }
// idea is to keep track of the disparity between two pools as a measure of FFPE degradation, // or overall oxidation affecting tissue sample. //possible SNP changes: // // // * A C G T // A * 1 2 3 // C 4 * 5 6 // G 7 8 * 9 // T 10 11 12 * // public static SignatureSorterResultFiles StrainVcf(VQROptions options) { var variantList = new List <CalledAllele>() { }; var basicCountsData = new CountData(); var edgeVariantsCountData = new EdgeIssueCountData(options.ExtentofEdgeRegion); string basicCountsPath = CleanUpOldFiles(options.VcfPath, options.OutputDirectory, ".counts"); string edgeCountsPath = CleanUpOldFiles(options.VcfPath, options.OutputDirectory, ".edgecounts"); string edgeVariantsPath = CleanUpOldFiles(options.VcfPath, options.OutputDirectory, ".edgevariants"); using (AlleleReader readerA = new AlleleReader(options.VcfPath)) { while (readerA.GetNextVariants(out variantList)) { foreach (var variant in variantList) { try { basicCountsData.Add(variant); edgeVariantsCountData.Add(variant, edgeVariantsPath); } catch (Exception ex) { Logger.WriteToLog(string.Format("Fatal error processing vcf; Check {0}, position {1}. Exception: {2}", variant.Chromosome, variant.ReferencePosition, ex)); throw; } } } //The edge issue filter trails N variants behind. //The following code cleans out the buffer, processing anything left behind in the buffer. for (int i = 0; i < options.ExtentofEdgeRegion; i++) { edgeVariantsCountData.Add(null, edgeVariantsPath); } if (options.LociCount > 0) { basicCountsData.ForceTotalPossibleMutations(options.LociCount); edgeVariantsCountData.ForceTotalPossibleMutations(options.LociCount); } if (options.DoBasicChecks) { CountsFileWriter.WriteCountsFile(basicCountsPath, basicCountsData); } if (options.DoAmpliconPositionChecks) { CountsFileWriter.WriteCountsFile(edgeCountsPath, edgeVariantsCountData); } } return(new SignatureSorterResultFiles(basicCountsPath, edgeCountsPath, edgeVariantsPath)); }
public virtual IVcfFileWriter <CalledAllele> CreatePhasedVcfWriter() { //Write header. We can do this at the beginning, it's just copying from old vcf. List <string> header = AlleleReader.GetAllHeaderLines(_options.VcfPath); var originalFileName = Path.GetFileName(_options.VcfPath); string outputFileName; if (originalFileName != null && originalFileName.EndsWith(".genome.vcf")) { outputFileName = originalFileName.Substring(0, originalFileName.LastIndexOf(".genome.vcf", StringComparison.Ordinal)); outputFileName = outputFileName + ".phased.genome.vcf"; } else if (originalFileName != null && originalFileName.EndsWith(".vcf")) { outputFileName = originalFileName.Substring(0, originalFileName.LastIndexOf(".vcf", StringComparison.Ordinal)); outputFileName = outputFileName + ".phased.vcf"; } else { throw new InvalidDataException(string.Format("Input file is not a VCF file: '{0}'", originalFileName)); } var outFile = Path.Combine(_options.OutputDirectory, outputFileName); var phasingCommandLine = "##Scylla_cmdline=" + _options.QuotedCommandLineArgumentsString; return(new PhasedVcfWriter(outFile, new VcfWriterConfig(_options.VariantCallingParams, _options.VcfWritingParams, _options.BamFilterParams, null, _options.Debug, false), new VcfWriterInputContext(), header, phasingCommandLine)); }
private void UpdateKnownPriors() { if (!string.IsNullOrEmpty(_options.PriorsPath)) { using (var reader = new AlleleReader(_options.PriorsPath)) { _knownVariants = reader.GetVariantsByChromosome(true, true, new List <AlleleCategory> { AlleleCategory.Insertion, AlleleCategory.Mnv }, doSkipCandidate: SkipPrior); if (_options.TrimMnvPriors) { foreach (var knownVariantList in _knownVariants.Values) { foreach (var knownVariant in knownVariantList) { if (knownVariant.Type == AlleleCategory.Mnv) { knownVariant.ReferenceAllele = knownVariant.ReferenceAllele.Substring(1); knownVariant.AlternateAllele = knownVariant.AlternateAllele.Substring(1); knownVariant.ReferencePosition++; } } } } } } }
private void GetForcedAlleles() { if (_options.ForcedAllelesFileNames == null || _options.ForcedAllelesFileNames.Count == 0) { return; } foreach (var fileName in _options.ForcedAllelesFileNames) { using (var reader = new AlleleReader(fileName, false, false)) { foreach (var variant in reader.GetVariants()) { var chr = variant.Chromosome; var pos = variant.ReferencePosition; var refAllele = variant.ReferenceAllele.ToUpper(); var altAllele = variant.AlternateAllele.ToUpper(); if (!_forcedAllelesByChrom.ContainsKey(chr)) { _forcedAllelesByChrom[chr] = new HashSet <Tuple <string, int, string, string> >(); } if (!IsValidAlt(altAllele, refAllele)) { Logger.WriteToLog($"Invalid forced genotyping variant: {variant}"); continue; } _forcedAllelesByChrom[chr].Add(new Tuple <string, int, string, string>(chr, pos, refAllele, altAllele)); } } } }
public void VennVcf_GtTest() { var outDir = TestPaths.LocalScratchDirectory; var VcfPathRoot = _TestDataPath; string VcfA = Path.Combine(VcfPathRoot, "gtTests_S15.vcf"); string VcfB = Path.Combine(VcfPathRoot, "gtTests_S18.vcf"); string OutputPath = Path.Combine(outDir, "gtConsensusOut.vcf"); string ExpectedPath = Path.Combine(VcfPathRoot, "gtConsensus.vcf"); VennVcfOptions parameters = new VennVcfOptions(); parameters.VariantCallingParams.MinimumFrequencyFilter = 0.03f; parameters.VariantCallingParams.MinimumFrequency = 0.01f; parameters.ConsensusFileName = OutputPath; parameters.OutputDirectory = outDir; VennProcessor Venn = new VennProcessor(new string[] { VcfA, VcfB }, parameters); Venn.DoPairwiseVenn(); Assert.True(File.Exists(OutputPath)); var expectedVariants = AlleleReader.GetAllVariantsInFile(ExpectedPath); var observedVariants = AlleleReader.GetAllVariantsInFile(OutputPath); Assert.Equal(expectedVariants.Count, observedVariants.Count); for (int i = 0; i < expectedVariants.Count; i++) { var ExpectedVariant = expectedVariants[i]; var OutputVariant = observedVariants[i]; Assert.Equal(ExpectedVariant.ToString(), OutputVariant.ToString()); } }
private static void CheckHeader(AlleleReader reader) { string piscesCmd = reader.HeaderLines.FirstOrDefault(str => str.Contains("##Pisces_cmdline")).Split("\"\"")[1]; var appOptionParser = new PiscesOptionsParser(); appOptionParser.ParseArgs(piscesCmd.Split(null)); // Check if VCF is diploid if (appOptionParser.PiscesOptions.VariantCallingParameters.PloidyModel == PloidyModel.DiploidByAdaptiveGT || appOptionParser.PiscesOptions.VariantCallingParameters.PloidyModel == PloidyModel.DiploidByThresholding) { throw new VariantReaderException("Adaptive Genotyper should be used with VCFs that are called as somatic " + "VCFs by Pisces. Please check the input VCF file."); } // Check if VCF is crushed else if (appOptionParser.PiscesOptions.VcfWritingParameters.ForceCrush == true) { throw new VariantReaderException("Adaptive Genotyper should be used with uncrushed VCFs. Please check the input VCF file."); } // Check if GVCF or --minvq 0 else if (!appOptionParser.PiscesOptions.VcfWritingParameters.OutputGvcfFile && (appOptionParser.PiscesOptions.VariantCallingParameters.MinimumVariantQScore > 0 || appOptionParser.PiscesOptions.VariantCallingParameters.MinimumFrequency > 0.02)) { throw new VariantReaderException("Adaptive Genotyper should be used with GVCFs or with option -minvq 0. Please" + " check in the input VCF file."); } }
public void GetVariantsTests() { var vr = new AlleleReader(VcfTestFile_1); var allVar = vr.GetVariants().ToList(); Assert.Equal(24, allVar.Count); Assert.Equal(10, allVar.First().ReferencePosition); Assert.Equal(4000, allVar.Last().ReferencePosition); }
public void AssignVariantTypeTests() { var vr = new AlleleReader(VcfTestFile_1); // Testing 1/1 Assert.True(TestVariant(vr, AlleleCategory.Reference)); Assert.True(TestVariant(vr, AlleleCategory.Snv)); Assert.True(TestVariant(vr, AlleleCategory.Insertion)); Assert.True(TestVariant(vr, AlleleCategory.Deletion)); // Testing 1/0 Assert.True(TestVariant(vr, AlleleCategory.Snv)); Assert.True(TestVariant(vr, AlleleCategory.Insertion)); Assert.True(TestVariant(vr, AlleleCategory.Deletion)); Assert.True(TestVariant(vr, AlleleCategory.Snv)); // Testing 0/0 //chr1 90.A. 25 PASS DP = 500 GT: GQ: AD: VF: NL: SB: NC 0 / 0:25:0,0:0.0000:23:0.0000:0.0010 //chr1 100.A AT 25 PASS DP = 500 GT: GQ: AD: VF: NL: SB: NC 0 / 0:25:0,0:0.0000:23:0.0000:0.0010 //chr1 110.AT A 25 PASS DP = 500 GT: GQ: AD: VF: NL: SB: NC 0 / 0:25:0,0:0.0000:23:0.0000:0.0010 //chr1 120.A T 25 PASS DP = 500 GT: GQ: AD: VF: NL: SB: NC 0 / 0:25:0,0:0.0000:23:0.0000:0.0010 Assert.True(TestVariant(vr, AlleleCategory.Reference)); Assert.True(TestVariant(vr, AlleleCategory.Insertion)); Assert.True(TestVariant(vr, AlleleCategory.Deletion)); Assert.True(TestVariant(vr, AlleleCategory.Snv)); // Testing 0/1 //chr1 130.A. 25 PASS DP = 500 GT: GQ: AD: VF: NL: SB: NC 0 / 1:25:0,0:0.0000:23:0.0000:0.0010 //chr1 140.A AT 25 PASS DP = 500 GT: GQ: AD: VF: NL: SB: NC 0 / 1:25:0,0:0.0000:23:0.0000:0.0010 //chr1 150.AT A 25 PASS DP = 500 GT: GQ: AD: VF: NL: SB: NC 0 / 1:25:0,0:0.0000:23:0.0000:0.0010 //chr1 160.A T 25 PASS DP = 500 GT: GQ: AD: VF: NL: SB: NC 0 / 1:25:0,0:0.0000:23:0.0000:0.0010 Assert.True(TestVariant(vr, AlleleCategory.Reference)); Assert.True(TestVariant(vr, AlleleCategory.Insertion)); Assert.True(TestVariant(vr, AlleleCategory.Deletion)); Assert.True(TestVariant(vr, AlleleCategory.Snv)); // Testing MNV //chr1 600.ATCA TCGC 25 PASS DP = 500 GT: GQ: AD: VF: NL: SB: NC 0 / 0:25:0,0:0.0000:23:0.0000:0.0010 //chr1 700.ATCA TCGC 25 PASS DP = 500 GT: GQ: AD: VF: NL: SB: NC 0 / 1:25:0,0:0.0000:23:0.0000:0.0010 //chr1 800.ATCA TCGC 25 PASS DP = 500 GT: GQ: AD: VF: NL: SB: NC 1 / 0:25:0,0:0.0000:23:0.0000:0.0010 // chr1 900.ATCA TCGC 25 PASS DP = 500 GT: GQ: AD: VF: NL: SB: NC 1 / 1:25:0,0:0.0000:23:0.0000:0.0010 Assert.True(TestVariant(vr, AlleleCategory.Mnv)); Assert.True(TestVariant(vr, AlleleCategory.Mnv)); Assert.True(TestVariant(vr, AlleleCategory.Mnv)); Assert.True(TestVariant(vr, AlleleCategory.Mnv)); // Testing ./. . ./1 1/. //chr1 1000.A T 25 PASS DP = 0 GT: GQ: AD: VF: NL: SB: NC./.:25:0,0:0.0000:23:0.0000:0.0010 //chr1 2000.A T 25 PASS DP = 500 GT: GQ: AD: VF: NL: SB: NC.:25:0,0:0.0000:23:0.0000:0.0010 //chr1 3000.A T 25 PASS DP = 500 GT: GQ: AD: VF: NL: SB: NC./ 1:25:0,0:0.0000:23:0.0000:0.0010 // chr1 4000.A T 25 PASS DP = 500 GT: GQ: AD: VF: NL: SB: NC 1 /.:25:0,0:0.0000:23:0.0000:0.0010 Assert.True(TestVariant(vr, AlleleCategory.Snv)); Assert.True(TestVariant(vr, AlleleCategory.Snv)); Assert.True(TestVariant(vr, AlleleCategory.Snv)); Assert.True(TestVariant(vr, AlleleCategory.Snv)); }
public void GetHeaderTests() { var header = AlleleReader.GetAllHeaderLines(VcfTestFile_1); string firstLine = "##fileformat=VCFv4.1"; string lastLine = "#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\tFORMAT\tmySample"; Assert.Equal(23, header.Count); Assert.Equal(firstLine, header[0]); Assert.Equal(lastLine, header[22]); }
private bool TestVariant(AlleleReader vr, AlleleCategory type) { var testVarList = new List <CalledAllele>() { new CalledAllele() }; vr.GetNextVariants(out testVarList); return(testVarList[0].Type == type); }
// tests two bams in different folders // expectations: // - if outputfolder is not specified, logs are in directory of first bam // - if outputfolder specified, logs are in output folder // - vcf files have header and both chromosomes, output is where normally expected private void ExecuteTest(int numberOfThreads, string outputFolder = null) { var sourcePath = Path.Combine(TestPaths.LocalTestDataDirectory, "Chr17Chr19.bam"); var otherTestDirectory = Path.Combine(TestPaths.LocalScratchDirectory, "MultiProcessIn"); var bamFilePath1 = Stage(sourcePath, "In1", otherTestDirectory + "1"); var bamFilePath2 = Stage(sourcePath, "In2", otherTestDirectory + "2"); var genomePath = Path.Combine(TestPaths.SharedGenomesDirectory, "chr17chr19"); var options = new PiscesApplicationOptions { BAMPaths = new[] { bamFilePath1, bamFilePath2 }, GenomePaths = new[] { genomePath }, OutputDirectory = outputFolder, CommandLineArguments = string.Format("-B {0},{1} -g {2}{3} -gVCF false", bamFilePath1, bamFilePath2, genomePath, string.IsNullOrEmpty(outputFolder) ? string.Empty : " -OutFolder " + outputFolder).Split(' '), VcfWritingParameters = new VcfWritingParameters() { OutputGvcfFile = true } }; options.SetIODirectories("Pisces"); var factory = new Factory(options); foreach (var workRequest in factory.WorkRequests) { if (File.Exists(workRequest.OutputFilePath)) { File.Delete(workRequest.OutputFilePath); } } Logger.OpenLog(options.LogFolder, options.LogFileName, true); var processor = new GenomeProcessor(factory, factory.GetReferenceGenome(options.GenomePaths[0]), false, true); processor.Execute(numberOfThreads); Logger.CloseLog(); foreach (var workRequest in factory.WorkRequests) { using (var reader = new AlleleReader(workRequest.OutputFilePath)) { Assert.True(reader.HeaderLines.Any()); var variants = reader.GetVariants().ToList(); Assert.Equal(251, variants.Count()); Assert.Equal("chr17", variants.First().Chromosome); Assert.Equal("chr19", variants.Last().Chromosome); } } Assert.True(Directory.GetFiles(options.LogFolder, options.LogFileNameBase).Any()); }
public VcfFilter(PsaraOptions settings) { var vcfIn = settings.VcfPath; var vcfName = Path.GetFileName(vcfIn); _originalHeaderLines = AlleleReader.GetAllHeaderLines(vcfIn); _geometricFilter = new GeometricFilter(settings.GeometricFilterParameters); _psaraOptions = (PsaraOptions)VcfConsumerAppParsingUtils.TryToUpdateWithOriginalOptions(settings, _originalHeaderLines, vcfIn); _outputFile = Path.Combine(settings.OutputDirectory, vcfName.Replace(".vcf", ".filtered.vcf")); _outputFile = _outputFile.Replace(".genome.filtered.vcf", ".filtered.genome.vcf"); }
/// <summary> /// Step forward with the reader, assembling a list of variants at your CurrentVariant position. /// </summary> /// <param name="Reader"></param> /// <param name="CurrentVariant"></param> /// <param name="BackLogExists"></param> /// <param name="TheBackLog"></param> /// <returns></returns> private static List <CalledAllele> AssembleColocatedList( AlleleReader Reader, CalledAllele CurrentVariant, AlleleCompareByLoci alleleOrdering, ref bool BackLogExists, ref List <CalledAllele> TheBackLog) { List <CalledAllele> CoLocatedVariants = new List <CalledAllele>(); bool ContinueReadA = true; var NextVariantList = new List <CalledAllele>(); while (ContinueReadA) { if (BackLogExists) { NextVariantList = TheBackLog; BackLogExists = false; } else { ContinueReadA = Reader.GetNextVariants(out NextVariantList); if (!ContinueReadA) { break; } } // VarOrder = -1 if Current comes first, 0 if co-located. int VarOrder = (alleleOrdering.OrderAlleles(CurrentVariant, NextVariantList.First())); switch (VarOrder) { case 0: //the variant we just got is at out current position CoLocatedVariants.AddRange(NextVariantList); break; case -1: //the variant we just got is after our current position, and needs to go to the backlog. TheBackLog = NextVariantList; //NextVariant; ContinueReadA = false; BackLogExists = true; break; default: // { throw new InvalidDataException("Vcf needs to be ordered."); } } } if (!BackLogExists) { TheBackLog = null; } return(CoLocatedVariants); }
private NeighborhoodBuilder CreateNbhdBuilder(string sourceVcf, int phasingDistance = 2, bool passingOnly = true, int minPassingVariantsInNbhd = 0) { var variantSource = new AlleleReader(sourceVcf); return(new NeighborhoodBuilder( new PhasableVariantCriteria() { ChrToProcessArray = new string[] { }, PassingVariantsOnly = passingOnly, PhasingDistance = phasingDistance, MinPassingVariantsInNbhd = minPassingVariantsInNbhd }, new VariantCallingParameters(), variantSource, null, 10)); }
public static VQRVcfWriter GetVQRVcfFileWriter(VcfConsumerAppOptions options, string outputFilePath) { var vcp = options.VariantCallingParams; var vwp = options.VcfWritingParams; var bfp = options.BamFilterParams; var vcfConfig = new VcfWriterConfig(vcp, vwp, bfp, null, false, false); var headerLines = AlleleReader.GetAllHeaderLines(options.VcfPath); var vqrCommandLineForVcfHeader = "##VQR_cmdline=" + options.QuotedCommandLineArgumentsString; return(new VQRVcfWriter(outputFilePath, vcfConfig, new VcfWriterInputContext(), headerLines, vqrCommandLineForVcfHeader)); }
public void VennVcf_CombineTwoPoolVariants_MergeRefCalls() { //this is from an issue where there were multiple co-located variants in one pool, //and just ref in the other, at chr15 92604460. The consensus answer should be // a single ref call (and not multiple ref calls!). var outDir = TestPaths.LocalScratchDirectory; var vcfPathRoot = _TestDataPath; string VcfPath_PoolA = Path.Combine(vcfPathRoot, "C64-Ct-4_S17.genome.vcf"); string VcfPath_PoolB = Path.Combine(vcfPathRoot, "C64-Ct-4_S18.genome.vcf"); string VcfPath_Consensus = Path.Combine(vcfPathRoot, "ExpectedConsensus2.vcf"); string OutputPath = Path.Combine(outDir, "Consensus2.vcf"); if (File.Exists(OutputPath)) { File.Delete(OutputPath); } VennVcfOptions parameters = new VennVcfOptions(); parameters.VariantCallingParams.MinimumFrequencyFilter = 0.03f; parameters.InputFiles = new string[] { VcfPath_PoolA, VcfPath_PoolB }; parameters.OutputDirectory = outDir; parameters.ConsensusFileName = OutputPath; VennProcessor venn = new VennProcessor(parameters.InputFiles, parameters); venn.DoPairwiseVenn(); Assert.Equal(File.Exists(OutputPath), true); var CombinedVariants = AlleleReader.GetAllVariantsInFile(OutputPath); var ExpectedVariants = AlleleReader.GetAllVariantsInFile(VcfPath_Consensus); Assert.Equal(ExpectedVariants.Count, CombinedVariants.Count); int NumVariantsAtPos92604460 = 0; for (int i = 0; i < ExpectedVariants.Count; i++) { var EVariant = ExpectedVariants[i]; var Variant = CombinedVariants[i]; if ((Variant.ReferencePosition == 92604460) && (Variant.Chromosome == "chr15")) { NumVariantsAtPos92604460++; } Assert.Equal(EVariant.ToString(), Variant.ToString()); } Assert.Equal(NumVariantsAtPos92604460, 1); }
private void CompareVariants(string expectedResultsFilePath, string actualResultsFilePath) { List <CalledAllele> results = AlleleReader.GetAllVariantsInFile(actualResultsFilePath); List <CalledAllele> expected = AlleleReader.GetAllVariantsInFile(expectedResultsFilePath); Assert.Equal(results.Count, expected.Count); for (int i = 0; i < results.Count; i++) { Assert.Equal(expected[i].ToString(), results[i].ToString()); } }
public void DefaultVCFOutput() { var appOptions = new PiscesApplicationOptions { BAMPaths = new[] { _bamChr19, _bamChr17Chr19, _bamChr17Chr19Dup }, IntervalPaths = new[] { _intervalsChr17, _intervalsChr19, null }, GenomePaths = new[] { _genomeChr17Chr19 }, VariantCallingParameters = new VariantCallingParameters() { MinimumCoverage = 10, LowDepthFilter = 10, AmpliconBiasFilterThreshold = 0.01F }, VcfWritingParameters = new VcfWritingParameters() { OutputGvcfFile = false, } }; var factory = new Factory(appOptions); var context = new VcfWriterInputContext { QuotedCommandLineString = "myCommandLine", SampleName = "mySample", ReferenceName = "myReference", ContigsByChr = new List <Tuple <string, long> > { new Tuple <string, long>("chr1", 10001), new Tuple <string, long>("chrX", 500) } }; var outputFile = factory.GetOutputFile(appOptions.BAMPaths[0]); var writer = factory.CreateVcfWriter(outputFile, context); var candidates = _defaultCandidates; writer.WriteHeader(); writer.Write(candidates); writer.Dispose(); Assert.True(File.Exists(outputFile)); Assert.Equal(outputFile, Path.ChangeExtension(_bamChr19, ".vcf")); var reader = new AlleleReader(outputFile); var header = reader.HeaderLines; Assert.Equal(header[7], "##FILTER=<ID=q30,Description=\"Quality score less than 30\">"); Assert.Equal(header[8], "##FILTER=<ID=AB,Description=\"Amplicon bias - disparate variant frequencies detected by amplicon\">"); Assert.Equal(header[9], "##FILTER=<ID=SB,Description=\"Variant strand bias too high\">"); Assert.Equal(header[10], "##FILTER=<ID=R5x9,Description=\"Repeats of part or all of the variant allele (max repeat length 5) in the reference greater than or equal to 9\">"); }
public void UnpackAlleles() { //two example vcf files that have been "crushed". var crushedVcf1 = Path.Combine(TestPaths.LocalTestDataDirectory, "VcfFileWriterTests_Crushed_Padded_expected.vcf"); var crushedVcf2 = Path.Combine(TestPaths.LocalTestDataDirectory, "crushed.genome.vcf"); var unpackedVariants1 = AlleleReader.GetAllVariantsInFile(crushedVcf1); var unpackedVariants2 = AlleleReader.GetAllVariantsInFile(crushedVcf2); Assert.Equal(8, unpackedVariants1.Count); //7 lines, but 8 alleles Assert.Equal(91, unpackedVariants2.Count); //90 lines, but 91 alleles var hetAlt1 = unpackedVariants1[5]; var hetAlt2 = unpackedVariants2[3]; var hetAlt1next = unpackedVariants1[6]; var hetAlt2next = unpackedVariants2[4]; //example one: //total depth = 5394, total variant count = 2387 + 2000 = 4387 //so, ref counts ~1007. //example two: //total depth = 532, total variant count = 254 + 254 = 508 //so, ref counts ~24. Assert.Equal(Genotype.HeterozygousAlt1Alt2, hetAlt1.Genotype); Assert.Equal(Genotype.HeterozygousAlt1Alt2, hetAlt2.Genotype); Assert.Equal(1007, hetAlt1.ReferenceSupport); Assert.Equal(2387, hetAlt1.AlleleSupport); Assert.Equal(0.4425, hetAlt1.Frequency, 4); Assert.Equal(24, hetAlt2.ReferenceSupport); Assert.Equal(254, hetAlt2.AlleleSupport); Assert.Equal(10, hetAlt1.ReferencePosition); Assert.Equal("AA", hetAlt1.ReferenceAllele); Assert.Equal("GA", hetAlt1.AlternateAllele); Assert.Equal(223906731, hetAlt2.ReferencePosition); Assert.Equal(1007, hetAlt1next.ReferenceSupport); Assert.Equal(2000, hetAlt1next.AlleleSupport); Assert.Equal("G", hetAlt1next.AlternateAllele); Assert.Equal(0.3708, hetAlt1next.Frequency, 4); Assert.Equal(24, hetAlt2next.ReferenceSupport); Assert.Equal(254, hetAlt2next.AlleleSupport); Assert.Equal(223906731, hetAlt2.ReferencePosition); Assert.Equal(10, hetAlt1next.ReferencePosition); Assert.Equal(223906731, hetAlt2next.ReferencePosition); }
public static string WriteCountsFile(string vcfIn, string outDir, int lociCount) { var variants = new List <CalledAllele>(); var countsPath = Path.Combine(outDir, Path.GetFileName(vcfIn).Replace(".vcf", ".counts")); var countsPathOld = Path.Combine(outDir, Path.GetFileName(vcfIn).Replace(".vcf", ".counts.original")); if (File.Exists(countsPath)) { if (File.Exists(countsPathOld)) { File.Delete(countsPathOld); } File.Copy(countsPath, countsPathOld); File.Delete(countsPath); } var counter = new MutationCounter(); using (AlleleReader readerA = new AlleleReader(vcfIn)) { counter.StartWriter(countsPath); while (readerA.GetNextVariants(out variants)) { foreach (var variant in variants) { try { counter.Add(variant); } catch (Exception ex) { Logger.WriteToLog(string.Format("Fatal error processing vcf; Check {0}, position {1}. Exception: {2}", variant.Chromosome, variant.ReferencePosition, ex)); throw; } } } if (lociCount > 0) { counter.ForceTotalPossibleMutations(lociCount); } counter.CloseWriter(); } return(countsPath); }
public void VcfLineToAllelesTests_SomaticForcedGTExample_PICS_1168() { //some example crummy input var inputLines = new List <string>() { "chr4\t56236582\t1ai\tA\tC\t.\t.\t.\t.\t.\r", "chr4\t56236583\t1aii\tA\tAA\t.\t.\t.\t.\t.", "chr18\t9888034\t6b\tA\t.\t.\t.\t.\t.\t.blah", "chr21\t46644966\t6b\tA\t.\t.\t.\t.\tboo\too", "chr21\t33694232\t6b\tA\t.\t.\t.\t.\t.\t.", "chr21\t33694239\t6c\tT\t<del>\t.\t.\t.\t.\t.", "chr8\t1817367\t6d\tC\tA\t.\t.\t.\t.\t.", "chr1\t109465143\tPICS827\tCTGCCATACAGCTTCAACAACAACTT\tATGCCATACAGCTTCAACAACAA\t.\t.\t.\t.\t.", }; var outputAlleles = new List <CalledAllele>() { }; foreach (var line in inputLines) { //make sure nothing throws var outputAllelesForLine = AlleleReader.VcfLineToAlleles(line, true); //make sure we only ever read 1 allele per line, since this is somatic input Assert.Equal(1, outputAllelesForLine.Count()); outputAlleles.Add(outputAllelesForLine[0]); } //sanity check results var allele1 = outputAlleles[0]; var allele2 = outputAlleles[7]; Assert.Equal("chr4", allele1.Chromosome); Assert.Equal(56236582, allele1.ReferencePosition); Assert.Equal("A", allele1.ReferenceAllele); Assert.Equal("C", allele1.AlternateAllele); Assert.Equal("chr1", allele2.Chromosome); Assert.Equal(109465143, allele2.ReferencePosition); Assert.Equal("CTGCCATACAGCTTCAACAACAACTT", allele2.ReferenceAllele); Assert.Equal("ATGCCATACAGCTTCAACAACAA", allele2.AlternateAllele); }
public static bool GetNextUncrushedAllele(AlleleReader reader, out CalledAllele variant) { var nextVariants = new List <CalledAllele>(); bool worked = reader.GetNextVariants(out nextVariants); variant = null; if (worked) { if (nextVariants.Count > 1) { throw new ArgumentException("Input file should not have crushed variants. There should only be one variant per line"); } variant = nextVariants[0]; } return(worked); }
private void AdjustOptions(ref ScyllaApplicationOptions scyllaOptions) { List <string> vcfHeaderLines = AlleleReader.GetAllHeaderLines(scyllaOptions.VcfPath); //where to find the Pisces options used to make the original vcf var piscesLogDirectory = Path.Combine(Path.GetDirectoryName(scyllaOptions.VcfPath), "PiscesLogs"); if (!Directory.Exists(piscesLogDirectory)) { piscesLogDirectory = Path.GetDirectoryName(scyllaOptions.VcfPath); } //figure out the original settings used, use those as the defaults. VcfConsumerAppParsingUtils.TryToUpdateWithOriginalOptions(scyllaOptions, vcfHeaderLines, piscesLogDirectory); //let anything input on the command line take precedence ApplicationOptionParser.ParseArgs(scyllaOptions.CommandLineArguments); _options.Save(Path.Combine(scyllaOptions.LogFolder, "ScyllaOptions.used.json")); }
public RecalibratedVariantsTests() { RecalCollection = new RecalibratedVariantsCollection(); var vcfPath = Path.Combine(TestPaths.LocalTestDataDirectory, "VariantDepthReaderTest.vcf"); using (var reader = new AlleleReader(vcfPath)) { var coLocatedVariantList = new List <CalledAllele>(); var lastVariant = new CalledAllele(); while (reader.GetNextVariants(out coLocatedVariantList)) { var variant = coLocatedVariantList[0]; if (lastVariant.ReferencePosition == variant.ReferencePosition) { continue; } RecalCollection.AddLocus(variant); lastVariant = variant; variant = new CalledAllele(); } } }
public void GetVariantsByChromosome() { var vcfReader = new AlleleReader(Path.Combine(TestPaths.LocalTestDataDirectory, "VcfReader_Extensions.vcf")); //Simple case var output = vcfReader.GetVariantsByChromosome(true, true, new List <AlleleCategory> { AlleleCategory.Insertion, AlleleCategory.Mnv }); Assert.Equal(1, output.Count); Assert.True(output.ContainsKey("chr1")); var candidateAlleles = new List <CandidateAllele>(); output.TryGetValue("chr1", out candidateAlleles); Assert.Equal(2, candidateAlleles.Count); Assert.Equal(AlleleCategory.Mnv, candidateAlleles[0].Type); Assert.Equal(AlleleCategory.Insertion, candidateAlleles[1].Type); //Custom rule var filteredVcfReader = new AlleleReader(Path.Combine(TestPaths.LocalTestDataDirectory, "VcfReader_Extensions.vcf")); var filteredOutput = filteredVcfReader.GetVariantsByChromosome(true, true, new List <AlleleCategory> { AlleleCategory.Insertion, AlleleCategory.Mnv }, candidate => candidate.ReferenceAllele.Length > 3); Assert.Equal(1, filteredOutput.Count); Assert.True(filteredOutput.ContainsKey("chr1")); var filteredCandidateAlleles = new List <CandidateAllele>(); filteredOutput.TryGetValue("chr1", out filteredCandidateAlleles); Assert.Equal(1, filteredCandidateAlleles.Count); Assert.False(filteredCandidateAlleles.Any(c => c.ReferenceAllele.Length > 3)); }
private void ExecuteEmptyIntervalsTest(bool throttle) { // ---------------------- // test when one bam has intervals and the other is empty // ---------------------- var bamFilePath = Path.Combine(TestPaths.LocalTestDataDirectory, "Chr17Chr19.bam"); var bamFilePath2 = Path.Combine(TestPaths.LocalTestDataDirectory, "Chr17Chr19_removedSQlines.bam"); var genomePath = Path.Combine(TestPaths.SharedGenomesDirectory, "chr17chr19"); var validIntervals = Path.Combine(TestPaths.LocalTestDataDirectory, "chr17only.picard"); var emptyIntervals = Path.Combine(TestPaths.LocalTestDataDirectory, "empty.picard"); var outputFolder = Path.Combine(TestPaths.LocalTestDataDirectory, "EmptyIntervalsTest_Mixed"); var options = new PiscesApplicationOptions { BAMPaths = new[] { bamFilePath, bamFilePath2 }, IntervalPaths = new [] { validIntervals, emptyIntervals }, GenomePaths = new[] { genomePath }, OutputDirectory = outputFolder, VcfWritingParameters = new Domain.Options.VcfWritingParameters() { OutputGvcfFile = true } }; var factory = new Factory(options); var processor = new GenomeProcessor(factory, factory.GetReferenceGenome(genomePath), throttle); processor.Execute(2); // first vcf file should have been processed regularly using (var reader = new AlleleReader(factory.WorkRequests.First().OutputFilePath)) { var variants = reader.GetVariants(); Assert.Equal(11, variants.Count()); } // second vcf file should be empty using (var reader = new AlleleReader(factory.WorkRequests.Last().OutputFilePath)) { var variants = reader.GetVariants(); Assert.Equal(0, variants.Count()); } // ---------------------- // try again but with both bams using empty intervals // ---------------------- options.IntervalPaths = new[] { emptyIntervals }; options.OutputDirectory = Path.Combine(TestPaths.LocalTestDataDirectory, "EmptyIntervalsTest_All"); factory = new Factory(options); processor = new GenomeProcessor(factory, factory.GetReferenceGenome(genomePath), throttle); processor.Execute(2); foreach (var workRequest in factory.WorkRequests) { // both vcf file should be empty using (var reader = new AlleleReader(workRequest.OutputFilePath)) { var variants = reader.GetVariants(); Assert.Equal(0, variants.Count()); } } }
public void CreateCallableNbhdsTests() { var vcfFilePath = Path.Combine(TestPaths.LocalTestDataDirectory, "VeryMutated.genome.vcf"); var variantSource = new AlleleReader(vcfFilePath); var vcfNeighborhood = new VcfNeighborhood(0, "chr1", new VariantSite(123), new VariantSite(125)); List <VcfNeighborhood> VcfNeighborhoods = new List <VcfNeighborhood>() { vcfNeighborhood }; //Test 1, genome is NULL var neighborhoodBuilder = new NeighborhoodBuilder(new PhasableVariantCriteria(), new VariantCallingParameters(), variantSource, null, 20); var neighborhoods = neighborhoodBuilder.ConvertToCallableNeighborhoods(VcfNeighborhoods); Assert.Equal(1, neighborhoods.Count()); Assert.Equal(2, neighborhoods.First().VcfVariantSites.Count()); Assert.Equal("chr1", neighborhoods[0].ReferenceName); Assert.Equal("RRR", neighborhoods[0].NbhdReferenceSequenceSubstring); //Test 2, genome is exists, but doesnt have the right chr var genomePath = Path.Combine(TestPaths.SharedGenomesDirectory, "Bacillus_cereus", "Sequence", "WholeGenomeFasta"); var refName = "chr_wrong"; Genome genome = new Genome(genomePath, new List <string>() { refName }); ChrReference chrReference = genome.GetChrReference(refName); neighborhoodBuilder = new NeighborhoodBuilder(new PhasableVariantCriteria(), new VariantCallingParameters(), variantSource, genome, 20); neighborhoods = neighborhoodBuilder.ConvertToCallableNeighborhoods(VcfNeighborhoods); Assert.Equal(1, neighborhoods.Count()); Assert.Equal(2, neighborhoods.First().VcfVariantSites.Count()); Assert.Equal("chr1", neighborhoods[0].ReferenceName); Assert.Equal("RRR", neighborhoods[0].NbhdReferenceSequenceSubstring); //Test 3, genome is exists, and DOES have the right chr refName = "chr"; genome = new Genome(genomePath, new List <string>() { refName }); chrReference = genome.GetChrReference(refName); neighborhoodBuilder = new NeighborhoodBuilder(new PhasableVariantCriteria(), new VariantCallingParameters(), variantSource, genome, 20); vcfNeighborhood = new VcfNeighborhood(0, "chr", new VariantSite(123), new VariantSite(125)); VcfNeighborhoods = new List <VcfNeighborhood>() { vcfNeighborhood }; neighborhoods = neighborhoodBuilder.ConvertToCallableNeighborhoods(VcfNeighborhoods); Assert.Equal(1, neighborhoods.Count()); Assert.Equal(2, neighborhoods.First().VcfVariantSites.Count()); Assert.Equal("chr", neighborhoods[0].ReferenceName); Assert.Equal("TAT", neighborhoods[0].NbhdReferenceSequenceSubstring); }
public void Execute( string bamFilePath, string vcfFilePath, string intervalPath, List <CalledAllele> expectedVariants, List <ChrReference> fakeReferences = null, bool doCheckVariants = true, bool doCheckReferences = false, int expectedNumCoveredPositions = 0, bool threadByChr = false, int doCountsOnly = 0, bool doLog = false, bool callMnvs = true, PiscesApplicationOptions applicationOptions = null, bool collapse = true) { if (doCheckReferences) { vcfFilePath = Path.ChangeExtension(vcfFilePath, "genome.vcf"); } if (applicationOptions == null) { applicationOptions = new PiscesApplicationOptions { BAMPaths = new[] { bamFilePath }, IntervalPaths = string.IsNullOrEmpty(intervalPath) ? null : new[] { intervalPath }, GenomePaths = new[] { GenomeDirectory }, OutputBiasFiles = true, DebugMode = doLog, CallMNVs = callMnvs, MaxGapBetweenMNV = 10, MaxSizeMNV = 15, Collapse = collapse, BamFilterParameters = new BamFilterParameters() { MinimumBaseCallQuality = 20 }, VariantCallingParameters = new VariantCallingParameters(), VcfWritingParameters = new VcfWritingParameters() { OutputGvcfFile = doCheckReferences, }, CommandLineArguments = new string[] { "some", "cmds" } }; } applicationOptions.OutputDirectory = OutputDirectory; var factory = GetFactory(applicationOptions); IGenome genome; if (fakeReferences == null) { genome = factory.GetReferenceGenome(GenomeDirectory); } else { genome = new MockGenome(fakeReferences, GenomeDirectory); } if (threadByChr) { var processor = new GenomeProcessor(factory, genome, false); processor.Execute(1); } else { var processor = new GenomeProcessor(factory, genome); processor.Execute(1); } var alleles = AlleleReader.GetAllVariantsInFile(vcfFilePath); var variantCalls = alleles.Where(a => !a.IsRefType).ToList(); if (doCheckVariants) { if (doCountsOnly > 0) { Assert.Equal(variantCalls.Count(), doCountsOnly); } else { CheckVariants(variantCalls, expectedVariants); } } if (doCheckReferences) { var referenceAlleles = alleles.Where(a => a.IsRefType).ToList(); // make sure no reference calls at variant positions Assert.Equal(referenceAlleles.Count(), alleles.Count(a => !variantCalls.Select(v => v.ReferencePosition).Contains(a.ReferencePosition))); } }