private static TypeOfUpdateNeeded UpdateAllele(VcfConsumerAppOptions appOptions, bool recalibrationData, CalledAllele inAllele, out List <CalledAllele> outAlleles) { outAlleles = new List <CalledAllele> { inAllele }; return(TypeOfUpdateNeeded.Modify); }
private static TypeOfUpdateNeeded UpdateAllele(VcfConsumerAppOptions appOptions, QualityRecalibrationData recalibrationData, CalledAllele inAllele, out List <CalledAllele> outAlleles) { outAlleles = new List <CalledAllele> { inAllele }; VQROptions options = (VQROptions)appOptions; var cat = MutationCounter.GetMutationCategory(inAllele); TypeOfUpdateNeeded updateHappened = TypeOfUpdateNeeded.NoChangeNeeded; if (options.DoBasicChecks && recalibrationData.BasicLookupTable.ContainsKey(cat)) { UpdateVariantQScoreAndRefilter(options.MaxQScore, options.VariantCallingParams.MinimumVariantQScoreFilter, recalibrationData.BasicLookupTable, inAllele, cat, false); updateHappened = TypeOfUpdateNeeded.Modify; } if (options.DoAmpliconPositionChecks && recalibrationData.AmpliconEdgeVariantsLookupTable.ContainsKey(cat) && recalibrationData.AmpliconEdgeVariantsList.ContainsKey(inAllele.Chromosome) && recalibrationData.AmpliconEdgeVariantsList[inAllele.Chromosome].Contains(inAllele.ReferencePosition)) { UpdateVariantQScoreAndRefilter(options.MaxQScore, options.VariantCallingParams.MinimumVariantQScoreFilter, recalibrationData.EdgeRiskLookupTable, inAllele, cat, true); updateHappened = TypeOfUpdateNeeded.Modify; } return(updateHappened); }
private static List <CalledAllele> WhatToDoToAlleles(VcfConsumerAppOptions options, T recalibrationData, UpdateSingleAlleleMethod whatToDoWithAllele, UpdateCoLocatedAllelesMethod whatToDoWithCoLocatedAlleles, List <CalledAllele> variantListFromFile, ref bool updateNeeded) { //do any loci-wide actions var modifiedAsAGroup = new List <CalledAllele>() { }; TypeOfUpdateNeeded updateneededForLoci = whatToDoWithCoLocatedAlleles(options, recalibrationData, variantListFromFile, out modifiedAsAGroup); updateNeeded = (updateneededForLoci != TypeOfUpdateNeeded.NoChangeNeeded); //do any per-individual allele actions on the new list var modifiedAsIndividualAlleles = new List <CalledAllele>() { }; foreach (var calledAllele in modifiedAsAGroup) { var convertedVariants = new List <CalledAllele>() { }; TypeOfUpdateNeeded updateneededForAllele = whatToDoWithAllele(options, recalibrationData, calledAllele, out convertedVariants); updateNeeded = updateNeeded || (updateneededForAllele != TypeOfUpdateNeeded.NoChangeNeeded); if (updateneededForAllele != TypeOfUpdateNeeded.DeleteCompletely) { modifiedAsIndividualAlleles.AddRange(convertedVariants); } } return(modifiedAsIndividualAlleles); }
public static void UpdateVcfAlleleByAllele(string vcfOut, VcfConsumerAppOptions options, bool shouldTrimComplexAlleles, T recalibrationData, UpdateSingleAlleleMethod whatToDoWithAllele, CanSkipVcfLinesMethod canSkipLineWithoutProcessing, GetVcfFileWriter getVcfFileWriter) { UpdateVcf(vcfOut, options, shouldTrimComplexAlleles, recalibrationData, whatToDoWithAllele, NeverUpdateByLoci, canSkipLineWithoutProcessing, getVcfFileWriter); }
public static TypeOfUpdateNeeded ProcessLocus(VcfConsumerAppOptions options, RecalibrationResults results, List <CalledAllele> incomingAlleles, out List <CalledAllele> outGoingAlleles) { // Use somatic call for chrM if (GenotypeCreator.GetPloidyForThisChr( options.VariantCallingParams.PloidyModel, options.VariantCallingParams.IsMale, incomingAlleles.First().Chromosome) != PloidyModel.DiploidByAdaptiveGT) { return(GetTypeOfUpdate((AdaptiveGtOptions)options, incomingAlleles, TypeOfUpdateNeeded.NoChangeNeeded, out outGoingAlleles)); } var orderedAlleles = GetTopTwoAlleles(incomingAlleles); if (orderedAlleles.Count == 1) { var alleles = ProcessSingleVariantLocus(incomingAlleles[0], results); return(GetTypeOfUpdate((AdaptiveGtOptions)options, alleles, TypeOfUpdateNeeded.Modify, out outGoingAlleles)); } else { var alleles = ProcessMultiAllelicLocus(orderedAlleles, results); return(GetTypeOfUpdate((AdaptiveGtOptions)options, alleles, TypeOfUpdateNeeded.Modify, out outGoingAlleles)); } }
public static TypeOfUpdateNeeded NeverUpdateByAlleleOnly(VcfConsumerAppOptions appOptions, T newData, CalledAllele inAllele, out List <CalledAllele> outAlleles) { outAlleles = new List <CalledAllele>() { inAllele }; return(TypeOfUpdateNeeded.NoChangeNeeded); }
public static VcfFileWriter GetVcfFileWriter(VcfConsumerAppOptions options, string outputFilePath) { var vcp = options.VariantCallingParams; var vwp = options.VcfWritingParams; var bfp = options.BamFilterParams; var vcfConfig = new VcfWriterConfig(vcp, vwp, bfp, null, false, false); return(new VcfFileWriter(outputFilePath, vcfConfig, new VcfWriterInputContext())); }
public PsaraVcfWriter GetPsaraVcfWriter(VcfConsumerAppOptions vcfConsumerOptions, string outputFilePath) { var config = new VcfWriterConfig(vcfConsumerOptions.VariantCallingParams, vcfConsumerOptions.VcfWritingParams, vcfConsumerOptions.BamFilterParams, null, false, false, false); var psaraCommandLineForVcfHeader = "##Psara_cmdline=" + vcfConsumerOptions.QuotedCommandLineArgumentsString; return(new PsaraVcfWriter(outputFilePath, config, new VcfWriterInputContext(), _originalHeaderLines, psaraCommandLineForVcfHeader)); }
public static VQRVcfWriter GetVQRVcfFileWriter(VcfConsumerAppOptions options, string outputFilePath) { var vcp = options.VariantCallingParams; var vwp = options.VcfWritingParams; var bfp = options.BamFilterParams; var vcfConfig = new VcfWriterConfig(vcp, vwp, bfp, null, false, false); var headerLines = AlleleReader.GetAllHeaderLines(options.VcfPath); var vqrCommandLineForVcfHeader = "##VQR_cmdline=" + options.QuotedCommandLineArgumentsString; return(new VQRVcfWriter(outputFilePath, vcfConfig, new VcfWriterInputContext(), headerLines, vqrCommandLineForVcfHeader)); }
private static TypeOfUpdateNeeded UpdateChrToFrog(VcfConsumerAppOptions appOptions, SomeData newData, CalledAllele inAllele, out List <CalledAllele> outAlleles) { inAllele.Chromosome = newData.NewReferenceChr; outAlleles = new List <CalledAllele> { inAllele }; if (inAllele.AlternateAllele == "T") { inAllele.AlternateAllele = "MadeAChangeHERE"; } return(TypeOfUpdateNeeded.Modify); }
private static TypeOfUpdateNeeded TagMultiAllelicSites(VcfConsumerAppOptions appOptions, SomeData newData, List <CalledAllele> inAlleles, out List <CalledAllele> outAlleles) { bool giveTag = inAlleles.Count > 1; foreach (var allele in inAlleles) { if (giveTag) { allele.Chromosome = "MultiAllelicSite"; } } outAlleles = inAlleles; return(TypeOfUpdateNeeded.Modify); }
/// <summary> /// We have a number of applications that are vcf-consumers (Psara, Scylla, VennVcf, VQR), /// and they need to (or should) parse out the original Pisces settings that were used to make the input vcf. /// </summary> /// <param name="optionsToUpdate"></param> /// <param name="vcfHeaderLines"></param> /// <param name="configFileDir"></param> /// <returns></returns> public static VcfConsumerAppOptions TryToUpdateWithOriginalOptions(VcfConsumerAppOptions optionsToUpdate, List <string> vcfHeaderLines, string configFileDir) { //update and revalidate, if required. //(The new options parser will automatically revalidate for us.) var piscesOptionsParser = GetOriginalPiscesOptions(vcfHeaderLines, configFileDir); var originalPiscesOptions = piscesOptionsParser.PiscesOptions; if (piscesOptionsParser.HadSuccess) { optionsToUpdate.VariantCallingParams = originalPiscesOptions.VariantCallingParameters; optionsToUpdate.BamFilterParams = originalPiscesOptions.BamFilterParameters; optionsToUpdate.VcfWritingParams = originalPiscesOptions.VcfWritingParameters; //validation is just a subset of the PiscesOptions validation optionsToUpdate.SetDerivedValues(); optionsToUpdate.Validate(); } return(optionsToUpdate); }
private static TypeOfUpdateNeeded TagIndelSites(VcfConsumerAppOptions appOptions, SomeData newData, List <CalledAllele> inAlleles, out List <CalledAllele> outAlleles) { bool giveTag = false; foreach (var allele in inAlleles) { if ((allele.Type == Domain.Types.AlleleCategory.Deletion) || (allele.Type == Domain.Types.AlleleCategory.Insertion)) { giveTag = true; } } foreach (var allele in inAlleles) { if (giveTag) { allele.Chromosome = "IndelSite"; } } outAlleles = inAlleles; return(TypeOfUpdateNeeded.Modify); }
public void UpdateVcfTest_TestOnAllLociAlleleAction() { var outDir = Path.Combine(TestPaths.LocalScratchDirectory, "ModifyCoLocated"); var inputDir = Path.Combine(TestPaths.LocalTestDataDirectory); var inputVcfFilePath = Path.Combine(inputDir, "colocated.genome.vcf"); var outputFile1 = Path.Combine(outDir, "Rewrite_NoChangeToVariants.vcf"); var outputFile2 = Path.Combine(outDir, "Rewrite_TagMultiAllelicSites.vcf"); var outputFile3 = Path.Combine(outDir, "Rewrite_TagIndelSites.vcf"); var expectedFile1 = Path.Combine(inputDir, "VcfReWriter_NoChangeToLoci.vcf"); var expectedFile2 = Path.Combine(inputDir, "VcfReWriter_TagMultiAllelicSites.vcf"); var expectedFile3 = Path.Combine(inputDir, "VcfReWriter_TagIndelSites.vcf"); TestUtilities.TestHelper.RecreateDirectory(outDir); var myData = new SomeData(); var options = new VcfConsumerAppOptions(); options.VcfPath = inputVcfFilePath; options.VariantCallingParams.AmpliconBiasFilterThreshold = null;//turning this off because these tests predate the AB filter. This allows the pre-exisiting vcf headers to stay the same. //edit NO lines VcfUpdater <SomeData> .UpdateVcfLociByLoci(outputFile1, options, true, myData, VcfUpdater <SomeData> .NeverUpdateByLoci, CanAlwaysSkipVcfLine, GetVcfFileWriter); //TagMultiAllelicSites VcfUpdater <SomeData> .UpdateVcfLociByLoci(outputFile2, options, true, myData, TagMultiAllelicSites, CanNeverSkipVcfLine, GetVcfFileWriter); //TagIndelSites VcfUpdater <SomeData> .UpdateVcfLociByLoci(outputFile3, options, true, myData, TagIndelSites, CanNeverSkipVcfLine, GetVcfFileWriter); //check files TestUtilities.TestHelper.CompareFiles(outputFile1, expectedFile1); TestUtilities.TestHelper.CompareFiles(outputFile2, expectedFile2); TestUtilities.TestHelper.CompareFiles(outputFile3, expectedFile3); }
public static TypeOfUpdateNeeded NeverUpdateByLoci(VcfConsumerAppOptions appOptions, T newData, List <CalledAllele> inAlleles, out List <CalledAllele> outAlleles) { outAlleles = inAlleles; return(TypeOfUpdateNeeded.NoChangeNeeded); }
public static TypeOfUpdateNeeded UpdateColocatedAlleles(VcfConsumerAppOptions appOptions, GeometricFilter filter, List <CalledAllele> inAlleles, out List <CalledAllele> outAlleles) { outAlleles = filter.DoFiltering(inAlleles); return(TypeOfUpdateNeeded.Modify); }
public void UpdateVcfTest_TestOnSingleAlleleAction() { var outDir = Path.Combine(TestPaths.LocalScratchDirectory, "VcfUpdaterTestsOutDir"); var inputDir = Path.Combine(TestPaths.LocalTestDataDirectory); var inputVcfFilePath = Path.Combine(inputDir, "crushed.genome.vcf"); var outputFile1 = Path.Combine(outDir, "RewriteExample1.vcf"); var outputFile2 = Path.Combine(outDir, "RewriteExample2.vcf"); var outputFile3 = Path.Combine(outDir, "RewriteExample3.vcf"); var outputFile4 = Path.Combine(outDir, "RewriteExample4.vcf"); var outputFile5 = Path.Combine(outDir, "RewriteExample5.vcf"); var outputFile6 = Path.Combine(outDir, "RewriteExample6.vcf"); var expectedFile1 = Path.Combine(inputDir, "VcfReWriter_NoChangeToVariants.vcf"); var expectedFile2 = Path.Combine(inputDir, "VcfReWriter_AllChangeToVariants.vcf"); var expectedFile3 = Path.Combine(inputDir, "VcfReWriter_SomeChangeToVariants.vcf"); var expectedFile4 = Path.Combine(inputDir, "VcfReWriter_RemoveAllVariants.vcf"); var expectedFile5 = Path.Combine(inputDir, "VcfReWriter_RemoveSomeVariants.vcf"); var expectedFile6 = Path.Combine(inputDir, "VcfReWriter_ComplexChangesVariants.vcf"); TestUtilities.TestHelper.RecreateDirectory(outDir); var myData = new SomeData(); var options = new VcfConsumerAppOptions(); options.VcfPath = inputVcfFilePath; options.VariantCallingParams.AmpliconBiasFilterThreshold = null;//turning this off because these tests predate the AB filter. This allows the pre-exisiting vcf headers to stay the same. //edit NO lines VcfUpdater <SomeData> .UpdateVcfAlleleByAllele(outputFile1, options, true, myData, UpdateChrToFrog, CanAlwaysSkipVcfLine, GetVcfFileWriter); //edit ALL lines VcfUpdater <SomeData> .UpdateVcfAlleleByAllele(outputFile2, options, true, myData, UpdateChrToFrog, CanNeverSkipVcfLine, GetVcfFileWriter); //do something silly to lines with a "C" allele VcfUpdater <SomeData> .UpdateVcfAlleleByAllele(outputFile3, options, true, myData, UpdateChrToFrog, CanSometimesSkipVcfLine, GetVcfFileWriter); //remove all vcf entries VcfUpdater <SomeData> .UpdateVcfAlleleByAllele(outputFile4, options, true, myData, UpdateChrToFrog, CanAlwaysDeleteVcfLine, GetVcfFileWriter); //remove all vcf entries with a "C" allele VcfUpdater <SomeData> .UpdateVcfAlleleByAllele(outputFile5, options, true, myData, UpdateChrToFrog, CanSometimesDeleteVcfLine, GetVcfFileWriter); //Look at lines with a "C" allele. //If lines with a C allele (ref or alt) have T as an alt, make the chr = "MadeAChangeHERE". //If lines with a C allele (ref or alt) DO NOT have T as an alt, delete the line entirely. VcfUpdater <SomeData> .UpdateVcfAlleleByAllele(outputFile6, options, true, myData, UpdateChrToFrogOrDelete, CanSometimesSkipVcfLine, GetVcfFileWriter); //so, this one is left as is; //chr1 223906730.G. 100 PASS DP = 532 GT: GQ: AD: DP: VF: NL: SB 0 / 0:100:532:532:0.00:20:-100.0000 //this one, the C->A should get removed, and the C->T should have chr = "MadeAChangeHERE". //chr1 223906731.C A,T 100 PASS DP = 532 GT: GQ: AD: DP: VF: NL: SB 1 / 2:100:254,254:532:0.95:20:-100.0000 // these are also all removed //chr1 223906744.C. 100 PASS DP = 532 GT: GQ: AD: DP: VF: NL: SB 0 / 0:100:532:532:0.00:20:-100.0000 //chr1 228526603.C. 100 PASS DP = 536 GT: GQ: AD: DP: VF: NL: SB 0 / 0:100:536:536:0.00:20:-100.0000 //chr1 228526606.C. 100 PASS DP = 536 GT: GQ: AD: DP: VF: NL: SB 0 / 0:100:536:536:0.00:20:-100.0000 //chr1 247812092.C. 100 PASS DP = 532 GT: GQ: AD: DP: VF: NL: SB 0 / 0:100:532:532:0.00:20:-100.0000 //chr1 247812094.C. 100 PASS DP = 532 GT: GQ: AD: DP: VF: NL: SB 0 / 0:100:532:532:0.00:20:-100.0000 //chr1 247812096.C. 100 PASS DP = 532 GT: GQ: AD: DP: VF: NL: SB 0 / 0:100:532:532:0.00:20:-100.0000 //chr1 247812099.C. 100 PASS DP = 532 GT: GQ: AD: DP: VF: NL: SB 0 / 0:100:532:532:0.00:20:-100.0000 //chr1 247812108.C. 100 PASS DP = 532 GT: GQ: AD: DP: VF: NL: SB 0 / 0:100:532:532:0.00:20:-100.0000 //chr2 55862775.C. 100 PASS DP = 532 GT: GQ: AD: DP: VF: NL: SB 0 / 0:100:532:532:0.00:20:-100.0000 //check files TestUtilities.TestHelper.CompareFiles(outputFile1, expectedFile1); TestUtilities.TestHelper.CompareFiles(outputFile2, expectedFile2); TestUtilities.TestHelper.CompareFiles(outputFile3, expectedFile3); TestUtilities.TestHelper.CompareFiles(outputFile4, expectedFile4); TestUtilities.TestHelper.CompareFiles(outputFile5, expectedFile5); TestUtilities.TestHelper.CompareFiles(outputFile6, expectedFile6); //explicit checks for the complicated one, so users can see what we are looking for: var variantsTest6 = AlleleReader.GetAllVariantsInFile(outputFile6); var variantsInput = AlleleReader.GetAllVariantsInFile(inputVcfFilePath); Assert.Equal(91, variantsInput.Count()); Assert.Equal(91 - 10, variantsTest6.Count()); //accounting for removed lines Assert.Equal(223906728, variantsInput[0].ReferencePosition); Assert.Equal("chr1", variantsInput[0].Chromosome); Assert.Equal(223906728, variantsTest6[0].ReferencePosition); Assert.Equal("chr1", variantsTest6[0].Chromosome); Assert.Equal(223906731, variantsInput[3].ReferencePosition); Assert.Equal("chr1", variantsInput[3].Chromosome); Assert.Equal(223906731, variantsTest6[3].ReferencePosition); Assert.Equal("FrogChr", variantsTest6[3].Chromosome); }
/// <summary> /// Take in a vcf, do stuff to it, write out a vcf. Streamed line by line, loci by loci, so as not to blow up your computer. /// </summary> /// <param name="vcfOut"> the output file name</param> /// <param name="options"> all the parameters associated with writing out a vcf</param> /// <param name="recalibrationData">the data you need for doing your "stuff" </param> /// <param name="whatToDoWithSingleAllele">how you want to change each allele</param> /// <param name="whatToDoWithCoLocatedAlleles">how you want to change each set of alleles, by loci</param> /// <param name="canSkipLinesWithoutProcessing">when you can skip lines (saves CPU time)</param> /// <param name="getVcfFileWriter">what your special vcf writer should be, includes special header lines, etc</param> /// <param name="shouldTrimComplexAlleles">if ACGT-> ACCT is ok, or if you want it trimmed to G -> C. this might affect position and ordering. Generally turn if OFF for processing vcfs, post scylla. </param> private static void UpdateVcf(string vcfOut, VcfConsumerAppOptions options, bool shouldTrimComplexAlleles, T recalibrationData, UpdateSingleAlleleMethod whatToDoWithSingleAllele, UpdateCoLocatedAllelesMethod whatToDoWithCoLocatedAlleles, CanSkipVcfLinesMethod canSkipLinesWithoutProcessing, GetVcfFileWriter getVcfFileWriter) { using (AlleleReader reader = new AlleleReader(options.VcfPath, shouldTrimComplexAlleles)) { using (VcfFileWriter writer = getVcfFileWriter(options, vcfOut)) { writer.WriteHeader(); writer.FlushBuffer(); var variantListFromFile = new List <CalledAllele>() { }; string incomingHangingLine = null; string outgoingHangingLine = null; while (true) { //get the next group to process incomingHangingLine = outgoingHangingLine; var coLocatedVcfLinesToProcess = reader.CloseColocatedLines(incomingHangingLine, out outgoingHangingLine); //how we know we are done if (coLocatedVcfLinesToProcess.Count == 0) { break; } bool updateNeededForLocus = false; TypeOfUpdateNeeded updatedNeededForLine = canSkipLinesWithoutProcessing(coLocatedVcfLinesToProcess); switch (updatedNeededForLine) { case TypeOfUpdateNeeded.NoChangeNeeded: writer.Write(coLocatedVcfLinesToProcess); break; case TypeOfUpdateNeeded.Modify: //then we need to change them into alleles and do stuff to them variantListFromFile = AlleleReader.VcfLinesToAlleles(coLocatedVcfLinesToProcess); List <CalledAllele> modifiedVariantListToWrite = WhatToDoToAlleles(options, recalibrationData, whatToDoWithSingleAllele, whatToDoWithCoLocatedAlleles, variantListFromFile, ref updateNeededForLocus); if (updateNeededForLocus) { writer.Write(modifiedVariantListToWrite); } else { writer.Write(coLocatedVcfLinesToProcess); } break; case TypeOfUpdateNeeded.DeleteCompletely: default: break; } } } } }