예제 #1
0
        public static void DoReformating(ReformatOptions options)
        {
            var inputFile  = options.VcfPath;
            var outputFile = inputFile.Replace(".vcf", ".uncrushed.vcf");
            var crush      = false;


            if (options.VcfWritingParams.ForceCrush.HasValue)
            {
                crush = (bool)options.VcfWritingParams.ForceCrush;
                options.VcfWritingParams.AllowMultipleVcfLinesPerLoci = !crush;
            }

            if (crush)
            {
                Console.WriteLine("crushing " + inputFile + "...");
                outputFile = inputFile.Replace(".vcf", ".crushed.vcf");
            }
            else
            {
                Console.WriteLine("uncrushing " + inputFile + "...");
            }

            if (File.Exists(outputFile))
            {
                File.Delete(outputFile);
            }

            //Update Vcf, variant by variant, based on the table data.
            VcfUpdater <bool> .UpdateVcfAlleleByAllele(outputFile, options, false, true, UpdateAllele, CanSkipNeverVcfLine,
                                                       GetVcfFileWriter);
        }
        public static void Recalibrate(SignatureSorterResultFiles countsFilePaths, VQROptions options)
        {
            string vcfFileName = Path.GetFileName(options.VcfPath);
            string vcfOut      = Path.Combine(options.OutputDirectory, vcfFileName + ".recal");

            if (File.Exists(vcfOut))
            {
                File.Delete(vcfOut);
            }

            try
            {
                //Read in the results files that have the data for the error modes detected.
                //Decide which types of variants we want to re-Qscore
                var recalibrationData = GetRecalibrationTables(countsFilePaths, options);

                //Update Vcf, variant by variant, based on the table data.
                VcfUpdater <QualityRecalibrationData> .UpdateVcfAlleleByAllele(vcfOut, options, false, recalibrationData, UpdateAllele, CanSkipVcfLines,
                                                                               VQRVcfWriter.GetVQRVcfFileWriter);

                //let the user know it worked
                if (File.Exists(vcfOut))
                {
                    Logger.WriteToLog("The following vcf was recalibrated: " + options.VcfPath);
                }
            }
            catch (Exception ex)
            {
                Logger.WriteToLog("Recalibrate failed for " + options.VcfPath);
                Logger.WriteToLog("Exception: " + ex);
            }
        }
예제 #3
0
        public static void RewriteVcf(string vcfIn, string outDir, AdaptiveGtOptions options, RecalibrationResults results)
        {
            Logger.WriteToLog("Rewriting VCF.");

            string vcfFileName = Path.GetFileName(vcfIn);

            if (vcfFileName.Contains("genome."))
            {
                vcfFileName = vcfFileName.Replace("genome", "recal");
            }
            else
            {
                vcfFileName = vcfFileName.Replace(".vcf", ".recal.vcf");
            }

            string vcfOut = Path.Combine(outDir, vcfFileName);

            if (File.Exists(vcfOut))
            {
                File.Delete(vcfOut);
            }

            VcfUpdater <RecalibrationResults> .UpdateVcfLociByLoci(vcfOut, options, false, results, LocusProcessor.ProcessLocus,
                                                                   (List <string> vcfLine) => TypeOfUpdateNeeded.Modify, GetAdaptiveGtWriter);

            Logger.WriteToLog("filtering complete.");
        }
예제 #4
0
        public void DoFiltering()
        {
            Logger.WriteToLog("filtering " + _psaraOptions.VcfPath + "...");

            if (File.Exists(_outputFile))
            {
                File.Delete(_outputFile);
            }

            VcfUpdater <GeometricFilter> .UpdateVcfLociByLoci(_outputFile, _psaraOptions, false, _geometricFilter,
                                                              UpdateColocatedAlleles, CanNeverSkipVcfLine, GetPsaraVcfWriter);

            Logger.WriteToLog("filtering complete.");
        }
예제 #5
0
        public void UpdateVcfTest_TestOnAllLociAlleleAction()
        {
            var outDir           = Path.Combine(TestPaths.LocalScratchDirectory, "ModifyCoLocated");
            var inputDir         = Path.Combine(TestPaths.LocalTestDataDirectory);
            var inputVcfFilePath = Path.Combine(inputDir, "colocated.genome.vcf");
            var outputFile1      = Path.Combine(outDir, "Rewrite_NoChangeToVariants.vcf");
            var outputFile2      = Path.Combine(outDir, "Rewrite_TagMultiAllelicSites.vcf");
            var outputFile3      = Path.Combine(outDir, "Rewrite_TagIndelSites.vcf");

            var expectedFile1 = Path.Combine(inputDir, "VcfReWriter_NoChangeToLoci.vcf");
            var expectedFile2 = Path.Combine(inputDir, "VcfReWriter_TagMultiAllelicSites.vcf");
            var expectedFile3 = Path.Combine(inputDir, "VcfReWriter_TagIndelSites.vcf");


            TestUtilities.TestHelper.RecreateDirectory(outDir);

            var myData  = new SomeData();
            var options = new VcfConsumerAppOptions();

            options.VcfPath = inputVcfFilePath;
            options.VariantCallingParams.AmpliconBiasFilterThreshold = null;//turning this off because these tests predate the AB filter. This allows the pre-exisiting vcf headers to stay the same.

            //edit NO lines
            VcfUpdater <SomeData> .UpdateVcfLociByLoci(outputFile1, options, true, myData, VcfUpdater <SomeData> .NeverUpdateByLoci, CanAlwaysSkipVcfLine, GetVcfFileWriter);

            //TagMultiAllelicSites
            VcfUpdater <SomeData> .UpdateVcfLociByLoci(outputFile2, options, true, myData, TagMultiAllelicSites, CanNeverSkipVcfLine, GetVcfFileWriter);

            //TagIndelSites
            VcfUpdater <SomeData> .UpdateVcfLociByLoci(outputFile3, options, true, myData, TagIndelSites, CanNeverSkipVcfLine, GetVcfFileWriter);

            //check files
            TestUtilities.TestHelper.CompareFiles(outputFile1, expectedFile1);
            TestUtilities.TestHelper.CompareFiles(outputFile2, expectedFile2);
            TestUtilities.TestHelper.CompareFiles(outputFile3, expectedFile3);
        }
예제 #6
0
        public void UpdateVcfTest_TestOnSingleAlleleAction()
        {
            var outDir           = Path.Combine(TestPaths.LocalScratchDirectory, "VcfUpdaterTestsOutDir");
            var inputDir         = Path.Combine(TestPaths.LocalTestDataDirectory);
            var inputVcfFilePath = Path.Combine(inputDir, "crushed.genome.vcf");
            var outputFile1      = Path.Combine(outDir, "RewriteExample1.vcf");
            var outputFile2      = Path.Combine(outDir, "RewriteExample2.vcf");
            var outputFile3      = Path.Combine(outDir, "RewriteExample3.vcf");
            var outputFile4      = Path.Combine(outDir, "RewriteExample4.vcf");
            var outputFile5      = Path.Combine(outDir, "RewriteExample5.vcf");
            var outputFile6      = Path.Combine(outDir, "RewriteExample6.vcf");

            var expectedFile1 = Path.Combine(inputDir, "VcfReWriter_NoChangeToVariants.vcf");
            var expectedFile2 = Path.Combine(inputDir, "VcfReWriter_AllChangeToVariants.vcf");
            var expectedFile3 = Path.Combine(inputDir, "VcfReWriter_SomeChangeToVariants.vcf");
            var expectedFile4 = Path.Combine(inputDir, "VcfReWriter_RemoveAllVariants.vcf");
            var expectedFile5 = Path.Combine(inputDir, "VcfReWriter_RemoveSomeVariants.vcf");
            var expectedFile6 = Path.Combine(inputDir, "VcfReWriter_ComplexChangesVariants.vcf");

            TestUtilities.TestHelper.RecreateDirectory(outDir);

            var myData  = new SomeData();
            var options = new VcfConsumerAppOptions();

            options.VcfPath = inputVcfFilePath;
            options.VariantCallingParams.AmpliconBiasFilterThreshold = null;//turning this off because these tests predate the AB filter. This allows the pre-exisiting vcf headers to stay the same.

            //edit NO lines
            VcfUpdater <SomeData> .UpdateVcfAlleleByAllele(outputFile1, options, true, myData, UpdateChrToFrog, CanAlwaysSkipVcfLine, GetVcfFileWriter);

            //edit ALL lines
            VcfUpdater <SomeData> .UpdateVcfAlleleByAllele(outputFile2, options, true, myData, UpdateChrToFrog, CanNeverSkipVcfLine, GetVcfFileWriter);

            //do something silly to lines with a "C" allele
            VcfUpdater <SomeData> .UpdateVcfAlleleByAllele(outputFile3, options, true, myData, UpdateChrToFrog, CanSometimesSkipVcfLine, GetVcfFileWriter);

            //remove all vcf entries
            VcfUpdater <SomeData> .UpdateVcfAlleleByAllele(outputFile4, options, true, myData, UpdateChrToFrog, CanAlwaysDeleteVcfLine, GetVcfFileWriter);

            //remove all vcf entries with a "C" allele
            VcfUpdater <SomeData> .UpdateVcfAlleleByAllele(outputFile5, options, true, myData, UpdateChrToFrog, CanSometimesDeleteVcfLine, GetVcfFileWriter);


            //Look at lines with a "C" allele.
            //If lines with a C allele (ref or alt) have T as an alt, make the chr = "MadeAChangeHERE".
            //If lines with a C allele (ref or alt) DO NOT have T as an alt, delete the line entirely.
            VcfUpdater <SomeData> .UpdateVcfAlleleByAllele(outputFile6, options, true, myData, UpdateChrToFrogOrDelete, CanSometimesSkipVcfLine, GetVcfFileWriter);

            //so, this one is left as is;
            //chr1    223906730.G.   100 PASS DP = 532  GT: GQ: AD: DP: VF: NL: SB    0 / 0:100:532:532:0.00:20:-100.0000

            //this one, the C->A should get removed, and the C->T should have  chr = "MadeAChangeHERE".
            //chr1    223906731.C   A,T 100 PASS DP = 532  GT: GQ: AD: DP: VF: NL: SB    1 / 2:100:254,254:532:0.95:20:-100.0000

            // these are also all removed
            //chr1    223906744.C.   100 PASS DP = 532  GT: GQ: AD: DP: VF: NL: SB    0 / 0:100:532:532:0.00:20:-100.0000
            //chr1    228526603.C.   100 PASS DP = 536  GT: GQ: AD: DP: VF: NL: SB    0 / 0:100:536:536:0.00:20:-100.0000
            //chr1    228526606.C.   100 PASS DP = 536  GT: GQ: AD: DP: VF: NL: SB    0 / 0:100:536:536:0.00:20:-100.0000
            //chr1    247812092.C.   100 PASS DP = 532  GT: GQ: AD: DP: VF: NL: SB    0 / 0:100:532:532:0.00:20:-100.0000
            //chr1    247812094.C.   100 PASS DP = 532  GT: GQ: AD: DP: VF: NL: SB    0 / 0:100:532:532:0.00:20:-100.0000
            //chr1    247812096.C.   100 PASS DP = 532  GT: GQ: AD: DP: VF: NL: SB    0 / 0:100:532:532:0.00:20:-100.0000
            //chr1    247812099.C.   100 PASS DP = 532  GT: GQ: AD: DP: VF: NL: SB    0 / 0:100:532:532:0.00:20:-100.0000
            //chr1    247812108.C.   100 PASS DP = 532  GT: GQ: AD: DP: VF: NL: SB    0 / 0:100:532:532:0.00:20:-100.0000
            //chr2    55862775.C.   100 PASS DP = 532  GT: GQ: AD: DP: VF: NL: SB    0 / 0:100:532:532:0.00:20:-100.0000


            //check files
            TestUtilities.TestHelper.CompareFiles(outputFile1, expectedFile1);
            TestUtilities.TestHelper.CompareFiles(outputFile2, expectedFile2);
            TestUtilities.TestHelper.CompareFiles(outputFile3, expectedFile3);
            TestUtilities.TestHelper.CompareFiles(outputFile4, expectedFile4);
            TestUtilities.TestHelper.CompareFiles(outputFile5, expectedFile5);
            TestUtilities.TestHelper.CompareFiles(outputFile6, expectedFile6);

            //explicit checks for the complicated one, so users can see what we are looking for:

            var variantsTest6 = AlleleReader.GetAllVariantsInFile(outputFile6);
            var variantsInput = AlleleReader.GetAllVariantsInFile(inputVcfFilePath);

            Assert.Equal(91, variantsInput.Count());
            Assert.Equal(91 - 10, variantsTest6.Count()); //accounting for removed lines

            Assert.Equal(223906728, variantsInput[0].ReferencePosition);
            Assert.Equal("chr1", variantsInput[0].Chromosome);

            Assert.Equal(223906728, variantsTest6[0].ReferencePosition);
            Assert.Equal("chr1", variantsTest6[0].Chromosome);

            Assert.Equal(223906731, variantsInput[3].ReferencePosition);
            Assert.Equal("chr1", variantsInput[3].Chromosome);

            Assert.Equal(223906731, variantsTest6[3].ReferencePosition);
            Assert.Equal("FrogChr", variantsTest6[3].Chromosome);
        }