Example #1
0
        public void VennVcf_EmptyInputTest()
        {
            var outDir      = TestPaths.LocalTestDataDirectory;
            var VcfPathRoot = _TestDataPath;

            string VcfA       = Path.Combine(VcfPathRoot, "Empty_S1.vcf");
            string VcfB       = Path.Combine(VcfPathRoot, "Empty_S2.vcf");
            string OutputPath = Path.Combine(outDir, "EmptyConsensus.vcf");

            VennVcfOptions parameters = new VennVcfOptions();

            parameters.VariantCallingParams.MinimumFrequencyFilter = 0.03f;
            parameters.VariantCallingParams.MinimumFrequency       = 0.01f;
            parameters.ConsensusFileName = Path.Combine(outDir, "EmptyConsensus.vcf");
            parameters.OutputDirectory   = outDir;
            parameters.DebugMode         = true;

            VennProcessor Venn = new VennProcessor(new string[] { VcfA, VcfB }, parameters);

            Venn.DoPairwiseVenn();

            Assert.True(File.Exists(OutputPath));
            var observedVariants = AlleleReader.GetAllVariantsInFile(OutputPath);

            Assert.Equal(0, observedVariants.Count);
        }
        public static void AssertSameVariants_QScoreAgnostic(string file1, string file2)
        {
            var variant1List = new List <CalledAllele>();
            var variant2List = new List <CalledAllele>();

            using (var reader1 = new AlleleReader(file1))
            {
                reader1.GetNextVariants(out variant1List);
                using (var reader2 = new AlleleReader(file2))
                {
                    reader2.GetNextVariants(out variant2List);

                    Assert.Equal(variant1List.Count, variant2List.Count);

                    for (int i = 0; i < variant1List.Count; i++)
                    {
                        var variant1 = variant1List[i];
                        var variant2 = variant2List[i];

                        Assert.Equal(variant1.Genotype, variant2.Genotype);
                        Assert.Equal(variant1.AlternateAllele, variant2.AlternateAllele);
                    }
                }
            }
        }
Example #3
0
        // idea is to keep track of the disparity between two pools as a measure of FFPE degradation,
        // or overall oxidation affecting tissue sample.


        //possible SNP changes:
        //
        //
        // *    A   C   G   T
        //  A   *   1   2   3
        //  C   4   *   5   6
        //  G   7   8   *   9
        //  T   10  11  12  *
        //

        public static SignatureSorterResultFiles StrainVcf(VQROptions options)
        {
            var variantList = new List <CalledAllele>()
            {
            };
            var basicCountsData       = new CountData();
            var edgeVariantsCountData = new EdgeIssueCountData(options.ExtentofEdgeRegion);

            string basicCountsPath  = CleanUpOldFiles(options.VcfPath, options.OutputDirectory, ".counts");
            string edgeCountsPath   = CleanUpOldFiles(options.VcfPath, options.OutputDirectory, ".edgecounts");
            string edgeVariantsPath = CleanUpOldFiles(options.VcfPath, options.OutputDirectory, ".edgevariants");

            using (AlleleReader readerA = new AlleleReader(options.VcfPath))
            {
                while (readerA.GetNextVariants(out variantList))
                {
                    foreach (var variant in variantList)
                    {
                        try
                        {
                            basicCountsData.Add(variant);
                            edgeVariantsCountData.Add(variant, edgeVariantsPath);
                        }


                        catch (Exception ex)
                        {
                            Logger.WriteToLog(string.Format("Fatal error processing vcf; Check {0}, position {1}.  Exception: {2}",
                                                            variant.Chromosome, variant.ReferencePosition, ex));
                            throw;
                        }
                    }
                }

                //The edge issue filter trails N variants behind.
                //The following code cleans out the buffer, processing anything left behind in the buffer.
                for (int i = 0; i < options.ExtentofEdgeRegion; i++)
                {
                    edgeVariantsCountData.Add(null, edgeVariantsPath);
                }

                if (options.LociCount > 0)
                {
                    basicCountsData.ForceTotalPossibleMutations(options.LociCount);
                    edgeVariantsCountData.ForceTotalPossibleMutations(options.LociCount);
                }

                if (options.DoBasicChecks)
                {
                    CountsFileWriter.WriteCountsFile(basicCountsPath, basicCountsData);
                }

                if (options.DoAmpliconPositionChecks)
                {
                    CountsFileWriter.WriteCountsFile(edgeCountsPath, edgeVariantsCountData);
                }
            }

            return(new SignatureSorterResultFiles(basicCountsPath, edgeCountsPath, edgeVariantsPath));
        }
Example #4
0
        public virtual IVcfFileWriter <CalledAllele> CreatePhasedVcfWriter()
        {
            //Write header. We can do this at the beginning, it's just copying from old vcf.
            List <string> header = AlleleReader.GetAllHeaderLines(_options.VcfPath);

            var    originalFileName = Path.GetFileName(_options.VcfPath);
            string outputFileName;

            if (originalFileName != null && originalFileName.EndsWith(".genome.vcf"))
            {
                outputFileName = originalFileName.Substring(0, originalFileName.LastIndexOf(".genome.vcf", StringComparison.Ordinal));
                outputFileName = outputFileName + ".phased.genome.vcf";
            }
            else if (originalFileName != null && originalFileName.EndsWith(".vcf"))
            {
                outputFileName = originalFileName.Substring(0, originalFileName.LastIndexOf(".vcf", StringComparison.Ordinal));
                outputFileName = outputFileName + ".phased.vcf";
            }
            else
            {
                throw new InvalidDataException(string.Format("Input file is not a VCF file: '{0}'", originalFileName));
            }

            var outFile = Path.Combine(_options.OutputDirectory, outputFileName);

            var phasingCommandLine = "##Scylla_cmdline=" + _options.QuotedCommandLineArgumentsString;

            return(new PhasedVcfWriter(outFile,
                                       new VcfWriterConfig(_options.VariantCallingParams, _options.VcfWritingParams, _options.BamFilterParams, null, _options.Debug, false),
                                       new VcfWriterInputContext(), header, phasingCommandLine));
        }
Example #5
0
 private void UpdateKnownPriors()
 {
     if (!string.IsNullOrEmpty(_options.PriorsPath))
     {
         using (var reader = new AlleleReader(_options.PriorsPath))
         {
             _knownVariants = reader.GetVariantsByChromosome(true, true, new List <AlleleCategory> {
                 AlleleCategory.Insertion, AlleleCategory.Mnv
             }, doSkipCandidate: SkipPrior);
             if (_options.TrimMnvPriors)
             {
                 foreach (var knownVariantList in _knownVariants.Values)
                 {
                     foreach (var knownVariant in knownVariantList)
                     {
                         if (knownVariant.Type == AlleleCategory.Mnv)
                         {
                             knownVariant.ReferenceAllele = knownVariant.ReferenceAllele.Substring(1);
                             knownVariant.AlternateAllele = knownVariant.AlternateAllele.Substring(1);
                             knownVariant.ReferencePosition++;
                         }
                     }
                 }
             }
         }
     }
 }
Example #6
0
        private void GetForcedAlleles()
        {
            if (_options.ForcedAllelesFileNames == null || _options.ForcedAllelesFileNames.Count == 0)
            {
                return;
            }
            foreach (var fileName in _options.ForcedAllelesFileNames)
            {
                using (var reader = new AlleleReader(fileName, false, false))
                {
                    foreach (var variant in reader.GetVariants())
                    {
                        var chr       = variant.Chromosome;
                        var pos       = variant.ReferencePosition;
                        var refAllele = variant.ReferenceAllele.ToUpper();
                        var altAllele = variant.AlternateAllele.ToUpper();

                        if (!_forcedAllelesByChrom.ContainsKey(chr))
                        {
                            _forcedAllelesByChrom[chr] = new HashSet <Tuple <string, int, string, string> >();
                        }


                        if (!IsValidAlt(altAllele, refAllele))
                        {
                            Logger.WriteToLog($"Invalid forced genotyping variant: {variant}");
                            continue;
                        }
                        _forcedAllelesByChrom[chr].Add(new Tuple <string, int, string, string>(chr, pos, refAllele, altAllele));
                    }
                }
            }
        }
Example #7
0
        public void VennVcf_GtTest()
        {
            var outDir      = TestPaths.LocalScratchDirectory;
            var VcfPathRoot = _TestDataPath;

            string VcfA         = Path.Combine(VcfPathRoot, "gtTests_S15.vcf");
            string VcfB         = Path.Combine(VcfPathRoot, "gtTests_S18.vcf");
            string OutputPath   = Path.Combine(outDir, "gtConsensusOut.vcf");
            string ExpectedPath = Path.Combine(VcfPathRoot, "gtConsensus.vcf");

            VennVcfOptions parameters = new VennVcfOptions();

            parameters.VariantCallingParams.MinimumFrequencyFilter = 0.03f;
            parameters.VariantCallingParams.MinimumFrequency       = 0.01f;
            parameters.ConsensusFileName = OutputPath;
            parameters.OutputDirectory   = outDir;

            VennProcessor Venn = new VennProcessor(new string[] { VcfA, VcfB }, parameters);

            Venn.DoPairwiseVenn();

            Assert.True(File.Exists(OutputPath));
            var expectedVariants = AlleleReader.GetAllVariantsInFile(ExpectedPath);
            var observedVariants = AlleleReader.GetAllVariantsInFile(OutputPath);

            Assert.Equal(expectedVariants.Count, observedVariants.Count);

            for (int i = 0; i < expectedVariants.Count; i++)
            {
                var ExpectedVariant = expectedVariants[i];
                var OutputVariant   = observedVariants[i];
                Assert.Equal(ExpectedVariant.ToString(), OutputVariant.ToString());
            }
        }
Example #8
0
        private static void CheckHeader(AlleleReader reader)
        {
            string piscesCmd       = reader.HeaderLines.FirstOrDefault(str => str.Contains("##Pisces_cmdline")).Split("\"\"")[1];
            var    appOptionParser = new PiscesOptionsParser();

            appOptionParser.ParseArgs(piscesCmd.Split(null));

            // Check if VCF is diploid
            if (appOptionParser.PiscesOptions.VariantCallingParameters.PloidyModel == PloidyModel.DiploidByAdaptiveGT ||
                appOptionParser.PiscesOptions.VariantCallingParameters.PloidyModel == PloidyModel.DiploidByThresholding)
            {
                throw new VariantReaderException("Adaptive Genotyper should be used with VCFs that are called as somatic " +
                                                 "VCFs by Pisces.  Please check the input VCF file.");
            }

            // Check if VCF is crushed
            else if (appOptionParser.PiscesOptions.VcfWritingParameters.ForceCrush == true)
            {
                throw new VariantReaderException("Adaptive Genotyper should be used with uncrushed VCFs.  Please check the input VCF file.");
            }

            // Check if GVCF or --minvq 0
            else if (!appOptionParser.PiscesOptions.VcfWritingParameters.OutputGvcfFile &&
                     (appOptionParser.PiscesOptions.VariantCallingParameters.MinimumVariantQScore > 0 ||
                      appOptionParser.PiscesOptions.VariantCallingParameters.MinimumFrequency > 0.02))
            {
                throw new VariantReaderException("Adaptive Genotyper should be used with GVCFs or with option -minvq 0.  Please" +
                                                 " check in the input VCF file.");
            }
        }
Example #9
0
        public void GetVariantsTests()
        {
            var vr     = new AlleleReader(VcfTestFile_1);
            var allVar = vr.GetVariants().ToList();

            Assert.Equal(24, allVar.Count);
            Assert.Equal(10, allVar.First().ReferencePosition);
            Assert.Equal(4000, allVar.Last().ReferencePosition);
        }
Example #10
0
        public void AssignVariantTypeTests()
        {
            var vr = new AlleleReader(VcfTestFile_1);

            // Testing 1/1
            Assert.True(TestVariant(vr, AlleleCategory.Reference));
            Assert.True(TestVariant(vr, AlleleCategory.Snv));
            Assert.True(TestVariant(vr, AlleleCategory.Insertion));
            Assert.True(TestVariant(vr, AlleleCategory.Deletion));

            // Testing 1/0
            Assert.True(TestVariant(vr, AlleleCategory.Snv));
            Assert.True(TestVariant(vr, AlleleCategory.Insertion));
            Assert.True(TestVariant(vr, AlleleCategory.Deletion));
            Assert.True(TestVariant(vr, AlleleCategory.Snv));

            // Testing 0/0
            //chr1    90.A.   25 PASS DP = 500  GT: GQ: AD: VF: NL: SB: NC 0 / 0:25:0,0:0.0000:23:0.0000:0.0010
            //chr1    100.A AT    25  PASS DP = 500  GT: GQ: AD: VF: NL: SB: NC 0 / 0:25:0,0:0.0000:23:0.0000:0.0010
            //chr1    110.AT A    25  PASS DP = 500  GT: GQ: AD: VF: NL: SB: NC 0 / 0:25:0,0:0.0000:23:0.0000:0.0010
            //chr1    120.A T 25  PASS DP = 500  GT: GQ: AD: VF: NL: SB: NC 0 / 0:25:0,0:0.0000:23:0.0000:0.0010
            Assert.True(TestVariant(vr, AlleleCategory.Reference));
            Assert.True(TestVariant(vr, AlleleCategory.Insertion));
            Assert.True(TestVariant(vr, AlleleCategory.Deletion));
            Assert.True(TestVariant(vr, AlleleCategory.Snv));

            // Testing 0/1
            //chr1    130.A.   25  PASS DP = 500  GT: GQ: AD: VF: NL: SB: NC    0 / 1:25:0,0:0.0000:23:0.0000:0.0010
            //chr1    140.A   AT  25  PASS DP = 500  GT: GQ: AD: VF: NL: SB: NC    0 / 1:25:0,0:0.0000:23:0.0000:0.0010
            //chr1    150.AT  A   25  PASS DP = 500  GT: GQ: AD: VF: NL: SB: NC    0 / 1:25:0,0:0.0000:23:0.0000:0.0010
            //chr1    160.A   T   25  PASS DP = 500  GT: GQ: AD: VF: NL: SB: NC    0 / 1:25:0,0:0.0000:23:0.0000:0.0010
            Assert.True(TestVariant(vr, AlleleCategory.Reference));
            Assert.True(TestVariant(vr, AlleleCategory.Insertion));
            Assert.True(TestVariant(vr, AlleleCategory.Deletion));
            Assert.True(TestVariant(vr, AlleleCategory.Snv));

            // Testing MNV
            //chr1    600.ATCA    TCGC    25  PASS DP = 500  GT: GQ: AD: VF: NL: SB: NC    0 / 0:25:0,0:0.0000:23:0.0000:0.0010
            //chr1    700.ATCA    TCGC    25  PASS DP = 500  GT: GQ: AD: VF: NL: SB: NC    0 / 1:25:0,0:0.0000:23:0.0000:0.0010
            //chr1    800.ATCA    TCGC    25  PASS DP = 500  GT: GQ: AD: VF: NL: SB: NC    1 / 0:25:0,0:0.0000:23:0.0000:0.0010
            // chr1    900.ATCA    TCGC    25  PASS DP = 500  GT: GQ: AD: VF: NL: SB: NC   1 / 1:25:0,0:0.0000:23:0.0000:0.0010
            Assert.True(TestVariant(vr, AlleleCategory.Mnv));
            Assert.True(TestVariant(vr, AlleleCategory.Mnv));
            Assert.True(TestVariant(vr, AlleleCategory.Mnv));
            Assert.True(TestVariant(vr, AlleleCategory.Mnv));

            // Testing ./. . ./1 1/.
            //chr1    1000.A   T   25  PASS DP = 0    GT: GQ: AD: VF: NL: SB: NC./.:25:0,0:0.0000:23:0.0000:0.0010
            //chr1    2000.A   T   25  PASS DP = 500  GT: GQ: AD: VF: NL: SB: NC.:25:0,0:0.0000:23:0.0000:0.0010
            //chr1    3000.A   T   25  PASS DP = 500  GT: GQ: AD: VF: NL: SB: NC./ 1:25:0,0:0.0000:23:0.0000:0.0010
            // chr1    4000.A   T   25  PASS DP = 500  GT: GQ: AD: VF: NL: SB: NC    1 /.:25:0,0:0.0000:23:0.0000:0.0010
            Assert.True(TestVariant(vr, AlleleCategory.Snv));
            Assert.True(TestVariant(vr, AlleleCategory.Snv));
            Assert.True(TestVariant(vr, AlleleCategory.Snv));
            Assert.True(TestVariant(vr, AlleleCategory.Snv));
        }
Example #11
0
        public void GetHeaderTests()
        {
            var    header    = AlleleReader.GetAllHeaderLines(VcfTestFile_1);
            string firstLine = "##fileformat=VCFv4.1";
            string lastLine  = "#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\tFORMAT\tmySample";

            Assert.Equal(23, header.Count);
            Assert.Equal(firstLine, header[0]);
            Assert.Equal(lastLine, header[22]);
        }
Example #12
0
        private bool TestVariant(AlleleReader vr, AlleleCategory type)
        {
            var testVarList = new List <CalledAllele>()
            {
                new CalledAllele()
            };

            vr.GetNextVariants(out testVarList);
            return(testVarList[0].Type == type);
        }
Example #13
0
        // tests two bams in different folders
        // expectations:
        // - if outputfolder is not specified, logs are in directory of first bam
        // - if outputfolder specified, logs are in output folder
        // - vcf files have header and both chromosomes, output is where normally expected
        private void ExecuteTest(int numberOfThreads, string outputFolder = null)
        {
            var sourcePath         = Path.Combine(TestPaths.LocalTestDataDirectory, "Chr17Chr19.bam");
            var otherTestDirectory = Path.Combine(TestPaths.LocalScratchDirectory, "MultiProcessIn");
            var bamFilePath1       = Stage(sourcePath, "In1", otherTestDirectory + "1");
            var bamFilePath2       = Stage(sourcePath, "In2", otherTestDirectory + "2");

            var genomePath = Path.Combine(TestPaths.SharedGenomesDirectory, "chr17chr19");

            var options = new PiscesApplicationOptions
            {
                BAMPaths             = new[] { bamFilePath1, bamFilePath2 },
                GenomePaths          = new[] { genomePath },
                OutputDirectory      = outputFolder,
                CommandLineArguments = string.Format("-B {0},{1} -g {2}{3} -gVCF false", bamFilePath1, bamFilePath2, genomePath, string.IsNullOrEmpty(outputFolder) ? string.Empty : " -OutFolder " + outputFolder).Split(' '),
                VcfWritingParameters = new VcfWritingParameters()
                {
                    OutputGvcfFile = true
                }
            };

            options.SetIODirectories("Pisces");
            var factory = new Factory(options);

            foreach (var workRequest in factory.WorkRequests)
            {
                if (File.Exists(workRequest.OutputFilePath))
                {
                    File.Delete(workRequest.OutputFilePath);
                }
            }

            Logger.OpenLog(options.LogFolder, options.LogFileName, true);

            var processor = new GenomeProcessor(factory, factory.GetReferenceGenome(options.GenomePaths[0]), false, true);

            processor.Execute(numberOfThreads);

            Logger.CloseLog();

            foreach (var workRequest in factory.WorkRequests)
            {
                using (var reader = new AlleleReader(workRequest.OutputFilePath))
                {
                    Assert.True(reader.HeaderLines.Any());
                    var variants = reader.GetVariants().ToList();

                    Assert.Equal(251, variants.Count());
                    Assert.Equal("chr17", variants.First().Chromosome);
                    Assert.Equal("chr19", variants.Last().Chromosome);
                }
            }

            Assert.True(Directory.GetFiles(options.LogFolder, options.LogFileNameBase).Any());
        }
Example #14
0
        public VcfFilter(PsaraOptions settings)
        {
            var vcfIn   = settings.VcfPath;
            var vcfName = Path.GetFileName(vcfIn);

            _originalHeaderLines = AlleleReader.GetAllHeaderLines(vcfIn);
            _geometricFilter     = new GeometricFilter(settings.GeometricFilterParameters);
            _psaraOptions        = (PsaraOptions)VcfConsumerAppParsingUtils.TryToUpdateWithOriginalOptions(settings, _originalHeaderLines, vcfIn);
            _outputFile          = Path.Combine(settings.OutputDirectory, vcfName.Replace(".vcf", ".filtered.vcf"));
            _outputFile          = _outputFile.Replace(".genome.filtered.vcf", ".filtered.genome.vcf");
        }
Example #15
0
        /// <summary>
        /// Step forward with the reader, assembling a list of variants at your CurrentVariant position.
        /// </summary>
        /// <param name="Reader"></param>
        /// <param name="CurrentVariant"></param>
        /// <param name="BackLogExists"></param>
        /// <param name="TheBackLog"></param>
        /// <returns></returns>
        private static List <CalledAllele> AssembleColocatedList(
            AlleleReader Reader, CalledAllele CurrentVariant, AlleleCompareByLoci alleleOrdering,
            ref bool BackLogExists, ref List <CalledAllele> TheBackLog)
        {
            List <CalledAllele> CoLocatedVariants = new List <CalledAllele>();
            bool ContinueReadA   = true;
            var  NextVariantList = new List <CalledAllele>();

            while (ContinueReadA)
            {
                if (BackLogExists)
                {
                    NextVariantList = TheBackLog;
                    BackLogExists   = false;
                }
                else
                {
                    ContinueReadA = Reader.GetNextVariants(out NextVariantList);

                    if (!ContinueReadA)
                    {
                        break;
                    }
                }

                // VarOrder =  -1 if Current comes first, 0 if co-located.
                int VarOrder = (alleleOrdering.OrderAlleles(CurrentVariant, NextVariantList.First()));

                switch (VarOrder)
                {
                case 0:     //the variant we just got is at out current position
                    CoLocatedVariants.AddRange(NextVariantList);
                    break;

                case -1:                             //the variant we just got is after our current position, and needs to go to the backlog.
                    TheBackLog    = NextVariantList; //NextVariant;
                    ContinueReadA = false;
                    BackLogExists = true;
                    break;

                default:     //
                {
                    throw new InvalidDataException("Vcf needs to be ordered.");
                }
                }
            }

            if (!BackLogExists)
            {
                TheBackLog = null;
            }

            return(CoLocatedVariants);
        }
Example #16
0
        private NeighborhoodBuilder CreateNbhdBuilder(string sourceVcf, int phasingDistance = 2, bool passingOnly = true, int minPassingVariantsInNbhd = 0)
        {
            var variantSource = new AlleleReader(sourceVcf);

            return(new NeighborhoodBuilder(
                       new PhasableVariantCriteria()
            {
                ChrToProcessArray = new string[] { }, PassingVariantsOnly = passingOnly, PhasingDistance = phasingDistance, MinPassingVariantsInNbhd = minPassingVariantsInNbhd
            },
                       new VariantCallingParameters(), variantSource, null, 10));
        }
        public static VQRVcfWriter GetVQRVcfFileWriter(VcfConsumerAppOptions options, string outputFilePath)
        {
            var vcp         = options.VariantCallingParams;
            var vwp         = options.VcfWritingParams;
            var bfp         = options.BamFilterParams;
            var vcfConfig   = new VcfWriterConfig(vcp, vwp, bfp, null, false, false);
            var headerLines = AlleleReader.GetAllHeaderLines(options.VcfPath);

            var vqrCommandLineForVcfHeader = "##VQR_cmdline=" + options.QuotedCommandLineArgumentsString;

            return(new VQRVcfWriter(outputFilePath, vcfConfig, new VcfWriterInputContext(), headerLines, vqrCommandLineForVcfHeader));
        }
Example #18
0
        public void VennVcf_CombineTwoPoolVariants_MergeRefCalls()
        {
            //this is  from an issue where there were multiple co-located variants in one pool,
            //and just ref in the other, at chr15	92604460.  The consensus answer should be
            // a single ref call (and not multiple ref calls!).
            var outDir      = TestPaths.LocalScratchDirectory;
            var vcfPathRoot = _TestDataPath;

            string VcfPath_PoolA     = Path.Combine(vcfPathRoot, "C64-Ct-4_S17.genome.vcf");
            string VcfPath_PoolB     = Path.Combine(vcfPathRoot, "C64-Ct-4_S18.genome.vcf");
            string VcfPath_Consensus = Path.Combine(vcfPathRoot, "ExpectedConsensus2.vcf");

            string OutputPath = Path.Combine(outDir, "Consensus2.vcf");

            if (File.Exists(OutputPath))
            {
                File.Delete(OutputPath);
            }

            VennVcfOptions parameters = new VennVcfOptions();

            parameters.VariantCallingParams.MinimumFrequencyFilter = 0.03f;
            parameters.InputFiles        = new string[] { VcfPath_PoolA, VcfPath_PoolB };
            parameters.OutputDirectory   = outDir;
            parameters.ConsensusFileName = OutputPath;
            VennProcessor venn = new VennProcessor(parameters.InputFiles, parameters);

            venn.DoPairwiseVenn();

            Assert.Equal(File.Exists(OutputPath), true);
            var CombinedVariants = AlleleReader.GetAllVariantsInFile(OutputPath);
            var ExpectedVariants = AlleleReader.GetAllVariantsInFile(VcfPath_Consensus);

            Assert.Equal(ExpectedVariants.Count, CombinedVariants.Count);

            int NumVariantsAtPos92604460 = 0;

            for (int i = 0; i < ExpectedVariants.Count; i++)
            {
                var EVariant = ExpectedVariants[i];
                var Variant  = CombinedVariants[i];

                if ((Variant.ReferencePosition == 92604460) &&
                    (Variant.Chromosome == "chr15"))
                {
                    NumVariantsAtPos92604460++;
                }

                Assert.Equal(EVariant.ToString(), Variant.ToString());
            }

            Assert.Equal(NumVariantsAtPos92604460, 1);
        }
        private void CompareVariants(string expectedResultsFilePath, string actualResultsFilePath)
        {
            List <CalledAllele> results  = AlleleReader.GetAllVariantsInFile(actualResultsFilePath);
            List <CalledAllele> expected = AlleleReader.GetAllVariantsInFile(expectedResultsFilePath);

            Assert.Equal(results.Count, expected.Count);

            for (int i = 0; i < results.Count; i++)
            {
                Assert.Equal(expected[i].ToString(), results[i].ToString());
            }
        }
        public void DefaultVCFOutput()
        {
            var appOptions = new PiscesApplicationOptions
            {
                BAMPaths                 = new[] { _bamChr19, _bamChr17Chr19, _bamChr17Chr19Dup },
                IntervalPaths            = new[] { _intervalsChr17, _intervalsChr19, null },
                GenomePaths              = new[] { _genomeChr17Chr19 },
                VariantCallingParameters = new VariantCallingParameters()
                {
                    MinimumCoverage             = 10,
                    LowDepthFilter              = 10,
                    AmpliconBiasFilterThreshold = 0.01F
                },
                VcfWritingParameters = new VcfWritingParameters()
                {
                    OutputGvcfFile = false,
                }
            };

            var factory = new Factory(appOptions);

            var context = new VcfWriterInputContext
            {
                QuotedCommandLineString = "myCommandLine",
                SampleName    = "mySample",
                ReferenceName = "myReference",
                ContigsByChr  = new List <Tuple <string, long> >
                {
                    new Tuple <string, long>("chr1", 10001),
                    new Tuple <string, long>("chrX", 500)
                }
            };
            var outputFile = factory.GetOutputFile(appOptions.BAMPaths[0]);
            var writer     = factory.CreateVcfWriter(outputFile, context);

            var candidates = _defaultCandidates;

            writer.WriteHeader();
            writer.Write(candidates);
            writer.Dispose();

            Assert.True(File.Exists(outputFile));
            Assert.Equal(outputFile, Path.ChangeExtension(_bamChr19, ".vcf"));

            var reader = new AlleleReader(outputFile);
            var header = reader.HeaderLines;

            Assert.Equal(header[7], "##FILTER=<ID=q30,Description=\"Quality score less than 30\">");
            Assert.Equal(header[8], "##FILTER=<ID=AB,Description=\"Amplicon bias - disparate variant frequencies detected by amplicon\">");
            Assert.Equal(header[9], "##FILTER=<ID=SB,Description=\"Variant strand bias too high\">");
            Assert.Equal(header[10], "##FILTER=<ID=R5x9,Description=\"Repeats of part or all of the variant allele (max repeat length 5) in the reference greater than or equal to 9\">");
        }
        public void UnpackAlleles()
        {
            //two example vcf files that have been "crushed".
            var crushedVcf1 = Path.Combine(TestPaths.LocalTestDataDirectory, "VcfFileWriterTests_Crushed_Padded_expected.vcf");
            var crushedVcf2 = Path.Combine(TestPaths.LocalTestDataDirectory, "crushed.genome.vcf");

            var unpackedVariants1 = AlleleReader.GetAllVariantsInFile(crushedVcf1);
            var unpackedVariants2 = AlleleReader.GetAllVariantsInFile(crushedVcf2);

            Assert.Equal(8, unpackedVariants1.Count);  //7 lines, but 8 alleles
            Assert.Equal(91, unpackedVariants2.Count); //90 lines, but 91 alleles

            var hetAlt1     = unpackedVariants1[5];
            var hetAlt2     = unpackedVariants2[3];
            var hetAlt1next = unpackedVariants1[6];
            var hetAlt2next = unpackedVariants2[4];

            //example one:
            //total depth = 5394, total variant count = 2387 + 2000 = 4387
            //so, ref counts ~1007.

            //example two:
            //total depth = 532, total variant count = 254 + 254 = 508
            //so, ref counts ~24.

            Assert.Equal(Genotype.HeterozygousAlt1Alt2, hetAlt1.Genotype);
            Assert.Equal(Genotype.HeterozygousAlt1Alt2, hetAlt2.Genotype);

            Assert.Equal(1007, hetAlt1.ReferenceSupport);
            Assert.Equal(2387, hetAlt1.AlleleSupport);
            Assert.Equal(0.4425, hetAlt1.Frequency, 4);

            Assert.Equal(24, hetAlt2.ReferenceSupport);
            Assert.Equal(254, hetAlt2.AlleleSupport);
            Assert.Equal(10, hetAlt1.ReferencePosition);
            Assert.Equal("AA", hetAlt1.ReferenceAllele);
            Assert.Equal("GA", hetAlt1.AlternateAllele);

            Assert.Equal(223906731, hetAlt2.ReferencePosition);

            Assert.Equal(1007, hetAlt1next.ReferenceSupport);
            Assert.Equal(2000, hetAlt1next.AlleleSupport);
            Assert.Equal("G", hetAlt1next.AlternateAllele);
            Assert.Equal(0.3708, hetAlt1next.Frequency, 4);

            Assert.Equal(24, hetAlt2next.ReferenceSupport);
            Assert.Equal(254, hetAlt2next.AlleleSupport);

            Assert.Equal(223906731, hetAlt2.ReferencePosition);
            Assert.Equal(10, hetAlt1next.ReferencePosition);
            Assert.Equal(223906731, hetAlt2next.ReferencePosition);
        }
Example #22
0
        public static string WriteCountsFile(string vcfIn, string outDir, int lociCount)
        {
            var variants      = new List <CalledAllele>();
            var countsPath    = Path.Combine(outDir, Path.GetFileName(vcfIn).Replace(".vcf", ".counts"));
            var countsPathOld = Path.Combine(outDir, Path.GetFileName(vcfIn).Replace(".vcf", ".counts.original"));

            if (File.Exists(countsPath))
            {
                if (File.Exists(countsPathOld))
                {
                    File.Delete(countsPathOld);
                }
                File.Copy(countsPath, countsPathOld);
                File.Delete(countsPath);
            }

            var counter = new MutationCounter();

            using (AlleleReader readerA = new AlleleReader(vcfIn))
            {
                counter.StartWriter(countsPath);

                while (readerA.GetNextVariants(out variants))
                {
                    foreach (var variant in variants)
                    {
                        try
                        {
                            counter.Add(variant);
                        }

                        catch (Exception ex)
                        {
                            Logger.WriteToLog(string.Format("Fatal error processing vcf; Check {0}, position {1}.  Exception: {2}",
                                                            variant.Chromosome, variant.ReferencePosition, ex));
                            throw;
                        }
                    }
                }

                if (lociCount > 0)
                {
                    counter.ForceTotalPossibleMutations(lociCount);
                }

                counter.CloseWriter();
            }

            return(countsPath);
        }
Example #23
0
        public void VcfLineToAllelesTests_SomaticForcedGTExample_PICS_1168()
        {
            //some example crummy input
            var inputLines = new List <string>()
            {
                "chr4\t56236582\t1ai\tA\tC\t.\t.\t.\t.\t.\r",
                "chr4\t56236583\t1aii\tA\tAA\t.\t.\t.\t.\t.",
                "chr18\t9888034\t6b\tA\t.\t.\t.\t.\t.\t.blah",
                "chr21\t46644966\t6b\tA\t.\t.\t.\t.\tboo\too",
                "chr21\t33694232\t6b\tA\t.\t.\t.\t.\t.\t.",
                "chr21\t33694239\t6c\tT\t<del>\t.\t.\t.\t.\t.",
                "chr8\t1817367\t6d\tC\tA\t.\t.\t.\t.\t.",
                "chr1\t109465143\tPICS827\tCTGCCATACAGCTTCAACAACAACTT\tATGCCATACAGCTTCAACAACAA\t.\t.\t.\t.\t.",
            };
            var outputAlleles = new List <CalledAllele>()
            {
            };

            foreach (var line in inputLines)
            {
                //make sure nothing throws
                var outputAllelesForLine = AlleleReader.VcfLineToAlleles(line, true);

                //make sure we only ever read 1 allele per line, since this is somatic input
                Assert.Equal(1, outputAllelesForLine.Count());

                outputAlleles.Add(outputAllelesForLine[0]);
            }

            //sanity check results
            var allele1 = outputAlleles[0];
            var allele2 = outputAlleles[7];

            Assert.Equal("chr4", allele1.Chromosome);
            Assert.Equal(56236582, allele1.ReferencePosition);
            Assert.Equal("A", allele1.ReferenceAllele);
            Assert.Equal("C", allele1.AlternateAllele);

            Assert.Equal("chr1", allele2.Chromosome);
            Assert.Equal(109465143, allele2.ReferencePosition);
            Assert.Equal("CTGCCATACAGCTTCAACAACAACTT", allele2.ReferenceAllele);
            Assert.Equal("ATGCCATACAGCTTCAACAACAA", allele2.AlternateAllele);
        }
Example #24
0
        public static bool GetNextUncrushedAllele(AlleleReader reader, out CalledAllele variant)
        {
            var  nextVariants = new List <CalledAllele>();
            bool worked       = reader.GetNextVariants(out nextVariants);

            variant = null;

            if (worked)
            {
                if (nextVariants.Count > 1)
                {
                    throw new ArgumentException("Input file should not have crushed variants. There should only be one variant per line");
                }

                variant = nextVariants[0];
            }

            return(worked);
        }
Example #25
0
        private void AdjustOptions(ref ScyllaApplicationOptions scyllaOptions)
        {
            List <string> vcfHeaderLines = AlleleReader.GetAllHeaderLines(scyllaOptions.VcfPath);

            //where to find the Pisces options used to make the original vcf
            var piscesLogDirectory = Path.Combine(Path.GetDirectoryName(scyllaOptions.VcfPath), "PiscesLogs");

            if (!Directory.Exists(piscesLogDirectory))
            {
                piscesLogDirectory = Path.GetDirectoryName(scyllaOptions.VcfPath);
            }


            //figure out the original settings used, use those as the defaults.
            VcfConsumerAppParsingUtils.TryToUpdateWithOriginalOptions(scyllaOptions, vcfHeaderLines, piscesLogDirectory);

            //let anything input on the command line take precedence
            ApplicationOptionParser.ParseArgs(scyllaOptions.CommandLineArguments);


            _options.Save(Path.Combine(scyllaOptions.LogFolder, "ScyllaOptions.used.json"));
        }
        public RecalibratedVariantsTests()
        {
            RecalCollection = new RecalibratedVariantsCollection();
            var vcfPath = Path.Combine(TestPaths.LocalTestDataDirectory, "VariantDepthReaderTest.vcf");

            using (var reader = new AlleleReader(vcfPath))
            {
                var coLocatedVariantList = new List <CalledAllele>();
                var lastVariant          = new CalledAllele();
                while (reader.GetNextVariants(out coLocatedVariantList))
                {
                    var variant = coLocatedVariantList[0];
                    if (lastVariant.ReferencePosition == variant.ReferencePosition)
                    {
                        continue;
                    }

                    RecalCollection.AddLocus(variant);
                    lastVariant = variant;
                    variant     = new CalledAllele();
                }
            }
        }
Example #27
0
        public void GetVariantsByChromosome()
        {
            var vcfReader =
                new AlleleReader(Path.Combine(TestPaths.LocalTestDataDirectory, "VcfReader_Extensions.vcf"));

            //Simple case
            var output = vcfReader.GetVariantsByChromosome(true, true,
                                                           new List <AlleleCategory> {
                AlleleCategory.Insertion, AlleleCategory.Mnv
            });

            Assert.Equal(1, output.Count);
            Assert.True(output.ContainsKey("chr1"));
            var candidateAlleles = new List <CandidateAllele>();

            output.TryGetValue("chr1", out candidateAlleles);
            Assert.Equal(2, candidateAlleles.Count);
            Assert.Equal(AlleleCategory.Mnv, candidateAlleles[0].Type);
            Assert.Equal(AlleleCategory.Insertion, candidateAlleles[1].Type);

            //Custom rule
            var filteredVcfReader =
                new AlleleReader(Path.Combine(TestPaths.LocalTestDataDirectory, "VcfReader_Extensions.vcf"));
            var filteredOutput = filteredVcfReader.GetVariantsByChromosome(true, true,
                                                                           new List <AlleleCategory> {
                AlleleCategory.Insertion, AlleleCategory.Mnv
            }, candidate => candidate.ReferenceAllele.Length > 3);

            Assert.Equal(1, filteredOutput.Count);
            Assert.True(filteredOutput.ContainsKey("chr1"));
            var filteredCandidateAlleles = new List <CandidateAllele>();

            filteredOutput.TryGetValue("chr1", out filteredCandidateAlleles);
            Assert.Equal(1, filteredCandidateAlleles.Count);
            Assert.False(filteredCandidateAlleles.Any(c => c.ReferenceAllele.Length > 3));
        }
        private void ExecuteEmptyIntervalsTest(bool throttle)
        {
            // ----------------------
            // test when one bam has intervals and the other is empty
            // ----------------------

            var bamFilePath    = Path.Combine(TestPaths.LocalTestDataDirectory, "Chr17Chr19.bam");
            var bamFilePath2   = Path.Combine(TestPaths.LocalTestDataDirectory, "Chr17Chr19_removedSQlines.bam");
            var genomePath     = Path.Combine(TestPaths.SharedGenomesDirectory, "chr17chr19");
            var validIntervals = Path.Combine(TestPaths.LocalTestDataDirectory, "chr17only.picard");
            var emptyIntervals = Path.Combine(TestPaths.LocalTestDataDirectory, "empty.picard");
            var outputFolder   = Path.Combine(TestPaths.LocalTestDataDirectory, "EmptyIntervalsTest_Mixed");

            var options = new PiscesApplicationOptions
            {
                BAMPaths             = new[] { bamFilePath, bamFilePath2 },
                IntervalPaths        = new [] { validIntervals, emptyIntervals },
                GenomePaths          = new[] { genomePath },
                OutputDirectory      = outputFolder,
                VcfWritingParameters = new Domain.Options.VcfWritingParameters()
                {
                    OutputGvcfFile = true
                }
            };

            var factory   = new Factory(options);
            var processor = new GenomeProcessor(factory, factory.GetReferenceGenome(genomePath), throttle);

            processor.Execute(2);

            // first vcf file should have been processed regularly
            using (var reader = new AlleleReader(factory.WorkRequests.First().OutputFilePath))
            {
                var variants = reader.GetVariants();
                Assert.Equal(11, variants.Count());
            }

            // second vcf file should be empty
            using (var reader = new AlleleReader(factory.WorkRequests.Last().OutputFilePath))
            {
                var variants = reader.GetVariants();
                Assert.Equal(0, variants.Count());
            }

            // ----------------------
            // try again but with both bams using empty intervals
            // ----------------------

            options.IntervalPaths   = new[] { emptyIntervals };
            options.OutputDirectory = Path.Combine(TestPaths.LocalTestDataDirectory, "EmptyIntervalsTest_All");

            factory   = new Factory(options);
            processor = new GenomeProcessor(factory, factory.GetReferenceGenome(genomePath), throttle);

            processor.Execute(2);

            foreach (var workRequest in factory.WorkRequests)
            {
                // both vcf file should be empty
                using (var reader = new AlleleReader(workRequest.OutputFilePath))
                {
                    var variants = reader.GetVariants();
                    Assert.Equal(0, variants.Count());
                }
            }
        }
Example #29
0
        public void CreateCallableNbhdsTests()
        {
            var vcfFilePath     = Path.Combine(TestPaths.LocalTestDataDirectory, "VeryMutated.genome.vcf");
            var variantSource   = new AlleleReader(vcfFilePath);
            var vcfNeighborhood = new VcfNeighborhood(0, "chr1", new VariantSite(123), new VariantSite(125));
            List <VcfNeighborhood> VcfNeighborhoods = new List <VcfNeighborhood>()
            {
                vcfNeighborhood
            };

            //Test 1, genome is NULL

            var neighborhoodBuilder = new NeighborhoodBuilder(new PhasableVariantCriteria(), new VariantCallingParameters(),
                                                              variantSource, null, 20);

            var neighborhoods = neighborhoodBuilder.ConvertToCallableNeighborhoods(VcfNeighborhoods);

            Assert.Equal(1, neighborhoods.Count());
            Assert.Equal(2, neighborhoods.First().VcfVariantSites.Count());
            Assert.Equal("chr1", neighborhoods[0].ReferenceName);
            Assert.Equal("RRR", neighborhoods[0].NbhdReferenceSequenceSubstring);

            //Test 2, genome is exists, but doesnt have the right chr

            var    genomePath = Path.Combine(TestPaths.SharedGenomesDirectory, "Bacillus_cereus", "Sequence", "WholeGenomeFasta");
            var    refName    = "chr_wrong";
            Genome genome     = new Genome(genomePath, new List <string>()
            {
                refName
            });
            ChrReference chrReference = genome.GetChrReference(refName);

            neighborhoodBuilder = new NeighborhoodBuilder(new PhasableVariantCriteria(), new VariantCallingParameters(),
                                                          variantSource, genome, 20);

            neighborhoods = neighborhoodBuilder.ConvertToCallableNeighborhoods(VcfNeighborhoods);
            Assert.Equal(1, neighborhoods.Count());
            Assert.Equal(2, neighborhoods.First().VcfVariantSites.Count());
            Assert.Equal("chr1", neighborhoods[0].ReferenceName);
            Assert.Equal("RRR", neighborhoods[0].NbhdReferenceSequenceSubstring);


            //Test 3, genome is exists, and DOES have the right chr

            refName = "chr";
            genome  = new Genome(genomePath, new List <string>()
            {
                refName
            });
            chrReference = genome.GetChrReference(refName);

            neighborhoodBuilder = new NeighborhoodBuilder(new PhasableVariantCriteria(), new VariantCallingParameters(),
                                                          variantSource, genome, 20);


            vcfNeighborhood  = new VcfNeighborhood(0, "chr", new VariantSite(123), new VariantSite(125));
            VcfNeighborhoods = new List <VcfNeighborhood>()
            {
                vcfNeighborhood
            };

            neighborhoods = neighborhoodBuilder.ConvertToCallableNeighborhoods(VcfNeighborhoods);
            Assert.Equal(1, neighborhoods.Count());
            Assert.Equal(2, neighborhoods.First().VcfVariantSites.Count());
            Assert.Equal("chr", neighborhoods[0].ReferenceName);
            Assert.Equal("TAT", neighborhoods[0].NbhdReferenceSequenceSubstring);
        }
        public void Execute(
            string bamFilePath,
            string vcfFilePath,
            string intervalPath,
            List <CalledAllele> expectedVariants,
            List <ChrReference> fakeReferences = null,
            bool doCheckVariants            = true,
            bool doCheckReferences          = false,
            int expectedNumCoveredPositions = 0,
            bool threadByChr = false,
            int doCountsOnly = 0,
            bool doLog       = false,
            bool callMnvs    = true,
            PiscesApplicationOptions applicationOptions = null,
            bool collapse = true)
        {
            if (doCheckReferences)
            {
                vcfFilePath = Path.ChangeExtension(vcfFilePath, "genome.vcf");
            }

            if (applicationOptions == null)
            {
                applicationOptions = new PiscesApplicationOptions
                {
                    BAMPaths            = new[] { bamFilePath },
                    IntervalPaths       = string.IsNullOrEmpty(intervalPath) ? null : new[] { intervalPath },
                    GenomePaths         = new[] { GenomeDirectory },
                    OutputBiasFiles     = true,
                    DebugMode           = doLog,
                    CallMNVs            = callMnvs,
                    MaxGapBetweenMNV    = 10,
                    MaxSizeMNV          = 15,
                    Collapse            = collapse,
                    BamFilterParameters = new BamFilterParameters()
                    {
                        MinimumBaseCallQuality = 20
                    },
                    VariantCallingParameters = new VariantCallingParameters(),
                    VcfWritingParameters     = new VcfWritingParameters()
                    {
                        OutputGvcfFile = doCheckReferences,
                    },
                    CommandLineArguments = new string[] { "some", "cmds" }
                };
            }

            applicationOptions.OutputDirectory = OutputDirectory;

            var factory = GetFactory(applicationOptions);

            IGenome genome;

            if (fakeReferences == null)
            {
                genome = factory.GetReferenceGenome(GenomeDirectory);
            }
            else
            {
                genome = new MockGenome(fakeReferences, GenomeDirectory);
            }

            if (threadByChr)
            {
                var processor = new GenomeProcessor(factory, genome, false);

                processor.Execute(1);
            }
            else
            {
                var processor = new GenomeProcessor(factory, genome);

                processor.Execute(1);
            }

            var alleles      = AlleleReader.GetAllVariantsInFile(vcfFilePath);
            var variantCalls = alleles.Where(a => !a.IsRefType).ToList();

            if (doCheckVariants)
            {
                if (doCountsOnly > 0)
                {
                    Assert.Equal(variantCalls.Count(), doCountsOnly);
                }
                else
                {
                    CheckVariants(variantCalls, expectedVariants);
                }
            }

            if (doCheckReferences)
            {
                var referenceAlleles = alleles.Where(a => a.IsRefType).ToList();

                // make sure no reference calls at variant positions
                Assert.Equal(referenceAlleles.Count(),
                             alleles.Count(a => !variantCalls.Select(v => v.ReferencePosition).Contains(a.ReferencePosition)));
            }
        }