Esempio n. 1
0
        /// <summary>
        /// Step forward with the reader, assembling a list of variants at your CurrentVariant position.
        /// </summary>
        /// <param name="Reader"></param>
        /// <param name="CurrentVariant"></param>
        /// <param name="BackLogExists"></param>
        /// <param name="TheBackLog"></param>
        /// <returns></returns>
        private static List <CalledAllele> AssembleColocatedList(
            VcfReader Reader, CalledAllele CurrentVariant, bool mFirst,
            ref bool BackLogExists, ref List <CalledAllele> TheBackLog)
        {
            List <CalledAllele> CoLocatedVariants = new List <CalledAllele>();
            bool ContinueReadA = true;

            while (ContinueReadA)
            {
                var NextVariantList = new List <CalledAllele>();

                if (BackLogExists)
                {
                    NextVariantList = TheBackLog;
                    BackLogExists   = false;
                }
                else
                {
                    VcfVariant NextVariant = new VcfVariant();
                    ContinueReadA = Reader.GetNextVariant(NextVariant);

                    if (!ContinueReadA)
                    {
                        break;
                    }

                    NextVariantList = VcfVariantUtilities.Convert(new List <VcfVariant> {
                        NextVariant
                    }).ToList();
                }

                // VarOrde =  -1 if Current comes first, 0 if co-located.
                int VarOrder = (AlleleCompareByLoci.OrderAlleles(CurrentVariant, NextVariantList.First(), mFirst));

                switch (VarOrder)
                {
                case 0:     //the variant we just got is at out current position
                    CoLocatedVariants.AddRange(NextVariantList);
                    break;

                case -1:                             //the variant we just got is after our current position, and needs to go to the backlog.
                    TheBackLog    = NextVariantList; //NextVariant;
                    ContinueReadA = false;
                    BackLogExists = true;
                    break;

                default:     //
                {
                    throw new InvalidDataException("Vcf needs to be ordered.");
                }
                }
            }

            if (!BackLogExists)
            {
                TheBackLog = null;
            }

            return(CoLocatedVariants);
        }
Esempio n. 2
0
        public static double?GetBAlleleFrequency(VcfVariant variant, int referenceCount, int variantCount)
        {
            double?baf = null;
            double totalAlleleCount = referenceCount + variantCount;

            if (totalAlleleCount < 1)
            {
                return(baf);
            }
            if (variant.ReferenceAllele.Equals(".") || variant.VariantAlleles[0].Equals("."))
            {
                return(baf);
            }

            if (BAllelePreference(variant.ReferenceAllele) < BAllelePreference(variant.VariantAlleles[0]))
            {
                baf = referenceCount / totalAlleleCount;
            }
            else
            {
                baf = variantCount / totalAlleleCount;
            }

            return(baf);
        }
Esempio n. 3
0
 private static bool VariantsMatch(VcfVariant variant1, VcfVariant variant2)
 {
     return(variant1.ReferenceName == variant2.ReferenceName &&
            variant1.ReferencePosition == variant2.ReferencePosition &&
            variant1.ReferenceAllele == variant2.ReferenceAllele &&
            variant1.VariantAlleles.First() == variant2.VariantAlleles.First());
 }
        public static bool HaveInfoToUpdateQ(VcfVariant originalVar, out int depth, out int callCount)
        {
            bool canUpdateQ = false;

            depth     = -1;
            callCount = -1;

            if ((originalVar.InfoFields == null) || (originalVar.Genotypes == null) ||
                (originalVar.Genotypes.Count < 1))
            {
                return(false);
            }

            if (originalVar.InfoFields.ContainsKey("DP"))
            {
                canUpdateQ = int.TryParse(originalVar.InfoFields["DP"], out depth);
            }

            if (originalVar.Genotypes[0].ContainsKey("AD"))
            {
                string[] spat = originalVar.Genotypes[0]["AD"].Split(',');

                if (spat.Length == 2)
                {
                    canUpdateQ = (canUpdateQ && int.TryParse(spat[1], out callCount));
                }
            }

            return(canUpdateQ);
        }
Esempio n. 5
0
        public void OverlapWorks_DupDel([NotNull] string truthVar, [NotNull] string queryVar, string type, bool isTp)
        {
            const string sampleName = "blah";
            var          vcfVariantParserSettings = VcfVariantParserSettings.Create(new List <string> {
                sampleName
            });
            var        baseVariant   = VcfVariant.TryParse(truthVar, vcfVariantParserSettings).GetOrThrow();
            const bool isCrossTypeOn = true;
            var        wittyerType   = WittyerType.Parse(type);
            var        inputSpecs    = InputSpec.GenerateCustomInputSpecs(!isCrossTypeOn, new[] { wittyerType }, percentDistance: PercentDistance).ToDictionary(s => s.VariantType, s => s);

            var bndSet    = new Dictionary <IGeneralBnd, IVcfVariant>();
            var errorList = new List <string>();
            var truthV    = (IMutableWittyerSimpleVariant)WittyerVcfReader.CreateVariant(baseVariant, baseVariant.Samples.First().Value, true, sampleName,
                                                                                         inputSpecs, bndSet, errorList, isCrossTypeOn);

            baseVariant = VcfVariant.TryParse(queryVar, vcfVariantParserSettings).GetOrThrow();
            var queryV = (IMutableWittyerVariant)WittyerVcfReader.CreateVariant(baseVariant, baseVariant.Samples.First().Value, false, sampleName,
                                                                                inputSpecs, bndSet, errorList, isCrossTypeOn);
            var tree = TruthForest.Create(sampleName, VcfHeader.CreateBuilder(VcfVersion.FourPointOne).Build());

            tree.AddTarget(truthV);
            OverlappingUtils.DoOverlapping(tree.VariantTrees, queryV, OverlappingUtils.IsVariantAlleleMatch, isCrossTypeOn, true);
            queryV.Finalize(WitDecision.FalsePositive, EvaluationMode.CrossTypeAndSimpleCounting, null);
            truthV.Finalize(WitDecision.FalseNegative, EvaluationMode.CrossTypeAndSimpleCounting, null);
            Assert.Equal(isTp ? WitDecision.TruePositive : WitDecision.FalsePositive, queryV.Sample.Wit);
            Assert.Equal(isTp ? WitDecision.TruePositive : WitDecision.FalseNegative, truthV.Sample.Wit);
        }
        private bool TestVariant(VcfReader vr, VariantType type1, VariantType type2)
        {
            var testVar = new VcfVariant();

            vr.GetNextVariant(testVar);
            return((testVar.VarType1 == type1) && (testVar.VarType2 == type2));
        }
Esempio n. 7
0
 public Comparison(VcfVariant variant, bool inBaseline, bool inTest)
 {
     Variant           = variant;
     InBaseline        = inBaseline;
     InTest            = inTest;
     ComparisonResults = new Dictionary <string, ComparisonResult>();
 }
        public void GenerateVcfStrings_IncludeHeaders()
        {
            if (MiscUtils.IsRunningAnyLinux)
            {
                return;                              // currently failing on linux :(
            }
            var parser         = VcfVariantParserSettings.Create(ImmutableList.Create(SampleName));
            var variants       = VcfVariant.TryParse(Bnd1, parser).FollowedBy(VcfVariant.TryParse(Bnd2, parser)).EnumerateSuccesses().ToList();
            var wittyerVariant = WittyerBndInternal.Create(variants[0],
                                                           variants[0].ToTryOfGenotypedVcfVariant(VariantNormalizer.TrimCommonBases).GetOrThrow().Samples.Values.First(),
                                                           WittyerType.IntraChromosomeBreakend, new List <uint>(), uint.MinValue, null, variants[1]);
            var headerLines = WittyerVcfWriter.GenerateVcfStrings(
                WittyerResult.Create(VcfHeader.CreateBuilder(VcfVersion.FourPointOne).Build(), SampleName,
                                     variants.Select(v => v.Contig).Distinct().ToList(), false,
                                     new Dictionary <WittyerType, IReadOnlyList <IWittyerVariant> >(),
                                     new Dictionary <WittyerType, IReadOnlyList <IWittyerBnd> >
            {
                { WittyerType.IntraChromosomeBreakend, new List <IWittyerBnd> {
                      wittyerVariant
                  } }
            }, new List <IVcfVariant>()), null, null)
                              .TakeWhile(line => line.StartsWith(VcfConstants.Header.Prefix)).ToList();

            // 11 = VcfVersion, WHO, WHAT, WHERE, WHY, WIT, WIN, WOW, date, version, column names
            Assert.Equal(11, headerLines.Count);
        }
Esempio n. 9
0
        public static string WriteCountsFile(string vcfIn, string outDir)
        {
            var variant    = new VcfVariant();
            var countsPath = Path.Combine(outDir, Path.GetFileName(vcfIn).Replace(".vcf", ".counts"));
            var counter    = new MutationCounter();

            using (VcfReader readerA = new VcfReader(vcfIn))
            {
                counter.StartWriter(countsPath);

                while (readerA.GetNextVariant(variant))
                {
                    try
                    {
                        counter.Add(variant);
                    }

                    catch (Exception ex)
                    {
                        Logger.WriteToLog(string.Format("Fatal error processing vcf; Check {0}, position {1}.  Exception: {2}",
                                                        variant.ReferenceName, variant.ReferencePosition, ex));
                        throw;
                    }
                }

                counter.CloseFalseCallsWriter();
            }

            return(countsPath);
        }
Esempio n. 10
0
        public static void WittyerVariantIntervalCorrect([NotNull] string variant, uint start, uint end,
                                                         uint posStart, uint posEnd, uint endStart, uint endEnd)
        {
            const string sampleName = "tumor";
            var          vcfVariant = VcfVariant.TryParse(variant,
                                                          VcfVariantParserSettings.Create(ImmutableList.Create("normal", sampleName), GenomeAssembly.Hg38))
                                      .GetOrThrowDebug();

            var _ = WittyerType.ParseFromVariant(vcfVariant, false, sampleName, out var type);

            if (type == null)
            {
                throw new NotSupportedException("This test does not handle svType null");
            }
            var wittyerVariant = WittyerVariantInternal
                                 .Create(vcfVariant, vcfVariant.Samples[sampleName], type, Bins, PercentDistance, BasepairDistance);

            var expectedStart = ContigAndInterval.Create(vcfVariant.Contig, start, end);
            var expectedPos   = BedInterval.Create(posStart, posEnd);
            var expectedEnd   = BedInterval.Create(endStart, endEnd);

            MultiAssert.Equal(expectedStart, wittyerVariant);
            MultiAssert.Equal(expectedPos, wittyerVariant.CiPosInterval);
            MultiAssert.Equal(expectedEnd, wittyerVariant.CiEndInterval);
            MultiAssert.AssertAll();
        }
Esempio n. 11
0
        public void GetNextVariantTests()
        {
            var    resultVariant = new VcfVariant();
            string resultString  = string.Empty;
            var    vr            = new VcfReader(VcfTestFile_1);

            vr.GetNextVariant(resultVariant, out resultString);
            Assert.Equal(resultString.TrimEnd('\r'), @"chr1	10	.	A	.	25	PASS	DP=500	GT:GQ:AD:VF:NL:SB:NC	1/1:25:0,0:0.0000:23:0.0000:0.0010");
            Assert.Equal(resultVariant.ReferenceName, "chr1");
            Assert.Equal(resultVariant.ReferenceAllele, "A");
            Assert.Equal(resultVariant.VariantAlleles.First(), ".");
            Assert.Equal(vr.Position(), 1452);

            var resultStringArray = new string[] {};

            resultVariant = new VcfVariant();

            vr.GetNextVariant(resultVariant, out resultString, out resultStringArray);
            Assert.Equal(resultString.TrimEnd('\r'), @"chr1	20	.	A	T	25	PASS	DP=500	GT:GQ:AD:VF:NL:SB:NC	1/1:25:0,0:0.0000:23:0.0000:0.0010");
            for (var i = 0; i < resultStringArray.Length; i++)
            {
                resultStringArray[i] = resultStringArray[i].TrimEnd('\r');
            }
            Assert.Equal(resultStringArray, @"chr1	20	.	A	T	25	PASS	DP=500	GT:GQ:AD:VF:NL:SB:NC	1/1:25:0,0:0.0000:23:0.0000:0.0010".Split('\t'));
            Assert.Equal(resultVariant.ReferenceName, "chr1");

            resultVariant = new VcfVariant();

            vr.GetNextVariant(resultVariant);
            Assert.Equal(resultVariant.ReferenceName, "chr1");
            Assert.Equal(resultVariant.ReferenceAllele, "A");
            Assert.Equal(resultVariant.VariantAlleles.First(), "AT");
        }
Esempio n. 12
0
        private static double GetDP(VcfVariant variant)
        {
            double dp;

            variant.TryParseInfoDouble("DP", out dp);
            return(dp);
        }
Esempio n. 13
0
        protected int GetCopyNumber(VcfVariant variant, out int end)
        {
            int CN = -1;

            end = -1;
            if (variant.GenotypeColumns != null && variant.GenotypeColumns.Count > 0)
            {
                Dictionary <string, string> genotype = variant.GenotypeColumns[variant.GenotypeColumns.Count - 1];
                if (genotype.ContainsKey("CN"))
                {
                    CN = int.Parse(genotype["CN"]);
                }
                if (genotype.ContainsKey("END"))
                {
                    end = int.Parse(genotype["END"]);
                }
            }
            if (variant.InfoFields.ContainsKey("END"))
            {
                end = int.Parse(variant.InfoFields["END"]);
            }
            if (variant.InfoFields.ContainsKey("CN"))
            {
                CN = int.Parse(variant.InfoFields["CN"]);
            }

            return(CN);
        }
Esempio n. 14
0
 private static bool VariantsMatch(VcfVariant variant1, VcfVariant variant2)
 {
     //Already assume these are from the same chromosome
     return(variant1.ReferencePosition == variant2.ReferencePosition &&
            variant1.VariantAlleles[0] == variant2.VariantAlleles[0] &&
            variant1.ReferenceAllele == variant2.ReferenceAllele);
 }
Esempio n. 15
0
        private static CandidateAllele Map(VcfVariant vcfVariant)
        {
            var alternateAllele = vcfVariant.VariantAlleles[0];
            var type            = AlleleCategory.Unsupported;

            if (!String.IsNullOrEmpty(vcfVariant.ReferenceAllele) &&
                !String.IsNullOrEmpty(alternateAllele))
            {
                if (vcfVariant.ReferenceAllele == alternateAllele)
                {
                    type = AlleleCategory.Reference;
                }

                if (vcfVariant.ReferenceAllele.Length == alternateAllele.Length)
                {
                    type = alternateAllele.Length == 1 ? AlleleCategory.Snv : AlleleCategory.Mnv;
                }
                else
                {
                    if (vcfVariant.ReferenceAllele.Length == 1)
                    {
                        type = AlleleCategory.Insertion;
                    }
                    else if (alternateAllele.Length == 1)
                    {
                        type = AlleleCategory.Deletion;
                    }
                }
            }

            return(new CandidateAllele(vcfVariant.ReferenceName, vcfVariant.ReferencePosition,
                                       vcfVariant.ReferenceAllele, alternateAllele, type));
        }
Esempio n. 16
0
        public void GetNextVariantTests()
        {
            var    resultVariant = new VcfVariant();
            string resultString  = string.Empty;
            var    vr            = new VcfReader(VcfTestFile_1);

            vr.GetNextVariant(resultVariant, out resultString);
            Assert.Equal(resultString.TrimEnd('\r'), @"chr1	10	.	A	.	25	PASS	DP=500	GT:GQ:AD:VF:NL:SB:NC	1/1:25:0,0:0.0000:23:0.0000:0.0010");
            Assert.Equal(resultVariant.ReferenceName, "chr1");
            Assert.Equal(resultVariant.ReferenceAllele, "A");
            Assert.Equal(resultVariant.VariantAlleles.First(), ".");

            //Note, we have seen this assert below fail for specific user configurations
            //When it fails the error mesg is as below:
            //Assert.Equal() Failure
            //Expected: 1428
            //Actual: 1452
            //If this happens to you, check your git attributes config file.
            //You might be handling vcf text file line endings differently so the white space counts differently in this test.
            // In that case, the fail is purely cosmetic.
            //
            //try: Auto detect text files and perform LF normalization
            //# http://davidlaing.com/2012/09/19/customise-your-gitattributes-to-become-a-git-ninja/
            //*text = auto
            //*.cs     diff = csharp
            //*.bam binary
            //*.vcf text
            //.fa text eol = crlf

            if (vr.Position() == 1428)
            {
                Console.WriteLine("This isn't critical, but you might want to change your line endings convention. ");
                Console.WriteLine("This project was developed with \\CR\\LF , not \\LF convention.");
            }
            else
            {
                Assert.Equal(1452, vr.Position());
            }

            var resultStringArray = new string[] {};

            resultVariant = new VcfVariant();

            vr.GetNextVariant(resultVariant, out resultString, out resultStringArray);
            Assert.Equal(resultString.TrimEnd('\r'), @"chr1	20	.	A	T	25	PASS	DP=500	GT:GQ:AD:VF:NL:SB:NC	1/1:25:0,0:0.0000:23:0.0000:0.0010");
            for (var i = 0; i < resultStringArray.Length; i++)
            {
                resultStringArray[i] = resultStringArray[i].TrimEnd('\r');
            }
            Assert.Equal(resultStringArray, @"chr1	20	.	A	T	25	PASS	DP=500	GT:GQ:AD:VF:NL:SB:NC	1/1:25:0,0:0.0000:23:0.0000:0.0010".Split('\t'));
            Assert.Equal(resultVariant.ReferenceName, "chr1");

            resultVariant = new VcfVariant();

            vr.GetNextVariant(resultVariant);
            Assert.Equal(resultVariant.ReferenceName, "chr1");
            Assert.Equal(resultVariant.ReferenceAllele, "A");
            Assert.Equal(resultVariant.VariantAlleles.First(), "AT");
        }
Esempio n. 17
0
        public static int OrderVariants(CalledAllele a, VcfVariant b, bool mFirst)
        {
            var vcfVariantA = new VcfVariant {
                ReferencePosition = a.Coordinate, ReferenceName = a.Chromosome
            };

            return(Extensions.OrderVariants(vcfVariantA, b, mFirst));
        }
Esempio n. 18
0
        public void ToStringNoValueWorks()
        {
            var parser  = VcfVariantParserSettings.Create(ImmutableList.Create(SampleName));
            var variant = VcfVariant.TryParse(NoValueUnsorted, parser).GetOrThrow();
            var actual  = WittyerVcfWriter.ToString(variant, null);

            Assert.Equal(NoValueSorted, actual);
        }
Esempio n. 19
0
        public void ToStringBnd()
        {
            var parser  = VcfVariantParserSettings.Create(ImmutableList.Create(SampleName));
            var variant = VcfVariant.TryParse(Bnd1, parser).GetOrThrow();
            var actual  = WittyerVcfWriter.ToString(variant, null);

            Assert.Equal(Bnd1, actual);
        }
Esempio n. 20
0
        public static bool CheckForMatch(RefPanelEntry entry, VcfVariant var)
        {
            bool match = (entry.Chr == var.ReferenceName) &&
                         (entry.FwdStandFirstPositionOfMutation == var.ReferencePosition) &&
                         (entry.FwdStrandRefAllele == var.ReferenceAllele) &&
                         (entry.FwdStrandAltAllele == var.VariantAlleles[0]);

            return(match);
        }
Esempio n. 21
0
        public static void ParseReferenceVariantWorks([NotNull] string inputVariant)
        {
            var vcfVariant = VcfVariant.TryParse(inputVariant,
                                                 VcfVariantParserSettings.Create(ImmutableList.Create("NA12878", "haha"), GenomeAssembly.Hg19))
                             .GetOrThrowDebug();

            WittyerType.ParseFromVariant(vcfVariant, false, "NA12878", out var actualType);
            Assert.Equal(WittyerType.CopyNumberReference, actualType);
        }
Esempio n. 22
0
        public void GetInsertionIntervalNoLenIns()
        {
            var variant = VcfVariant
                          .TryParse(UnknownLength, VcfVariantParserSettings.Create(ImmutableList.Create("blah"))).GetOrThrow();
            var bedInterval = WittyerBndInternal.GetInsertionInterval(variant);

            MultiAssert.Equal(null, bedInterval?.GetLength());
            MultiAssert.AssertAll();
        }
Esempio n. 23
0
        /// <summary>
        ///     Writes the variant
        /// </summary>
        public void WriteVariant(VcfVariant variant)
        {
            // sanity check
            if (!IsOpen)
            {
                throw new ApplicationException("ERROR: An attempt was made to write a variant to an unopened file.");
            }

            _writer.WriteLine(variant.ToString());
        }
Esempio n. 24
0
        private static IWittyerBnd CreateWittyerBnd([NotNull] string bndLine1, [NotNull] string bndLine2)
        {
            var variant = VcfVariant.TryParse(bndLine1,
                                              VcfVariantParserSettings.Create(ImmutableList.Create("normal"), GenomeAssembly.Grch37))
                          .GetOrThrowDebug();

            return(WittyerBndInternal.Create(variant, variant.Samples["normal"], WittyerType.TranslocationBreakend, Bins,
                                             BasepairDistance, PercentDistance, VcfVariant.TryParse(bndLine2,
                                                                                                    VcfVariantParserSettings.Create(ImmutableList.Create("normal"), GenomeAssembly.Grch37))
                                             .GetOrThrowDebug()));
        }
Esempio n. 25
0
        public void TestGetBAlleleFrequency(string refAllele, string altAllele, int refCount, int altCount,
                                            double?expectedFreq)
        {
            VcfVariant variant = new VcfVariant();

            variant.ReferenceAllele = refAllele;
            variant.VariantAlleles  = new string[] { altAllele };

            double?freq = SNVReviewer.GetBAlleleFrequency(variant, refCount, altCount);

            Assert.Equal(expectedFreq, freq);
        }
Esempio n. 26
0
        public void VennVcf_CombineTwoPoolVariants_MergeRefCalls()
        {
            //this is  from an issue where there were multiple co-located variants in one pool,
            //and just ref in the other, at chr15	92604460.  The consensus answer should be
            // a single ref call (and not multiple ref calls!).
            var outDir      = TestPaths.LocalScratchDirectory;
            var vcfPathRoot = _TestDataPath;

            string VcfPath_PoolA     = Path.Combine(vcfPathRoot, "C64-Ct-4_S17.genome.vcf");
            string VcfPath_PoolB     = Path.Combine(vcfPathRoot, "C64-Ct-4_S18.genome.vcf");
            string VcfPath_Consensus = Path.Combine(vcfPathRoot, "ExpectedConsensus2.vcf");

            string OutputPath = Path.Combine(outDir, "Consensus2.vcf");

            if (File.Exists(OutputPath))
            {
                File.Delete(OutputPath);
            }

            VennVcfOptions parameters = new VennVcfOptions();

            parameters.VariantCallingParams.MinimumFrequencyFilter = 0.03f;
            parameters.InputFiles        = new string[] { VcfPath_PoolA, VcfPath_PoolB };
            parameters.OutputDirectory   = outDir; //Path.Combine(outDir, "RefMergeOut.vcf");
            parameters.ConsensusFileName = OutputPath;
            VennProcessor venn = new VennProcessor(parameters.InputFiles, parameters);

            venn.DoPairwiseVenn(false);

            Assert.Equal(File.Exists(OutputPath), true);
            List <VcfVariant> CombinedVariants = VcfReader.GetAllVariantsInFile(OutputPath);
            List <VcfVariant> ExpectedVariants = VcfReader.GetAllVariantsInFile(VcfPath_Consensus);

            Assert.Equal(ExpectedVariants.Count, CombinedVariants.Count);

            int NumVariantsAtPos92604460 = 0;

            for (int i = 0; i < ExpectedVariants.Count; i++)
            {
                VcfVariant EVariant = ExpectedVariants[i];
                VcfVariant Variant  = CombinedVariants[i];

                if ((Variant.ReferencePosition == 92604460) &&
                    (Variant.ReferenceName == "chr15"))
                {
                    NumVariantsAtPos92604460++;
                }

                Assert.Equal(EVariant.ToString(), Variant.ToString());
            }

            Assert.Equal(NumVariantsAtPos92604460, 1);
        }
Esempio n. 27
0
        public static void CheckVariantsMatch(VcfVariant baseline, CalledAllele test)
        {
            Assert.Equal(baseline.ReferenceAllele, test.ReferenceAllele);
            Assert.Equal(baseline.VariantAlleles[0], test.AlternateAllele);
            Assert.Equal(baseline.VariantAlleles.Length, 1);
            Assert.Equal(baseline.ReferenceName, test.Chromosome);
            Assert.Equal(baseline.ReferencePosition, test.ReferencePosition);

            int numAlts = (baseline.VariantAlleles[0] == ".") ? 0 : baseline.VariantAlleles.Length;

            Assert.Equal(VcfVariantUtilities.MapGTString(baseline.Genotypes[0]["GT"], numAlts), test.Genotype);
        }
Esempio n. 28
0
        private IEnumerable <CalledAllele> GetNextBlockOfOriginalAllelesFromVcfVar()
        {
            var  vcfVar = new VcfVariant();
            bool worked = _variantSource.GetNextVariant(vcfVar);

            if (!worked)
            {
                return(new List <CalledAllele>());
            }

            return(VcfVariantUtilities.Convert(new List <VcfVariant> {
                vcfVar
            }));
        }
Esempio n. 29
0
        /// <summary>
        /// Step 1: Load the normal het SNVs of interest.
        /// </summary>
        protected void LoadVariants(string vcfPath)
        {
            Console.WriteLine("{0} Loading variants of interest from {1}", DateTime.Now, vcfPath);
            this.Variants = new List<VcfVariant>();
            int overallCount = 0;
            int countThisChromosome = 0;
            using (VcfReader reader = new VcfReader(vcfPath, requireGenotypes: false))
            {
                VcfVariant variant = new VcfVariant();
                while (true)
                {
                    bool result = reader.GetNextVariant(out variant);
                    if (!result) break;
                    overallCount++;
                    if (variant.ReferenceName != this.Chromosome)
                    {
                        // Shortcut: If we've seen records for the desired chromosome, then as soon as we hit another chromosome,
                        // we can abort:
                        if (countThisChromosome > 0) break;
                        continue;
                    }
                    countThisChromosome++;
                    // Single-allele SNVs only:
                    if (variant.VariantAlleles.Length != 1 || variant.VariantAlleles[0].Length != 1 || variant.ReferenceAllele.Length != 1) continue;
                    // PF variants only:
                    if ((variant.GenotypeColumns != null && variant.GenotypeColumns.Any()) && variant.Filters != "PASS") continue; // FILTER may not say PASS for a dbSNP VCF file
                    if (variant.GenotypeColumns != null && variant.GenotypeColumns.Any()) // not available if we use a dbSNP VCF file
                    {
                        if (!variant.GenotypeColumns[0].ContainsKey("GT")) continue; // no genotype - we don't know if it's a het SNV.
                        string genotype = variant.GenotypeColumns[0]["GT"];
                        if (genotype != "0/1" && genotype != "1/0") continue;

                        // Also require they have a high enough quality score:
                        if (variant.GenotypeColumns[0].ContainsKey("GQX")) // Note: Allow no GQX field, in case we want to use another caller (e.g. Pisces) and not crash
                        {
                            float GQX = float.Parse(variant.GenotypeColumns[0]["GQX"]);
                            if (GQX < 30) continue;
                        }
                    }
                    // Note: Let's NOT require the variant be in dbSNP.  Maybe we didn't do annotation, either because
                    // we chose not to or because we're on a reference without annotation available.
                    //if (variant.Identifier == ".") continue;
                    // Remember all the variants that pass all our tests:
                    this.Variants.Add(variant);
                    variant = new VcfVariant();
                }
            }
            Console.WriteLine("Retained {0} variants, out of {1} records for {2}", this.Variants.Count, countThisChromosome, this.Chromosome);
        }
Esempio n. 30
0
        public static void Recalibrate(string vcfIn, string vcfOut, string sampleCountsFileName,
                                       int baselineQNoise, double zFactor, int maxQscore, int filterQScore)
        {
            if (!File.Exists(sampleCountsFileName))
            {
                Logger.WriteToLog("Cannot recalibrate. Cannot find {0} ", sampleCountsFileName);
                return;
            }
            else
            {
                Logger.WriteToLog("Found counts file: {0} ", sampleCountsFileName);
            }

            var LookupTable = GetPhredScaledCalibratedRates(baselineQNoise, zFactor, sampleCountsFileName);

            //if no work to do here...
            if ((LookupTable == null) || (LookupTable.Count == 0))
            {
                return;
            }

            if (File.Exists(vcfOut))
            {
                File.Delete(vcfOut);
            }

            using (VcfReader reader = new VcfReader(vcfIn))
                using (StreamWriter writer = new StreamWriter(vcfOut))
                {
                    writer.NewLine = "\n";
                    List <string> headerLines = reader.HeaderLines;
                    foreach (string headerLine in headerLines)
                    {
                        writer.WriteLine(headerLine);
                    }

                    var originalVar = new VcfVariant();
                    while (reader.GetNextVariant(originalVar))
                    {
                        var cat = MutationCounter.GetMutationCategory(originalVar);

                        if (LookupTable.ContainsKey(cat))
                        {
                            UpdateVariant(maxQscore, filterQScore, LookupTable, originalVar, cat);
                        }
                        writer.WriteLine(originalVar);
                    }
                }
        }
Esempio n. 31
0
        public static MutationCategory GetMutationCategory(
            VcfVariant consensusVariant)
        {
            if (consensusVariant.VariantAlleles.Length == 0)
            {
                return(MutationCategory.Reference);
            }

            if (consensusVariant.VariantAlleles.Length > 1)
            {
                throw new ArgumentException("This method is expecting only one variant allele per variant entry");
            }

            int refLength = consensusVariant.ReferenceAllele.Length;
            int altLength = consensusVariant.VariantAlleles[0].Length;

            if (refLength > altLength)
            {
                return(MutationCategory.Deletion);
            }

            if (refLength < altLength)
            {
                return(MutationCategory.Insertion);
            }

            if ((refLength != 1) || (altLength != 1))
            {
                return(MutationCategory.Other);
            }

            if ((consensusVariant.VariantAlleles[0] == ".") ||
                (consensusVariant.VariantAlleles[0] == consensusVariant.ReferenceAllele))
            {
                return(MutationCategory.Reference);
            }

            var EnumString = consensusVariant.ReferenceAllele + "to" + consensusVariant.VariantAlleles[0];

            foreach (MutationCategory mutation in GetAllMutationCategories())
            {
                if (EnumString.ToLower() == mutation.ToString().ToLower())
                {
                    return(mutation);
                }
            }

            return(MutationCategory.Other);
        }
Esempio n. 32
0
        private static Tuple <ushort, int, int> GetTuple(string vcfLine, ChromosomeRenamer renamer, int flankingLength = 0)
        {
            var fields = vcfLine.Split('\t');

            if (fields.Length < VcfCommon.MinNumColumns)
            {
                throw new GeneralException($"Expected at least {VcfCommon.MinNumColumns} fields in the vcf string: [{vcfLine}]");
            }

            var vcfVariant = new VcfVariant(fields, vcfLine, false);
            var variant    = new VariantFeature(vcfVariant, renamer, new VID());

            return(new Tuple <ushort, int, int>(variant.ReferenceIndex, variant.VcfReferenceBegin - flankingLength,
                                                variant.VcfReferenceEnd + flankingLength));
        }
Esempio n. 33
0
        /// <summary>
        /// Loop over variants like this: foreach (VcfVariant variant in reader.GetVariants())
        /// </summary>
        public IEnumerable<VcfVariant> GetVariants()
        {
            // sanity check: make sure the file is open
            if (!IsOpen) yield break;

            while (true)
            {
                // grab the next vcf line
                string line = Reader.ReadLine();
                if (line == null) break;

                VcfVariant variant = new VcfVariant();

                // split the columns and assign them to VcfVariant
                string[] cols = line.Split('\t');

                // convert the columns to a variant
                ConvertColumnsToVariant(cols, variant);
                if (RequireGenotypes && (variant.Genotypes == null || variant.Genotypes.Count == 0))
                    throw new ApplicationException("Missing genotype columns in VCF file");
                yield return variant;
            }
        }
Esempio n. 34
0
        private static void AssignVariantType(VcfVariant variant)
        {
            string genotype = null;

            if (variant.Genotypes[0] != null && variant.Genotypes[0].ContainsKey("GT"))
            {
                genotype = variant.Genotypes[0]["GT"];
            }

            // sanity check: support missing genotypes
            if (genotype == null || genotype == "./." || genotype == ".")
            {
                variant.VarType1 = VariantType.Missing;
                variant.VarType2 = VariantType.Missing;
                return;
            }
            // Handle usual cases like 0/0, 0/1, 1/0, 1/1 as well as 
            // special cases like ., ./., ./1, 1/.:
            int haplotypeA = int.TryParse(genotype.Substring(0, 1), out haplotypeA) ? haplotypeA : -1;
            int haplotypeB = genotype.Length >= 3 && int.TryParse(genotype.Substring(2, 1), out haplotypeB) ? haplotypeB : -1;
            // Treat things like ./1 or 0/. as homozygous:
            if (haplotypeA == -1) haplotypeA = haplotypeB;
            if (haplotypeB == -1) haplotypeB = haplotypeA;

            variant.VarType1 = GetAlleleVariantType(variant, haplotypeA);
            variant.VarType2 = GetAlleleVariantType(variant, haplotypeB);
        }
Esempio n. 35
0
		/// <summary>
		/// Assign a variant type to a particular allele.  The rules are as follows:
		/// - If ref==alt, type is reference.  
		/// - Otherwise, trim off any common prefix and any common suffix.  Let |ref| denote the length of the
		///   reference allele after trimming, and |alt| denote the length of the alt allele after trimming.
		/// - If |ref|=0, it's an insertion
		/// - If |alt|=0, it's a deletion
		/// - If |ref|=|alt|=1, it's a SNV
		/// - If |ref| = |alt| > 1, it's a MNP
		/// - If |ref|>0 and |alt|>0 and |ref| != |alt|, it's a complex event
		/// </summary>
		private static VariantType GetAlleleVariantType(VcfVariant variant, int haplotype)
		{
			if (haplotype == 0)
				return VariantType.Reference;
			if (haplotype == -1)
				return VariantType.Missing;
            if (haplotype > variant.VariantAlleles.Length)
            {
                throw new Exception(string.Format("Error in variant at {0}:{1} - GT tag specifies nonexistent allele",
                    variant.ReferenceName, variant.ReferencePosition));
            }

			string altAllele = variant.VariantAlleles[haplotype - 1];
			return GetAlleleVariantType(variant.ReferenceAllele, altAllele);
		}
Esempio n. 36
0
        /// <summary>
        ///     populates a vcf variant object given an array of vcf columns
        /// </summary>
        protected void ConvertColumnsToVariant(string[] cols, VcfVariant variant)
        {
            variant.ReferenceName = cols[VcfCommon.ChromIndex];
            variant.ReferencePosition = int.Parse(cols[VcfCommon.PosIndex]);
            variant.Identifier = cols[VcfCommon.IDIndex];
            variant.ReferenceAllele = cols[VcfCommon.RefIndex];
            variant.Filters = cols[VcfCommon.FilterIndex];

            if (cols[VcfCommon.QualIndex] == ".")
                variant.HasQuality = false;
            double.TryParse(cols[VcfCommon.QualIndex], out variant.Quality); // CFTR uses a ".", which is not actually legal... (actually, vcf 4.1 does allow the missing value "." here. Strelka uses it)

            // parse the variant alleles
            variant.VariantAlleles = cols[VcfCommon.AltIndex].Split(',');

            // parse the info fields
            //variant.InfoFields.Clear();
            variant.InfoFields = new Dictionary<string, string>(StringComparer.OrdinalIgnoreCase);
            string InfoData = cols[VcfCommon.InfoIndex];
            if (InfoData == ".") InfoData = ""; // Special case: a "." in the INFO field should be treated like an empty string.
            string[] infoCols = InfoData.Split(InfoSplitChars, StringSplitOptions.RemoveEmptyEntries);

            int numInfoCols = infoCols.Length;

            if ((variant.InfoTagOrder == null) || (numInfoCols != variant.InfoTagOrder.Length))
            {
                variant.InfoTagOrder = new string[numInfoCols];
            }

            for (int infoColIndex = 0; infoColIndex < numInfoCols; infoColIndex++)
            {
                string infoField = infoCols[infoColIndex];
                string[] infoFieldKvp = infoField.Split('=');
                variant.InfoTagOrder[infoColIndex] = infoFieldKvp[0];
                variant.InfoFields[infoFieldKvp[0]] = (infoFieldKvp.Length == 1 ? null : infoFieldKvp[1]);
            }

            if (cols.Length > VcfCommon.GenotypeIndex) // Genotype columns present
            {
                // parse the genotype format field
                if (cols[VcfCommon.FormatIndex] != GenotypeTagString)
                {
                    GenotypeTagString = cols[VcfCommon.FormatIndex];
                    GenotypeTagOrder = GenotypeTagString.Split(':');
                }
                variant.GenotypeTagOrder = GenotypeTagOrder;

                // parse the genotype data for each sample
                variant.Genotypes = new List<Dictionary<string, string>>();
                for (int sampleIndex = 0; sampleIndex < this.Samples.Count; sampleIndex++)
                {
                    string genotypeColumn = cols[VcfCommon.GenotypeIndex + sampleIndex];
                    if (genotypeColumn == ".")
                    {
                        variant.Genotypes.Add(null);
                    }
                    else
                    {
                        string[] genotypeCols = genotypeColumn.Split(':');
                        variant.Genotypes.Add(ParseGenotype(variant.GenotypeTagOrder, genotypeCols));
                    }
                }

                // specify the variant type:
                AssignVariantType(variant);
            }
        }
Esempio n. 37
0
        protected int GetCopyNumber(VcfVariant variant, out int end)
        {
            int CN = -1;
            end = -1;
            if (variant.Genotypes != null && variant.Genotypes.Count > 0)
            {
                Dictionary<string, string> genotype = variant.Genotypes[variant.Genotypes.Count - 1];
                if (genotype.ContainsKey("CN"))
                {
                    CN = int.Parse(genotype["CN"]);
                }
                if (genotype.ContainsKey("END"))
                {
                    end = int.Parse(genotype["END"]);
                }
            }
            if (variant.InfoFields.ContainsKey("END"))
            {
                end = int.Parse(variant.InfoFields["END"]);
            }
            if (variant.InfoFields.ContainsKey("CN"))
            {
                CN = int.Parse(variant.InfoFields["CN"]);
            }

            return CN;
        }
Esempio n. 38
0
        /// <summary>
        ///     Retrieves the next available variant and returns false if no variants are available.
        /// </summary>
        public bool GetNextVariant(VcfVariant variant)
        {
            // sanity check: make sure the file is open
            if (!IsOpen) return false;

            // grab the next vcf line
            string line = Reader.ReadLine();
            if (line == null) return false;

            // split the columns and assign them to VcfVariant
            string[] cols = line.Split('\t');

            // convert the columns to a variant
            ConvertColumnsToVariant(cols, variant);
            if (RequireGenotypes && variant.Genotypes.Count == 0)
                throw new ApplicationException("Missing genotype columns in VCF file");

            return true;
        }