/// <summary> /// returns the allele depths given different sources of information /// </summary> public static int[] GetAlleleDepths(IntermediateSampleFields intermediateSampleFields) { int[] ad = null; // use TAR & TIR if (intermediateSampleFields.TAR != null && intermediateSampleFields.TIR != null) { ad = GetAlleleDepthsUsingTarTir(intermediateSampleFields); } // use allele counts if (ad == null && intermediateSampleFields.TotalAlleleCount != null) { ad = GetAlleleDepthsUsingAlleleCounts(intermediateSampleFields); } // use allele depths if (ad == null && intermediateSampleFields.FormatIndices.AD != null) { ad = GetAlleleDepthsUsingAd(intermediateSampleFields); } // use NR & NV if (ad == null && intermediateSampleFields.NR != null && intermediateSampleFields.NV != null) { ad = GetAlleleDepthsUsingNrNv(intermediateSampleFields); } return(ad); }
/// <summary> /// returns the appropriate allele count string given the supplied base /// </summary> private static int?GetAlleleCountString(string s, IntermediateSampleFields intermediateSampleFields) { int?ac = null; // ReSharper disable once SwitchStatementMissingSomeCases switch (s) { case "A": ac = intermediateSampleFields.ACount; break; case "C": ac = intermediateSampleFields.CCount; break; case "G": ac = intermediateSampleFields.GCount; break; case "T": ac = intermediateSampleFields.TCount; break; } return(ac); }
/// <summary> /// returns the allele depths using allele depths /// </summary> private static int[] GetAlleleDepthsUsingAd(IntermediateSampleFields intermediateSampleFields) { if (intermediateSampleFields.FormatIndices.AD == null || intermediateSampleFields.SampleColumns.Length <= intermediateSampleFields.FormatIndices.AD.Value) { return(null); } var ad = intermediateSampleFields.SampleColumns[intermediateSampleFields.FormatIndices.AD.Value].Split(','); if (ad[0] == ".") { return(null); } var nAllele = ad.Length; var alleleDepths = new int[nAllele]; for (int i = 0; i < nAllele; i++) { if (!int.TryParse(ad[i], out var num)) { return(null); } alleleDepths[i] = num; } return(alleleDepths); }
private static double[] GetVariantFrequenciesUsingAlleleCounts(IntermediateSampleFields sampleFields) { bool isRefSingleBase = sampleFields.VcfRefAllele.Length == 1; bool areAllAltsSingleBase = sampleFields.AltAlleles.All(altAllele => altAllele.Length == 1); bool isReference = sampleFields.AltAlleles.Length == 1 && sampleFields.AltAlleles[0] == "."; // for this to work we need a single-base reference allele and all raw allele counts must be available if (sampleFields.TotalAlleleCount == null || isReference || !isRefSingleBase || !areAllAltsSingleBase) { return(null); } int numAltAlleles = sampleFields.AltAlleles.Length; double[] variantFreqs = new double[numAltAlleles]; if (sampleFields.TotalAlleleCount == 0) { return(variantFreqs); } for (int i = 0; i < numAltAlleles; i++) { var alleleCount = GetAlleleCount(sampleFields, i); variantFreqs[i] = alleleCount / (double)sampleFields.TotalAlleleCount; } return(variantFreqs); }
private static int GetAlleleCount(IntermediateSampleFields sampleFields, int alleleIndex) { string altAllele = sampleFields.AltAlleles[alleleIndex]; int alleleCount = 0; switch (altAllele) { case "A": alleleCount = sampleFields.ACount ?? 0; break; case "C": alleleCount = sampleFields.CCount ?? 0; break; case "G": alleleCount = sampleFields.GCount ?? 0; break; case "T": alleleCount = sampleFields.TCount ?? 0; break; } return(alleleCount); }
private static double[] GetVariantFrequenciesUsingAlleleDepths(IntermediateSampleFields sampleFields) { if (sampleFields.FormatIndices.AD == null || sampleFields.SampleColumns.Length <= sampleFields.FormatIndices.AD.Value) { return(null); } int numAltAlleles = sampleFields.AltAlleles.Length; double[] variantFreqs = new double[numAltAlleles]; var adField = sampleFields.SampleColumns[sampleFields.FormatIndices.AD.Value]; var(alleleDepths, allValuesAreValid, totalDepth) = GetAlleleDepths(adField); if (!allValuesAreValid || numAltAlleles != alleleDepths.Length) { return(null); } // sanity check: make sure we handle NaNs properly if (totalDepth == 0) { return(variantFreqs); } for (int alleleIndex = 0; alleleIndex < numAltAlleles; alleleIndex++) { variantFreqs[alleleIndex] = alleleDepths[alleleIndex] / (double)totalDepth; } return(variantFreqs); }
/// <summary> /// returns the variant frequency using TIR and TAR /// </summary> private static int[] GetAlleleDepthsUsingTarTir(IntermediateSampleFields intermediateSampleFields) { if (intermediateSampleFields.TIR == null || intermediateSampleFields.TAR == null || intermediateSampleFields.AltAlleles.Length > 1) { return(null); } return(new[] { intermediateSampleFields.TAR.Value, intermediateSampleFields.TIR.Value }); }
private static double[] GetVariantFrequenciesUsingVf(IntermediateSampleFields sampleFields) { if (sampleFields.AltAlleles.Length > 1 || sampleFields.VF == null) { return(null); } return(new[] { sampleFields.VF.Value }); }
/// <summary> /// returns the allele depths using NR & NV from Platypus /// </summary> private static int[] GetAlleleDepthsUsingNrNv(IntermediateSampleFields intermediateSampleFields) { if (intermediateSampleFields.AltAlleles.Length > 1) { return(null); } if (intermediateSampleFields.NR == null || intermediateSampleFields.NV == null) { return(null); } return(new[] { intermediateSampleFields.NR.Value - intermediateSampleFields.NV.Value, intermediateSampleFields.NV.Value }); }
private static double[] GetVariantFrequenciesUsingNrNv(IntermediateSampleFields sampleFields) { // NR and NV: never observed with multiple alternate alleles if (sampleFields.NR == null || sampleFields.NV == null || sampleFields.AltAlleles.Length > 1) { return(null); } if (sampleFields.NR == 0) { return(ZeroVf); } var nr = (double)sampleFields.NR; var nv = (double)sampleFields.NV; return(new[] { nv / nr }); }
private static double[] GetVariantFrequenciesUsingTarTir(IntermediateSampleFields sampleFields) { // TAR and TIR: never observed with multiple alternate alleles if (sampleFields.TIR == null || sampleFields.TAR == null || sampleFields.AltAlleles.Length > 1) { return(null); } if (sampleFields.TIR + sampleFields.TAR == 0) { return(ZeroVf); } var tir = (double)sampleFields.TIR; var tar = (double)sampleFields.TAR; return(new[] { tir / (tar + tir) }); }
/// <summary> /// returns a JsonSample object given the data contained within the sample genotype /// field. /// </summary> private ISample ExtractSample(string sampleColumn) { // sanity check: make sure we have a format column if (_formatIndices == null || string.IsNullOrEmpty(sampleColumn)) { return(Sample.EmptySample); } var sampleColumns = sampleColumn.Split(':'); // handle missing sample columns if (sampleColumns.Length == 1 && sampleColumns[0] == ".") { return(Sample.EmptySample); } var sampleFields = new IntermediateSampleFields(_vcfColumns, _formatIndices, sampleColumns); var alleleDepths = AlleleDepths.GetAlleleDepths(sampleFields); var failedFilter = FailedFilter.GetFailedFilter(sampleFields); var genotype = Genotype.GetGenotype(sampleFields); var genotypeQuality = GenotypeQuality.GetGenotypeQuality(sampleFields); var totalDepth = TotalDepth.GetTotalDepth(_infoDepth, sampleFields); var variantFrequencies = VariantFrequency.GetVariantFrequencies(sampleFields); var splitReadCounts = ReadCounts.GetSplitReadCounts(sampleFields); var pairEndReadCounts = ReadCounts.GetPairEndReadCounts(sampleFields); var isLossOfHeterozygosity = sampleFields.MajorChromosomeCount != null && sampleFields.CopyNumber != null && sampleFields.MajorChromosomeCount.Value == sampleFields.CopyNumber.Value && sampleFields.CopyNumber.Value > 1; var sample = new Sample(genotype, genotypeQuality, variantFrequencies, totalDepth, alleleDepths, failedFilter, sampleFields.CopyNumber, isLossOfHeterozygosity, sampleFields.DenovoQuality, splitReadCounts, pairEndReadCounts, sampleFields.RepeatNumber, sampleFields.RepeatNumberSpan, sampleFields.MAD, sampleFields.SCH, sampleFields.PLG, sampleFields.PCN, sampleFields.DCS, sampleFields.DID, sampleFields.DST, sampleFields.PCH, sampleFields.CHC, sampleFields.AQ, sampleFields.LQ); return(sample); }
/// <summary> /// returns the allele depths using allele counts /// </summary> private static int[] GetAlleleDepthsUsingAlleleCounts(IntermediateSampleFields intermediateSampleFields) { if (intermediateSampleFields.TotalAlleleCount == null) { return(null); } // sanity check: make sure all alternate alleles are SNVs if (intermediateSampleFields.VcfRefAllele.Length != 1 || intermediateSampleFields.AltAlleles.Any(altAllele => altAllele.Length != 1)) { return(null); } var ad = new int[intermediateSampleFields.AltAlleles.Length + 1]; // handle reference allele var ac = GetAlleleCountString(intermediateSampleFields.VcfRefAllele, intermediateSampleFields); if (ac == null) { return(null); } ad[0] = ac.Value; // handle alternate alleles var index = 1; foreach (var altAllele in intermediateSampleFields.AltAlleles) { ac = GetAlleleCountString(altAllele, intermediateSampleFields); if (ac == null) { return(null); } ad[index++] = ac.Value; } return(ad); }
public static double[] GetVariantFrequencies(IntermediateSampleFields sampleFields) { double[] vf = null; // use VF if (sampleFields.VF != null) { vf = GetVariantFrequenciesUsingVf(sampleFields); } // use TAR & TIR if (sampleFields.TAR != null && sampleFields.TIR != null) { vf = GetVariantFrequenciesUsingTarTir(sampleFields); } // use allele counts if (vf == null && sampleFields.TotalAlleleCount != null) { vf = GetVariantFrequenciesUsingAlleleCounts(sampleFields); } // use allele depths if (vf == null && sampleFields.FormatIndices.AD != null) { vf = GetVariantFrequenciesUsingAlleleDepths(sampleFields); } // use NR & NV if (vf == null && sampleFields.NR != null && sampleFields.NV != null) { vf = GetVariantFrequenciesUsingNrNv(sampleFields); } return(vf); }