private static double[] GetVariantFrequenciesUsingAlleleDepths(IntermediateSampleFields sampleFields) { if (sampleFields.FormatIndices.AD == null || sampleFields.SampleColumns.Length <= sampleFields.FormatIndices.AD.Value) { return(null); } int numAltAlleles = sampleFields.AltAlleles.Length; var variantFreqs = new double[numAltAlleles]; string adField = sampleFields.SampleColumns[sampleFields.FormatIndices.AD.Value]; (var alleleDepths, bool allValuesAreValid, int totalDepth) = GetAlleleDepths(adField); if (!allValuesAreValid || numAltAlleles != alleleDepths.Length) { return(null); } // sanity check: make sure we handle NaNs properly if (totalDepth == 0) { return(variantFreqs); } for (var alleleIndex = 0; alleleIndex < numAltAlleles; alleleIndex++) { variantFreqs[alleleIndex] = alleleDepths[alleleIndex] / (double)totalDepth; } return(variantFreqs); }
private static int GetAlleleCount(IntermediateSampleFields sampleFields, int alleleIndex) { string altAllele = sampleFields.AltAlleles[alleleIndex]; var alleleCount = 0; // ReSharper disable once SwitchStatementMissingSomeCases switch (altAllele) { case "A": alleleCount = sampleFields.ACount ?? 0; break; case "C": alleleCount = sampleFields.CCount ?? 0; break; case "G": alleleCount = sampleFields.GCount ?? 0; break; case "T": alleleCount = sampleFields.TCount ?? 0; break; } return(alleleCount); }
/// <summary> /// returns the appropriate allele count string given the supplied base /// </summary> private static int?GetAlleleCountString(string s, IntermediateSampleFields intermediateSampleFields) { int?ac = null; // ReSharper disable once SwitchStatementMissingSomeCases switch (s) { case "A": ac = intermediateSampleFields.ACount; break; case "C": ac = intermediateSampleFields.CCount; break; case "G": ac = intermediateSampleFields.GCount; break; case "T": ac = intermediateSampleFields.TCount; break; } return(ac); }
public static double[] GetVariantFrequencies(IntermediateSampleFields sampleFields) { double[] vf = null; // use VF if (sampleFields.VF != null) { vf = GetVariantFrequenciesUsingVf(sampleFields); } // use TAR & TIR if (sampleFields.TAR != null && sampleFields.TIR != null) { vf = GetVariantFrequenciesUsingTarTir(sampleFields); } // use allele counts if (vf == null && sampleFields.TotalAlleleCount != null) { vf = GetVariantFrequenciesUsingAlleleCounts(sampleFields); } // use allele depths if (vf == null && sampleFields.FormatIndices.AD != null) { vf = GetVariantFrequenciesUsingAlleleDepths(sampleFields); } return(vf); }
private static double[] GetVariantFrequenciesUsingAlleleCounts(IntermediateSampleFields sampleFields) { bool isRefSingleBase = sampleFields.VcfRefAllele.Length == 1; bool areAllAltsSingleBase = sampleFields.AltAlleles.AreAllAltAllelesSingleBase(); bool isReference = sampleFields.AltAlleles.Length == 1 && sampleFields.AltAlleles[0] == "."; // for this to work we need a single-base reference allele and all raw allele counts must be available if (sampleFields.TotalAlleleCount == null || isReference || !isRefSingleBase || !areAllAltsSingleBase) { return(null); } int numAltAlleles = sampleFields.AltAlleles.Length; var variantFreqs = new double[numAltAlleles]; if (sampleFields.TotalAlleleCount == 0) { return(variantFreqs); } for (var i = 0; i < numAltAlleles; i++) { int alleleCount = GetAlleleCount(sampleFields, i); variantFreqs[i] = alleleCount / (double)sampleFields.TotalAlleleCount; } return(variantFreqs); }
/// <summary> /// returns the allele depths using allele depths /// </summary> private static int[] GetAlleleDepthsUsingAd(IntermediateSampleFields intermediateSampleFields) { if (intermediateSampleFields.FormatIndices.AD == null || intermediateSampleFields.SampleColumns.Length <= intermediateSampleFields.FormatIndices.AD.Value) { return(null); } var ad = intermediateSampleFields.SampleColumns[intermediateSampleFields.FormatIndices.AD.Value].OptimizedSplit(','); if (ad[0] == ".") { return(null); } int nAllele = ad.Length; var alleleDepths = new int[nAllele]; for (var i = 0; i < nAllele; i++) { (int number, bool foundError) = ad[i].OptimizedParseInt32(); if (foundError) { return(null); } alleleDepths[i] = number; } return(alleleDepths); }
public static int?GetTotalDepth(int?infoDepth, IntermediateSampleFields intermediateSampleFields) { // use TAR & TIR if (intermediateSampleFields.TAR != null && intermediateSampleFields.TIR != null) { return(GetTotalDepthUsingTarTir(intermediateSampleFields)); } // use base counts if (intermediateSampleFields.TotalAlleleCount != null) { return(GetTotalDepthUsingAlleleCounts(intermediateSampleFields)); } // use DPI if (intermediateSampleFields.FormatIndices.DPI != null) { return(GetTotalDepthUsingDpi(intermediateSampleFields)); } // use DP if (intermediateSampleFields.FormatIndices.DP != null) { return(GetTotalDepthUsingDp(intermediateSampleFields)); } // use INFO DP (Pisces) return(infoDepth); }
/// <summary> /// returns the variant frequency using TIR and TAR /// </summary> private static int[] GetAlleleDepthsUsingTarTir(IntermediateSampleFields intermediateSampleFields) { if (intermediateSampleFields.TIR == null || intermediateSampleFields.TAR == null || intermediateSampleFields.AltAlleles.Length > 1) { return(null); } return(new[] { intermediateSampleFields.TAR.Value, intermediateSampleFields.TIR.Value }); }
private static double[] GetVariantFrequenciesUsingVf(IntermediateSampleFields sampleFields) { if (sampleFields.AltAlleles.Length > 1 || sampleFields.VF == null) { return(null); } return(new[] { sampleFields.VF.Value }); }
private static int?GetTotalDepthUsingDp(IntermediateSampleFields intermediateSampleFields) { if (intermediateSampleFields.FormatIndices.DP == null || intermediateSampleFields.SampleColumns.Length <= intermediateSampleFields.FormatIndices.DP.Value) { return(null); } string depth = intermediateSampleFields.SampleColumns[intermediateSampleFields.FormatIndices.DP.Value]; (int number, bool foundError) = depth.OptimizedParseInt32(); return(foundError ? null : (int?)number); }
private static double[] GetVariantFrequenciesUsingTarTir(IntermediateSampleFields sampleFields) { // TAR and TIR: never observed with multiple alternate alleles if (sampleFields.TIR == null || sampleFields.TAR == null || sampleFields.AltAlleles.Length > 1) { return(null); } if (sampleFields.TIR + sampleFields.TAR == 0) { return(ZeroVf); } var tir = (double)sampleFields.TIR; var tar = (double)sampleFields.TAR; return(new[] { tir / (tar + tir) }); }
internal ISample ExtractSample(string sampleColumn) { // sanity check: make sure we have a format column if (_formatIndices == null || string.IsNullOrEmpty(sampleColumn)) { return(Sample.EmptySample); } var sampleColumns = sampleColumn.OptimizedSplit(':'); // handle missing sample columns if (sampleColumns.Length == 1 && sampleColumns[0] == ".") { return(Sample.EmptySample); } var sampleFields = new IntermediateSampleFields(_vcfColumns, _formatIndices, sampleColumns); var alleleDepths = AlleleDepths.GetAlleleDepths(sampleFields); bool failedFilter = FailedFilter.GetFailedFilter(sampleFields); string genotype = Genotype.GetGenotype(sampleFields); var genotypeQuality = GenotypeQuality.GetGenotypeQuality(sampleFields); var totalDepth = TotalDepth.GetTotalDepth(_infoDepth, sampleFields); double?denovoQuality = sampleColumns.GetString(_formatIndices.DQ).GetDouble(); var variantFrequencies = LegacyVariantFrequency.GetVariantFrequencies(sampleFields); var splitReadCounts = ReadCounts.GetSplitReadCounts(sampleFields); var pairEndReadCounts = ReadCounts.GetPairEndReadCounts(sampleFields); bool isLossOfHeterozygosity = sampleFields.MajorChromosomeCount != null && sampleFields.CopyNumber != null && sampleFields.MajorChromosomeCount.Value == sampleFields.CopyNumber.Value && sampleFields.CopyNumber.Value > 1; var sample = new Sample(alleleDepths, sampleFields.AQ, sampleFields.CopyNumber, sampleFields.DST, failedFilter, genotype, genotypeQuality, false, denovoQuality, sampleFields.LQ, pairEndReadCounts, null, splitReadCounts, totalDepth, variantFrequencies, null, null, isLossOfHeterozygosity, null, null); return(sample); }
public static int[] GetPairEndReadCounts(IntermediateSampleFields intermediateSampleFields) { if (intermediateSampleFields.FormatIndices.PR == null) { return(null); } var readCounts = intermediateSampleFields.SampleColumns[intermediateSampleFields.FormatIndices.PR.Value].OptimizedSplit(','); var pairEndReadCounts = new int[readCounts.Length]; for (var i = 0; i < pairEndReadCounts.Length; i++) { (int number, bool foundError) = readCounts[i].OptimizedParseInt32(); if (foundError) { return(null); } pairEndReadCounts[i] = number; } return(pairEndReadCounts); }
/// <summary> /// returns the allele depths using allele counts /// </summary> private static int[] GetAlleleDepthsUsingAlleleCounts(IntermediateSampleFields intermediateSampleFields) { if (intermediateSampleFields.TotalAlleleCount == null) { return(null); } // sanity check: make sure all alternate alleles are SNVs if (intermediateSampleFields.VcfRefAllele.Length != 1 || !intermediateSampleFields.AltAlleles.AreAllAltAllelesSingleBase()) { return(null); } var ad = new int[intermediateSampleFields.AltAlleles.Length + 1]; // handle reference allele var ac = GetAlleleCountString(intermediateSampleFields.VcfRefAllele, intermediateSampleFields); if (ac == null) { return(null); } ad[0] = ac.Value; // handle alternate alleles var index = 1; foreach (string altAllele in intermediateSampleFields.AltAlleles) { ac = GetAlleleCountString(altAllele, intermediateSampleFields); if (ac == null) { return(null); } ad[index++] = ac.Value; } return(ad); }
public static int?GetGenotypeQuality(IntermediateSampleFields intermediateSampleFields) { bool hasGqx = intermediateSampleFields.FormatIndices.GQX != null; bool hasGq = intermediateSampleFields.FormatIndices.GQ != null; if (!hasGqx && !hasGq) { return(null); } int gqIndex = hasGqx ? intermediateSampleFields.FormatIndices.GQX.Value : intermediateSampleFields.FormatIndices.GQ.Value; if (intermediateSampleFields.SampleColumns.Length <= gqIndex) { return(null); } string gq = intermediateSampleFields.SampleColumns[gqIndex]; (int number, bool foundError) = gq.OptimizedParseInt32(); return(foundError ? null : (int?)number); }
/// <summary> /// returns the allele depths given different sources of information /// </summary> public static int[] GetAlleleDepths(IntermediateSampleFields intermediateSampleFields) { int[] ad = null; // use TAR & TIR if (intermediateSampleFields.TAR != null && intermediateSampleFields.TIR != null) { ad = GetAlleleDepthsUsingTarTir(intermediateSampleFields); } // use allele counts if (ad == null && intermediateSampleFields.TotalAlleleCount != null) { ad = GetAlleleDepthsUsingAlleleCounts(intermediateSampleFields); } // use allele depths if (ad == null && intermediateSampleFields.FormatIndices.AD != null) { ad = GetAlleleDepthsUsingAd(intermediateSampleFields); } return(ad); }
private static int?GetTotalDepthUsingTarTir(IntermediateSampleFields intermediateSampleFields) => intermediateSampleFields.TAR + intermediateSampleFields.TIR;
private static int?GetTotalDepthUsingAlleleCounts(IntermediateSampleFields intermediateSampleFields) => intermediateSampleFields.TotalAlleleCount;