// ReSharper restore InconsistentNaming public IntermediateSampleFields(string[] vcfColumns, FormatIndices formatIndices, string[] sampleCols) { VcfRefAllele = vcfColumns[VcfCommon.RefIndex]; AltAlleles = vcfColumns[VcfCommon.AltIndex].Split(','); FormatIndices = formatIndices; SampleColumns = sampleCols; (TAR, TIR) = GetLinkedIntegers(GetFirstValue(GetString(formatIndices.TAR, sampleCols)), GetFirstValue(GetString(formatIndices.TIR, sampleCols))); (NR, NV) = GetLinkedIntegers(GetString(formatIndices.NR, sampleCols), GetString(formatIndices.NV, sampleCols)); RepeatNumberSpan = GetString(formatIndices.CI, sampleCols); MajorChromosomeCount = GetInteger(GetString(formatIndices.MCC, sampleCols)); DenovoQuality = GetFloat(GetString(formatIndices.DQ, sampleCols)); MAD = GetIntegers(GetString(formatIndices.MAD, sampleCols)); SCH = GetString(formatIndices.SCH, sampleCols); PLG = GetIntegers(GetString(formatIndices.PLG, sampleCols)); PCN = GetIntegers(GetString(formatIndices.PCN, sampleCols)); DCS = GetStrings(GetString(formatIndices.DCS, sampleCols)); DID = GetStrings(GetString(formatIndices.DID, sampleCols)); DST = GetStrings(GetString(formatIndices.DST, sampleCols)); PCH = GetIntegers(GetString(formatIndices.PCH, sampleCols)); CHC = GetBool(GetString(formatIndices.CHC, sampleCols), "+"); AQ = GetFloat(GetString(formatIndices.AQ, sampleCols)); LQ = GetFloat(GetString(formatIndices.LQ, sampleCols)); VF = GetDouble(GetString(formatIndices.VF, sampleCols)); (CopyNumber, RepeatNumber) = GetCopyNumber(GetString(formatIndices.CN, sampleCols), vcfColumns[VcfCommon.AltIndex].Contains("STR")); (ACount, CCount, GCount, TCount, TotalAlleleCount) = GetAlleleCounts( GetString(formatIndices.AU, sampleCols), GetString(formatIndices.CU, sampleCols), GetString(formatIndices.GU, sampleCols), GetString(formatIndices.TU, sampleCols)); }
internal static ISample ExtractSample(string sampleColumn, FormatIndices formatIndices, ISimplePosition simplePosition, IMitoHeteroplasmyProvider mitoHeteroplasmyProvider, LegacySampleFieldExtractor legacyExtractor = null, bool enableDq = false) { // sanity check: make sure we have a format column if (string.IsNullOrEmpty(sampleColumn)) { return(Sample.EmptySample); } string[] sampleColumns = sampleColumn.OptimizedSplit(':', formatIndices.NumColumns); if (sampleColumns.Length == 1 && sampleColumns[0] == ".") { return(Sample.EmptySample); } sampleColumns.NormalizeNulls(); if (legacyExtractor != null) { return(legacyExtractor.ExtractSample(sampleColumn)); } int[] alleleDepths = sampleColumns.GetString(formatIndices.AD).GetIntegers(); float?artifactAdjustedQualityScore = sampleColumns.GetString(formatIndices.AQ).GetFloat(); int? copyNumber = sampleColumns.GetString(formatIndices.CN).GetInteger(); string[] diseaseAffectedStatuses = sampleColumns.GetString(formatIndices.DST).GetStrings(); bool failedFilter = sampleColumns.GetString(formatIndices.FT).GetFailedFilter(); string genotype = sampleColumns.GetString(formatIndices.GT); int? genotypeQuality = sampleColumns.GetString(formatIndices.GQ).GetInteger(); bool isDeNovo = sampleColumns.GetString(formatIndices.DN).IsDeNovo(); double? deNovoQuality = enableDq? sampleColumns.GetString(formatIndices.DQ).GetDouble():null; float? likelihoodRatioQualityScore = sampleColumns.GetString(formatIndices.LQ).GetFloat(); int[] pairedEndReadCounts = sampleColumns.GetString(formatIndices.PR).GetIntegers(); int[] repeatUnitCounts = sampleColumns.GetString(formatIndices.REPCN).GetIntegers('/'); int[] splitReadCounts = sampleColumns.GetString(formatIndices.SR).GetIntegers(); int? totalDepth = sampleColumns.GetString(formatIndices.DP).GetInteger(); double?variantFrequency = sampleColumns.GetString(formatIndices.VF).GetDouble(); int? minorHaplotypeCopyNumber = sampleColumns.GetString(formatIndices.MCN).GetInteger(); double?somaticQuality = sampleColumns.GetString(formatIndices.SQ).GetDouble(); int? binCount = sampleColumns.GetString(formatIndices.BC).GetInteger(); double[] variantFrequencies = VariantFrequency.GetVariantFrequencies(variantFrequency, alleleDepths, simplePosition.AltAlleles.Length); string[] mitoHeteroplasmyPercentiles = mitoHeteroplasmyProvider?.GetVrfPercentiles(simplePosition.Chromosome, simplePosition.Start, simplePosition.AltAlleles, variantFrequencies)?.Select(x => x?.ToString("0.##") ?? "null").ToArray(); var isLoh = GetLoh(copyNumber, minorHaplotypeCopyNumber, genotype); var sample = new Sample(alleleDepths, artifactAdjustedQualityScore, copyNumber, diseaseAffectedStatuses, failedFilter, genotype, genotypeQuality, isDeNovo, deNovoQuality, likelihoodRatioQualityScore, pairedEndReadCounts, repeatUnitCounts, splitReadCounts, totalDepth, variantFrequencies, minorHaplotypeCopyNumber, somaticQuality, isLoh, mitoHeteroplasmyPercentiles, binCount); return(sample); }
private static bool IsLegacyVariantCaller(FormatIndices formatIndices) { return(formatIndices.TAR != null || formatIndices.TIR != null || formatIndices.AU != null || formatIndices.GU != null || formatIndices.CU != null || formatIndices.TU != null || formatIndices.GQX != null || formatIndices.DPI != null || formatIndices.MCC != null); }
internal static ISample ExtractSample(string sampleColumn, FormatIndices formatIndices, int numAltAlleles, LegacySampleFieldExtractor legacyExtractor = null) { // sanity check: make sure we have a format column if (string.IsNullOrEmpty(sampleColumn)) { return(Sample.EmptySample); } string[] sampleColumns = sampleColumn.OptimizedSplit(':', formatIndices.NumColumns); if (sampleColumns.Length == 1 && sampleColumns[0] == ".") { return(Sample.EmptySample); } sampleColumns.NormalizeNulls(); if (legacyExtractor != null) { return(legacyExtractor.ExtractSample(sampleColumn)); } int[] alleleDepths = sampleColumns.GetString(formatIndices.AD).GetIntegers(); float?artifactAdjustedQualityScore = sampleColumns.GetString(formatIndices.AQ).GetFloat(); int? copyNumber = sampleColumns.GetString(formatIndices.CN).GetInteger(); string[] diseaseAffectedStatuses = sampleColumns.GetString(formatIndices.DST).GetStrings(); bool failedFilter = sampleColumns.GetString(formatIndices.FT).GetFailedFilter(); string genotype = sampleColumns.GetString(formatIndices.GT); int? genotypeQuality = sampleColumns.GetString(formatIndices.GQ).GetInteger(); bool isDeNovo = sampleColumns.GetString(formatIndices.DN).IsDeNovo(); float? likelihoodRatioQualityScore = sampleColumns.GetString(formatIndices.LQ).GetFloat(); int[] pairedEndReadCounts = sampleColumns.GetString(formatIndices.PR).GetIntegers(); int[] repeatUnitCounts = sampleColumns.GetString(formatIndices.REPCN).GetIntegers('/'); int[] splitReadCounts = sampleColumns.GetString(formatIndices.SR).GetIntegers(); int? totalDepth = sampleColumns.GetString(formatIndices.DP).GetInteger(); double?variantFrequency = sampleColumns.GetString(formatIndices.VF).GetDouble(); int? minorHaplotypeCopyNumber = sampleColumns.GetString(formatIndices.MCN).GetInteger(); double?somaticQuality = sampleColumns.GetString(formatIndices.SQ).GetDouble(); double[] variantFrequencies = VariantFrequency.GetVariantFrequencies(variantFrequency, alleleDepths, numAltAlleles); var isLoh = GetLoh(copyNumber, minorHaplotypeCopyNumber, genotype); var sample = new Sample(alleleDepths, artifactAdjustedQualityScore, copyNumber, diseaseAffectedStatuses, failedFilter, genotype, genotypeQuality, isDeNovo, likelihoodRatioQualityScore, pairedEndReadCounts, repeatUnitCounts, splitReadCounts, totalDepth, variantFrequencies, minorHaplotypeCopyNumber, somaticQuality, isLoh); return(sample); }
internal static ISample[] ToSamples(this string[] vcfColumns, FormatIndices formatIndices, int numAltAlleles, bool isRepeatExpansion) { if (vcfColumns.Length < VcfCommon.MinNumColumnsSampleGenotypes) { return(null); } int numSamples = vcfColumns.Length - VcfCommon.MinNumColumnsSampleGenotypes + 1; var samples = new ISample[numSamples]; formatIndices.Set(vcfColumns[VcfCommon.FormatIndex]); for (int index = VcfCommon.GenotypeIndex; index < vcfColumns.Length; index++) { samples[index - VcfCommon.GenotypeIndex] = ExtractSample(vcfColumns[index], formatIndices, numAltAlleles, isRepeatExpansion); } return(samples); }
internal static ISample[] ToSamples(this string[] vcfColumns, FormatIndices formatIndices, ISimplePosition simplePosition, IMitoHeteroplasmyProvider mitoHeteroplasmyProvider, bool enableDq = false) { if (vcfColumns.Length < VcfCommon.MinNumColumnsSampleGenotypes) { return(null); } int numSamples = vcfColumns.Length - VcfCommon.MinNumColumnsSampleGenotypes + 1; var samples = new ISample[numSamples]; formatIndices.Set(vcfColumns[VcfCommon.FormatIndex]); var legacySampleExtractor = IsLegacyVariantCaller(formatIndices) ? new LegacySampleFieldExtractor(vcfColumns, formatIndices) : null; for (int index = VcfCommon.GenotypeIndex; index < vcfColumns.Length; index++) { samples[index - VcfCommon.GenotypeIndex] = ExtractSample(vcfColumns[index], formatIndices, simplePosition, mitoHeteroplasmyProvider, legacySampleExtractor, enableDq); } return(samples); }
internal static ISample[] ToSamples(this string[] vcfColumns, FormatIndices formatIndices, int numAltAlleles) { if (vcfColumns.Length < VcfCommon.MinNumColumnsSampleGenotypes) { return(null); } int numSamples = vcfColumns.Length - VcfCommon.MinNumColumnsSampleGenotypes + 1; var samples = new ISample[numSamples]; formatIndices.Set(vcfColumns[VcfCommon.FormatIndex]); var legacySampleExtractor = IsLegacyVariantCaller(formatIndices) ? new LegacySampleFieldExtractor(vcfColumns, formatIndices) : null; for (int index = VcfCommon.GenotypeIndex; index < vcfColumns.Length; index++) { samples[index - VcfCommon.GenotypeIndex] = ExtractSample(vcfColumns[index], formatIndices, numAltAlleles, legacySampleExtractor); } return(samples); }
/// <summary> /// extracts the genotype fields from the VCF file and returns a list of JSON samples /// </summary> internal ISample[] ExtractSamples() { // sanity check: make sure we have enough columns if (_vcfColumns.Length < VcfCommon.MinNumColumnsSampleGenotypes) { return(null); } var nSamples = _vcfColumns.Length - VcfCommon.MinNumColumnsSampleGenotypes + 1; var samples = new ISample[nSamples]; // extract the indices for each genotype field _formatIndices = FormatIndices.Extract(_vcfColumns[VcfCommon.FormatIndex]); // add each sample for (var index = VcfCommon.GenotypeIndex; index < _vcfColumns.Length; index++) { samples[index - VcfCommon.GenotypeIndex] = ExtractSample(_vcfColumns[index]); } return(samples); }