internal static ISample ExtractSample(string sampleColumn, FormatIndices formatIndices, ISimplePosition simplePosition, IMitoHeteroplasmyProvider mitoHeteroplasmyProvider, LegacySampleFieldExtractor legacyExtractor = null, bool enableDq = false) { // sanity check: make sure we have a format column if (string.IsNullOrEmpty(sampleColumn)) { return(Sample.EmptySample); } string[] sampleColumns = sampleColumn.OptimizedSplit(':', formatIndices.NumColumns); if (sampleColumns.Length == 1 && sampleColumns[0] == ".") { return(Sample.EmptySample); } sampleColumns.NormalizeNulls(); if (legacyExtractor != null) { return(legacyExtractor.ExtractSample(sampleColumn)); } int[] alleleDepths = sampleColumns.GetString(formatIndices.AD).GetIntegers(); float?artifactAdjustedQualityScore = sampleColumns.GetString(formatIndices.AQ).GetFloat(); int? copyNumber = sampleColumns.GetString(formatIndices.CN).GetInteger(); string[] diseaseAffectedStatuses = sampleColumns.GetString(formatIndices.DST).GetStrings(); bool failedFilter = sampleColumns.GetString(formatIndices.FT).GetFailedFilter(); string genotype = sampleColumns.GetString(formatIndices.GT); int? genotypeQuality = sampleColumns.GetString(formatIndices.GQ).GetInteger(); bool isDeNovo = sampleColumns.GetString(formatIndices.DN).IsDeNovo(); double? deNovoQuality = enableDq? sampleColumns.GetString(formatIndices.DQ).GetDouble():null; float? likelihoodRatioQualityScore = sampleColumns.GetString(formatIndices.LQ).GetFloat(); int[] pairedEndReadCounts = sampleColumns.GetString(formatIndices.PR).GetIntegers(); int[] repeatUnitCounts = sampleColumns.GetString(formatIndices.REPCN).GetIntegers('/'); int[] splitReadCounts = sampleColumns.GetString(formatIndices.SR).GetIntegers(); int? totalDepth = sampleColumns.GetString(formatIndices.DP).GetInteger(); double?variantFrequency = sampleColumns.GetString(formatIndices.VF).GetDouble(); int? minorHaplotypeCopyNumber = sampleColumns.GetString(formatIndices.MCN).GetInteger(); double?somaticQuality = sampleColumns.GetString(formatIndices.SQ).GetDouble(); int? binCount = sampleColumns.GetString(formatIndices.BC).GetInteger(); double[] variantFrequencies = VariantFrequency.GetVariantFrequencies(variantFrequency, alleleDepths, simplePosition.AltAlleles.Length); string[] mitoHeteroplasmyPercentiles = mitoHeteroplasmyProvider?.GetVrfPercentiles(simplePosition.Chromosome, simplePosition.Start, simplePosition.AltAlleles, variantFrequencies)?.Select(x => x?.ToString("0.##") ?? "null").ToArray(); var isLoh = GetLoh(copyNumber, minorHaplotypeCopyNumber, genotype); var sample = new Sample(alleleDepths, artifactAdjustedQualityScore, copyNumber, diseaseAffectedStatuses, failedFilter, genotype, genotypeQuality, isDeNovo, deNovoQuality, likelihoodRatioQualityScore, pairedEndReadCounts, repeatUnitCounts, splitReadCounts, totalDepth, variantFrequencies, minorHaplotypeCopyNumber, somaticQuality, isLoh, mitoHeteroplasmyPercentiles, binCount); return(sample); }
internal static ISample ExtractSample(string sampleColumn, FormatIndices formatIndices, int numAltAlleles, LegacySampleFieldExtractor legacyExtractor = null) { // sanity check: make sure we have a format column if (string.IsNullOrEmpty(sampleColumn)) { return(Sample.EmptySample); } string[] sampleColumns = sampleColumn.OptimizedSplit(':', formatIndices.NumColumns); if (sampleColumns.Length == 1 && sampleColumns[0] == ".") { return(Sample.EmptySample); } sampleColumns.NormalizeNulls(); if (legacyExtractor != null) { return(legacyExtractor.ExtractSample(sampleColumn)); } int[] alleleDepths = sampleColumns.GetString(formatIndices.AD).GetIntegers(); float?artifactAdjustedQualityScore = sampleColumns.GetString(formatIndices.AQ).GetFloat(); int? copyNumber = sampleColumns.GetString(formatIndices.CN).GetInteger(); string[] diseaseAffectedStatuses = sampleColumns.GetString(formatIndices.DST).GetStrings(); bool failedFilter = sampleColumns.GetString(formatIndices.FT).GetFailedFilter(); string genotype = sampleColumns.GetString(formatIndices.GT); int? genotypeQuality = sampleColumns.GetString(formatIndices.GQ).GetInteger(); bool isDeNovo = sampleColumns.GetString(formatIndices.DN).IsDeNovo(); float? likelihoodRatioQualityScore = sampleColumns.GetString(formatIndices.LQ).GetFloat(); int[] pairedEndReadCounts = sampleColumns.GetString(formatIndices.PR).GetIntegers(); int[] repeatUnitCounts = sampleColumns.GetString(formatIndices.REPCN).GetIntegers('/'); int[] splitReadCounts = sampleColumns.GetString(formatIndices.SR).GetIntegers(); int? totalDepth = sampleColumns.GetString(formatIndices.DP).GetInteger(); double?variantFrequency = sampleColumns.GetString(formatIndices.VF).GetDouble(); int? minorHaplotypeCopyNumber = sampleColumns.GetString(formatIndices.MCN).GetInteger(); double?somaticQuality = sampleColumns.GetString(formatIndices.SQ).GetDouble(); double[] variantFrequencies = VariantFrequency.GetVariantFrequencies(variantFrequency, alleleDepths, numAltAlleles); var isLoh = GetLoh(copyNumber, minorHaplotypeCopyNumber, genotype); var sample = new Sample(alleleDepths, artifactAdjustedQualityScore, copyNumber, diseaseAffectedStatuses, failedFilter, genotype, genotypeQuality, isDeNovo, likelihoodRatioQualityScore, pairedEndReadCounts, repeatUnitCounts, splitReadCounts, totalDepth, variantFrequencies, minorHaplotypeCopyNumber, somaticQuality, isLoh); return(sample); }
/// <summary> /// returns a JsonSample object given the data contained within the sample genotype /// field. /// </summary> private ISample ExtractSample(string sampleColumn) { // sanity check: make sure we have a format column if (_formatIndices == null || string.IsNullOrEmpty(sampleColumn)) { return(Sample.EmptySample); } var sampleColumns = sampleColumn.Split(':'); // handle missing sample columns if (sampleColumns.Length == 1 && sampleColumns[0] == ".") { return(Sample.EmptySample); } var sampleFields = new IntermediateSampleFields(_vcfColumns, _formatIndices, sampleColumns); var alleleDepths = AlleleDepths.GetAlleleDepths(sampleFields); var failedFilter = FailedFilter.GetFailedFilter(sampleFields); var genotype = Genotype.GetGenotype(sampleFields); var genotypeQuality = GenotypeQuality.GetGenotypeQuality(sampleFields); var totalDepth = TotalDepth.GetTotalDepth(_infoDepth, sampleFields); var variantFrequencies = VariantFrequency.GetVariantFrequencies(sampleFields); var splitReadCounts = ReadCounts.GetSplitReadCounts(sampleFields); var pairEndReadCounts = ReadCounts.GetPairEndReadCounts(sampleFields); var isLossOfHeterozygosity = sampleFields.MajorChromosomeCount != null && sampleFields.CopyNumber != null && sampleFields.MajorChromosomeCount.Value == sampleFields.CopyNumber.Value && sampleFields.CopyNumber.Value > 1; var sample = new Sample(genotype, genotypeQuality, variantFrequencies, totalDepth, alleleDepths, failedFilter, sampleFields.CopyNumber, isLossOfHeterozygosity, sampleFields.DenovoQuality, splitReadCounts, pairEndReadCounts, sampleFields.RepeatNumber, sampleFields.RepeatNumberSpan, sampleFields.MAD, sampleFields.SCH, sampleFields.PLG, sampleFields.PCN, sampleFields.DCS, sampleFields.DID, sampleFields.DST, sampleFields.PCH, sampleFields.CHC, sampleFields.AQ, sampleFields.LQ); return(sample); }