Esempio n. 1
0
        internal static ISample ExtractSample(string sampleColumn, FormatIndices formatIndices, ISimplePosition simplePosition,
                                              IMitoHeteroplasmyProvider mitoHeteroplasmyProvider, LegacySampleFieldExtractor legacyExtractor = null, bool enableDq = false)
        {
            // sanity check: make sure we have a format column
            if (string.IsNullOrEmpty(sampleColumn))
            {
                return(Sample.EmptySample);
            }

            string[] sampleColumns = sampleColumn.OptimizedSplit(':', formatIndices.NumColumns);
            if (sampleColumns.Length == 1 && sampleColumns[0] == ".")
            {
                return(Sample.EmptySample);
            }

            sampleColumns.NormalizeNulls();

            if (legacyExtractor != null)
            {
                return(legacyExtractor.ExtractSample(sampleColumn));
            }

            int[] alleleDepths = sampleColumns.GetString(formatIndices.AD).GetIntegers();
            float?artifactAdjustedQualityScore = sampleColumns.GetString(formatIndices.AQ).GetFloat();
            int?  copyNumber = sampleColumns.GetString(formatIndices.CN).GetInteger();

            string[] diseaseAffectedStatuses = sampleColumns.GetString(formatIndices.DST).GetStrings();
            bool     failedFilter            = sampleColumns.GetString(formatIndices.FT).GetFailedFilter();
            string   genotype                    = sampleColumns.GetString(formatIndices.GT);
            int?     genotypeQuality             = sampleColumns.GetString(formatIndices.GQ).GetInteger();
            bool     isDeNovo                    = sampleColumns.GetString(formatIndices.DN).IsDeNovo();
            double?  deNovoQuality               = enableDq? sampleColumns.GetString(formatIndices.DQ).GetDouble():null;
            float?   likelihoodRatioQualityScore = sampleColumns.GetString(formatIndices.LQ).GetFloat();

            int[]  pairedEndReadCounts      = sampleColumns.GetString(formatIndices.PR).GetIntegers();
            int[]  repeatUnitCounts         = sampleColumns.GetString(formatIndices.REPCN).GetIntegers('/');
            int[]  splitReadCounts          = sampleColumns.GetString(formatIndices.SR).GetIntegers();
            int?   totalDepth               = sampleColumns.GetString(formatIndices.DP).GetInteger();
            double?variantFrequency         = sampleColumns.GetString(formatIndices.VF).GetDouble();
            int?   minorHaplotypeCopyNumber = sampleColumns.GetString(formatIndices.MCN).GetInteger();
            double?somaticQuality           = sampleColumns.GetString(formatIndices.SQ).GetDouble();
            int?   binCount = sampleColumns.GetString(formatIndices.BC).GetInteger();

            double[] variantFrequencies          = VariantFrequency.GetVariantFrequencies(variantFrequency, alleleDepths, simplePosition.AltAlleles.Length);
            string[] mitoHeteroplasmyPercentiles = mitoHeteroplasmyProvider?.GetVrfPercentiles(simplePosition.Chromosome, simplePosition.Start,
                                                                                               simplePosition.AltAlleles, variantFrequencies)?.Select(x => x?.ToString("0.##") ?? "null").ToArray();

            var isLoh = GetLoh(copyNumber, minorHaplotypeCopyNumber, genotype);

            var sample = new Sample(alleleDepths, artifactAdjustedQualityScore, copyNumber, diseaseAffectedStatuses,
                                    failedFilter, genotype, genotypeQuality, isDeNovo, deNovoQuality, likelihoodRatioQualityScore, pairedEndReadCounts,
                                    repeatUnitCounts, splitReadCounts, totalDepth, variantFrequencies, minorHaplotypeCopyNumber, somaticQuality, isLoh, mitoHeteroplasmyPercentiles, binCount);

            return(sample);
        }
Esempio n. 2
0
        internal static ISample ExtractSample(string sampleColumn, FormatIndices formatIndices, int numAltAlleles,
                                              LegacySampleFieldExtractor legacyExtractor = null)
        {
            // sanity check: make sure we have a format column
            if (string.IsNullOrEmpty(sampleColumn))
            {
                return(Sample.EmptySample);
            }

            string[] sampleColumns = sampleColumn.OptimizedSplit(':', formatIndices.NumColumns);
            if (sampleColumns.Length == 1 && sampleColumns[0] == ".")
            {
                return(Sample.EmptySample);
            }

            sampleColumns.NormalizeNulls();

            if (legacyExtractor != null)
            {
                return(legacyExtractor.ExtractSample(sampleColumn));
            }

            int[] alleleDepths = sampleColumns.GetString(formatIndices.AD).GetIntegers();
            float?artifactAdjustedQualityScore = sampleColumns.GetString(formatIndices.AQ).GetFloat();
            int?  copyNumber = sampleColumns.GetString(formatIndices.CN).GetInteger();

            string[] diseaseAffectedStatuses = sampleColumns.GetString(formatIndices.DST).GetStrings();
            bool     failedFilter            = sampleColumns.GetString(formatIndices.FT).GetFailedFilter();
            string   genotype                    = sampleColumns.GetString(formatIndices.GT);
            int?     genotypeQuality             = sampleColumns.GetString(formatIndices.GQ).GetInteger();
            bool     isDeNovo                    = sampleColumns.GetString(formatIndices.DN).IsDeNovo();
            float?   likelihoodRatioQualityScore = sampleColumns.GetString(formatIndices.LQ).GetFloat();

            int[]  pairedEndReadCounts      = sampleColumns.GetString(formatIndices.PR).GetIntegers();
            int[]  repeatUnitCounts         = sampleColumns.GetString(formatIndices.REPCN).GetIntegers('/');
            int[]  splitReadCounts          = sampleColumns.GetString(formatIndices.SR).GetIntegers();
            int?   totalDepth               = sampleColumns.GetString(formatIndices.DP).GetInteger();
            double?variantFrequency         = sampleColumns.GetString(formatIndices.VF).GetDouble();
            int?   minorHaplotypeCopyNumber = sampleColumns.GetString(formatIndices.MCN).GetInteger();
            double?somaticQuality           = sampleColumns.GetString(formatIndices.SQ).GetDouble();

            double[] variantFrequencies = VariantFrequency.GetVariantFrequencies(variantFrequency, alleleDepths, numAltAlleles);

            var isLoh = GetLoh(copyNumber, minorHaplotypeCopyNumber, genotype);

            var sample = new Sample(alleleDepths, artifactAdjustedQualityScore, copyNumber, diseaseAffectedStatuses,
                                    failedFilter, genotype, genotypeQuality, isDeNovo, likelihoodRatioQualityScore, pairedEndReadCounts,
                                    repeatUnitCounts, splitReadCounts, totalDepth, variantFrequencies, minorHaplotypeCopyNumber, somaticQuality, isLoh);

            return(sample);
        }
Esempio n. 3
0
        /// <summary>
        /// returns a JsonSample object given the data contained within the sample genotype
        /// field.
        /// </summary>
        private ISample ExtractSample(string sampleColumn)
        {
            // sanity check: make sure we have a format column
            if (_formatIndices == null || string.IsNullOrEmpty(sampleColumn))
            {
                return(Sample.EmptySample);
            }

            var sampleColumns = sampleColumn.Split(':');

            // handle missing sample columns
            if (sampleColumns.Length == 1 && sampleColumns[0] == ".")
            {
                return(Sample.EmptySample);
            }

            var sampleFields = new IntermediateSampleFields(_vcfColumns, _formatIndices, sampleColumns);

            var alleleDepths = AlleleDepths.GetAlleleDepths(sampleFields);
            var failedFilter = FailedFilter.GetFailedFilter(sampleFields);
            var genotype     = Genotype.GetGenotype(sampleFields);

            var genotypeQuality    = GenotypeQuality.GetGenotypeQuality(sampleFields);
            var totalDepth         = TotalDepth.GetTotalDepth(_infoDepth, sampleFields);
            var variantFrequencies = VariantFrequency.GetVariantFrequencies(sampleFields);
            var splitReadCounts    = ReadCounts.GetSplitReadCounts(sampleFields);
            var pairEndReadCounts  = ReadCounts.GetPairEndReadCounts(sampleFields);

            var isLossOfHeterozygosity = sampleFields.MajorChromosomeCount != null && sampleFields.CopyNumber != null &&
                                         sampleFields.MajorChromosomeCount.Value == sampleFields.CopyNumber.Value &&
                                         sampleFields.CopyNumber.Value > 1;

            var sample = new Sample(genotype, genotypeQuality, variantFrequencies, totalDepth, alleleDepths, failedFilter,
                                    sampleFields.CopyNumber, isLossOfHeterozygosity, sampleFields.DenovoQuality, splitReadCounts,
                                    pairEndReadCounts, sampleFields.RepeatNumber, sampleFields.RepeatNumberSpan, sampleFields.MAD,
                                    sampleFields.SCH, sampleFields.PLG, sampleFields.PCN, sampleFields.DCS, sampleFields.DID,
                                    sampleFields.DST, sampleFields.PCH, sampleFields.CHC, sampleFields.AQ, sampleFields.LQ);

            return(sample);
        }