Exemplo n.º 1
0
        // ReSharper restore InconsistentNaming

        public IntermediateSampleFields(string[] vcfColumns, FormatIndices formatIndices, string[] sampleCols)
        {
            VcfRefAllele  = vcfColumns[VcfCommon.RefIndex];
            AltAlleles    = vcfColumns[VcfCommon.AltIndex].Split(',');
            FormatIndices = formatIndices;
            SampleColumns = sampleCols;

            (TAR, TIR)           = GetLinkedIntegers(GetFirstValue(GetString(formatIndices.TAR, sampleCols)), GetFirstValue(GetString(formatIndices.TIR, sampleCols)));
            (NR, NV)             = GetLinkedIntegers(GetString(formatIndices.NR, sampleCols), GetString(formatIndices.NV, sampleCols));
            RepeatNumberSpan     = GetString(formatIndices.CI, sampleCols);
            MajorChromosomeCount = GetInteger(GetString(formatIndices.MCC, sampleCols));
            DenovoQuality        = GetFloat(GetString(formatIndices.DQ, sampleCols));
            MAD = GetIntegers(GetString(formatIndices.MAD, sampleCols));
            SCH = GetString(formatIndices.SCH, sampleCols);
            PLG = GetIntegers(GetString(formatIndices.PLG, sampleCols));
            PCN = GetIntegers(GetString(formatIndices.PCN, sampleCols));
            DCS = GetStrings(GetString(formatIndices.DCS, sampleCols));
            DID = GetStrings(GetString(formatIndices.DID, sampleCols));
            DST = GetStrings(GetString(formatIndices.DST, sampleCols));
            PCH = GetIntegers(GetString(formatIndices.PCH, sampleCols));
            CHC = GetBool(GetString(formatIndices.CHC, sampleCols), "+");
            AQ  = GetFloat(GetString(formatIndices.AQ, sampleCols));
            LQ  = GetFloat(GetString(formatIndices.LQ, sampleCols));
            VF  = GetDouble(GetString(formatIndices.VF, sampleCols));

            (CopyNumber, RepeatNumber) = GetCopyNumber(GetString(formatIndices.CN, sampleCols), vcfColumns[VcfCommon.AltIndex].Contains("STR"));

            (ACount, CCount, GCount, TCount, TotalAlleleCount) = GetAlleleCounts(
                GetString(formatIndices.AU, sampleCols), GetString(formatIndices.CU, sampleCols),
                GetString(formatIndices.GU, sampleCols), GetString(formatIndices.TU, sampleCols));
        }
Exemplo n.º 2
0
        internal static ISample ExtractSample(string sampleColumn, FormatIndices formatIndices, ISimplePosition simplePosition,
                                              IMitoHeteroplasmyProvider mitoHeteroplasmyProvider, LegacySampleFieldExtractor legacyExtractor = null, bool enableDq = false)
        {
            // sanity check: make sure we have a format column
            if (string.IsNullOrEmpty(sampleColumn))
            {
                return(Sample.EmptySample);
            }

            string[] sampleColumns = sampleColumn.OptimizedSplit(':', formatIndices.NumColumns);
            if (sampleColumns.Length == 1 && sampleColumns[0] == ".")
            {
                return(Sample.EmptySample);
            }

            sampleColumns.NormalizeNulls();

            if (legacyExtractor != null)
            {
                return(legacyExtractor.ExtractSample(sampleColumn));
            }

            int[] alleleDepths = sampleColumns.GetString(formatIndices.AD).GetIntegers();
            float?artifactAdjustedQualityScore = sampleColumns.GetString(formatIndices.AQ).GetFloat();
            int?  copyNumber = sampleColumns.GetString(formatIndices.CN).GetInteger();

            string[] diseaseAffectedStatuses = sampleColumns.GetString(formatIndices.DST).GetStrings();
            bool     failedFilter            = sampleColumns.GetString(formatIndices.FT).GetFailedFilter();
            string   genotype                    = sampleColumns.GetString(formatIndices.GT);
            int?     genotypeQuality             = sampleColumns.GetString(formatIndices.GQ).GetInteger();
            bool     isDeNovo                    = sampleColumns.GetString(formatIndices.DN).IsDeNovo();
            double?  deNovoQuality               = enableDq? sampleColumns.GetString(formatIndices.DQ).GetDouble():null;
            float?   likelihoodRatioQualityScore = sampleColumns.GetString(formatIndices.LQ).GetFloat();

            int[]  pairedEndReadCounts      = sampleColumns.GetString(formatIndices.PR).GetIntegers();
            int[]  repeatUnitCounts         = sampleColumns.GetString(formatIndices.REPCN).GetIntegers('/');
            int[]  splitReadCounts          = sampleColumns.GetString(formatIndices.SR).GetIntegers();
            int?   totalDepth               = sampleColumns.GetString(formatIndices.DP).GetInteger();
            double?variantFrequency         = sampleColumns.GetString(formatIndices.VF).GetDouble();
            int?   minorHaplotypeCopyNumber = sampleColumns.GetString(formatIndices.MCN).GetInteger();
            double?somaticQuality           = sampleColumns.GetString(formatIndices.SQ).GetDouble();
            int?   binCount = sampleColumns.GetString(formatIndices.BC).GetInteger();

            double[] variantFrequencies          = VariantFrequency.GetVariantFrequencies(variantFrequency, alleleDepths, simplePosition.AltAlleles.Length);
            string[] mitoHeteroplasmyPercentiles = mitoHeteroplasmyProvider?.GetVrfPercentiles(simplePosition.Chromosome, simplePosition.Start,
                                                                                               simplePosition.AltAlleles, variantFrequencies)?.Select(x => x?.ToString("0.##") ?? "null").ToArray();

            var isLoh = GetLoh(copyNumber, minorHaplotypeCopyNumber, genotype);

            var sample = new Sample(alleleDepths, artifactAdjustedQualityScore, copyNumber, diseaseAffectedStatuses,
                                    failedFilter, genotype, genotypeQuality, isDeNovo, deNovoQuality, likelihoodRatioQualityScore, pairedEndReadCounts,
                                    repeatUnitCounts, splitReadCounts, totalDepth, variantFrequencies, minorHaplotypeCopyNumber, somaticQuality, isLoh, mitoHeteroplasmyPercentiles, binCount);

            return(sample);
        }
Exemplo n.º 3
0
 private static bool IsLegacyVariantCaller(FormatIndices formatIndices)
 {
     return(formatIndices.TAR != null ||
            formatIndices.TIR != null ||
            formatIndices.AU != null ||
            formatIndices.GU != null ||
            formatIndices.CU != null ||
            formatIndices.TU != null ||
            formatIndices.GQX != null ||
            formatIndices.DPI != null ||
            formatIndices.MCC != null);
 }
Exemplo n.º 4
0
        internal static ISample ExtractSample(string sampleColumn, FormatIndices formatIndices, int numAltAlleles,
                                              LegacySampleFieldExtractor legacyExtractor = null)
        {
            // sanity check: make sure we have a format column
            if (string.IsNullOrEmpty(sampleColumn))
            {
                return(Sample.EmptySample);
            }

            string[] sampleColumns = sampleColumn.OptimizedSplit(':', formatIndices.NumColumns);
            if (sampleColumns.Length == 1 && sampleColumns[0] == ".")
            {
                return(Sample.EmptySample);
            }

            sampleColumns.NormalizeNulls();

            if (legacyExtractor != null)
            {
                return(legacyExtractor.ExtractSample(sampleColumn));
            }

            int[] alleleDepths = sampleColumns.GetString(formatIndices.AD).GetIntegers();
            float?artifactAdjustedQualityScore = sampleColumns.GetString(formatIndices.AQ).GetFloat();
            int?  copyNumber = sampleColumns.GetString(formatIndices.CN).GetInteger();

            string[] diseaseAffectedStatuses = sampleColumns.GetString(formatIndices.DST).GetStrings();
            bool     failedFilter            = sampleColumns.GetString(formatIndices.FT).GetFailedFilter();
            string   genotype                    = sampleColumns.GetString(formatIndices.GT);
            int?     genotypeQuality             = sampleColumns.GetString(formatIndices.GQ).GetInteger();
            bool     isDeNovo                    = sampleColumns.GetString(formatIndices.DN).IsDeNovo();
            float?   likelihoodRatioQualityScore = sampleColumns.GetString(formatIndices.LQ).GetFloat();

            int[]  pairedEndReadCounts      = sampleColumns.GetString(formatIndices.PR).GetIntegers();
            int[]  repeatUnitCounts         = sampleColumns.GetString(formatIndices.REPCN).GetIntegers('/');
            int[]  splitReadCounts          = sampleColumns.GetString(formatIndices.SR).GetIntegers();
            int?   totalDepth               = sampleColumns.GetString(formatIndices.DP).GetInteger();
            double?variantFrequency         = sampleColumns.GetString(formatIndices.VF).GetDouble();
            int?   minorHaplotypeCopyNumber = sampleColumns.GetString(formatIndices.MCN).GetInteger();
            double?somaticQuality           = sampleColumns.GetString(formatIndices.SQ).GetDouble();

            double[] variantFrequencies = VariantFrequency.GetVariantFrequencies(variantFrequency, alleleDepths, numAltAlleles);

            var isLoh = GetLoh(copyNumber, minorHaplotypeCopyNumber, genotype);

            var sample = new Sample(alleleDepths, artifactAdjustedQualityScore, copyNumber, diseaseAffectedStatuses,
                                    failedFilter, genotype, genotypeQuality, isDeNovo, likelihoodRatioQualityScore, pairedEndReadCounts,
                                    repeatUnitCounts, splitReadCounts, totalDepth, variantFrequencies, minorHaplotypeCopyNumber, somaticQuality, isLoh);

            return(sample);
        }
Exemplo n.º 5
0
        internal static ISample[] ToSamples(this string[] vcfColumns, FormatIndices formatIndices, int numAltAlleles, bool isRepeatExpansion)
        {
            if (vcfColumns.Length < VcfCommon.MinNumColumnsSampleGenotypes)
            {
                return(null);
            }

            int numSamples = vcfColumns.Length - VcfCommon.MinNumColumnsSampleGenotypes + 1;
            var samples    = new ISample[numSamples];

            formatIndices.Set(vcfColumns[VcfCommon.FormatIndex]);

            for (int index = VcfCommon.GenotypeIndex; index < vcfColumns.Length; index++)
            {
                samples[index - VcfCommon.GenotypeIndex] = ExtractSample(vcfColumns[index], formatIndices, numAltAlleles, isRepeatExpansion);
            }

            return(samples);
        }
Exemplo n.º 6
0
        internal static ISample[]  ToSamples(this string[] vcfColumns, FormatIndices formatIndices, ISimplePosition simplePosition, IMitoHeteroplasmyProvider mitoHeteroplasmyProvider, bool enableDq = false)
        {
            if (vcfColumns.Length < VcfCommon.MinNumColumnsSampleGenotypes)
            {
                return(null);
            }

            int numSamples = vcfColumns.Length - VcfCommon.MinNumColumnsSampleGenotypes + 1;
            var samples    = new ISample[numSamples];

            formatIndices.Set(vcfColumns[VcfCommon.FormatIndex]);

            var legacySampleExtractor = IsLegacyVariantCaller(formatIndices) ? new LegacySampleFieldExtractor(vcfColumns, formatIndices) : null;

            for (int index = VcfCommon.GenotypeIndex; index < vcfColumns.Length; index++)
            {
                samples[index - VcfCommon.GenotypeIndex] = ExtractSample(vcfColumns[index], formatIndices, simplePosition, mitoHeteroplasmyProvider, legacySampleExtractor, enableDq);
            }

            return(samples);
        }
Exemplo n.º 7
0
        internal static ISample[] ToSamples(this string[] vcfColumns, FormatIndices formatIndices, int numAltAlleles)
        {
            if (vcfColumns.Length < VcfCommon.MinNumColumnsSampleGenotypes)
            {
                return(null);
            }

            int numSamples = vcfColumns.Length - VcfCommon.MinNumColumnsSampleGenotypes + 1;
            var samples    = new ISample[numSamples];

            formatIndices.Set(vcfColumns[VcfCommon.FormatIndex]);

            var legacySampleExtractor = IsLegacyVariantCaller(formatIndices) ? new LegacySampleFieldExtractor(vcfColumns, formatIndices) : null;

            for (int index = VcfCommon.GenotypeIndex; index < vcfColumns.Length; index++)
            {
                samples[index - VcfCommon.GenotypeIndex] = ExtractSample(vcfColumns[index], formatIndices, numAltAlleles, legacySampleExtractor);
            }

            return(samples);
        }
Exemplo n.º 8
0
        /// <summary>
        /// extracts the genotype fields from the VCF file and returns a list of JSON samples
        /// </summary>
        internal ISample[] ExtractSamples()
        {
            // sanity check: make sure we have enough columns
            if (_vcfColumns.Length < VcfCommon.MinNumColumnsSampleGenotypes)
            {
                return(null);
            }

            var nSamples = _vcfColumns.Length - VcfCommon.MinNumColumnsSampleGenotypes + 1;
            var samples  = new ISample[nSamples];

            // extract the indices for each genotype field
            _formatIndices = FormatIndices.Extract(_vcfColumns[VcfCommon.FormatIndex]);

            // add each sample
            for (var index = VcfCommon.GenotypeIndex; index < _vcfColumns.Length; index++)
            {
                samples[index - VcfCommon.GenotypeIndex] = ExtractSample(_vcfColumns[index]);
            }

            return(samples);
        }