Ejemplo n.º 1
0
        private static double[] GetVariantFrequenciesUsingAlleleDepths(IntermediateSampleFields sampleFields)
        {
            if (sampleFields.FormatIndices.AD == null || sampleFields.SampleColumns.Length <= sampleFields.FormatIndices.AD.Value)
            {
                return(null);
            }

            int numAltAlleles = sampleFields.AltAlleles.Length;
            var variantFreqs  = new double[numAltAlleles];

            string adField = sampleFields.SampleColumns[sampleFields.FormatIndices.AD.Value];

            (var alleleDepths, bool allValuesAreValid, int totalDepth) = GetAlleleDepths(adField);
            if (!allValuesAreValid || numAltAlleles != alleleDepths.Length)
            {
                return(null);
            }

            // sanity check: make sure we handle NaNs properly
            if (totalDepth == 0)
            {
                return(variantFreqs);
            }

            for (var alleleIndex = 0; alleleIndex < numAltAlleles; alleleIndex++)
            {
                variantFreqs[alleleIndex] = alleleDepths[alleleIndex] / (double)totalDepth;
            }

            return(variantFreqs);
        }
Ejemplo n.º 2
0
        private static int GetAlleleCount(IntermediateSampleFields sampleFields, int alleleIndex)
        {
            string altAllele   = sampleFields.AltAlleles[alleleIndex];
            var    alleleCount = 0;

            // ReSharper disable once SwitchStatementMissingSomeCases
            switch (altAllele)
            {
            case "A":
                alleleCount = sampleFields.ACount ?? 0;
                break;

            case "C":
                alleleCount = sampleFields.CCount ?? 0;
                break;

            case "G":
                alleleCount = sampleFields.GCount ?? 0;
                break;

            case "T":
                alleleCount = sampleFields.TCount ?? 0;
                break;
            }

            return(alleleCount);
        }
Ejemplo n.º 3
0
        /// <summary>
        /// returns the appropriate allele count string given the supplied base
        /// </summary>
        private static int?GetAlleleCountString(string s, IntermediateSampleFields intermediateSampleFields)
        {
            int?ac = null;

            // ReSharper disable once SwitchStatementMissingSomeCases
            switch (s)
            {
            case "A":
                ac = intermediateSampleFields.ACount;
                break;

            case "C":
                ac = intermediateSampleFields.CCount;
                break;

            case "G":
                ac = intermediateSampleFields.GCount;
                break;

            case "T":
                ac = intermediateSampleFields.TCount;
                break;
            }

            return(ac);
        }
Ejemplo n.º 4
0
        public static double[] GetVariantFrequencies(IntermediateSampleFields sampleFields)
        {
            double[] vf = null;

            // use VF
            if (sampleFields.VF != null)
            {
                vf = GetVariantFrequenciesUsingVf(sampleFields);
            }

            // use TAR & TIR
            if (sampleFields.TAR != null && sampleFields.TIR != null)
            {
                vf = GetVariantFrequenciesUsingTarTir(sampleFields);
            }

            // use allele counts
            if (vf == null && sampleFields.TotalAlleleCount != null)
            {
                vf = GetVariantFrequenciesUsingAlleleCounts(sampleFields);
            }

            // use allele depths
            if (vf == null && sampleFields.FormatIndices.AD != null)
            {
                vf = GetVariantFrequenciesUsingAlleleDepths(sampleFields);
            }



            return(vf);
        }
Ejemplo n.º 5
0
        private static double[] GetVariantFrequenciesUsingAlleleCounts(IntermediateSampleFields sampleFields)
        {
            bool isRefSingleBase      = sampleFields.VcfRefAllele.Length == 1;
            bool areAllAltsSingleBase = sampleFields.AltAlleles.AreAllAltAllelesSingleBase();
            bool isReference          = sampleFields.AltAlleles.Length == 1 && sampleFields.AltAlleles[0] == ".";

            // for this to work we need a single-base reference allele and all raw allele counts must be available
            if (sampleFields.TotalAlleleCount == null || isReference || !isRefSingleBase || !areAllAltsSingleBase)
            {
                return(null);
            }

            int numAltAlleles = sampleFields.AltAlleles.Length;
            var variantFreqs  = new double[numAltAlleles];

            if (sampleFields.TotalAlleleCount == 0)
            {
                return(variantFreqs);
            }

            for (var i = 0; i < numAltAlleles; i++)
            {
                int alleleCount = GetAlleleCount(sampleFields, i);
                variantFreqs[i] = alleleCount / (double)sampleFields.TotalAlleleCount;
            }

            return(variantFreqs);
        }
Ejemplo n.º 6
0
        /// <summary>
        /// returns the allele depths using allele depths
        /// </summary>
        private static int[] GetAlleleDepthsUsingAd(IntermediateSampleFields intermediateSampleFields)
        {
            if (intermediateSampleFields.FormatIndices.AD == null || intermediateSampleFields.SampleColumns.Length <=
                intermediateSampleFields.FormatIndices.AD.Value)
            {
                return(null);
            }

            var ad = intermediateSampleFields.SampleColumns[intermediateSampleFields.FormatIndices.AD.Value].OptimizedSplit(',');

            if (ad[0] == ".")
            {
                return(null);
            }

            int nAllele      = ad.Length;
            var alleleDepths = new int[nAllele];

            for (var i = 0; i < nAllele; i++)
            {
                (int number, bool foundError) = ad[i].OptimizedParseInt32();
                if (foundError)
                {
                    return(null);
                }
                alleleDepths[i] = number;
            }

            return(alleleDepths);
        }
Ejemplo n.º 7
0
        public static int?GetTotalDepth(int?infoDepth, IntermediateSampleFields intermediateSampleFields)
        {
            // use TAR & TIR
            if (intermediateSampleFields.TAR != null && intermediateSampleFields.TIR != null)
            {
                return(GetTotalDepthUsingTarTir(intermediateSampleFields));
            }

            // use base counts
            if (intermediateSampleFields.TotalAlleleCount != null)
            {
                return(GetTotalDepthUsingAlleleCounts(intermediateSampleFields));
            }

            // use DPI
            if (intermediateSampleFields.FormatIndices.DPI != null)
            {
                return(GetTotalDepthUsingDpi(intermediateSampleFields));
            }

            // use DP
            if (intermediateSampleFields.FormatIndices.DP != null)
            {
                return(GetTotalDepthUsingDp(intermediateSampleFields));
            }

            // use INFO DP (Pisces)
            return(infoDepth);
        }
Ejemplo n.º 8
0
 /// <summary>
 /// returns the variant frequency using TIR and TAR
 /// </summary>
 private static int[] GetAlleleDepthsUsingTarTir(IntermediateSampleFields intermediateSampleFields)
 {
     if (intermediateSampleFields.TIR == null || intermediateSampleFields.TAR == null || intermediateSampleFields.AltAlleles.Length > 1)
     {
         return(null);
     }
     return(new[] { intermediateSampleFields.TAR.Value, intermediateSampleFields.TIR.Value });
 }
Ejemplo n.º 9
0
 private static double[] GetVariantFrequenciesUsingVf(IntermediateSampleFields sampleFields)
 {
     if (sampleFields.AltAlleles.Length > 1 || sampleFields.VF == null)
     {
         return(null);
     }
     return(new[] { sampleFields.VF.Value });
 }
Ejemplo n.º 10
0
        private static int?GetTotalDepthUsingDp(IntermediateSampleFields intermediateSampleFields)
        {
            if (intermediateSampleFields.FormatIndices.DP == null || intermediateSampleFields.SampleColumns.Length <= intermediateSampleFields.FormatIndices.DP.Value)
            {
                return(null);
            }
            string depth = intermediateSampleFields.SampleColumns[intermediateSampleFields.FormatIndices.DP.Value];

            (int number, bool foundError) = depth.OptimizedParseInt32();
            return(foundError ? null : (int?)number);
        }
Ejemplo n.º 11
0
        private static double[] GetVariantFrequenciesUsingTarTir(IntermediateSampleFields sampleFields)
        {
            // TAR and TIR: never observed with multiple alternate alleles
            if (sampleFields.TIR == null || sampleFields.TAR == null || sampleFields.AltAlleles.Length > 1)
            {
                return(null);
            }
            if (sampleFields.TIR + sampleFields.TAR == 0)
            {
                return(ZeroVf);
            }

            var tir = (double)sampleFields.TIR;
            var tar = (double)sampleFields.TAR;

            return(new[] { tir / (tar + tir) });
        }
Ejemplo n.º 12
0
        internal ISample ExtractSample(string sampleColumn)
        {
            // sanity check: make sure we have a format column
            if (_formatIndices == null || string.IsNullOrEmpty(sampleColumn))
            {
                return(Sample.EmptySample);
            }

            var sampleColumns = sampleColumn.OptimizedSplit(':');

            // handle missing sample columns
            if (sampleColumns.Length == 1 && sampleColumns[0] == ".")
            {
                return(Sample.EmptySample);
            }

            var sampleFields = new IntermediateSampleFields(_vcfColumns, _formatIndices, sampleColumns);

            var    alleleDepths = AlleleDepths.GetAlleleDepths(sampleFields);
            bool   failedFilter = FailedFilter.GetFailedFilter(sampleFields);
            string genotype     = Genotype.GetGenotype(sampleFields);

            var    genotypeQuality    = GenotypeQuality.GetGenotypeQuality(sampleFields);
            var    totalDepth         = TotalDepth.GetTotalDepth(_infoDepth, sampleFields);
            double?denovoQuality      = sampleColumns.GetString(_formatIndices.DQ).GetDouble();
            var    variantFrequencies = LegacyVariantFrequency.GetVariantFrequencies(sampleFields);
            var    splitReadCounts    = ReadCounts.GetSplitReadCounts(sampleFields);
            var    pairEndReadCounts  = ReadCounts.GetPairEndReadCounts(sampleFields);

            bool isLossOfHeterozygosity = sampleFields.MajorChromosomeCount != null &&
                                          sampleFields.CopyNumber != null &&
                                          sampleFields.MajorChromosomeCount.Value == sampleFields.CopyNumber.Value &&
                                          sampleFields.CopyNumber.Value > 1;

            var sample = new Sample(alleleDepths, sampleFields.AQ, sampleFields.CopyNumber, sampleFields.DST,
                                    failedFilter, genotype, genotypeQuality, false, denovoQuality, sampleFields.LQ, pairEndReadCounts, null, splitReadCounts,
                                    totalDepth, variantFrequencies, null, null, isLossOfHeterozygosity, null, null);

            return(sample);
        }
Ejemplo n.º 13
0
        public static int[] GetPairEndReadCounts(IntermediateSampleFields intermediateSampleFields)
        {
            if (intermediateSampleFields.FormatIndices.PR == null)
            {
                return(null);
            }
            var readCounts = intermediateSampleFields.SampleColumns[intermediateSampleFields.FormatIndices.PR.Value].OptimizedSplit(',');

            var pairEndReadCounts = new int[readCounts.Length];

            for (var i = 0; i < pairEndReadCounts.Length; i++)
            {
                (int number, bool foundError) = readCounts[i].OptimizedParseInt32();
                if (foundError)
                {
                    return(null);
                }
                pairEndReadCounts[i] = number;
            }

            return(pairEndReadCounts);
        }
Ejemplo n.º 14
0
        /// <summary>
        /// returns the allele depths using allele counts
        /// </summary>
        private static int[] GetAlleleDepthsUsingAlleleCounts(IntermediateSampleFields intermediateSampleFields)
        {
            if (intermediateSampleFields.TotalAlleleCount == null)
            {
                return(null);
            }

            // sanity check: make sure all alternate alleles are SNVs
            if (intermediateSampleFields.VcfRefAllele.Length != 1 || !intermediateSampleFields.AltAlleles.AreAllAltAllelesSingleBase())
            {
                return(null);
            }

            var ad = new int[intermediateSampleFields.AltAlleles.Length + 1];

            // handle reference allele
            var ac = GetAlleleCountString(intermediateSampleFields.VcfRefAllele, intermediateSampleFields);

            if (ac == null)
            {
                return(null);
            }
            ad[0] = ac.Value;

            // handle alternate alleles
            var index = 1;

            foreach (string altAllele in intermediateSampleFields.AltAlleles)
            {
                ac = GetAlleleCountString(altAllele, intermediateSampleFields);
                if (ac == null)
                {
                    return(null);
                }
                ad[index++] = ac.Value;
            }

            return(ad);
        }
Ejemplo n.º 15
0
        public static int?GetGenotypeQuality(IntermediateSampleFields intermediateSampleFields)
        {
            bool hasGqx = intermediateSampleFields.FormatIndices.GQX != null;
            bool hasGq  = intermediateSampleFields.FormatIndices.GQ != null;

            if (!hasGqx && !hasGq)
            {
                return(null);
            }

            int gqIndex = hasGqx ? intermediateSampleFields.FormatIndices.GQX.Value : intermediateSampleFields.FormatIndices.GQ.Value;

            if (intermediateSampleFields.SampleColumns.Length <= gqIndex)
            {
                return(null);
            }

            string gq = intermediateSampleFields.SampleColumns[gqIndex];

            (int number, bool foundError) = gq.OptimizedParseInt32();
            return(foundError ? null : (int?)number);
        }
Ejemplo n.º 16
0
        /// <summary>
        /// returns the allele depths given different sources of information
        /// </summary>
        public static int[] GetAlleleDepths(IntermediateSampleFields intermediateSampleFields)
        {
            int[] ad = null;

            // use TAR & TIR
            if (intermediateSampleFields.TAR != null && intermediateSampleFields.TIR != null)
            {
                ad = GetAlleleDepthsUsingTarTir(intermediateSampleFields);
            }

            // use allele counts
            if (ad == null && intermediateSampleFields.TotalAlleleCount != null)
            {
                ad = GetAlleleDepthsUsingAlleleCounts(intermediateSampleFields);
            }

            // use allele depths
            if (ad == null && intermediateSampleFields.FormatIndices.AD != null)
            {
                ad = GetAlleleDepthsUsingAd(intermediateSampleFields);
            }

            return(ad);
        }
Ejemplo n.º 17
0
 private static int?GetTotalDepthUsingTarTir(IntermediateSampleFields intermediateSampleFields) => intermediateSampleFields.TAR + intermediateSampleFields.TIR;
Ejemplo n.º 18
0
 private static int?GetTotalDepthUsingAlleleCounts(IntermediateSampleFields intermediateSampleFields) => intermediateSampleFields.TotalAlleleCount;