Esempio n. 1
0
        private static GenotypeLikelihoodsAllelePair[] calculatePLcache(int altAlleles)
        {
            int numLikelihoods = GenotypeLikelihoods.numLikelihoods(1 + altAlleles, 2);

            GenotypeLikelihoodsAllelePair[] cache = new GenotypeLikelihoodsAllelePair[numLikelihoods];

            // for all possible combinations of 2 alleles
            for (int allele1 = 0; allele1 <= altAlleles; allele1++)
            {
                for (int allele2 = allele1; allele2 <= altAlleles; allele2++)
                {
                    cache[calculatePLindex(allele1, allele2)] = new GenotypeLikelihoodsAllelePair(allele1, allele2);
                }
            }

            // a bit of sanity checking
            for (int i = 0; i < cache.Length; i++)
            {
                if (cache[i] == null)
                {
                    throw new Exception("BUG: cache entry " + i + " is unexpected null");
                }
            }

            return(cache);
        }
Esempio n. 2
0
        public override bool Equals(object aThat)
        {
            //check for self-comparison
            if (this == aThat)
            {
                return(true);
            }

            if (!(aThat is GenotypeLikelihoods))
            {
                return(false);
            }
            GenotypeLikelihoods that = (GenotypeLikelihoods)aThat;

            // now a proper field-by-field evaluation can be made.
            // GLs are considered equal if the corresponding PLs are equal
            int[] me    = AsPLs;
            int[] other = that.AsPLs;
            return(me.Length == other.Length && me.SequenceEqual(other));
        }
Esempio n. 3
0
        /// <summary>
        /// Get the number of values expected for this header field, given the properties of VariantContext vc
        ///
        /// If the count is a fixed count, return that.  For example, a field with size of 1 in the header returns 1
        /// If the count is of type A, return vc.getNAlleles - 1
        /// If the count is of type G, return the expected number of genotypes given the number of alleles in VC and the
        ///   max ploidy among all samples.  Note that if the max ploidy of the VC is 0 (there's no GT information
        ///   at all, then implicitly assume diploid samples when computing G values.
        /// If the count is UNBOUNDED return -1
        /// </summary>
        /// <param name="vc">
        /// @return </param>
        public virtual int getCount(VariantContext vc)
        {
            switch (countType)
            {
            case Bio.VCF.VCFHeaderLineCount.INTEGER:
                return(count);

            case Bio.VCF.VCFHeaderLineCount.UNBOUNDED:
                return(-1);

            case Bio.VCF.VCFHeaderLineCount.A:
                return(vc.NAlleles - 1);

            case Bio.VCF.VCFHeaderLineCount.G:
                int ploidy = vc.GetMaxPloidy(2);
                return(GenotypeLikelihoods.numLikelihoods(vc.NAlleles, ploidy));

            default:
                throw new VCFParsingError("Unknown count type: " + countType);
            }
        }
Esempio n. 4
0
        /// <summary>
        /// Create a genotype map
        /// </summary>
        /// <param name="str"> the string </param>
        /// <param name="alleles"> the list of alleles </param>
        /// <returns> a mapping of sample name to genotype object </returns>
        public LazyGenotypesContext.LazyData CreateGenotypeMap(string str, IList <Allele> alleles, string chr, int pos)
        {
            if (genotypeParts == null)
            {
                genotypeParts = new String[header.ColumnCount - NUM_STANDARD_FIELDS];
            }
            try {
                FastStringUtils.Split(str, VCFConstants.FIELD_SEPARATOR_CHAR, genotypeParts);
            } catch (Exception e) {
                throw new VCFParsingError("Could not parse genotypes, was expecting " + (genotypeParts.Length - 1).ToString() + " but found " + str.Split(VCFConstants.FIELD_SEPARATOR_CHAR).Length.ToString(), e);
            }
            List <Genotype> genotypes = new List <Genotype> (genotypeParts.Length);

            // get the format keys
            //int nGTKeys = ParsingUtils.Split(genotypeParts[0], genotypeKeyArray, VCFConstants.GENOTYPE_FIELD_SEPARATOR_CHAR);
            string[] genotypeKeyArray       = genotypeParts [0].Split(VCFConstants.GENOTYPE_FIELD_SEPARATOR_CHAR);
            int      genotypeAlleleLocation = Array.IndexOf(genotypeKeyArray, VCFConstants.GENOTYPE_KEY);

            if (version != VCFHeaderVersion.VCF4_1 && genotypeAlleleLocation == -1)
            {
                generateException("Unable to find the GT field for the record; the GT field is required in VCF4.0");
            }
            // clear out our allele mapping
            alleleMap.Clear();
            GenotypeBuilder gb = new GenotypeBuilder();

            // cycle through the genotype strings
            for (int genotypeOffset = 1; genotypeOffset < genotypeParts.Length; genotypeOffset++)
            {
                Genotype curGenotype;
                string   sampleName  = header.GenotypeSampleNames [genotypeOffset - 1];
                var      currentGeno = genotypeParts [genotypeOffset];
                //shortcut for null alleles
                if (currentGeno == "./.")
                {
                    curGenotype = GenotypeBuilder.CreateMissing(sampleName, 2);
                }
                else if (currentGeno == ".")
                {
                    curGenotype = GenotypeBuilder.CreateMissing(sampleName, 1);
                }
                else
                {
                    gb.Reset(false);
                    gb.SampleName = sampleName;
                    string[] GTValueArray = FastStringUtils.Split(currentGeno, VCFConstants.GENOTYPE_FIELD_SEPARATOR_CHAR, int.MaxValue, StringSplitOptions.None);
                    // cycle through the sample names
                    // check to see if the value list is longer than the key list, which is a problem
                    if (genotypeKeyArray.Length < GTValueArray.Length)
                    {
                        generateException("There are too many keys for the sample " + sampleName + ", line is: keys = " + genotypeParts [0] + ", values = " + genotypeParts [genotypeOffset]);
                    }
                    if (genotypeAlleleLocation > 0)
                    {
                        generateException("Saw GT field at position " + genotypeAlleleLocation + ", but it must be at the first position for genotypes when present");
                    }

                    //TODO: THIS IS A DAMNED MESS
                    //Code loops over all fields in the key and decodes them, adding them as information to the genotype builder, which then makes it.
                    if (genotypeKeyArray.Length > 0)
                    {
                        gb.MaxAttributes(genotypeKeyArray.Length - 1);
                        for (int i = 0; i < genotypeKeyArray.Length; i++)
                        {
                            string gtKey = genotypeKeyArray [i];
                            if (i >= GTValueArray.Length)
                            {
                                break;
                            }
                            // todo -- all of these on the fly parsing of the missing value should be static constants
                            if (gtKey == VCFConstants.GENOTYPE_FILTER_KEY)
                            {
                                IList <string> filters = parseFilters(GetCachedString(GTValueArray [i]));
                                if (filters != null)
                                {
                                    gb.SetFilters(filters.ToList());
                                }
                            }
                            else if (GTValueArray [i] == VCFConstants.MISSING_VALUE_v4)
                            {
                                // don't add missing values to the map
                            }
                            else
                            {
                                if (gtKey == VCFConstants.GENOTYPE_QUALITY_KEY)
                                {
                                    if (GTValueArray [i] == VCFConstants.MISSING_GENOTYPE_QUALITY_v3)
                                    {
                                        gb.noGQ();
                                    }
                                    else
                                    {
                                        gb.GQ = ((int)Math.Round(Convert.ToDouble(GTValueArray [i])));
                                    }
                                }
                                else if (gtKey == VCFConstants.GENOTYPE_ALLELE_DEPTHS)
                                {
                                    gb.AD = (decodeInts(GTValueArray [i]));
                                }
                                else if (gtKey == VCFConstants.GENOTYPE_PL_KEY)
                                {
                                    gb.PL = (decodeInts(GTValueArray [i]));
                                }
                                else if (gtKey == VCFConstants.GENOTYPE_LIKELIHOODS_KEY)
                                {
                                    gb.PL = (GenotypeLikelihoods.fromGLField(GTValueArray [i]).AsPLs);
                                }
                                else if (gtKey.Equals(VCFConstants.DEPTH_KEY))
                                {
                                    gb.DP = (Convert.ToInt32(GTValueArray [i]));
                                }
                                else
                                {
                                    gb.AddAttribute(gtKey, GTValueArray [i]);
                                }
                            }
                        }
                    }

                    List <Allele> GTalleles;
                    if (genotypeAlleleLocation == -1)
                    {
                        GTalleles = new List <Allele> (0);
                    }
                    else
                    {
                        GTalleles = parseGenotypeAlleles(GTValueArray [genotypeAlleleLocation], alleles, alleleMap);
                    }
                    gb.Alleles = GTalleles;
                    gb.Phased  = genotypeAlleleLocation != -1 && GTValueArray [genotypeAlleleLocation].IndexOf(VCFConstants.PHASED_AS_CHAR) != -1;

                    // add it to the list
                    try {
                        curGenotype = gb.Make();
                    } catch (Exception e) {
                        throw new VCFParsingError(e.Message + ", at position " + chr + ":" + pos);
                    }
                }
                genotypes.Add(curGenotype);
            }
            return(new LazyGenotypesContext.LazyData(genotypes, header.SampleNamesInOrder, header.SampleNameToOffset));
        }
Esempio n. 5
0
 /// <summary>
 /// This genotype has this PL value, converted from double[]. SLOW
 /// </summary>
 public void setPL(double[] GLs)
 {
     this.PL = GenotypeLikelihoods.fromLog10Likelihoods(GLs).AsPLs;
 }