private static GenotypeLikelihoodsAllelePair[] calculatePLcache(int altAlleles) { int numLikelihoods = GenotypeLikelihoods.numLikelihoods(1 + altAlleles, 2); GenotypeLikelihoodsAllelePair[] cache = new GenotypeLikelihoodsAllelePair[numLikelihoods]; // for all possible combinations of 2 alleles for (int allele1 = 0; allele1 <= altAlleles; allele1++) { for (int allele2 = allele1; allele2 <= altAlleles; allele2++) { cache[calculatePLindex(allele1, allele2)] = new GenotypeLikelihoodsAllelePair(allele1, allele2); } } // a bit of sanity checking for (int i = 0; i < cache.Length; i++) { if (cache[i] == null) { throw new Exception("BUG: cache entry " + i + " is unexpected null"); } } return(cache); }
public override bool Equals(object aThat) { //check for self-comparison if (this == aThat) { return(true); } if (!(aThat is GenotypeLikelihoods)) { return(false); } GenotypeLikelihoods that = (GenotypeLikelihoods)aThat; // now a proper field-by-field evaluation can be made. // GLs are considered equal if the corresponding PLs are equal int[] me = AsPLs; int[] other = that.AsPLs; return(me.Length == other.Length && me.SequenceEqual(other)); }
/// <summary> /// Get the number of values expected for this header field, given the properties of VariantContext vc /// /// If the count is a fixed count, return that. For example, a field with size of 1 in the header returns 1 /// If the count is of type A, return vc.getNAlleles - 1 /// If the count is of type G, return the expected number of genotypes given the number of alleles in VC and the /// max ploidy among all samples. Note that if the max ploidy of the VC is 0 (there's no GT information /// at all, then implicitly assume diploid samples when computing G values. /// If the count is UNBOUNDED return -1 /// </summary> /// <param name="vc"> /// @return </param> public virtual int getCount(VariantContext vc) { switch (countType) { case Bio.VCF.VCFHeaderLineCount.INTEGER: return(count); case Bio.VCF.VCFHeaderLineCount.UNBOUNDED: return(-1); case Bio.VCF.VCFHeaderLineCount.A: return(vc.NAlleles - 1); case Bio.VCF.VCFHeaderLineCount.G: int ploidy = vc.GetMaxPloidy(2); return(GenotypeLikelihoods.numLikelihoods(vc.NAlleles, ploidy)); default: throw new VCFParsingError("Unknown count type: " + countType); } }
/// <summary> /// Create a genotype map /// </summary> /// <param name="str"> the string </param> /// <param name="alleles"> the list of alleles </param> /// <returns> a mapping of sample name to genotype object </returns> public LazyGenotypesContext.LazyData CreateGenotypeMap(string str, IList <Allele> alleles, string chr, int pos) { if (genotypeParts == null) { genotypeParts = new String[header.ColumnCount - NUM_STANDARD_FIELDS]; } try { FastStringUtils.Split(str, VCFConstants.FIELD_SEPARATOR_CHAR, genotypeParts); } catch (Exception e) { throw new VCFParsingError("Could not parse genotypes, was expecting " + (genotypeParts.Length - 1).ToString() + " but found " + str.Split(VCFConstants.FIELD_SEPARATOR_CHAR).Length.ToString(), e); } List <Genotype> genotypes = new List <Genotype> (genotypeParts.Length); // get the format keys //int nGTKeys = ParsingUtils.Split(genotypeParts[0], genotypeKeyArray, VCFConstants.GENOTYPE_FIELD_SEPARATOR_CHAR); string[] genotypeKeyArray = genotypeParts [0].Split(VCFConstants.GENOTYPE_FIELD_SEPARATOR_CHAR); int genotypeAlleleLocation = Array.IndexOf(genotypeKeyArray, VCFConstants.GENOTYPE_KEY); if (version != VCFHeaderVersion.VCF4_1 && genotypeAlleleLocation == -1) { generateException("Unable to find the GT field for the record; the GT field is required in VCF4.0"); } // clear out our allele mapping alleleMap.Clear(); GenotypeBuilder gb = new GenotypeBuilder(); // cycle through the genotype strings for (int genotypeOffset = 1; genotypeOffset < genotypeParts.Length; genotypeOffset++) { Genotype curGenotype; string sampleName = header.GenotypeSampleNames [genotypeOffset - 1]; var currentGeno = genotypeParts [genotypeOffset]; //shortcut for null alleles if (currentGeno == "./.") { curGenotype = GenotypeBuilder.CreateMissing(sampleName, 2); } else if (currentGeno == ".") { curGenotype = GenotypeBuilder.CreateMissing(sampleName, 1); } else { gb.Reset(false); gb.SampleName = sampleName; string[] GTValueArray = FastStringUtils.Split(currentGeno, VCFConstants.GENOTYPE_FIELD_SEPARATOR_CHAR, int.MaxValue, StringSplitOptions.None); // cycle through the sample names // check to see if the value list is longer than the key list, which is a problem if (genotypeKeyArray.Length < GTValueArray.Length) { generateException("There are too many keys for the sample " + sampleName + ", line is: keys = " + genotypeParts [0] + ", values = " + genotypeParts [genotypeOffset]); } if (genotypeAlleleLocation > 0) { generateException("Saw GT field at position " + genotypeAlleleLocation + ", but it must be at the first position for genotypes when present"); } //TODO: THIS IS A DAMNED MESS //Code loops over all fields in the key and decodes them, adding them as information to the genotype builder, which then makes it. if (genotypeKeyArray.Length > 0) { gb.MaxAttributes(genotypeKeyArray.Length - 1); for (int i = 0; i < genotypeKeyArray.Length; i++) { string gtKey = genotypeKeyArray [i]; if (i >= GTValueArray.Length) { break; } // todo -- all of these on the fly parsing of the missing value should be static constants if (gtKey == VCFConstants.GENOTYPE_FILTER_KEY) { IList <string> filters = parseFilters(GetCachedString(GTValueArray [i])); if (filters != null) { gb.SetFilters(filters.ToList()); } } else if (GTValueArray [i] == VCFConstants.MISSING_VALUE_v4) { // don't add missing values to the map } else { if (gtKey == VCFConstants.GENOTYPE_QUALITY_KEY) { if (GTValueArray [i] == VCFConstants.MISSING_GENOTYPE_QUALITY_v3) { gb.noGQ(); } else { gb.GQ = ((int)Math.Round(Convert.ToDouble(GTValueArray [i]))); } } else if (gtKey == VCFConstants.GENOTYPE_ALLELE_DEPTHS) { gb.AD = (decodeInts(GTValueArray [i])); } else if (gtKey == VCFConstants.GENOTYPE_PL_KEY) { gb.PL = (decodeInts(GTValueArray [i])); } else if (gtKey == VCFConstants.GENOTYPE_LIKELIHOODS_KEY) { gb.PL = (GenotypeLikelihoods.fromGLField(GTValueArray [i]).AsPLs); } else if (gtKey.Equals(VCFConstants.DEPTH_KEY)) { gb.DP = (Convert.ToInt32(GTValueArray [i])); } else { gb.AddAttribute(gtKey, GTValueArray [i]); } } } } List <Allele> GTalleles; if (genotypeAlleleLocation == -1) { GTalleles = new List <Allele> (0); } else { GTalleles = parseGenotypeAlleles(GTValueArray [genotypeAlleleLocation], alleles, alleleMap); } gb.Alleles = GTalleles; gb.Phased = genotypeAlleleLocation != -1 && GTValueArray [genotypeAlleleLocation].IndexOf(VCFConstants.PHASED_AS_CHAR) != -1; // add it to the list try { curGenotype = gb.Make(); } catch (Exception e) { throw new VCFParsingError(e.Message + ", at position " + chr + ":" + pos); } } genotypes.Add(curGenotype); } return(new LazyGenotypesContext.LazyData(genotypes, header.SampleNamesInOrder, header.SampleNameToOffset)); }
/// <summary> /// This genotype has this PL value, converted from double[]. SLOW /// </summary> public void setPL(double[] GLs) { this.PL = GenotypeLikelihoods.fromLog10Likelihoods(GLs).AsPLs; }