/// <summary> /// Get the number of values expected for this header field, given the properties of VariantContext vc /// /// If the count is a fixed count, return that. For example, a field with size of 1 in the header returns 1 /// If the count is of type A, return vc.getNAlleles - 1 /// If the count is of type G, return the expected number of genotypes given the number of alleles in VC and the /// max ploidy among all samples. Note that if the max ploidy of the VC is 0 (there's no GT information /// at all, then implicitly assume diploid samples when computing G values. /// If the count is UNBOUNDED return -1 /// </summary> /// <param name="vc"> /// @return </param> public virtual int getCount(VariantContext vc) { switch (countType) { case Bio.VCF.VCFHeaderLineCount.INTEGER: return(count); case Bio.VCF.VCFHeaderLineCount.UNBOUNDED: return(-1); case Bio.VCF.VCFHeaderLineCount.A: return(vc.NAlleles - 1); case Bio.VCF.VCFHeaderLineCount.G: int ploidy = vc.GetMaxPloidy(2); return(GenotypeLikelihoods.numLikelihoods(vc.NAlleles, ploidy)); default: throw new VCFParsingError("Unknown count type: " + countType); } }
/// <summary> /// add the genotype data /// </summary> /// <param name="vc"> the variant context </param> /// <param name="genotypeFormatKeys"> Genotype formatting string </param> /// <param name="alleleMap"> alleles for this context </param> /// <exception cref="IOException"> for writer </exception> private string getGenotypeDataText(VariantContext vc, IDictionary <Allele, string> alleleMap, IList <string> genotypeFormatKeys) { StringBuilder sbn = new StringBuilder(); int ploidy = vc.GetMaxPloidy(2); foreach (string sample in mHeader.GenotypeSampleNames) { sbn.Append(VCFConstants.FIELD_SEPARATOR); Genotype g = vc.GetGenotype(sample); if (g == null) { g = GenotypeBuilder.CreateMissing(sample, ploidy); } IList <string> attrs = new List <string>(genotypeFormatKeys.Count); foreach (string field in genotypeFormatKeys) { if (field.Equals(VCFConstants.GENOTYPE_KEY)) { if (!g.Available) { throw new Exception("GTs cannot be missing for some samples if they are available for others in the record"); } sbn.Append(getAlleleText(g.getAllele(0), alleleMap)); for (int i = 1; i < g.Ploidy; i++) { sbn.Append(g.Phased ? VCFConstants.PHASED : VCFConstants.UNPHASED); sbn.Append(getAlleleText(g.getAllele(i), alleleMap)); } continue; } else { string outputValue; if (field.Equals(VCFConstants.GENOTYPE_FILTER_KEY)) { outputValue = g.Filtered ? g.Filters : VCFConstants.PASSES_FILTERS_v4; } else { IntGenotypeFieldAccessors.Accessor accessor = intGenotypeFieldAccessors.GetAccessor(field); if (accessor != null) { int[] intValues = accessor.getValues(g); if (intValues == null) { outputValue = VCFConstants.MISSING_VALUE_v4; } else if (intValues.Length == 1) // fast path { outputValue = Convert.ToString(intValues[0]); } else { StringBuilder sb = new StringBuilder(); sb.Append(intValues[0]); for (int i = 1; i < intValues.Length; i++) { sb.Append(","); sb.Append(intValues[i]); } outputValue = sb.ToString(); } } else { object val = g.HasExtendedAttribute(field) ? g.GetExtendedAttribute(field) : VCFConstants.MISSING_VALUE_v4; VCFFormatHeaderLine metaData = mHeader.getFormatHeaderLine(field); if (metaData != null) { int numInFormatField = metaData.getCount(vc); if (numInFormatField > 1 && val.Equals(VCFConstants.MISSING_VALUE_v4)) { // If we have a missing field but multiple values are expected, we need to construct a new string with all fields. // For example, if Number=2, the string has to be ".,." StringBuilder sb = new StringBuilder(VCFConstants.MISSING_VALUE_v4); for (int i = 1; i < numInFormatField; i++) { sb.Append(","); sb.Append(VCFConstants.MISSING_VALUE_v4); } val = sb.ToString(); } } // assume that if key is absent, then the given string encoding suffices outputValue = formatVCFField(val); } } if (outputValue != null) { attrs.Add(outputValue); } } } // strip off trailing missing values for (int i = attrs.Count - 1; i >= 0; i--) { if (isMissingValue(attrs[i])) { attrs.RemoveAt(i); } else { break; } } for (int i = 0; i < attrs.Count; i++) { if (i > 0 || genotypeFormatKeys.Contains(VCFConstants.GENOTYPE_KEY)) { sbn.Append(VCFConstants.GENOTYPE_FIELD_SEPARATOR); } sbn.Append(attrs[i]); } } return(sbn.ToString()); }
/// <summary> /// Get the number of values expected for this header field, given the properties of VariantContext vc /// /// If the count is a fixed count, return that. For example, a field with size of 1 in the header returns 1 /// If the count is of type A, return vc.getNAlleles - 1 /// If the count is of type G, return the expected number of genotypes given the number of alleles in VC and the /// max ploidy among all samples. Note that if the max ploidy of the VC is 0 (there's no GT information /// at all, then implicitly assume diploid samples when computing G values. /// If the count is UNBOUNDED return -1 /// </summary> /// <param name="vc"> /// @return </param> public virtual int getCount(VariantContext vc) { switch (countType) { case Bio.VCF.VCFHeaderLineCount.INTEGER: return count; case Bio.VCF.VCFHeaderLineCount.UNBOUNDED: return -1; case Bio.VCF.VCFHeaderLineCount.A: return vc.NAlleles - 1; case Bio.VCF.VCFHeaderLineCount.G: int ploidy = vc.GetMaxPloidy(2); return GenotypeLikelihoods.numLikelihoods(vc.NAlleles, ploidy); default: throw new VCFParsingError("Unknown count type: " + countType); } }
/// <summary> /// add the genotype data /// </summary> /// <param name="vc"> the variant context </param> /// <param name="genotypeFormatKeys"> Genotype formatting string </param> /// <param name="alleleMap"> alleles for this context </param> /// <exception cref="IOException"> for writer </exception> private string getGenotypeDataText(VariantContext vc, IDictionary<Allele, string> alleleMap, IList<string> genotypeFormatKeys) { StringBuilder sbn = new StringBuilder(); int ploidy = vc.GetMaxPloidy(2); foreach (string sample in mHeader.GenotypeSampleNames) { sbn.Append(VCFConstants.FIELD_SEPARATOR); Genotype g = vc.GetGenotype(sample); if (g == null) { g = GenotypeBuilder.CreateMissing(sample, ploidy); } IList<string> attrs = new List<string>(genotypeFormatKeys.Count); foreach (string field in genotypeFormatKeys) { if (field.Equals(VCFConstants.GENOTYPE_KEY)) { if (!g.Available) { throw new Exception("GTs cannot be missing for some samples if they are available for others in the record"); } sbn.Append(getAlleleText(g.getAllele(0), alleleMap)); for (int i = 1; i < g.Ploidy; i++) { sbn.Append(g.Phased ? VCFConstants.PHASED : VCFConstants.UNPHASED); sbn.Append(getAlleleText(g.getAllele(i), alleleMap)); } continue; } else { string outputValue; if (field.Equals(VCFConstants.GENOTYPE_FILTER_KEY)) { outputValue = g.Filtered ? g.Filters : VCFConstants.PASSES_FILTERS_v4; } else { IntGenotypeFieldAccessors.Accessor accessor = intGenotypeFieldAccessors.GetAccessor(field); if (accessor != null) { int[] intValues = accessor.getValues(g); if (intValues == null) { outputValue = VCFConstants.MISSING_VALUE_v4; } else if (intValues.Length == 1) // fast path { outputValue = Convert.ToString(intValues[0]); } else { StringBuilder sb = new StringBuilder(); sb.Append(intValues[0]); for (int i = 1; i < intValues.Length; i++) { sb.Append(","); sb.Append(intValues[i]); } outputValue = sb.ToString(); } } else { object val = g.HasExtendedAttribute(field) ? g.GetExtendedAttribute(field) : VCFConstants.MISSING_VALUE_v4; VCFFormatHeaderLine metaData = mHeader.getFormatHeaderLine(field); if (metaData != null) { int numInFormatField = metaData.getCount(vc); if (numInFormatField > 1 && val.Equals(VCFConstants.MISSING_VALUE_v4)) { // If we have a missing field but multiple values are expected, we need to construct a new string with all fields. // For example, if Number=2, the string has to be ".,." StringBuilder sb = new StringBuilder(VCFConstants.MISSING_VALUE_v4); for (int i = 1; i < numInFormatField; i++) { sb.Append(","); sb.Append(VCFConstants.MISSING_VALUE_v4); } val = sb.ToString(); } } // assume that if key is absent, then the given string encoding suffices outputValue = formatVCFField(val); } } if (outputValue != null) { attrs.Add(outputValue); } } } // strip off trailing missing values for (int i = attrs.Count - 1; i >= 0; i--) { if (isMissingValue(attrs[i])) { attrs.RemoveAt(i); } else { break; } } for (int i = 0; i < attrs.Count; i++) { if (i > 0 || genotypeFormatKeys.Contains(VCFConstants.GENOTYPE_KEY)) { sbn.Append(VCFConstants.GENOTYPE_FIELD_SEPARATOR); } sbn.Append(attrs[i]); } } return sbn.ToString(); }