Exemple #1
0
        /// <summary>
        /// Get the number of values expected for this header field, given the properties of VariantContext vc
        ///
        /// If the count is a fixed count, return that.  For example, a field with size of 1 in the header returns 1
        /// If the count is of type A, return vc.getNAlleles - 1
        /// If the count is of type G, return the expected number of genotypes given the number of alleles in VC and the
        ///   max ploidy among all samples.  Note that if the max ploidy of the VC is 0 (there's no GT information
        ///   at all, then implicitly assume diploid samples when computing G values.
        /// If the count is UNBOUNDED return -1
        /// </summary>
        /// <param name="vc">
        /// @return </param>
        public virtual int getCount(VariantContext vc)
        {
            switch (countType)
            {
            case Bio.VCF.VCFHeaderLineCount.INTEGER:
                return(count);

            case Bio.VCF.VCFHeaderLineCount.UNBOUNDED:
                return(-1);

            case Bio.VCF.VCFHeaderLineCount.A:
                return(vc.NAlleles - 1);

            case Bio.VCF.VCFHeaderLineCount.G:
                int ploidy = vc.GetMaxPloidy(2);
                return(GenotypeLikelihoods.numLikelihoods(vc.NAlleles, ploidy));

            default:
                throw new VCFParsingError("Unknown count type: " + countType);
            }
        }
Exemple #2
0
        /// <summary>
        /// add the genotype data
        /// </summary>
        /// <param name="vc">                     the variant context </param>
        /// <param name="genotypeFormatKeys">  Genotype formatting string </param>
        /// <param name="alleleMap">              alleles for this context </param>
        /// <exception cref="IOException"> for writer </exception>
        private string getGenotypeDataText(VariantContext vc, IDictionary <Allele, string> alleleMap, IList <string> genotypeFormatKeys)
        {
            StringBuilder sbn    = new StringBuilder();
            int           ploidy = vc.GetMaxPloidy(2);

            foreach (string sample in mHeader.GenotypeSampleNames)
            {
                sbn.Append(VCFConstants.FIELD_SEPARATOR);

                Genotype g = vc.GetGenotype(sample);
                if (g == null)
                {
                    g = GenotypeBuilder.CreateMissing(sample, ploidy);
                }
                IList <string> attrs = new List <string>(genotypeFormatKeys.Count);
                foreach (string field in genotypeFormatKeys)
                {
                    if (field.Equals(VCFConstants.GENOTYPE_KEY))
                    {
                        if (!g.Available)
                        {
                            throw new Exception("GTs cannot be missing for some samples if they are available for others in the record");
                        }

                        sbn.Append(getAlleleText(g.getAllele(0), alleleMap));
                        for (int i = 1; i < g.Ploidy; i++)
                        {
                            sbn.Append(g.Phased ? VCFConstants.PHASED : VCFConstants.UNPHASED);
                            sbn.Append(getAlleleText(g.getAllele(i), alleleMap));
                        }
                        continue;
                    }
                    else
                    {
                        string outputValue;
                        if (field.Equals(VCFConstants.GENOTYPE_FILTER_KEY))
                        {
                            outputValue = g.Filtered ? g.Filters : VCFConstants.PASSES_FILTERS_v4;
                        }
                        else
                        {
                            IntGenotypeFieldAccessors.Accessor accessor = intGenotypeFieldAccessors.GetAccessor(field);
                            if (accessor != null)
                            {
                                int[] intValues = accessor.getValues(g);
                                if (intValues == null)
                                {
                                    outputValue = VCFConstants.MISSING_VALUE_v4;
                                }
                                else if (intValues.Length == 1)                                 // fast path
                                {
                                    outputValue = Convert.ToString(intValues[0]);
                                }
                                else
                                {
                                    StringBuilder sb = new StringBuilder();
                                    sb.Append(intValues[0]);
                                    for (int i = 1; i < intValues.Length; i++)
                                    {
                                        sb.Append(",");
                                        sb.Append(intValues[i]);
                                    }
                                    outputValue = sb.ToString();
                                }
                            }
                            else
                            {
                                object val = g.HasExtendedAttribute(field) ? g.GetExtendedAttribute(field) : VCFConstants.MISSING_VALUE_v4;

                                VCFFormatHeaderLine metaData = mHeader.getFormatHeaderLine(field);
                                if (metaData != null)
                                {
                                    int numInFormatField = metaData.getCount(vc);
                                    if (numInFormatField > 1 && val.Equals(VCFConstants.MISSING_VALUE_v4))
                                    {
                                        // If we have a missing field but multiple values are expected, we need to construct a new string with all fields.
                                        // For example, if Number=2, the string has to be ".,."
                                        StringBuilder sb = new StringBuilder(VCFConstants.MISSING_VALUE_v4);
                                        for (int i = 1; i < numInFormatField; i++)
                                        {
                                            sb.Append(",");
                                            sb.Append(VCFConstants.MISSING_VALUE_v4);
                                        }
                                        val = sb.ToString();
                                    }
                                }

                                // assume that if key is absent, then the given string encoding suffices
                                outputValue = formatVCFField(val);
                            }
                        }

                        if (outputValue != null)
                        {
                            attrs.Add(outputValue);
                        }
                    }
                }

                // strip off trailing missing values
                for (int i = attrs.Count - 1; i >= 0; i--)
                {
                    if (isMissingValue(attrs[i]))
                    {
                        attrs.RemoveAt(i);
                    }
                    else
                    {
                        break;
                    }
                }

                for (int i = 0; i < attrs.Count; i++)
                {
                    if (i > 0 || genotypeFormatKeys.Contains(VCFConstants.GENOTYPE_KEY))
                    {
                        sbn.Append(VCFConstants.GENOTYPE_FIELD_SEPARATOR);
                    }
                    sbn.Append(attrs[i]);
                }
            }
            return(sbn.ToString());
        }
		/// <summary>
		/// Get the number of values expected for this header field, given the properties of VariantContext vc
		/// 
		/// If the count is a fixed count, return that.  For example, a field with size of 1 in the header returns 1
		/// If the count is of type A, return vc.getNAlleles - 1
		/// If the count is of type G, return the expected number of genotypes given the number of alleles in VC and the
		///   max ploidy among all samples.  Note that if the max ploidy of the VC is 0 (there's no GT information
		///   at all, then implicitly assume diploid samples when computing G values.
		/// If the count is UNBOUNDED return -1
		/// </summary>
		/// <param name="vc">
		/// @return </param>
		public virtual int getCount(VariantContext vc)
		{
			switch (countType)
			{
				case Bio.VCF.VCFHeaderLineCount.INTEGER:
					return count;
				case Bio.VCF.VCFHeaderLineCount.UNBOUNDED:
					return -1;
				case Bio.VCF.VCFHeaderLineCount.A:
					return vc.NAlleles - 1;
				case Bio.VCF.VCFHeaderLineCount.G:
					int ploidy = vc.GetMaxPloidy(2);
					return GenotypeLikelihoods.numLikelihoods(vc.NAlleles, ploidy);
				default:
					throw new VCFParsingError("Unknown count type: " + countType);
			}
		}
Exemple #4
0
		/// <summary>
		/// add the genotype data
		/// </summary>
		/// <param name="vc">                     the variant context </param>
		/// <param name="genotypeFormatKeys">  Genotype formatting string </param>
		/// <param name="alleleMap">              alleles for this context </param>
		/// <exception cref="IOException"> for writer </exception>
		private string getGenotypeDataText(VariantContext vc, IDictionary<Allele, string> alleleMap, IList<string> genotypeFormatKeys)
		{
            StringBuilder sbn = new StringBuilder();
			int ploidy = vc.GetMaxPloidy(2);
			foreach (string sample in mHeader.GenotypeSampleNames)
			{
				sbn.Append(VCFConstants.FIELD_SEPARATOR);

				Genotype g = vc.GetGenotype(sample);
				if (g == null)
				{
					g = GenotypeBuilder.CreateMissing(sample, ploidy);
				}
				IList<string> attrs = new List<string>(genotypeFormatKeys.Count);
				foreach (string field in genotypeFormatKeys)
				{
					if (field.Equals(VCFConstants.GENOTYPE_KEY))
					{
						if (!g.Available)
						{
							throw new Exception("GTs cannot be missing for some samples if they are available for others in the record");
						}

						sbn.Append(getAlleleText(g.getAllele(0), alleleMap));
						for (int i = 1; i < g.Ploidy; i++)
						{
                            sbn.Append(g.Phased ? VCFConstants.PHASED : VCFConstants.UNPHASED);
							sbn.Append(getAlleleText(g.getAllele(i), alleleMap));
						}
						continue;
					}
					else
					{
						string outputValue;
						if (field.Equals(VCFConstants.GENOTYPE_FILTER_KEY))
						{
							outputValue = g.Filtered ? g.Filters : VCFConstants.PASSES_FILTERS_v4;
						}
						else
						{
							IntGenotypeFieldAccessors.Accessor accessor = intGenotypeFieldAccessors.GetAccessor(field);
							if (accessor != null)
							{

								int[] intValues = accessor.getValues(g);
								if (intValues == null)
								{
									outputValue = VCFConstants.MISSING_VALUE_v4;
								}
								else if (intValues.Length == 1) // fast path
								{
									outputValue = Convert.ToString(intValues[0]);
								}
								else
								{
									StringBuilder sb = new StringBuilder();
									sb.Append(intValues[0]);
									for (int i = 1; i < intValues.Length; i++)
									{
										sb.Append(",");
										sb.Append(intValues[i]);
									}
									outputValue = sb.ToString();
								}
							}
							else
							{
								object val = g.HasExtendedAttribute(field) ? g.GetExtendedAttribute(field) : VCFConstants.MISSING_VALUE_v4;

								VCFFormatHeaderLine metaData = mHeader.getFormatHeaderLine(field);
								if (metaData != null)
								{
									int numInFormatField = metaData.getCount(vc);
									if (numInFormatField > 1 && val.Equals(VCFConstants.MISSING_VALUE_v4))
									{
										// If we have a missing field but multiple values are expected, we need to construct a new string with all fields.
										// For example, if Number=2, the string has to be ".,."
										StringBuilder sb = new StringBuilder(VCFConstants.MISSING_VALUE_v4);
										for (int i = 1; i < numInFormatField; i++)
										{
											sb.Append(",");
											sb.Append(VCFConstants.MISSING_VALUE_v4);
										}
										val = sb.ToString();
									}
								}

								// assume that if key is absent, then the given string encoding suffices
								outputValue = formatVCFField(val);
							}
						}

						if (outputValue != null)
						{
							attrs.Add(outputValue);
						}
					}
				}

				// strip off trailing missing values
				for (int i = attrs.Count - 1; i >= 0; i--)
				{
					if (isMissingValue(attrs[i]))
					{
						attrs.RemoveAt(i);
					}
					else
					{
						break;
					}
				}

				for (int i = 0; i < attrs.Count; i++)
				{
					if (i > 0 || genotypeFormatKeys.Contains(VCFConstants.GENOTYPE_KEY))
					{
						sbn.Append(VCFConstants.GENOTYPE_FIELD_SEPARATOR);
					}
					sbn.Append(attrs[i]);
				}
			}
            return sbn.ToString();
		}