示例#1
0
        /// <summary>
        /// add the genotype data
        /// </summary>
        /// <param name="vc">                     the variant context </param>
        /// <param name="genotypeFormatKeys">  Genotype formatting string </param>
        /// <param name="alleleMap">              alleles for this context </param>
        /// <exception cref="IOException"> for writer </exception>
        private string getGenotypeDataText(VariantContext vc, IDictionary <Allele, string> alleleMap, IList <string> genotypeFormatKeys)
        {
            StringBuilder sbn    = new StringBuilder();
            int           ploidy = vc.GetMaxPloidy(2);

            foreach (string sample in mHeader.GenotypeSampleNames)
            {
                sbn.Append(VCFConstants.FIELD_SEPARATOR);

                Genotype g = vc.GetGenotype(sample);
                if (g == null)
                {
                    g = GenotypeBuilder.CreateMissing(sample, ploidy);
                }
                IList <string> attrs = new List <string>(genotypeFormatKeys.Count);
                foreach (string field in genotypeFormatKeys)
                {
                    if (field.Equals(VCFConstants.GENOTYPE_KEY))
                    {
                        if (!g.Available)
                        {
                            throw new Exception("GTs cannot be missing for some samples if they are available for others in the record");
                        }

                        sbn.Append(getAlleleText(g.getAllele(0), alleleMap));
                        for (int i = 1; i < g.Ploidy; i++)
                        {
                            sbn.Append(g.Phased ? VCFConstants.PHASED : VCFConstants.UNPHASED);
                            sbn.Append(getAlleleText(g.getAllele(i), alleleMap));
                        }
                        continue;
                    }
                    else
                    {
                        string outputValue;
                        if (field.Equals(VCFConstants.GENOTYPE_FILTER_KEY))
                        {
                            outputValue = g.Filtered ? g.Filters : VCFConstants.PASSES_FILTERS_v4;
                        }
                        else
                        {
                            IntGenotypeFieldAccessors.Accessor accessor = intGenotypeFieldAccessors.GetAccessor(field);
                            if (accessor != null)
                            {
                                int[] intValues = accessor.getValues(g);
                                if (intValues == null)
                                {
                                    outputValue = VCFConstants.MISSING_VALUE_v4;
                                }
                                else if (intValues.Length == 1)                                 // fast path
                                {
                                    outputValue = Convert.ToString(intValues[0]);
                                }
                                else
                                {
                                    StringBuilder sb = new StringBuilder();
                                    sb.Append(intValues[0]);
                                    for (int i = 1; i < intValues.Length; i++)
                                    {
                                        sb.Append(",");
                                        sb.Append(intValues[i]);
                                    }
                                    outputValue = sb.ToString();
                                }
                            }
                            else
                            {
                                object val = g.HasExtendedAttribute(field) ? g.GetExtendedAttribute(field) : VCFConstants.MISSING_VALUE_v4;

                                VCFFormatHeaderLine metaData = mHeader.getFormatHeaderLine(field);
                                if (metaData != null)
                                {
                                    int numInFormatField = metaData.getCount(vc);
                                    if (numInFormatField > 1 && val.Equals(VCFConstants.MISSING_VALUE_v4))
                                    {
                                        // If we have a missing field but multiple values are expected, we need to construct a new string with all fields.
                                        // For example, if Number=2, the string has to be ".,."
                                        StringBuilder sb = new StringBuilder(VCFConstants.MISSING_VALUE_v4);
                                        for (int i = 1; i < numInFormatField; i++)
                                        {
                                            sb.Append(",");
                                            sb.Append(VCFConstants.MISSING_VALUE_v4);
                                        }
                                        val = sb.ToString();
                                    }
                                }

                                // assume that if key is absent, then the given string encoding suffices
                                outputValue = formatVCFField(val);
                            }
                        }

                        if (outputValue != null)
                        {
                            attrs.Add(outputValue);
                        }
                    }
                }

                // strip off trailing missing values
                for (int i = attrs.Count - 1; i >= 0; i--)
                {
                    if (isMissingValue(attrs[i]))
                    {
                        attrs.RemoveAt(i);
                    }
                    else
                    {
                        break;
                    }
                }

                for (int i = 0; i < attrs.Count; i++)
                {
                    if (i > 0 || genotypeFormatKeys.Contains(VCFConstants.GENOTYPE_KEY))
                    {
                        sbn.Append(VCFConstants.GENOTYPE_FIELD_SEPARATOR);
                    }
                    sbn.Append(attrs[i]);
                }
            }
            return(sbn.ToString());
        }
示例#2
0
        /// <summary>
        /// Create a genotype map
        /// </summary>
        /// <param name="str"> the string </param>
        /// <param name="alleles"> the list of alleles </param>
        /// <returns> a mapping of sample name to genotype object </returns>
        public LazyGenotypesContext.LazyData CreateGenotypeMap(string str, IList <Allele> alleles, string chr, int pos)
        {
            if (genotypeParts == null)
            {
                genotypeParts = new String[header.ColumnCount - NUM_STANDARD_FIELDS];
            }
            try {
                FastStringUtils.Split(str, VCFConstants.FIELD_SEPARATOR_CHAR, genotypeParts);
            } catch (Exception e) {
                throw new VCFParsingError("Could not parse genotypes, was expecting " + (genotypeParts.Length - 1).ToString() + " but found " + str.Split(VCFConstants.FIELD_SEPARATOR_CHAR).Length.ToString(), e);
            }
            List <Genotype> genotypes = new List <Genotype> (genotypeParts.Length);

            // get the format keys
            //int nGTKeys = ParsingUtils.Split(genotypeParts[0], genotypeKeyArray, VCFConstants.GENOTYPE_FIELD_SEPARATOR_CHAR);
            string[] genotypeKeyArray       = genotypeParts [0].Split(VCFConstants.GENOTYPE_FIELD_SEPARATOR_CHAR);
            int      genotypeAlleleLocation = Array.IndexOf(genotypeKeyArray, VCFConstants.GENOTYPE_KEY);

            if (version != VCFHeaderVersion.VCF4_1 && genotypeAlleleLocation == -1)
            {
                generateException("Unable to find the GT field for the record; the GT field is required in VCF4.0");
            }
            // clear out our allele mapping
            alleleMap.Clear();
            GenotypeBuilder gb = new GenotypeBuilder();

            // cycle through the genotype strings
            for (int genotypeOffset = 1; genotypeOffset < genotypeParts.Length; genotypeOffset++)
            {
                Genotype curGenotype;
                string   sampleName  = header.GenotypeSampleNames [genotypeOffset - 1];
                var      currentGeno = genotypeParts [genotypeOffset];
                //shortcut for null alleles
                if (currentGeno == "./.")
                {
                    curGenotype = GenotypeBuilder.CreateMissing(sampleName, 2);
                }
                else if (currentGeno == ".")
                {
                    curGenotype = GenotypeBuilder.CreateMissing(sampleName, 1);
                }
                else
                {
                    gb.Reset(false);
                    gb.SampleName = sampleName;
                    string[] GTValueArray = FastStringUtils.Split(currentGeno, VCFConstants.GENOTYPE_FIELD_SEPARATOR_CHAR, int.MaxValue, StringSplitOptions.None);
                    // cycle through the sample names
                    // check to see if the value list is longer than the key list, which is a problem
                    if (genotypeKeyArray.Length < GTValueArray.Length)
                    {
                        generateException("There are too many keys for the sample " + sampleName + ", line is: keys = " + genotypeParts [0] + ", values = " + genotypeParts [genotypeOffset]);
                    }
                    if (genotypeAlleleLocation > 0)
                    {
                        generateException("Saw GT field at position " + genotypeAlleleLocation + ", but it must be at the first position for genotypes when present");
                    }

                    //TODO: THIS IS A DAMNED MESS
                    //Code loops over all fields in the key and decodes them, adding them as information to the genotype builder, which then makes it.
                    if (genotypeKeyArray.Length > 0)
                    {
                        gb.MaxAttributes(genotypeKeyArray.Length - 1);
                        for (int i = 0; i < genotypeKeyArray.Length; i++)
                        {
                            string gtKey = genotypeKeyArray [i];
                            if (i >= GTValueArray.Length)
                            {
                                break;
                            }
                            // todo -- all of these on the fly parsing of the missing value should be static constants
                            if (gtKey == VCFConstants.GENOTYPE_FILTER_KEY)
                            {
                                IList <string> filters = parseFilters(GetCachedString(GTValueArray [i]));
                                if (filters != null)
                                {
                                    gb.SetFilters(filters.ToList());
                                }
                            }
                            else if (GTValueArray [i] == VCFConstants.MISSING_VALUE_v4)
                            {
                                // don't add missing values to the map
                            }
                            else
                            {
                                if (gtKey == VCFConstants.GENOTYPE_QUALITY_KEY)
                                {
                                    if (GTValueArray [i] == VCFConstants.MISSING_GENOTYPE_QUALITY_v3)
                                    {
                                        gb.noGQ();
                                    }
                                    else
                                    {
                                        gb.GQ = ((int)Math.Round(Convert.ToDouble(GTValueArray [i])));
                                    }
                                }
                                else if (gtKey == VCFConstants.GENOTYPE_ALLELE_DEPTHS)
                                {
                                    gb.AD = (decodeInts(GTValueArray [i]));
                                }
                                else if (gtKey == VCFConstants.GENOTYPE_PL_KEY)
                                {
                                    gb.PL = (decodeInts(GTValueArray [i]));
                                }
                                else if (gtKey == VCFConstants.GENOTYPE_LIKELIHOODS_KEY)
                                {
                                    gb.PL = (GenotypeLikelihoods.fromGLField(GTValueArray [i]).AsPLs);
                                }
                                else if (gtKey.Equals(VCFConstants.DEPTH_KEY))
                                {
                                    gb.DP = (Convert.ToInt32(GTValueArray [i]));
                                }
                                else
                                {
                                    gb.AddAttribute(gtKey, GTValueArray [i]);
                                }
                            }
                        }
                    }

                    List <Allele> GTalleles;
                    if (genotypeAlleleLocation == -1)
                    {
                        GTalleles = new List <Allele> (0);
                    }
                    else
                    {
                        GTalleles = parseGenotypeAlleles(GTValueArray [genotypeAlleleLocation], alleles, alleleMap);
                    }
                    gb.Alleles = GTalleles;
                    gb.Phased  = genotypeAlleleLocation != -1 && GTValueArray [genotypeAlleleLocation].IndexOf(VCFConstants.PHASED_AS_CHAR) != -1;

                    // add it to the list
                    try {
                        curGenotype = gb.Make();
                    } catch (Exception e) {
                        throw new VCFParsingError(e.Message + ", at position " + chr + ":" + pos);
                    }
                }
                genotypes.Add(curGenotype);
            }
            return(new LazyGenotypesContext.LazyData(genotypes, header.SampleNamesInOrder, header.SampleNameToOffset));
        }