Example #1
0
        /// <summary>
        /// load the format/info meta data maps (these are used for quick lookup by key name)
        /// </summary>
        private void loadMetaDataMaps()
        {
            foreach (VCFHeaderLine line in mMetaData)
            {
                if (line is VCFInfoHeaderLine)
                {
                    VCFInfoHeaderLine infoLine = (VCFInfoHeaderLine)line;
                    addMetaDataMapBinding(mInfoMetaData, infoLine);
                }
                else if (line is VCFFormatHeaderLine)
                {
                    VCFFormatHeaderLine formatLine = (VCFFormatHeaderLine)line;
                    addMetaDataMapBinding(mFormatMetaData, formatLine);
                }
                else if (line is VCFFilterHeaderLine)
                {
                    VCFFilterHeaderLine filterLine = (VCFFilterHeaderLine)line;
                    mFilterMetaData[filterLine.ID] = filterLine;
                }
                else if (line is VCFContigHeaderLine)
                {
                    contigMetaData.Add((VCFContigHeaderLine)line);
                }
                else
                {
                    mOtherMetaData[line.Key] = line;
                }
            }

            if (hasFormatLine(VCFConstants.GENOTYPE_LIKELIHOODS_KEY) && !hasFormatLine(VCFConstants.GENOTYPE_PL_KEY))
            {
                Console.WriteLine("Warning now we want PL fields, not just GL fields");
                //throw new VCFParsingError("Found " + VCFConstants.GENOTYPE_LIKELIHOODS_KEY + " format, but no " + VCFConstants.GENOTYPE_PL_KEY + " field.  We now only manage PL fields internally.");
            }
        }
Example #2
0
        /// <summary>
        /// add the genotype data
        /// </summary>
        /// <param name="vc">                     the variant context </param>
        /// <param name="genotypeFormatKeys">  Genotype formatting string </param>
        /// <param name="alleleMap">              alleles for this context </param>
        /// <exception cref="IOException"> for writer </exception>
        private string getGenotypeDataText(VariantContext vc, IDictionary <Allele, string> alleleMap, IList <string> genotypeFormatKeys)
        {
            StringBuilder sbn    = new StringBuilder();
            int           ploidy = vc.GetMaxPloidy(2);

            foreach (string sample in mHeader.GenotypeSampleNames)
            {
                sbn.Append(VCFConstants.FIELD_SEPARATOR);

                Genotype g = vc.GetGenotype(sample);
                if (g == null)
                {
                    g = GenotypeBuilder.CreateMissing(sample, ploidy);
                }
                IList <string> attrs = new List <string>(genotypeFormatKeys.Count);
                foreach (string field in genotypeFormatKeys)
                {
                    if (field.Equals(VCFConstants.GENOTYPE_KEY))
                    {
                        if (!g.Available)
                        {
                            throw new Exception("GTs cannot be missing for some samples if they are available for others in the record");
                        }

                        sbn.Append(getAlleleText(g.getAllele(0), alleleMap));
                        for (int i = 1; i < g.Ploidy; i++)
                        {
                            sbn.Append(g.Phased ? VCFConstants.PHASED : VCFConstants.UNPHASED);
                            sbn.Append(getAlleleText(g.getAllele(i), alleleMap));
                        }
                        continue;
                    }
                    else
                    {
                        string outputValue;
                        if (field.Equals(VCFConstants.GENOTYPE_FILTER_KEY))
                        {
                            outputValue = g.Filtered ? g.Filters : VCFConstants.PASSES_FILTERS_v4;
                        }
                        else
                        {
                            IntGenotypeFieldAccessors.Accessor accessor = intGenotypeFieldAccessors.GetAccessor(field);
                            if (accessor != null)
                            {
                                int[] intValues = accessor.getValues(g);
                                if (intValues == null)
                                {
                                    outputValue = VCFConstants.MISSING_VALUE_v4;
                                }
                                else if (intValues.Length == 1)                                 // fast path
                                {
                                    outputValue = Convert.ToString(intValues[0]);
                                }
                                else
                                {
                                    StringBuilder sb = new StringBuilder();
                                    sb.Append(intValues[0]);
                                    for (int i = 1; i < intValues.Length; i++)
                                    {
                                        sb.Append(",");
                                        sb.Append(intValues[i]);
                                    }
                                    outputValue = sb.ToString();
                                }
                            }
                            else
                            {
                                object val = g.HasExtendedAttribute(field) ? g.GetExtendedAttribute(field) : VCFConstants.MISSING_VALUE_v4;

                                VCFFormatHeaderLine metaData = mHeader.getFormatHeaderLine(field);
                                if (metaData != null)
                                {
                                    int numInFormatField = metaData.getCount(vc);
                                    if (numInFormatField > 1 && val.Equals(VCFConstants.MISSING_VALUE_v4))
                                    {
                                        // If we have a missing field but multiple values are expected, we need to construct a new string with all fields.
                                        // For example, if Number=2, the string has to be ".,."
                                        StringBuilder sb = new StringBuilder(VCFConstants.MISSING_VALUE_v4);
                                        for (int i = 1; i < numInFormatField; i++)
                                        {
                                            sb.Append(",");
                                            sb.Append(VCFConstants.MISSING_VALUE_v4);
                                        }
                                        val = sb.ToString();
                                    }
                                }

                                // assume that if key is absent, then the given string encoding suffices
                                outputValue = formatVCFField(val);
                            }
                        }

                        if (outputValue != null)
                        {
                            attrs.Add(outputValue);
                        }
                    }
                }

                // strip off trailing missing values
                for (int i = attrs.Count - 1; i >= 0; i--)
                {
                    if (isMissingValue(attrs[i]))
                    {
                        attrs.RemoveAt(i);
                    }
                    else
                    {
                        break;
                    }
                }

                for (int i = 0; i < attrs.Count; i++)
                {
                    if (i > 0 || genotypeFormatKeys.Contains(VCFConstants.GENOTYPE_KEY))
                    {
                        sbn.Append(VCFConstants.GENOTYPE_FIELD_SEPARATOR);
                    }
                    sbn.Append(attrs[i]);
                }
            }
            return(sbn.ToString());
        }
Example #3
0
		/// <summary>
		/// create a VCF header from a set of header record lines
		/// </summary>
		/// <param name="headerStrings"> a list of strings that represent all the ## and # entries </param>
		/// <returns> a VCFHeader object </returns>
		protected internal virtual VCFHeader parseHeaderFromLines (IList<string> headerStrings, VCFHeaderVersion version)
		{
			this.version = version;
			ISet<VCFHeaderLine> metaData = new LinkedHashSet<VCFHeaderLine> ();
			ISet<string> sampleNames = new LinkedHashSet<string> ();
			int contigCounter = 0;
			// iterate over all the passed in strings
			foreach (string str in headerStrings) {
				if (!str.StartsWith (VCFHeader.METADATA_INDICATOR)) {//presumably the #CHROM POS ID REF ALT QUAL FILTER INFO   etc. line
					string[] strings = str.Substring (1).Split (VCFConstants.FIELD_SEPARATOR_CHAR);
					//check for null last string, grrr...
					if (String.IsNullOrEmpty (strings.Last ())) {
						strings = strings.Take (strings.Length - 1).ToArray ();
					}
					if (strings.Length < VCFHeader.HEADER_FIELDS.Length) {
						throw new VCFParsingError ("There are not enough columns present in the header line: " + str);
					}
					//Verify Arrays
					var misMatchedColumns = Enumerable.Range (0, VCFHeader.HEADER_FIELDS.Length).Where (x => VCFHeader.HEADER_FIELDS [x] != strings [x]).Select (x => strings [x]).ToArray ();
					if (misMatchedColumns.Length > 0) {
						throw new VCFParsingError ("We were not expecting column name '" + misMatchedColumns [0] + " in that position");
					}
					int arrayIndex = VCFHeader.HEADER_FIELDS.Length;//start after verified columns
					bool sawFormatTag = false;
					if (arrayIndex < strings.Length) {
						if (!strings [arrayIndex].Equals ("FORMAT")) {
							throw new VCFParsingError ("we were expecting column name 'FORMAT' but we saw '" + strings [arrayIndex] + "'");
						}
						sawFormatTag = true;
						arrayIndex++;
					}
					while (arrayIndex < strings.Length) {
						sampleNames.Add (strings [arrayIndex++]);
					}
					if (sawFormatTag && sampleNames.Count == 0) {
						throw new VCFParsingError ("The FORMAT field was provided but there is no genotype/sample data");
					}

				} else {
					if (str.StartsWith (VCFConstants.INFO_HEADER_START)) {
						VCFInfoHeaderLine info = new VCFInfoHeaderLine (str.Substring (7), version);
						metaData.Add (info);
					} else if (str.StartsWith (VCFConstants.FILTER_HEADER_START)) {
						VCFFilterHeaderLine filter = new VCFFilterHeaderLine (str.Substring (9), version);
						metaData.Add (filter);
					} else if (str.StartsWith (VCFConstants.FORMAT_HEADER_START)) {
						VCFFormatHeaderLine format = new VCFFormatHeaderLine (str.Substring (9), version);
						metaData.Add (format);
					} else if (str.StartsWith (VCFConstants.CONTIG_HEADER_START)) {
						VCFContigHeaderLine contig = new VCFContigHeaderLine (str.Substring (9), version, VCFConstants.CONTIG_HEADER_START.Substring (2), contigCounter++);
						metaData.Add (contig);
					} else if (str.StartsWith (VCFConstants.ALT_HEADER_START)) {
						//TODO: Consider giving Alt header lines their own class
						VCFSimpleHeaderLine alt = new VCFSimpleHeaderLine (str.Substring (6), version, VCFConstants.ALT_HEADER_START.Substring (2), "ID", "Description");
						metaData.Add (alt);
					} else {
						int equals = str.IndexOf ("=");
						if (equals != -1) {
							metaData.Add (new VCFHeaderLine (str.Substring (2, equals - 2), str.Substring (equals + 1)));
						}
					}
				}
			}
			this.header = new VCFHeader (metaData, sampleNames);
			if (doOnTheFlyModifications) {
				this.header = VCFStandardHeaderLines.repairStandardHeaderLines (this.header);
			}
			return this.header;
		}
Example #4
0
        /// <summary>
        /// create a VCF header from a set of header record lines
        /// </summary>
        /// <param name="headerStrings"> a list of strings that represent all the ## and # entries </param>
        /// <returns> a VCFHeader object </returns>
        protected internal virtual VCFHeader parseHeaderFromLines(IList <string> headerStrings, VCFHeaderVersion version)
        {
            this.version = version;
            ISet <VCFHeaderLine> metaData    = new LinkedHashSet <VCFHeaderLine> ();
            ISet <string>        sampleNames = new LinkedHashSet <string> ();
            int contigCounter = 0;

            // iterate over all the passed in strings
            foreach (string str in headerStrings)
            {
                if (!str.StartsWith(VCFHeader.METADATA_INDICATOR))                   //presumably the #CHROM POS ID REF ALT QUAL FILTER INFO   etc. line
                {
                    string[] strings = str.Substring(1).Split(VCFConstants.FIELD_SEPARATOR_CHAR);
                    //check for null last string, grrr...
                    if (String.IsNullOrEmpty(strings.Last()))
                    {
                        strings = strings.Take(strings.Length - 1).ToArray();
                    }
                    if (strings.Length < VCFHeader.HEADER_FIELDS.Length)
                    {
                        throw new VCFParsingError("There are not enough columns present in the header line: " + str);
                    }
                    //Verify Arrays
                    var misMatchedColumns = Enumerable.Range(0, VCFHeader.HEADER_FIELDS.Length).Where(x => VCFHeader.HEADER_FIELDS [x] != strings [x]).Select(x => strings [x]).ToArray();
                    if (misMatchedColumns.Length > 0)
                    {
                        throw new VCFParsingError("We were not expecting column name '" + misMatchedColumns [0] + " in that position");
                    }
                    int  arrayIndex   = VCFHeader.HEADER_FIELDS.Length;                 //start after verified columns
                    bool sawFormatTag = false;
                    if (arrayIndex < strings.Length)
                    {
                        if (!strings [arrayIndex].Equals("FORMAT"))
                        {
                            throw new VCFParsingError("we were expecting column name 'FORMAT' but we saw '" + strings [arrayIndex] + "'");
                        }
                        sawFormatTag = true;
                        arrayIndex++;
                    }
                    while (arrayIndex < strings.Length)
                    {
                        sampleNames.Add(strings [arrayIndex++]);
                    }
                    if (sawFormatTag && sampleNames.Count == 0)
                    {
                        throw new VCFParsingError("The FORMAT field was provided but there is no genotype/sample data");
                    }
                }
                else
                {
                    if (str.StartsWith(VCFConstants.INFO_HEADER_START))
                    {
                        VCFInfoHeaderLine info = new VCFInfoHeaderLine(str.Substring(7), version);
                        metaData.Add(info);
                    }
                    else if (str.StartsWith(VCFConstants.FILTER_HEADER_START))
                    {
                        VCFFilterHeaderLine filter = new VCFFilterHeaderLine(str.Substring(9), version);
                        metaData.Add(filter);
                    }
                    else if (str.StartsWith(VCFConstants.FORMAT_HEADER_START))
                    {
                        VCFFormatHeaderLine format = new VCFFormatHeaderLine(str.Substring(9), version);
                        metaData.Add(format);
                    }
                    else if (str.StartsWith(VCFConstants.CONTIG_HEADER_START))
                    {
                        VCFContigHeaderLine contig = new VCFContigHeaderLine(str.Substring(9), version, VCFConstants.CONTIG_HEADER_START.Substring(2), contigCounter++);
                        metaData.Add(contig);
                    }
                    else if (str.StartsWith(VCFConstants.ALT_HEADER_START))
                    {
                        //TODO: Consider giving Alt header lines their own class
                        VCFSimpleHeaderLine alt = new VCFSimpleHeaderLine(str.Substring(6), version, VCFConstants.ALT_HEADER_START.Substring(2), "ID", "Description");
                        metaData.Add(alt);
                    }
                    else
                    {
                        int equals = str.IndexOf("=");
                        if (equals != -1)
                        {
                            metaData.Add(new VCFHeaderLine(str.Substring(2, equals - 2), str.Substring(equals + 1)));
                        }
                    }
                }
            }
            this.header = new VCFHeader(metaData, sampleNames);
            if (doOnTheFlyModifications)
            {
                this.header = VCFStandardHeaderLines.repairStandardHeaderLines(this.header);
            }
            return(this.header);
        }
Example #5
0
 private static void registerStandard(VCFFormatHeaderLine line)
 {
     formatStandards.add(line);
 }