/// <summary> /// load the format/info meta data maps (these are used for quick lookup by key name) /// </summary> private void loadMetaDataMaps() { foreach (VCFHeaderLine line in mMetaData) { if (line is VCFInfoHeaderLine) { VCFInfoHeaderLine infoLine = (VCFInfoHeaderLine)line; addMetaDataMapBinding(mInfoMetaData, infoLine); } else if (line is VCFFormatHeaderLine) { VCFFormatHeaderLine formatLine = (VCFFormatHeaderLine)line; addMetaDataMapBinding(mFormatMetaData, formatLine); } else if (line is VCFFilterHeaderLine) { VCFFilterHeaderLine filterLine = (VCFFilterHeaderLine)line; mFilterMetaData[filterLine.ID] = filterLine; } else if (line is VCFContigHeaderLine) { contigMetaData.Add((VCFContigHeaderLine)line); } else { mOtherMetaData[line.Key] = line; } } if (hasFormatLine(VCFConstants.GENOTYPE_LIKELIHOODS_KEY) && !hasFormatLine(VCFConstants.GENOTYPE_PL_KEY)) { Console.WriteLine("Warning now we want PL fields, not just GL fields"); //throw new VCFParsingError("Found " + VCFConstants.GENOTYPE_LIKELIHOODS_KEY + " format, but no " + VCFConstants.GENOTYPE_PL_KEY + " field. We now only manage PL fields internally."); } }
/// <summary> /// add the genotype data /// </summary> /// <param name="vc"> the variant context </param> /// <param name="genotypeFormatKeys"> Genotype formatting string </param> /// <param name="alleleMap"> alleles for this context </param> /// <exception cref="IOException"> for writer </exception> private string getGenotypeDataText(VariantContext vc, IDictionary <Allele, string> alleleMap, IList <string> genotypeFormatKeys) { StringBuilder sbn = new StringBuilder(); int ploidy = vc.GetMaxPloidy(2); foreach (string sample in mHeader.GenotypeSampleNames) { sbn.Append(VCFConstants.FIELD_SEPARATOR); Genotype g = vc.GetGenotype(sample); if (g == null) { g = GenotypeBuilder.CreateMissing(sample, ploidy); } IList <string> attrs = new List <string>(genotypeFormatKeys.Count); foreach (string field in genotypeFormatKeys) { if (field.Equals(VCFConstants.GENOTYPE_KEY)) { if (!g.Available) { throw new Exception("GTs cannot be missing for some samples if they are available for others in the record"); } sbn.Append(getAlleleText(g.getAllele(0), alleleMap)); for (int i = 1; i < g.Ploidy; i++) { sbn.Append(g.Phased ? VCFConstants.PHASED : VCFConstants.UNPHASED); sbn.Append(getAlleleText(g.getAllele(i), alleleMap)); } continue; } else { string outputValue; if (field.Equals(VCFConstants.GENOTYPE_FILTER_KEY)) { outputValue = g.Filtered ? g.Filters : VCFConstants.PASSES_FILTERS_v4; } else { IntGenotypeFieldAccessors.Accessor accessor = intGenotypeFieldAccessors.GetAccessor(field); if (accessor != null) { int[] intValues = accessor.getValues(g); if (intValues == null) { outputValue = VCFConstants.MISSING_VALUE_v4; } else if (intValues.Length == 1) // fast path { outputValue = Convert.ToString(intValues[0]); } else { StringBuilder sb = new StringBuilder(); sb.Append(intValues[0]); for (int i = 1; i < intValues.Length; i++) { sb.Append(","); sb.Append(intValues[i]); } outputValue = sb.ToString(); } } else { object val = g.HasExtendedAttribute(field) ? g.GetExtendedAttribute(field) : VCFConstants.MISSING_VALUE_v4; VCFFormatHeaderLine metaData = mHeader.getFormatHeaderLine(field); if (metaData != null) { int numInFormatField = metaData.getCount(vc); if (numInFormatField > 1 && val.Equals(VCFConstants.MISSING_VALUE_v4)) { // If we have a missing field but multiple values are expected, we need to construct a new string with all fields. // For example, if Number=2, the string has to be ".,." StringBuilder sb = new StringBuilder(VCFConstants.MISSING_VALUE_v4); for (int i = 1; i < numInFormatField; i++) { sb.Append(","); sb.Append(VCFConstants.MISSING_VALUE_v4); } val = sb.ToString(); } } // assume that if key is absent, then the given string encoding suffices outputValue = formatVCFField(val); } } if (outputValue != null) { attrs.Add(outputValue); } } } // strip off trailing missing values for (int i = attrs.Count - 1; i >= 0; i--) { if (isMissingValue(attrs[i])) { attrs.RemoveAt(i); } else { break; } } for (int i = 0; i < attrs.Count; i++) { if (i > 0 || genotypeFormatKeys.Contains(VCFConstants.GENOTYPE_KEY)) { sbn.Append(VCFConstants.GENOTYPE_FIELD_SEPARATOR); } sbn.Append(attrs[i]); } } return(sbn.ToString()); }
/// <summary> /// create a VCF header from a set of header record lines /// </summary> /// <param name="headerStrings"> a list of strings that represent all the ## and # entries </param> /// <returns> a VCFHeader object </returns> protected internal virtual VCFHeader parseHeaderFromLines (IList<string> headerStrings, VCFHeaderVersion version) { this.version = version; ISet<VCFHeaderLine> metaData = new LinkedHashSet<VCFHeaderLine> (); ISet<string> sampleNames = new LinkedHashSet<string> (); int contigCounter = 0; // iterate over all the passed in strings foreach (string str in headerStrings) { if (!str.StartsWith (VCFHeader.METADATA_INDICATOR)) {//presumably the #CHROM POS ID REF ALT QUAL FILTER INFO etc. line string[] strings = str.Substring (1).Split (VCFConstants.FIELD_SEPARATOR_CHAR); //check for null last string, grrr... if (String.IsNullOrEmpty (strings.Last ())) { strings = strings.Take (strings.Length - 1).ToArray (); } if (strings.Length < VCFHeader.HEADER_FIELDS.Length) { throw new VCFParsingError ("There are not enough columns present in the header line: " + str); } //Verify Arrays var misMatchedColumns = Enumerable.Range (0, VCFHeader.HEADER_FIELDS.Length).Where (x => VCFHeader.HEADER_FIELDS [x] != strings [x]).Select (x => strings [x]).ToArray (); if (misMatchedColumns.Length > 0) { throw new VCFParsingError ("We were not expecting column name '" + misMatchedColumns [0] + " in that position"); } int arrayIndex = VCFHeader.HEADER_FIELDS.Length;//start after verified columns bool sawFormatTag = false; if (arrayIndex < strings.Length) { if (!strings [arrayIndex].Equals ("FORMAT")) { throw new VCFParsingError ("we were expecting column name 'FORMAT' but we saw '" + strings [arrayIndex] + "'"); } sawFormatTag = true; arrayIndex++; } while (arrayIndex < strings.Length) { sampleNames.Add (strings [arrayIndex++]); } if (sawFormatTag && sampleNames.Count == 0) { throw new VCFParsingError ("The FORMAT field was provided but there is no genotype/sample data"); } } else { if (str.StartsWith (VCFConstants.INFO_HEADER_START)) { VCFInfoHeaderLine info = new VCFInfoHeaderLine (str.Substring (7), version); metaData.Add (info); } else if (str.StartsWith (VCFConstants.FILTER_HEADER_START)) { VCFFilterHeaderLine filter = new VCFFilterHeaderLine (str.Substring (9), version); metaData.Add (filter); } else if (str.StartsWith (VCFConstants.FORMAT_HEADER_START)) { VCFFormatHeaderLine format = new VCFFormatHeaderLine (str.Substring (9), version); metaData.Add (format); } else if (str.StartsWith (VCFConstants.CONTIG_HEADER_START)) { VCFContigHeaderLine contig = new VCFContigHeaderLine (str.Substring (9), version, VCFConstants.CONTIG_HEADER_START.Substring (2), contigCounter++); metaData.Add (contig); } else if (str.StartsWith (VCFConstants.ALT_HEADER_START)) { //TODO: Consider giving Alt header lines their own class VCFSimpleHeaderLine alt = new VCFSimpleHeaderLine (str.Substring (6), version, VCFConstants.ALT_HEADER_START.Substring (2), "ID", "Description"); metaData.Add (alt); } else { int equals = str.IndexOf ("="); if (equals != -1) { metaData.Add (new VCFHeaderLine (str.Substring (2, equals - 2), str.Substring (equals + 1))); } } } } this.header = new VCFHeader (metaData, sampleNames); if (doOnTheFlyModifications) { this.header = VCFStandardHeaderLines.repairStandardHeaderLines (this.header); } return this.header; }
/// <summary> /// create a VCF header from a set of header record lines /// </summary> /// <param name="headerStrings"> a list of strings that represent all the ## and # entries </param> /// <returns> a VCFHeader object </returns> protected internal virtual VCFHeader parseHeaderFromLines(IList <string> headerStrings, VCFHeaderVersion version) { this.version = version; ISet <VCFHeaderLine> metaData = new LinkedHashSet <VCFHeaderLine> (); ISet <string> sampleNames = new LinkedHashSet <string> (); int contigCounter = 0; // iterate over all the passed in strings foreach (string str in headerStrings) { if (!str.StartsWith(VCFHeader.METADATA_INDICATOR)) //presumably the #CHROM POS ID REF ALT QUAL FILTER INFO etc. line { string[] strings = str.Substring(1).Split(VCFConstants.FIELD_SEPARATOR_CHAR); //check for null last string, grrr... if (String.IsNullOrEmpty(strings.Last())) { strings = strings.Take(strings.Length - 1).ToArray(); } if (strings.Length < VCFHeader.HEADER_FIELDS.Length) { throw new VCFParsingError("There are not enough columns present in the header line: " + str); } //Verify Arrays var misMatchedColumns = Enumerable.Range(0, VCFHeader.HEADER_FIELDS.Length).Where(x => VCFHeader.HEADER_FIELDS [x] != strings [x]).Select(x => strings [x]).ToArray(); if (misMatchedColumns.Length > 0) { throw new VCFParsingError("We were not expecting column name '" + misMatchedColumns [0] + " in that position"); } int arrayIndex = VCFHeader.HEADER_FIELDS.Length; //start after verified columns bool sawFormatTag = false; if (arrayIndex < strings.Length) { if (!strings [arrayIndex].Equals("FORMAT")) { throw new VCFParsingError("we were expecting column name 'FORMAT' but we saw '" + strings [arrayIndex] + "'"); } sawFormatTag = true; arrayIndex++; } while (arrayIndex < strings.Length) { sampleNames.Add(strings [arrayIndex++]); } if (sawFormatTag && sampleNames.Count == 0) { throw new VCFParsingError("The FORMAT field was provided but there is no genotype/sample data"); } } else { if (str.StartsWith(VCFConstants.INFO_HEADER_START)) { VCFInfoHeaderLine info = new VCFInfoHeaderLine(str.Substring(7), version); metaData.Add(info); } else if (str.StartsWith(VCFConstants.FILTER_HEADER_START)) { VCFFilterHeaderLine filter = new VCFFilterHeaderLine(str.Substring(9), version); metaData.Add(filter); } else if (str.StartsWith(VCFConstants.FORMAT_HEADER_START)) { VCFFormatHeaderLine format = new VCFFormatHeaderLine(str.Substring(9), version); metaData.Add(format); } else if (str.StartsWith(VCFConstants.CONTIG_HEADER_START)) { VCFContigHeaderLine contig = new VCFContigHeaderLine(str.Substring(9), version, VCFConstants.CONTIG_HEADER_START.Substring(2), contigCounter++); metaData.Add(contig); } else if (str.StartsWith(VCFConstants.ALT_HEADER_START)) { //TODO: Consider giving Alt header lines their own class VCFSimpleHeaderLine alt = new VCFSimpleHeaderLine(str.Substring(6), version, VCFConstants.ALT_HEADER_START.Substring(2), "ID", "Description"); metaData.Add(alt); } else { int equals = str.IndexOf("="); if (equals != -1) { metaData.Add(new VCFHeaderLine(str.Substring(2, equals - 2), str.Substring(equals + 1))); } } } } this.header = new VCFHeader(metaData, sampleNames); if (doOnTheFlyModifications) { this.header = VCFStandardHeaderLines.repairStandardHeaderLines(this.header); } return(this.header); }
private static void registerStandard(VCFFormatHeaderLine line) { formatStandards.add(line); }