/// <summary> /// load the format/info meta data maps (these are used for quick lookup by key name) /// </summary> private void loadMetaDataMaps() { foreach (VCFHeaderLine line in mMetaData) { if (line is VCFInfoHeaderLine) { VCFInfoHeaderLine infoLine = (VCFInfoHeaderLine)line; addMetaDataMapBinding(mInfoMetaData, infoLine); } else if (line is VCFFormatHeaderLine) { VCFFormatHeaderLine formatLine = (VCFFormatHeaderLine)line; addMetaDataMapBinding(mFormatMetaData, formatLine); } else if (line is VCFFilterHeaderLine) { VCFFilterHeaderLine filterLine = (VCFFilterHeaderLine)line; mFilterMetaData[filterLine.ID] = filterLine; } else if (line is VCFContigHeaderLine) { contigMetaData.Add((VCFContigHeaderLine)line); } else { mOtherMetaData[line.Key] = line; } } if (hasFormatLine(VCFConstants.GENOTYPE_LIKELIHOODS_KEY) && !hasFormatLine(VCFConstants.GENOTYPE_PL_KEY)) { Console.WriteLine("Warning now we want PL fields, not just GL fields"); //throw new VCFParsingError("Found " + VCFConstants.GENOTYPE_LIKELIHOODS_KEY + " format, but no " + VCFConstants.GENOTYPE_PL_KEY + " field. We now only manage PL fields internally."); } }
/// <summary> /// create a VCF header from a set of header record lines /// </summary> /// <param name="headerStrings"> a list of strings that represent all the ## and # entries </param> /// <returns> a VCFHeader object </returns> protected internal virtual VCFHeader parseHeaderFromLines(IList <string> headerStrings, VCFHeaderVersion version) { this.version = version; ISet <VCFHeaderLine> metaData = new LinkedHashSet <VCFHeaderLine> (); ISet <string> sampleNames = new LinkedHashSet <string> (); int contigCounter = 0; // iterate over all the passed in strings foreach (string str in headerStrings) { if (!str.StartsWith(VCFHeader.METADATA_INDICATOR)) //presumably the #CHROM POS ID REF ALT QUAL FILTER INFO etc. line { string[] strings = str.Substring(1).Split(VCFConstants.FIELD_SEPARATOR_CHAR); //check for null last string, grrr... if (String.IsNullOrEmpty(strings.Last())) { strings = strings.Take(strings.Length - 1).ToArray(); } if (strings.Length < VCFHeader.HEADER_FIELDS.Length) { throw new VCFParsingError("There are not enough columns present in the header line: " + str); } //Verify Arrays var misMatchedColumns = Enumerable.Range(0, VCFHeader.HEADER_FIELDS.Length).Where(x => VCFHeader.HEADER_FIELDS [x] != strings [x]).Select(x => strings [x]).ToArray(); if (misMatchedColumns.Length > 0) { throw new VCFParsingError("We were not expecting column name '" + misMatchedColumns [0] + " in that position"); } int arrayIndex = VCFHeader.HEADER_FIELDS.Length; //start after verified columns bool sawFormatTag = false; if (arrayIndex < strings.Length) { if (!strings [arrayIndex].Equals("FORMAT")) { throw new VCFParsingError("we were expecting column name 'FORMAT' but we saw '" + strings [arrayIndex] + "'"); } sawFormatTag = true; arrayIndex++; } while (arrayIndex < strings.Length) { sampleNames.Add(strings [arrayIndex++]); } if (sawFormatTag && sampleNames.Count == 0) { throw new VCFParsingError("The FORMAT field was provided but there is no genotype/sample data"); } } else { if (str.StartsWith(VCFConstants.INFO_HEADER_START)) { VCFInfoHeaderLine info = new VCFInfoHeaderLine(str.Substring(7), version); metaData.Add(info); } else if (str.StartsWith(VCFConstants.FILTER_HEADER_START)) { VCFFilterHeaderLine filter = new VCFFilterHeaderLine(str.Substring(9), version); metaData.Add(filter); } else if (str.StartsWith(VCFConstants.FORMAT_HEADER_START)) { VCFFormatHeaderLine format = new VCFFormatHeaderLine(str.Substring(9), version); metaData.Add(format); } else if (str.StartsWith(VCFConstants.CONTIG_HEADER_START)) { VCFContigHeaderLine contig = new VCFContigHeaderLine(str.Substring(9), version, VCFConstants.CONTIG_HEADER_START.Substring(2), contigCounter++); metaData.Add(contig); } else if (str.StartsWith(VCFConstants.ALT_HEADER_START)) { //TODO: Consider giving Alt header lines their own class VCFSimpleHeaderLine alt = new VCFSimpleHeaderLine(str.Substring(6), version, VCFConstants.ALT_HEADER_START.Substring(2), "ID", "Description"); metaData.Add(alt); } else { int equals = str.IndexOf("="); if (equals != -1) { metaData.Add(new VCFHeaderLine(str.Substring(2, equals - 2), str.Substring(equals + 1))); } } } } this.header = new VCFHeader(metaData, sampleNames); if (doOnTheFlyModifications) { this.header = VCFStandardHeaderLines.repairStandardHeaderLines(this.header); } return(this.header); }
/// <summary> /// create a VCF header from a set of header record lines /// </summary> /// <param name="headerStrings"> a list of strings that represent all the ## and # entries </param> /// <returns> a VCFHeader object </returns> protected internal virtual VCFHeader parseHeaderFromLines (IList<string> headerStrings, VCFHeaderVersion version) { this.version = version; ISet<VCFHeaderLine> metaData = new LinkedHashSet<VCFHeaderLine> (); ISet<string> sampleNames = new LinkedHashSet<string> (); int contigCounter = 0; // iterate over all the passed in strings foreach (string str in headerStrings) { if (!str.StartsWith (VCFHeader.METADATA_INDICATOR)) {//presumably the #CHROM POS ID REF ALT QUAL FILTER INFO etc. line string[] strings = str.Substring (1).Split (VCFConstants.FIELD_SEPARATOR_CHAR); //check for null last string, grrr... if (String.IsNullOrEmpty (strings.Last ())) { strings = strings.Take (strings.Length - 1).ToArray (); } if (strings.Length < VCFHeader.HEADER_FIELDS.Length) { throw new VCFParsingError ("There are not enough columns present in the header line: " + str); } //Verify Arrays var misMatchedColumns = Enumerable.Range (0, VCFHeader.HEADER_FIELDS.Length).Where (x => VCFHeader.HEADER_FIELDS [x] != strings [x]).Select (x => strings [x]).ToArray (); if (misMatchedColumns.Length > 0) { throw new VCFParsingError ("We were not expecting column name '" + misMatchedColumns [0] + " in that position"); } int arrayIndex = VCFHeader.HEADER_FIELDS.Length;//start after verified columns bool sawFormatTag = false; if (arrayIndex < strings.Length) { if (!strings [arrayIndex].Equals ("FORMAT")) { throw new VCFParsingError ("we were expecting column name 'FORMAT' but we saw '" + strings [arrayIndex] + "'"); } sawFormatTag = true; arrayIndex++; } while (arrayIndex < strings.Length) { sampleNames.Add (strings [arrayIndex++]); } if (sawFormatTag && sampleNames.Count == 0) { throw new VCFParsingError ("The FORMAT field was provided but there is no genotype/sample data"); } } else { if (str.StartsWith (VCFConstants.INFO_HEADER_START)) { VCFInfoHeaderLine info = new VCFInfoHeaderLine (str.Substring (7), version); metaData.Add (info); } else if (str.StartsWith (VCFConstants.FILTER_HEADER_START)) { VCFFilterHeaderLine filter = new VCFFilterHeaderLine (str.Substring (9), version); metaData.Add (filter); } else if (str.StartsWith (VCFConstants.FORMAT_HEADER_START)) { VCFFormatHeaderLine format = new VCFFormatHeaderLine (str.Substring (9), version); metaData.Add (format); } else if (str.StartsWith (VCFConstants.CONTIG_HEADER_START)) { VCFContigHeaderLine contig = new VCFContigHeaderLine (str.Substring (9), version, VCFConstants.CONTIG_HEADER_START.Substring (2), contigCounter++); metaData.Add (contig); } else if (str.StartsWith (VCFConstants.ALT_HEADER_START)) { //TODO: Consider giving Alt header lines their own class VCFSimpleHeaderLine alt = new VCFSimpleHeaderLine (str.Substring (6), version, VCFConstants.ALT_HEADER_START.Substring (2), "ID", "Description"); metaData.Add (alt); } else { int equals = str.IndexOf ("="); if (equals != -1) { metaData.Add (new VCFHeaderLine (str.Substring (2, equals - 2), str.Substring (equals + 1))); } } } } this.header = new VCFHeader (metaData, sampleNames); if (doOnTheFlyModifications) { this.header = VCFStandardHeaderLines.repairStandardHeaderLines (this.header); } return this.header; }