/// <summary> /// Create the info string; assumes that no values are null /// </summary> /// <param name="infoFields"> a map of info fields </param> /// <exception cref="IOException"> for writer </exception> private string getInfoString(IDictionary <string, string> infoFields) { if (infoFields.Count == 0) { return(VCFConstants.EMPTY_INFO_FIELD); } bool isFirst = true; string toReturn = ""; foreach (KeyValuePair <string, string> entry in infoFields) { if (isFirst) { isFirst = false; } else { toReturn += VCFConstants.INFO_FIELD_SEPARATOR; } string key = entry.Key; toReturn += key; if (!entry.Value.Equals("")) { VCFInfoHeaderLine metaData = mHeader.getInfoHeaderLine(key); if (metaData == null || metaData.CountType != VCFHeaderLineCount.INTEGER || metaData.Count != 0) { toReturn += "="; toReturn += entry.Value; } } } return(toReturn); }
/// <summary> /// load the format/info meta data maps (these are used for quick lookup by key name) /// </summary> private void loadMetaDataMaps() { foreach (VCFHeaderLine line in mMetaData) { if (line is VCFInfoHeaderLine) { VCFInfoHeaderLine infoLine = (VCFInfoHeaderLine)line; addMetaDataMapBinding(mInfoMetaData, infoLine); } else if (line is VCFFormatHeaderLine) { VCFFormatHeaderLine formatLine = (VCFFormatHeaderLine)line; addMetaDataMapBinding(mFormatMetaData, formatLine); } else if (line is VCFFilterHeaderLine) { VCFFilterHeaderLine filterLine = (VCFFilterHeaderLine)line; mFilterMetaData[filterLine.ID] = filterLine; } else if (line is VCFContigHeaderLine) { contigMetaData.Add((VCFContigHeaderLine)line); } else { mOtherMetaData[line.Key] = line; } } if (hasFormatLine(VCFConstants.GENOTYPE_LIKELIHOODS_KEY) && !hasFormatLine(VCFConstants.GENOTYPE_PL_KEY)) { Console.WriteLine("Warning now we want PL fields, not just GL fields"); //throw new VCFParsingError("Found " + VCFConstants.GENOTYPE_LIKELIHOODS_KEY + " format, but no " + VCFConstants.GENOTYPE_PL_KEY + " field. We now only manage PL fields internally."); } }
/// <summary> /// parse out the info fields into a dictionary of key/values </summary> /// <param name="infoField"> the fields </param> /// <returns> a mapping of keys to objects </returns> private IDictionary <string, object> parseInfo(string infoField) { IDictionary <string, object> attributes = new Dictionary <string, object> (); if (infoField.Length == 0) { generateException("The VCF specification requires a valid info field"); } if (!infoField.Equals(VCFConstants.EMPTY_INFO_FIELD)) { if (infoField.IndexOf("\t") != -1 || infoField.IndexOf(" ") != -1) { generateException("The VCF specification does not allow for whitespace in the INFO field"); } string[] infoFieldArray = infoField.Split(VCFConstants.INFO_FIELD_SEPARATOR_CHAR_AS_ARRAY); //int infoFieldSplitSize = ParsingUtils.Split(infoField, infoFieldArray, VCFConstants.INFO_FIELD_SEPARATOR_CHAR, false); for (int i = 0; i < infoFieldArray.Length; i++) { string key; object value; int eqI = infoFieldArray [i].IndexOf("="); if (eqI != -1) { key = infoFieldArray [i].Substring(0, eqI); string valueString = infoFieldArray [i].Substring(eqI + 1); // split on the INFO field separator string[] infoValueArray = valueString.Split(VCFConstants.INFO_FIELD_ARRAY_SEPARATOR_CHAR); //int infoValueSplitSize = ParsingUtils.Split(valueString, infoValueArray, VCFConstants.INFO_FIELD_ARRAY_SEPARATOR_CHAR, false); if (infoValueArray.Length == 1) // infoValueSplitSize == 1) { value = infoValueArray [0]; VCFInfoHeaderLine headerLine = header.getInfoHeaderLine(key); if (headerLine != null && headerLine.Type == VCFHeaderLineType.Flag && value.Equals("0")) { // deal with the case where a flag field has =0, such as DB=0, by skipping the add continue; } } else { //List<string> valueList = new List<string>(infoValueArray); value = infoValueArray; } } else { key = infoFieldArray [i]; VCFInfoHeaderLine headerLine = header.getInfoHeaderLine(key); if (headerLine != null && headerLine.Type != VCFHeaderLineType.Flag) { //Note: This was originally a warning that was only thrown once throw new VCFParsingError("Found info key " + key + " without a = value, but the header says the field is of type " + headerLine.Type + " but this construct is only value for FLAG type fields"); //Old version just set this value // value = VCFConstants.MISSING_VALUE_v4; } else { value = true; } } // this line ensures that key/value pairs that look like key=; are parsed correctly as MISSING if ("".Equals(value)) { value = VCFConstants.MISSING_VALUE_v4; } //TODO: We should have enough information in the header here to attributes [key] = value; } } return(attributes); }
/// <summary> /// create a VCF header from a set of header record lines /// </summary> /// <param name="headerStrings"> a list of strings that represent all the ## and # entries </param> /// <returns> a VCFHeader object </returns> protected internal virtual VCFHeader parseHeaderFromLines(IList <string> headerStrings, VCFHeaderVersion version) { this.version = version; ISet <VCFHeaderLine> metaData = new LinkedHashSet <VCFHeaderLine> (); ISet <string> sampleNames = new LinkedHashSet <string> (); int contigCounter = 0; // iterate over all the passed in strings foreach (string str in headerStrings) { if (!str.StartsWith(VCFHeader.METADATA_INDICATOR)) //presumably the #CHROM POS ID REF ALT QUAL FILTER INFO etc. line { string[] strings = str.Substring(1).Split(VCFConstants.FIELD_SEPARATOR_CHAR); //check for null last string, grrr... if (String.IsNullOrEmpty(strings.Last())) { strings = strings.Take(strings.Length - 1).ToArray(); } if (strings.Length < VCFHeader.HEADER_FIELDS.Length) { throw new VCFParsingError("There are not enough columns present in the header line: " + str); } //Verify Arrays var misMatchedColumns = Enumerable.Range(0, VCFHeader.HEADER_FIELDS.Length).Where(x => VCFHeader.HEADER_FIELDS [x] != strings [x]).Select(x => strings [x]).ToArray(); if (misMatchedColumns.Length > 0) { throw new VCFParsingError("We were not expecting column name '" + misMatchedColumns [0] + " in that position"); } int arrayIndex = VCFHeader.HEADER_FIELDS.Length; //start after verified columns bool sawFormatTag = false; if (arrayIndex < strings.Length) { if (!strings [arrayIndex].Equals("FORMAT")) { throw new VCFParsingError("we were expecting column name 'FORMAT' but we saw '" + strings [arrayIndex] + "'"); } sawFormatTag = true; arrayIndex++; } while (arrayIndex < strings.Length) { sampleNames.Add(strings [arrayIndex++]); } if (sawFormatTag && sampleNames.Count == 0) { throw new VCFParsingError("The FORMAT field was provided but there is no genotype/sample data"); } } else { if (str.StartsWith(VCFConstants.INFO_HEADER_START)) { VCFInfoHeaderLine info = new VCFInfoHeaderLine(str.Substring(7), version); metaData.Add(info); } else if (str.StartsWith(VCFConstants.FILTER_HEADER_START)) { VCFFilterHeaderLine filter = new VCFFilterHeaderLine(str.Substring(9), version); metaData.Add(filter); } else if (str.StartsWith(VCFConstants.FORMAT_HEADER_START)) { VCFFormatHeaderLine format = new VCFFormatHeaderLine(str.Substring(9), version); metaData.Add(format); } else if (str.StartsWith(VCFConstants.CONTIG_HEADER_START)) { VCFContigHeaderLine contig = new VCFContigHeaderLine(str.Substring(9), version, VCFConstants.CONTIG_HEADER_START.Substring(2), contigCounter++); metaData.Add(contig); } else if (str.StartsWith(VCFConstants.ALT_HEADER_START)) { //TODO: Consider giving Alt header lines their own class VCFSimpleHeaderLine alt = new VCFSimpleHeaderLine(str.Substring(6), version, VCFConstants.ALT_HEADER_START.Substring(2), "ID", "Description"); metaData.Add(alt); } else { int equals = str.IndexOf("="); if (equals != -1) { metaData.Add(new VCFHeaderLine(str.Substring(2, equals - 2), str.Substring(equals + 1))); } } } } this.header = new VCFHeader(metaData, sampleNames); if (doOnTheFlyModifications) { this.header = VCFStandardHeaderLines.repairStandardHeaderLines(this.header); } return(this.header); }
/// <summary> /// create a VCF header from a set of header record lines /// </summary> /// <param name="headerStrings"> a list of strings that represent all the ## and # entries </param> /// <returns> a VCFHeader object </returns> protected internal virtual VCFHeader parseHeaderFromLines (IList<string> headerStrings, VCFHeaderVersion version) { this.version = version; ISet<VCFHeaderLine> metaData = new LinkedHashSet<VCFHeaderLine> (); ISet<string> sampleNames = new LinkedHashSet<string> (); int contigCounter = 0; // iterate over all the passed in strings foreach (string str in headerStrings) { if (!str.StartsWith (VCFHeader.METADATA_INDICATOR)) {//presumably the #CHROM POS ID REF ALT QUAL FILTER INFO etc. line string[] strings = str.Substring (1).Split (VCFConstants.FIELD_SEPARATOR_CHAR); //check for null last string, grrr... if (String.IsNullOrEmpty (strings.Last ())) { strings = strings.Take (strings.Length - 1).ToArray (); } if (strings.Length < VCFHeader.HEADER_FIELDS.Length) { throw new VCFParsingError ("There are not enough columns present in the header line: " + str); } //Verify Arrays var misMatchedColumns = Enumerable.Range (0, VCFHeader.HEADER_FIELDS.Length).Where (x => VCFHeader.HEADER_FIELDS [x] != strings [x]).Select (x => strings [x]).ToArray (); if (misMatchedColumns.Length > 0) { throw new VCFParsingError ("We were not expecting column name '" + misMatchedColumns [0] + " in that position"); } int arrayIndex = VCFHeader.HEADER_FIELDS.Length;//start after verified columns bool sawFormatTag = false; if (arrayIndex < strings.Length) { if (!strings [arrayIndex].Equals ("FORMAT")) { throw new VCFParsingError ("we were expecting column name 'FORMAT' but we saw '" + strings [arrayIndex] + "'"); } sawFormatTag = true; arrayIndex++; } while (arrayIndex < strings.Length) { sampleNames.Add (strings [arrayIndex++]); } if (sawFormatTag && sampleNames.Count == 0) { throw new VCFParsingError ("The FORMAT field was provided but there is no genotype/sample data"); } } else { if (str.StartsWith (VCFConstants.INFO_HEADER_START)) { VCFInfoHeaderLine info = new VCFInfoHeaderLine (str.Substring (7), version); metaData.Add (info); } else if (str.StartsWith (VCFConstants.FILTER_HEADER_START)) { VCFFilterHeaderLine filter = new VCFFilterHeaderLine (str.Substring (9), version); metaData.Add (filter); } else if (str.StartsWith (VCFConstants.FORMAT_HEADER_START)) { VCFFormatHeaderLine format = new VCFFormatHeaderLine (str.Substring (9), version); metaData.Add (format); } else if (str.StartsWith (VCFConstants.CONTIG_HEADER_START)) { VCFContigHeaderLine contig = new VCFContigHeaderLine (str.Substring (9), version, VCFConstants.CONTIG_HEADER_START.Substring (2), contigCounter++); metaData.Add (contig); } else if (str.StartsWith (VCFConstants.ALT_HEADER_START)) { //TODO: Consider giving Alt header lines their own class VCFSimpleHeaderLine alt = new VCFSimpleHeaderLine (str.Substring (6), version, VCFConstants.ALT_HEADER_START.Substring (2), "ID", "Description"); metaData.Add (alt); } else { int equals = str.IndexOf ("="); if (equals != -1) { metaData.Add (new VCFHeaderLine (str.Substring (2, equals - 2), str.Substring (equals + 1))); } } } } this.header = new VCFHeader (metaData, sampleNames); if (doOnTheFlyModifications) { this.header = VCFStandardHeaderLines.repairStandardHeaderLines (this.header); } return this.header; }
private static void registerStandard(VCFInfoHeaderLine line) { infoStandards.add(line); }