/// <summary> /// create a VCF format header line /// </summary> /// <param name="line"> the header line </param> /// <param name="version"> the VCF header version </param> /// <param name="lineType"> the header line type /// </param> protected internal VCFCompoundHeaderLine(string line, VCFHeaderVersion version, SupportedHeaderLineType lineType) : base(lineType.ToString(), "") { IDictionary <string, string> mapping = VCFHeaderLineTranslator.parseLine(version, line, "ID", "Number", "Type", "Description"); name = mapping["ID"]; count = -1; string numberStr = mapping["Number"]; if (numberStr.Equals(VCFConstants.PER_ALTERNATE_ALLELE_COUNT)) { countType = VCFHeaderLineCount.A; } else if (numberStr.Equals(VCFConstants.PER_ALLELE_COUNT)) { countType = VCFHeaderLineCount.R; } else if (numberStr.Equals(VCFConstants.PER_GENOTYPE_COUNT)) { countType = VCFHeaderLineCount.G; } else if (((version == VCFHeaderVersion.VCF4_0 || version == VCFHeaderVersion.VCF4_1 || version == VCFHeaderVersion.VCF4_2) && numberStr.Equals(VCFConstants.UNBOUNDED_ENCODING_v4)) || ((version == VCFHeaderVersion.VCF3_2 || version == VCFHeaderVersion.VCF3_3) && numberStr.Equals(VCFConstants.UNBOUNDED_ENCODING_v3))) { countType = VCFHeaderLineCount.UNBOUNDED; } else { countType = VCFHeaderLineCount.INTEGER; count = Convert.ToInt32(numberStr); } if (count < 0 && countType == VCFHeaderLineCount.INTEGER) { throw new VCFParsingError("Count < 0 for fixed size VCF header field " + name); } try { type = (VCFHeaderLineType)Enum.Parse(typeof(VCFHeaderLineType), mapping["Type"]); } #pragma warning disable 0168 catch (Exception e) #pragma warning restore 0168 { throw new VCFParsingError(mapping["Type"] + " is not a valid type in the VCF specification (note that types are case-sensitive)"); } if (type == VCFHeaderLineType.Flag && !allowFlagValues()) { throw new ArgumentException("Flag is an unsupported type for this kind of field"); } description = mapping["Description"]; if (description == null && ALLOW_UNBOUND_DESCRIPTIONS) // handle the case where there's no description provided { description = UNBOUND_DESCRIPTION; } this.lineType = lineType; validate(); }
void writeHeader(VCFHeader header) { header = doNotWriteGenotypes ? new VCFHeader(header.MetaDataInSortedOrder) : header; try { // the file format field needs to be written first writer.Write(VERSION_LINE + "\n"); foreach (VCFHeaderLine line in header.MetaDataInSortedOrder) { if (VCFHeaderVersion.IsFormatString(line.Key)) { continue; } writer.Write(VCFHeader.METADATA_INDICATOR); writer.Write(line.ToString()); writer.Write("\n"); } // write out the column line writer.Write(VCFHeader.HEADER_INDICATOR); bool isFirst = true; foreach (string field in VCFHeader.HEADER_FIELDS) { if (isFirst) { isFirst = false; // don't write out a field separator } else { writer.Write(VCFConstants.FIELD_SEPARATOR); } writer.Write(field.ToString()); } if (header.hasGenotypingData()) { writer.Write(VCFConstants.FIELD_SEPARATOR); writer.Write("FORMAT"); foreach (string sample in header.GenotypeSampleNames) { writer.Write(VCFConstants.FIELD_SEPARATOR); writer.Write(sample); } } writer.Write("\n"); } catch (IOException e) { throw new Exception("IOException writing the VCF header.", e); } }
/// <summary> /// check our metadata for a VCF version tag, and throw an exception if the version is out of date /// or the version is not present /// TODO: Should only be one format line /// </summary> public virtual void loadVCFVersion() { IList <VCFHeaderLine> toRemove = new List <VCFHeaderLine>(); foreach (VCFHeaderLine line in mMetaData) { if (VCFHeaderVersion.IsFormatString(line.Key)) { toRemove.Add(line); } } // remove old header lines for now, mMetaData.RemoveRange(toRemove); }
/// <param name="reader"> the line reader to take header lines from </param> /// <returns> the number of header lines </returns> public virtual VCFHeader readHeader(StreamReader reader) { IList <string> headerStrings = new List <string>(); string line; try { bool foundHeaderVersion = false; while ((line = reader.ReadLine()) != null) { lineNo++; if (line.StartsWith(VCFHeader.METADATA_INDICATOR)) { string[] lineFields = line.Substring(2).Split('='); if (lineFields.Length == 2 && VCFHeaderVersion.IsFormatString(lineFields[0])) { if (!VCFHeaderVersion.IsVersionString(lineFields[1])) { throw new VCFParsingError("Header: " + lineFields[1] + " is not a supported version"); } foundHeaderVersion = true; version = VCFHeaderVersion.ToHeaderVersion(lineFields[1]); if (!this.AcceptableVersions.Contains(version)) { throw new VCFParsingError("This codec is strictly for " + Name + "; please use a different codec for " + lineFields[1]); } } headerStrings.Add(line); } else if (line.StartsWith(VCFHeader.HEADER_INDICATOR)) {//should be only one such line if (!foundHeaderVersion) { throw new VCFParsingError("We never saw a header line specifying VCF version"); } headerStrings.Add(line); return(parseHeaderFromLines(headerStrings, version)); } else { throw new VCFParsingError("We never saw the required CHROM header line (starting with one #) for the input VCF file"); } } } catch (IOException e) { throw new Exception("IO Exception ", e); } throw new VCFParsingError("We never saw the required CHROM header line (starting with one #) for the input VCF file"); }
public static VCFHeaderVersion GetHeaderVersion(String versionLine) { versionLine = clean(versionLine); String[] lineFields = versionLine.Split('='); if (lineFields.Length != 2 || !IsFormatString(lineFields[0].Substring(2))) { throw new VCFParsingError(versionLine + " is not a valid VCF version line"); } VCFHeaderVersion vcfHV = ToHeaderVersion(lineFields[1]); if (vcfHV == null) { throw new VCFParsingError(lineFields[1] + " is not a supported version"); } return(vcfHV); }
public static IDictionary<string, string> parseLine(VCFHeaderVersion version, string valueLine, params string[] expectedTagOrder) { return mapping[version].parseLine(valueLine,expectedTagOrder); }
/// <summary> /// create a VCF header from a set of header record lines /// </summary> /// <param name="headerStrings"> a list of strings that represent all the ## and # entries </param> /// <returns> a VCFHeader object </returns> protected internal virtual VCFHeader parseHeaderFromLines(IList <string> headerStrings, VCFHeaderVersion version) { this.version = version; ISet <VCFHeaderLine> metaData = new LinkedHashSet <VCFHeaderLine> (); ISet <string> sampleNames = new LinkedHashSet <string> (); int contigCounter = 0; // iterate over all the passed in strings foreach (string str in headerStrings) { if (!str.StartsWith(VCFHeader.METADATA_INDICATOR)) //presumably the #CHROM POS ID REF ALT QUAL FILTER INFO etc. line { string[] strings = str.Substring(1).Split(VCFConstants.FIELD_SEPARATOR_CHAR); //check for null last string, grrr... if (String.IsNullOrEmpty(strings.Last())) { strings = strings.Take(strings.Length - 1).ToArray(); } if (strings.Length < VCFHeader.HEADER_FIELDS.Length) { throw new VCFParsingError("There are not enough columns present in the header line: " + str); } //Verify Arrays var misMatchedColumns = Enumerable.Range(0, VCFHeader.HEADER_FIELDS.Length).Where(x => VCFHeader.HEADER_FIELDS [x] != strings [x]).Select(x => strings [x]).ToArray(); if (misMatchedColumns.Length > 0) { throw new VCFParsingError("We were not expecting column name '" + misMatchedColumns [0] + " in that position"); } int arrayIndex = VCFHeader.HEADER_FIELDS.Length; //start after verified columns bool sawFormatTag = false; if (arrayIndex < strings.Length) { if (!strings [arrayIndex].Equals("FORMAT")) { throw new VCFParsingError("we were expecting column name 'FORMAT' but we saw '" + strings [arrayIndex] + "'"); } sawFormatTag = true; arrayIndex++; } while (arrayIndex < strings.Length) { sampleNames.Add(strings [arrayIndex++]); } if (sawFormatTag && sampleNames.Count == 0) { throw new VCFParsingError("The FORMAT field was provided but there is no genotype/sample data"); } } else { if (str.StartsWith(VCFConstants.INFO_HEADER_START)) { VCFInfoHeaderLine info = new VCFInfoHeaderLine(str.Substring(7), version); metaData.Add(info); } else if (str.StartsWith(VCFConstants.FILTER_HEADER_START)) { VCFFilterHeaderLine filter = new VCFFilterHeaderLine(str.Substring(9), version); metaData.Add(filter); } else if (str.StartsWith(VCFConstants.FORMAT_HEADER_START)) { VCFFormatHeaderLine format = new VCFFormatHeaderLine(str.Substring(9), version); metaData.Add(format); } else if (str.StartsWith(VCFConstants.CONTIG_HEADER_START)) { VCFContigHeaderLine contig = new VCFContigHeaderLine(str.Substring(9), version, VCFConstants.CONTIG_HEADER_START.Substring(2), contigCounter++); metaData.Add(contig); } else if (str.StartsWith(VCFConstants.ALT_HEADER_START)) { //TODO: Consider giving Alt header lines their own class VCFSimpleHeaderLine alt = new VCFSimpleHeaderLine(str.Substring(6), version, VCFConstants.ALT_HEADER_START.Substring(2), "ID", "Description"); metaData.Add(alt); } else { int equals = str.IndexOf("="); if (equals != -1) { metaData.Add(new VCFHeaderLine(str.Substring(2, equals - 2), str.Substring(equals + 1))); } } } } this.header = new VCFHeader(metaData, sampleNames); if (doOnTheFlyModifications) { this.header = VCFStandardHeaderLines.repairStandardHeaderLines(this.header); } return(this.header); }
/// <summary> /// create a VCF format header line /// </summary> /// <param name="line"> the header line </param> /// <param name="version"> the VCF header version </param> /// <param name="lineType"> the header line type /// </param> protected internal VCFCompoundHeaderLine(string line, VCFHeaderVersion version, SupportedHeaderLineType lineType) : base(lineType.ToString(), "") { IDictionary<string, string> mapping = VCFHeaderLineTranslator.parseLine(version,line, "ID","Number","Type","Description"); name = mapping["ID"]; count = -1; string numberStr = mapping["Number"]; if (numberStr.Equals(VCFConstants.PER_ALLELE_COUNT)) { countType = VCFHeaderLineCount.A; } else if (numberStr.Equals(VCFConstants.PER_GENOTYPE_COUNT)) { countType = VCFHeaderLineCount.G; } else if (((version == VCFHeaderVersion.VCF4_0 || version == VCFHeaderVersion.VCF4_1 || version == VCFHeaderVersion.VCF4_2) && numberStr.Equals(VCFConstants.UNBOUNDED_ENCODING_v4)) || ((version == VCFHeaderVersion.VCF3_2 || version == VCFHeaderVersion.VCF3_3) && numberStr.Equals(VCFConstants.UNBOUNDED_ENCODING_v3))) { countType = VCFHeaderLineCount.UNBOUNDED; } else { countType = VCFHeaderLineCount.INTEGER; count = Convert.ToInt32(numberStr); } if (count < 0 && countType == VCFHeaderLineCount.INTEGER) { throw new VCFParsingError("Count < 0 for fixed size VCF header field " + name); } try { type = (VCFHeaderLineType) Enum.Parse(typeof(VCFHeaderLineType), mapping["Type"]); } catch (Exception e) { throw new VCFParsingError(mapping["Type"] + " is not a valid type in the VCF specification (note that types are case-sensitive)"); } if (type == VCFHeaderLineType.Flag && !allowFlagValues()) { throw new System.ArgumentException("Flag is an unsupported type for this kind of field"); } description = mapping["Description"]; if (description == null && ALLOW_UNBOUND_DESCRIPTIONS) // handle the case where there's no description provided { description = UNBOUND_DESCRIPTION; } this.lineType = lineType; validate(); }
/// <summary> /// create a VCF info header line /// </summary> /// <param name="line"> the header line </param> /// <param name="version"> the vcf header version </param> public VCFFilterHeaderLine(string line, VCFHeaderVersion version) : base(line, version, "FILTER", "ID", "Description") { }
/// <param name="reader"> the line reader to take header lines from </param> /// <returns> the number of header lines </returns> public virtual VCFHeader readHeader (StreamReader reader) { IList<string> headerStrings = new List<string> (); string line; try { bool foundHeaderVersion = false; while ((line = reader.ReadLine ()) != null) { lineNo++; if (line.StartsWith (VCFHeader.METADATA_INDICATOR)) { string[] lineFields = line.Substring (2).Split ('='); if (lineFields.Length == 2 && VCFHeaderVersion.IsFormatString (lineFields [0])) { if (!VCFHeaderVersion.IsVersionString (lineFields [1])) { throw new VCFParsingError ("Header: " + lineFields [1] + " is not a supported version"); } foundHeaderVersion = true; version = VCFHeaderVersion.ToHeaderVersion (lineFields [1]); if (!this.AcceptableVersions.Contains (version)) { throw new VCFParsingError ("This codec is strictly for " + Name + "; please use a different codec for " + lineFields [1]); } } headerStrings.Add (line); } else if (line.StartsWith (VCFHeader.HEADER_INDICATOR)) {//should be only one such line if (!foundHeaderVersion) { throw new VCFParsingError ("We never saw a header line specifying VCF version"); } headerStrings.Add (line); return parseHeaderFromLines (headerStrings, version); } else { throw new VCFParsingError ("We never saw the required CHROM header line (starting with one #) for the input VCF file"); } } } catch (IOException e) { throw new Exception ("IO Exception ", e); } throw new VCFParsingError ("We never saw the required CHROM header line (starting with one #) for the input VCF file"); }
/// <summary> /// create a VCF header from a set of header record lines /// </summary> /// <param name="headerStrings"> a list of strings that represent all the ## and # entries </param> /// <returns> a VCFHeader object </returns> protected internal virtual VCFHeader parseHeaderFromLines (IList<string> headerStrings, VCFHeaderVersion version) { this.version = version; ISet<VCFHeaderLine> metaData = new LinkedHashSet<VCFHeaderLine> (); ISet<string> sampleNames = new LinkedHashSet<string> (); int contigCounter = 0; // iterate over all the passed in strings foreach (string str in headerStrings) { if (!str.StartsWith (VCFHeader.METADATA_INDICATOR)) {//presumably the #CHROM POS ID REF ALT QUAL FILTER INFO etc. line string[] strings = str.Substring (1).Split (VCFConstants.FIELD_SEPARATOR_CHAR); //check for null last string, grrr... if (String.IsNullOrEmpty (strings.Last ())) { strings = strings.Take (strings.Length - 1).ToArray (); } if (strings.Length < VCFHeader.HEADER_FIELDS.Length) { throw new VCFParsingError ("There are not enough columns present in the header line: " + str); } //Verify Arrays var misMatchedColumns = Enumerable.Range (0, VCFHeader.HEADER_FIELDS.Length).Where (x => VCFHeader.HEADER_FIELDS [x] != strings [x]).Select (x => strings [x]).ToArray (); if (misMatchedColumns.Length > 0) { throw new VCFParsingError ("We were not expecting column name '" + misMatchedColumns [0] + " in that position"); } int arrayIndex = VCFHeader.HEADER_FIELDS.Length;//start after verified columns bool sawFormatTag = false; if (arrayIndex < strings.Length) { if (!strings [arrayIndex].Equals ("FORMAT")) { throw new VCFParsingError ("we were expecting column name 'FORMAT' but we saw '" + strings [arrayIndex] + "'"); } sawFormatTag = true; arrayIndex++; } while (arrayIndex < strings.Length) { sampleNames.Add (strings [arrayIndex++]); } if (sawFormatTag && sampleNames.Count == 0) { throw new VCFParsingError ("The FORMAT field was provided but there is no genotype/sample data"); } } else { if (str.StartsWith (VCFConstants.INFO_HEADER_START)) { VCFInfoHeaderLine info = new VCFInfoHeaderLine (str.Substring (7), version); metaData.Add (info); } else if (str.StartsWith (VCFConstants.FILTER_HEADER_START)) { VCFFilterHeaderLine filter = new VCFFilterHeaderLine (str.Substring (9), version); metaData.Add (filter); } else if (str.StartsWith (VCFConstants.FORMAT_HEADER_START)) { VCFFormatHeaderLine format = new VCFFormatHeaderLine (str.Substring (9), version); metaData.Add (format); } else if (str.StartsWith (VCFConstants.CONTIG_HEADER_START)) { VCFContigHeaderLine contig = new VCFContigHeaderLine (str.Substring (9), version, VCFConstants.CONTIG_HEADER_START.Substring (2), contigCounter++); metaData.Add (contig); } else if (str.StartsWith (VCFConstants.ALT_HEADER_START)) { //TODO: Consider giving Alt header lines their own class VCFSimpleHeaderLine alt = new VCFSimpleHeaderLine (str.Substring (6), version, VCFConstants.ALT_HEADER_START.Substring (2), "ID", "Description"); metaData.Add (alt); } else { int equals = str.IndexOf ("="); if (equals != -1) { metaData.Add (new VCFHeaderLine (str.Substring (2, equals - 2), str.Substring (equals + 1))); } } } } this.header = new VCFHeader (metaData, sampleNames); if (doOnTheFlyModifications) { this.header = VCFStandardHeaderLines.repairStandardHeaderLines (this.header); } return this.header; }
public VCFFormatHeaderLine(string line, VCFHeaderVersion version) : base(line, version, SupportedHeaderLineType.FORMAT) { }
/// <summary> /// create a VCF info header line /// </summary> /// <param name="line"> the header line </param> /// <param name="version"> the vcf header version </param> /// <param name="key"> the key for this header line </param> /// <param name="expectedTagOrdering"> the tag ordering expected for this header line </param> public VCFSimpleHeaderLine(string line, VCFHeaderVersion version, string key, params string[] expectedTagOrdering) : this(key, VCFHeaderLineTranslator.parseLine(version, line, expectedTagOrdering), expectedTagOrdering) { }
/// <summary> /// create a VCF contig header line /// </summary> /// <param name="line"> the header line </param> /// <param name="version"> the vcf header version </param> /// <param name="key"> the key for this header line </param> //JAVA TO C# CONVERTER WARNING: 'final' parameters are not allowed in .NET: //ORIGINAL LINE: public VCFContigHeaderLine(final String line, final VCFHeaderVersion version, final String key, int contigIndex) public VCFContigHeaderLine(string line, VCFHeaderVersion version, string key, int contigIndex) : base(line, version, key, null) { this.contigIndex = contigIndex; }
public static IDictionary <string, string> parseLine(VCFHeaderVersion version, string valueLine, params string[] expectedTagOrder) { return(mapping[version].parseLine(valueLine, expectedTagOrder)); }