void writeHeader(VCFHeader header) { header = doNotWriteGenotypes ? new VCFHeader(header.MetaDataInSortedOrder) : header; try { // the file format field needs to be written first writer.Write(VERSION_LINE + "\n"); foreach (VCFHeaderLine line in header.MetaDataInSortedOrder) { if (VCFHeaderVersion.IsFormatString(line.Key)) { continue; } writer.Write(VCFHeader.METADATA_INDICATOR); writer.Write(line.ToString()); writer.Write("\n"); } // write out the column line writer.Write(VCFHeader.HEADER_INDICATOR); bool isFirst = true; foreach (string field in VCFHeader.HEADER_FIELDS) { if (isFirst) { isFirst = false; // don't write out a field separator } else { writer.Write(VCFConstants.FIELD_SEPARATOR); } writer.Write(field.ToString()); } if (header.hasGenotypingData()) { writer.Write(VCFConstants.FIELD_SEPARATOR); writer.Write("FORMAT"); foreach (string sample in header.GenotypeSampleNames) { writer.Write(VCFConstants.FIELD_SEPARATOR); writer.Write(sample); } } writer.Write("\n"); } catch (IOException e) { throw new Exception("IOException writing the VCF header.", e); } }
public static VCFCompoundHeaderLine GetMetaDataForField(VCFHeader header, string field) { VCFCompoundHeaderLine metaData = header.getFormatHeaderLine(field); if (metaData == null) { metaData = header.getInfoHeaderLine(field); } if (metaData == null) { throw new VCFParsingError("Fully decoding VariantContext requires header line for all fields, but none was found for " + field); } return(metaData); }
// TODO: Add a c'tor that reads intervals. public VCFParser(FileInfo vcfFile) { fileName = vcfFile.FullName; if (vcfFile.Extension == ".gz") { FileStream fs = vcfFile.OpenRead(); GZipStream gz = new GZipStream(fs, CompressionMode.Decompress); this.reader = new StreamReader(gz); } else { this.reader = new StreamReader(vcfFile.OpenRead(), System.Text.Encoding.ASCII, true, 4000000); } VCFHeader header = vcfCodec.readHeader(reader); if (!(header is VCFHeader)) { throw new ArgumentException("The file " + vcfFile.FullName + " did not have a VCF header"); } this.Header = header; }
/// <summary> /// Walks over the VCF header and repairs the standard VCF header lines in it, returning a freshly /// allocated VCFHeader with standard VCF header lines repaired as necessary /// </summary> /// <param name="header"> /// @return </param> public static VCFHeader repairStandardHeaderLines(VCFHeader header) { ISet <VCFHeaderLine> newLines = new LinkedHashSet <VCFHeaderLine>(); foreach (VCFHeaderLine line in header.MetaDataInInputOrder) { VCFHeaderLine cur = line; if (line is VCFFormatHeaderLine) { cur = formatStandards.repair((VCFFormatHeaderLine)line); } else if (line is VCFInfoHeaderLine) { cur = infoStandards.repair((VCFInfoHeaderLine)line); } newLines.Add(cur); } return(new VCFHeader(newLines, header.GenotypeSampleNames)); }
public void WriteVariants(string outFileName, IEnumerable <VariantContext> variants, VCFHeader header) { System.Text.Encoding enc = System.Text.Encoding.GetEncoding(encodingName); writer = new StreamWriter(outFileName, false, enc); writeHeader(header); foreach (var vc in variants) { writer.Write(getVariantLinetoWrite(vc)); } writer.Close(); }
/// <summary> /// Creates a shallow copy of the meta data in VCF header toCopy /// </summary> /// <param name="toCopy"> </param> public VCFHeader(VCFHeader toCopy) : this(toCopy.mMetaData) { }
/// <summary> /// create a VCF header from a set of header record lines /// </summary> /// <param name="headerStrings"> a list of strings that represent all the ## and # entries </param> /// <returns> a VCFHeader object </returns> protected internal virtual VCFHeader parseHeaderFromLines(IList <string> headerStrings, VCFHeaderVersion version) { this.version = version; ISet <VCFHeaderLine> metaData = new LinkedHashSet <VCFHeaderLine> (); ISet <string> sampleNames = new LinkedHashSet <string> (); int contigCounter = 0; // iterate over all the passed in strings foreach (string str in headerStrings) { if (!str.StartsWith(VCFHeader.METADATA_INDICATOR)) //presumably the #CHROM POS ID REF ALT QUAL FILTER INFO etc. line { string[] strings = str.Substring(1).Split(VCFConstants.FIELD_SEPARATOR_CHAR); //check for null last string, grrr... if (String.IsNullOrEmpty(strings.Last())) { strings = strings.Take(strings.Length - 1).ToArray(); } if (strings.Length < VCFHeader.HEADER_FIELDS.Length) { throw new VCFParsingError("There are not enough columns present in the header line: " + str); } //Verify Arrays var misMatchedColumns = Enumerable.Range(0, VCFHeader.HEADER_FIELDS.Length).Where(x => VCFHeader.HEADER_FIELDS [x] != strings [x]).Select(x => strings [x]).ToArray(); if (misMatchedColumns.Length > 0) { throw new VCFParsingError("We were not expecting column name '" + misMatchedColumns [0] + " in that position"); } int arrayIndex = VCFHeader.HEADER_FIELDS.Length; //start after verified columns bool sawFormatTag = false; if (arrayIndex < strings.Length) { if (!strings [arrayIndex].Equals("FORMAT")) { throw new VCFParsingError("we were expecting column name 'FORMAT' but we saw '" + strings [arrayIndex] + "'"); } sawFormatTag = true; arrayIndex++; } while (arrayIndex < strings.Length) { sampleNames.Add(strings [arrayIndex++]); } if (sawFormatTag && sampleNames.Count == 0) { throw new VCFParsingError("The FORMAT field was provided but there is no genotype/sample data"); } } else { if (str.StartsWith(VCFConstants.INFO_HEADER_START)) { VCFInfoHeaderLine info = new VCFInfoHeaderLine(str.Substring(7), version); metaData.Add(info); } else if (str.StartsWith(VCFConstants.FILTER_HEADER_START)) { VCFFilterHeaderLine filter = new VCFFilterHeaderLine(str.Substring(9), version); metaData.Add(filter); } else if (str.StartsWith(VCFConstants.FORMAT_HEADER_START)) { VCFFormatHeaderLine format = new VCFFormatHeaderLine(str.Substring(9), version); metaData.Add(format); } else if (str.StartsWith(VCFConstants.CONTIG_HEADER_START)) { VCFContigHeaderLine contig = new VCFContigHeaderLine(str.Substring(9), version, VCFConstants.CONTIG_HEADER_START.Substring(2), contigCounter++); metaData.Add(contig); } else if (str.StartsWith(VCFConstants.ALT_HEADER_START)) { //TODO: Consider giving Alt header lines their own class VCFSimpleHeaderLine alt = new VCFSimpleHeaderLine(str.Substring(6), version, VCFConstants.ALT_HEADER_START.Substring(2), "ID", "Description"); metaData.Add(alt); } else { int equals = str.IndexOf("="); if (equals != -1) { metaData.Add(new VCFHeaderLine(str.Substring(2, equals - 2), str.Substring(equals + 1))); } } } } this.header = new VCFHeader(metaData, sampleNames); if (doOnTheFlyModifications) { this.header = VCFStandardHeaderLines.repairStandardHeaderLines(this.header); } return(this.header); }