Ejemplo n.º 1
0
        void writeHeader(VCFHeader header)
        {
            header = doNotWriteGenotypes ? new VCFHeader(header.MetaDataInSortedOrder) : header;
            try
            {
                // the file format field needs to be written first
                writer.Write(VERSION_LINE + "\n");

                foreach (VCFHeaderLine line in header.MetaDataInSortedOrder)
                {
                    if (VCFHeaderVersion.IsFormatString(line.Key))
                    {
                        continue;
                    }

                    writer.Write(VCFHeader.METADATA_INDICATOR);
                    writer.Write(line.ToString());
                    writer.Write("\n");
                }

                // write out the column line
                writer.Write(VCFHeader.HEADER_INDICATOR);
                bool isFirst = true;
                foreach (string field in VCFHeader.HEADER_FIELDS)
                {
                    if (isFirst)
                    {
                        isFirst = false;                         // don't write out a field separator
                    }
                    else
                    {
                        writer.Write(VCFConstants.FIELD_SEPARATOR);
                    }
                    writer.Write(field.ToString());
                }
                if (header.hasGenotypingData())
                {
                    writer.Write(VCFConstants.FIELD_SEPARATOR);
                    writer.Write("FORMAT");
                    foreach (string sample in header.GenotypeSampleNames)
                    {
                        writer.Write(VCFConstants.FIELD_SEPARATOR);
                        writer.Write(sample);
                    }
                }
                writer.Write("\n");
            }
            catch (IOException e)
            {
                throw new Exception("IOException writing the VCF header.", e);
            }
        }
Ejemplo n.º 2
0
        public static VCFCompoundHeaderLine GetMetaDataForField(VCFHeader header, string field)
        {
            VCFCompoundHeaderLine metaData = header.getFormatHeaderLine(field);

            if (metaData == null)
            {
                metaData = header.getInfoHeaderLine(field);
            }
            if (metaData == null)
            {
                throw new VCFParsingError("Fully decoding VariantContext requires header line for all fields, but none was found for " + field);
            }
            return(metaData);
        }
Ejemplo n.º 3
0
        // TODO: Add a c'tor that reads intervals.
        public VCFParser(FileInfo vcfFile)
        {
            fileName = vcfFile.FullName;
            if (vcfFile.Extension == ".gz")
            {
                FileStream fs = vcfFile.OpenRead();
                GZipStream gz = new GZipStream(fs, CompressionMode.Decompress);
                this.reader = new StreamReader(gz);
            }
            else
            {
                this.reader = new StreamReader(vcfFile.OpenRead(), System.Text.Encoding.ASCII, true, 4000000);
            }
            VCFHeader header = vcfCodec.readHeader(reader);

            if (!(header is VCFHeader))
            {
                throw new ArgumentException("The file " + vcfFile.FullName + " did not have a VCF header");
            }
            this.Header = header;
        }
Ejemplo n.º 4
0
        /// <summary>
        /// Walks over the VCF header and repairs the standard VCF header lines in it, returning a freshly
        /// allocated VCFHeader with standard VCF header lines repaired as necessary
        /// </summary>
        /// <param name="header">
        /// @return </param>
        public static VCFHeader repairStandardHeaderLines(VCFHeader header)
        {
            ISet <VCFHeaderLine> newLines = new LinkedHashSet <VCFHeaderLine>();

            foreach (VCFHeaderLine line in header.MetaDataInInputOrder)
            {
                VCFHeaderLine cur = line;
                if (line is VCFFormatHeaderLine)
                {
                    cur = formatStandards.repair((VCFFormatHeaderLine)line);
                }
                else if (line is VCFInfoHeaderLine)
                {
                    cur = infoStandards.repair((VCFInfoHeaderLine)line);
                }

                newLines.Add(cur);
            }

            return(new VCFHeader(newLines, header.GenotypeSampleNames));
        }
Ejemplo n.º 5
0
 public void WriteVariants(string outFileName, IEnumerable <VariantContext> variants, VCFHeader header)
 {
     System.Text.Encoding enc = System.Text.Encoding.GetEncoding(encodingName);
     writer = new StreamWriter(outFileName, false, enc);
     writeHeader(header);
     foreach (var vc in variants)
     {
         writer.Write(getVariantLinetoWrite(vc));
     }
     writer.Close();
 }
Ejemplo n.º 6
0
 /// <summary>
 /// Creates a shallow copy of the meta data in VCF header toCopy
 /// </summary>
 /// <param name="toCopy"> </param>
 public VCFHeader(VCFHeader toCopy) : this(toCopy.mMetaData)
 {
 }
Ejemplo n.º 7
0
        /// <summary>
        /// create a VCF header from a set of header record lines
        /// </summary>
        /// <param name="headerStrings"> a list of strings that represent all the ## and # entries </param>
        /// <returns> a VCFHeader object </returns>
        protected internal virtual VCFHeader parseHeaderFromLines(IList <string> headerStrings, VCFHeaderVersion version)
        {
            this.version = version;
            ISet <VCFHeaderLine> metaData    = new LinkedHashSet <VCFHeaderLine> ();
            ISet <string>        sampleNames = new LinkedHashSet <string> ();
            int contigCounter = 0;

            // iterate over all the passed in strings
            foreach (string str in headerStrings)
            {
                if (!str.StartsWith(VCFHeader.METADATA_INDICATOR))                   //presumably the #CHROM POS ID REF ALT QUAL FILTER INFO   etc. line
                {
                    string[] strings = str.Substring(1).Split(VCFConstants.FIELD_SEPARATOR_CHAR);
                    //check for null last string, grrr...
                    if (String.IsNullOrEmpty(strings.Last()))
                    {
                        strings = strings.Take(strings.Length - 1).ToArray();
                    }
                    if (strings.Length < VCFHeader.HEADER_FIELDS.Length)
                    {
                        throw new VCFParsingError("There are not enough columns present in the header line: " + str);
                    }
                    //Verify Arrays
                    var misMatchedColumns = Enumerable.Range(0, VCFHeader.HEADER_FIELDS.Length).Where(x => VCFHeader.HEADER_FIELDS [x] != strings [x]).Select(x => strings [x]).ToArray();
                    if (misMatchedColumns.Length > 0)
                    {
                        throw new VCFParsingError("We were not expecting column name '" + misMatchedColumns [0] + " in that position");
                    }
                    int  arrayIndex   = VCFHeader.HEADER_FIELDS.Length;                 //start after verified columns
                    bool sawFormatTag = false;
                    if (arrayIndex < strings.Length)
                    {
                        if (!strings [arrayIndex].Equals("FORMAT"))
                        {
                            throw new VCFParsingError("we were expecting column name 'FORMAT' but we saw '" + strings [arrayIndex] + "'");
                        }
                        sawFormatTag = true;
                        arrayIndex++;
                    }
                    while (arrayIndex < strings.Length)
                    {
                        sampleNames.Add(strings [arrayIndex++]);
                    }
                    if (sawFormatTag && sampleNames.Count == 0)
                    {
                        throw new VCFParsingError("The FORMAT field was provided but there is no genotype/sample data");
                    }
                }
                else
                {
                    if (str.StartsWith(VCFConstants.INFO_HEADER_START))
                    {
                        VCFInfoHeaderLine info = new VCFInfoHeaderLine(str.Substring(7), version);
                        metaData.Add(info);
                    }
                    else if (str.StartsWith(VCFConstants.FILTER_HEADER_START))
                    {
                        VCFFilterHeaderLine filter = new VCFFilterHeaderLine(str.Substring(9), version);
                        metaData.Add(filter);
                    }
                    else if (str.StartsWith(VCFConstants.FORMAT_HEADER_START))
                    {
                        VCFFormatHeaderLine format = new VCFFormatHeaderLine(str.Substring(9), version);
                        metaData.Add(format);
                    }
                    else if (str.StartsWith(VCFConstants.CONTIG_HEADER_START))
                    {
                        VCFContigHeaderLine contig = new VCFContigHeaderLine(str.Substring(9), version, VCFConstants.CONTIG_HEADER_START.Substring(2), contigCounter++);
                        metaData.Add(contig);
                    }
                    else if (str.StartsWith(VCFConstants.ALT_HEADER_START))
                    {
                        //TODO: Consider giving Alt header lines their own class
                        VCFSimpleHeaderLine alt = new VCFSimpleHeaderLine(str.Substring(6), version, VCFConstants.ALT_HEADER_START.Substring(2), "ID", "Description");
                        metaData.Add(alt);
                    }
                    else
                    {
                        int equals = str.IndexOf("=");
                        if (equals != -1)
                        {
                            metaData.Add(new VCFHeaderLine(str.Substring(2, equals - 2), str.Substring(equals + 1)));
                        }
                    }
                }
            }
            this.header = new VCFHeader(metaData, sampleNames);
            if (doOnTheFlyModifications)
            {
                this.header = VCFStandardHeaderLines.repairStandardHeaderLines(this.header);
            }
            return(this.header);
        }