Пример #1
0
        /// <summary>
        /// create a VCF format header line
        /// </summary>
        /// <param name="line">   the header line </param>
        /// <param name="version">      the VCF header version </param>
        /// <param name="lineType">     the header line type
        ///  </param>
        protected internal VCFCompoundHeaderLine(string line, VCFHeaderVersion version, SupportedHeaderLineType lineType) : base(lineType.ToString(), "")
        {
            IDictionary <string, string> mapping = VCFHeaderLineTranslator.parseLine(version, line, "ID", "Number", "Type", "Description");

            name  = mapping["ID"];
            count = -1;
            string numberStr = mapping["Number"];

            if (numberStr.Equals(VCFConstants.PER_ALTERNATE_ALLELE_COUNT))
            {
                countType = VCFHeaderLineCount.A;
            }
            else if (numberStr.Equals(VCFConstants.PER_ALLELE_COUNT))
            {
                countType = VCFHeaderLineCount.R;
            }
            else if (numberStr.Equals(VCFConstants.PER_GENOTYPE_COUNT))
            {
                countType = VCFHeaderLineCount.G;
            }
            else if (((version == VCFHeaderVersion.VCF4_0 || version == VCFHeaderVersion.VCF4_1 || version == VCFHeaderVersion.VCF4_2) && numberStr.Equals(VCFConstants.UNBOUNDED_ENCODING_v4)) || ((version == VCFHeaderVersion.VCF3_2 || version == VCFHeaderVersion.VCF3_3) && numberStr.Equals(VCFConstants.UNBOUNDED_ENCODING_v3)))
            {
                countType = VCFHeaderLineCount.UNBOUNDED;
            }
            else
            {
                countType = VCFHeaderLineCount.INTEGER;
                count     = Convert.ToInt32(numberStr);
            }
            if (count < 0 && countType == VCFHeaderLineCount.INTEGER)
            {
                throw new VCFParsingError("Count < 0 for fixed size VCF header field " + name);
            }
            try
            {
                type = (VCFHeaderLineType)Enum.Parse(typeof(VCFHeaderLineType), mapping["Type"]);
            }
#pragma warning disable 0168
            catch (Exception e)
#pragma warning restore 0168
            {
                throw new VCFParsingError(mapping["Type"] + " is not a valid type in the VCF specification (note that types are case-sensitive)");
            }
            if (type == VCFHeaderLineType.Flag && !allowFlagValues())
            {
                throw new ArgumentException("Flag is an unsupported type for this kind of field");
            }

            description = mapping["Description"];
            if (description == null && ALLOW_UNBOUND_DESCRIPTIONS) // handle the case where there's no description provided
            {
                description = UNBOUND_DESCRIPTION;
            }

            this.lineType = lineType;
            validate();
        }
Пример #2
0
        void writeHeader(VCFHeader header)
        {
            header = doNotWriteGenotypes ? new VCFHeader(header.MetaDataInSortedOrder) : header;
            try
            {
                // the file format field needs to be written first
                writer.Write(VERSION_LINE + "\n");

                foreach (VCFHeaderLine line in header.MetaDataInSortedOrder)
                {
                    if (VCFHeaderVersion.IsFormatString(line.Key))
                    {
                        continue;
                    }

                    writer.Write(VCFHeader.METADATA_INDICATOR);
                    writer.Write(line.ToString());
                    writer.Write("\n");
                }

                // write out the column line
                writer.Write(VCFHeader.HEADER_INDICATOR);
                bool isFirst = true;
                foreach (string field in VCFHeader.HEADER_FIELDS)
                {
                    if (isFirst)
                    {
                        isFirst = false;                         // don't write out a field separator
                    }
                    else
                    {
                        writer.Write(VCFConstants.FIELD_SEPARATOR);
                    }
                    writer.Write(field.ToString());
                }
                if (header.hasGenotypingData())
                {
                    writer.Write(VCFConstants.FIELD_SEPARATOR);
                    writer.Write("FORMAT");
                    foreach (string sample in header.GenotypeSampleNames)
                    {
                        writer.Write(VCFConstants.FIELD_SEPARATOR);
                        writer.Write(sample);
                    }
                }
                writer.Write("\n");
            }
            catch (IOException e)
            {
                throw new Exception("IOException writing the VCF header.", e);
            }
        }
Пример #3
0
        /// <summary>
        /// check our metadata for a VCF version tag, and throw an exception if the version is out of date
        /// or the version is not present
        /// TODO: Should only be one format line
        /// </summary>
        public virtual void loadVCFVersion()
        {
            IList <VCFHeaderLine> toRemove = new List <VCFHeaderLine>();

            foreach (VCFHeaderLine line in mMetaData)
            {
                if (VCFHeaderVersion.IsFormatString(line.Key))
                {
                    toRemove.Add(line);
                }
            }
            // remove old header lines for now,
            mMetaData.RemoveRange(toRemove);
        }
Пример #4
0
        /// <param name="reader"> the line reader to take header lines from </param>
        /// <returns> the number of header lines </returns>
        public virtual VCFHeader readHeader(StreamReader reader)
        {
            IList <string> headerStrings = new List <string>();
            string         line;

            try
            {
                bool foundHeaderVersion = false;
                while ((line = reader.ReadLine()) != null)
                {
                    lineNo++;
                    if (line.StartsWith(VCFHeader.METADATA_INDICATOR))
                    {
                        string[] lineFields = line.Substring(2).Split('=');
                        if (lineFields.Length == 2 && VCFHeaderVersion.IsFormatString(lineFields[0]))
                        {
                            if (!VCFHeaderVersion.IsVersionString(lineFields[1]))
                            {
                                throw new VCFParsingError("Header: " + lineFields[1] + " is not a supported version");
                            }
                            foundHeaderVersion = true;
                            version            = VCFHeaderVersion.ToHeaderVersion(lineFields[1]);
                            if (!this.AcceptableVersions.Contains(version))
                            {
                                throw new VCFParsingError("This codec is strictly for " + Name + "; please use a different codec for " + lineFields[1]);
                            }
                        }
                        headerStrings.Add(line);
                    }
                    else if (line.StartsWith(VCFHeader.HEADER_INDICATOR))
                    {//should be only one such line
                        if (!foundHeaderVersion)
                        {
                            throw new VCFParsingError("We never saw a header line specifying VCF version");
                        }
                        headerStrings.Add(line);
                        return(parseHeaderFromLines(headerStrings, version));
                    }
                    else
                    {
                        throw new VCFParsingError("We never saw the required CHROM header line (starting with one #) for the input VCF file");
                    }
                }
            }
            catch (IOException e)
            {
                throw new Exception("IO Exception ", e);
            }
            throw new VCFParsingError("We never saw the required CHROM header line (starting with one #) for the input VCF file");
        }
Пример #5
0
        public static VCFHeaderVersion GetHeaderVersion(String versionLine)
        {
            versionLine = clean(versionLine);
            String[] lineFields = versionLine.Split('=');
            if (lineFields.Length != 2 || !IsFormatString(lineFields[0].Substring(2)))
            {
                throw new VCFParsingError(versionLine + " is not a valid VCF version line");
            }
            VCFHeaderVersion vcfHV = ToHeaderVersion(lineFields[1]);

            if (vcfHV == null)
            {
                throw new VCFParsingError(lineFields[1] + " is not a supported version");
            }
            return(vcfHV);
        }
Пример #6
0
		public static IDictionary<string, string> parseLine(VCFHeaderVersion version, string valueLine, params string[] expectedTagOrder)
		{
			return mapping[version].parseLine(valueLine,expectedTagOrder);
		}
Пример #7
0
        /// <summary>
        /// create a VCF header from a set of header record lines
        /// </summary>
        /// <param name="headerStrings"> a list of strings that represent all the ## and # entries </param>
        /// <returns> a VCFHeader object </returns>
        protected internal virtual VCFHeader parseHeaderFromLines(IList <string> headerStrings, VCFHeaderVersion version)
        {
            this.version = version;
            ISet <VCFHeaderLine> metaData    = new LinkedHashSet <VCFHeaderLine> ();
            ISet <string>        sampleNames = new LinkedHashSet <string> ();
            int contigCounter = 0;

            // iterate over all the passed in strings
            foreach (string str in headerStrings)
            {
                if (!str.StartsWith(VCFHeader.METADATA_INDICATOR))                   //presumably the #CHROM POS ID REF ALT QUAL FILTER INFO   etc. line
                {
                    string[] strings = str.Substring(1).Split(VCFConstants.FIELD_SEPARATOR_CHAR);
                    //check for null last string, grrr...
                    if (String.IsNullOrEmpty(strings.Last()))
                    {
                        strings = strings.Take(strings.Length - 1).ToArray();
                    }
                    if (strings.Length < VCFHeader.HEADER_FIELDS.Length)
                    {
                        throw new VCFParsingError("There are not enough columns present in the header line: " + str);
                    }
                    //Verify Arrays
                    var misMatchedColumns = Enumerable.Range(0, VCFHeader.HEADER_FIELDS.Length).Where(x => VCFHeader.HEADER_FIELDS [x] != strings [x]).Select(x => strings [x]).ToArray();
                    if (misMatchedColumns.Length > 0)
                    {
                        throw new VCFParsingError("We were not expecting column name '" + misMatchedColumns [0] + " in that position");
                    }
                    int  arrayIndex   = VCFHeader.HEADER_FIELDS.Length;                 //start after verified columns
                    bool sawFormatTag = false;
                    if (arrayIndex < strings.Length)
                    {
                        if (!strings [arrayIndex].Equals("FORMAT"))
                        {
                            throw new VCFParsingError("we were expecting column name 'FORMAT' but we saw '" + strings [arrayIndex] + "'");
                        }
                        sawFormatTag = true;
                        arrayIndex++;
                    }
                    while (arrayIndex < strings.Length)
                    {
                        sampleNames.Add(strings [arrayIndex++]);
                    }
                    if (sawFormatTag && sampleNames.Count == 0)
                    {
                        throw new VCFParsingError("The FORMAT field was provided but there is no genotype/sample data");
                    }
                }
                else
                {
                    if (str.StartsWith(VCFConstants.INFO_HEADER_START))
                    {
                        VCFInfoHeaderLine info = new VCFInfoHeaderLine(str.Substring(7), version);
                        metaData.Add(info);
                    }
                    else if (str.StartsWith(VCFConstants.FILTER_HEADER_START))
                    {
                        VCFFilterHeaderLine filter = new VCFFilterHeaderLine(str.Substring(9), version);
                        metaData.Add(filter);
                    }
                    else if (str.StartsWith(VCFConstants.FORMAT_HEADER_START))
                    {
                        VCFFormatHeaderLine format = new VCFFormatHeaderLine(str.Substring(9), version);
                        metaData.Add(format);
                    }
                    else if (str.StartsWith(VCFConstants.CONTIG_HEADER_START))
                    {
                        VCFContigHeaderLine contig = new VCFContigHeaderLine(str.Substring(9), version, VCFConstants.CONTIG_HEADER_START.Substring(2), contigCounter++);
                        metaData.Add(contig);
                    }
                    else if (str.StartsWith(VCFConstants.ALT_HEADER_START))
                    {
                        //TODO: Consider giving Alt header lines their own class
                        VCFSimpleHeaderLine alt = new VCFSimpleHeaderLine(str.Substring(6), version, VCFConstants.ALT_HEADER_START.Substring(2), "ID", "Description");
                        metaData.Add(alt);
                    }
                    else
                    {
                        int equals = str.IndexOf("=");
                        if (equals != -1)
                        {
                            metaData.Add(new VCFHeaderLine(str.Substring(2, equals - 2), str.Substring(equals + 1)));
                        }
                    }
                }
            }
            this.header = new VCFHeader(metaData, sampleNames);
            if (doOnTheFlyModifications)
            {
                this.header = VCFStandardHeaderLines.repairStandardHeaderLines(this.header);
            }
            return(this.header);
        }
Пример #8
0
		/// <summary>
		/// create a VCF format header line
		/// </summary>
		/// <param name="line">   the header line </param>
		/// <param name="version">      the VCF header version </param>
		/// <param name="lineType">     the header line type
		///  </param>
		protected internal VCFCompoundHeaderLine(string line, VCFHeaderVersion version, SupportedHeaderLineType lineType) : base(lineType.ToString(), "")
		{
			IDictionary<string, string> mapping = VCFHeaderLineTranslator.parseLine(version,line, "ID","Number","Type","Description");
			name = mapping["ID"];
			count = -1;
			string numberStr = mapping["Number"];
			if (numberStr.Equals(VCFConstants.PER_ALLELE_COUNT))
			{
				countType = VCFHeaderLineCount.A;
			}
			else if (numberStr.Equals(VCFConstants.PER_GENOTYPE_COUNT))
			{
				countType = VCFHeaderLineCount.G;
			}
			else if (((version == VCFHeaderVersion.VCF4_0 || version == VCFHeaderVersion.VCF4_1 || version == VCFHeaderVersion.VCF4_2) && numberStr.Equals(VCFConstants.UNBOUNDED_ENCODING_v4)) || ((version == VCFHeaderVersion.VCF3_2 || version == VCFHeaderVersion.VCF3_3) && numberStr.Equals(VCFConstants.UNBOUNDED_ENCODING_v3)))
			{
				countType = VCFHeaderLineCount.UNBOUNDED;
			}
			else
			{
				countType = VCFHeaderLineCount.INTEGER;
				count = Convert.ToInt32(numberStr);
            }
			if (count < 0 && countType == VCFHeaderLineCount.INTEGER)
			{
				throw new VCFParsingError("Count < 0 for fixed size VCF header field " + name);
			}
			try
			{
				type = (VCFHeaderLineType) Enum.Parse(typeof(VCFHeaderLineType), mapping["Type"]);
			}
			catch (Exception e)
			{
				throw new VCFParsingError(mapping["Type"] + " is not a valid type in the VCF specification (note that types are case-sensitive)");
			}
			if (type == VCFHeaderLineType.Flag && !allowFlagValues())
			{
				throw new System.ArgumentException("Flag is an unsupported type for this kind of field");
			}

			description = mapping["Description"];
			if (description == null && ALLOW_UNBOUND_DESCRIPTIONS) // handle the case where there's no description provided
			{
				description = UNBOUND_DESCRIPTION;
			}

			this.lineType = lineType;
			validate();
		}
Пример #9
0
 /// <summary>
 /// create a VCF info header line
 /// </summary>
 /// <param name="line">      the header line </param>
 /// <param name="version">   the vcf header version </param>
 public VCFFilterHeaderLine(string line, VCFHeaderVersion version) : base(line, version, "FILTER", "ID", "Description")
 {
 }
Пример #10
0
		/// <param name="reader"> the line reader to take header lines from </param>
		/// <returns> the number of header lines </returns>        
		public virtual VCFHeader readHeader (StreamReader reader)
		{
			IList<string> headerStrings = new List<string> ();
			string line;
			try {
				bool foundHeaderVersion = false;
				while ((line = reader.ReadLine ()) != null) {
					lineNo++;
					if (line.StartsWith (VCFHeader.METADATA_INDICATOR)) {
						string[] lineFields = line.Substring (2).Split ('=');
						if (lineFields.Length == 2 && VCFHeaderVersion.IsFormatString (lineFields [0])) {
							if (!VCFHeaderVersion.IsVersionString (lineFields [1])) {
								throw new VCFParsingError ("Header: " + lineFields [1] + " is not a supported version");
							}
							foundHeaderVersion = true;
							version = VCFHeaderVersion.ToHeaderVersion (lineFields [1]);
							if (!this.AcceptableVersions.Contains (version)) {
								throw new VCFParsingError ("This codec is strictly for " + Name + "; please use a different codec for " + lineFields [1]);
							}                            
						}
						headerStrings.Add (line);
					} else if (line.StartsWith (VCFHeader.HEADER_INDICATOR)) {//should be only one such line
						if (!foundHeaderVersion) {
							throw new VCFParsingError ("We never saw a header line specifying VCF version");
						}
						headerStrings.Add (line);
						return parseHeaderFromLines (headerStrings, version);
					} else {
						throw new VCFParsingError ("We never saw the required CHROM header line (starting with one #) for the input VCF file");
					}
				}
			} catch (IOException e) {
				throw new Exception ("IO Exception ", e);
			}
			throw new VCFParsingError ("We never saw the required CHROM header line (starting with one #) for the input VCF file");		
		}
Пример #11
0
		/// <summary>
		/// create a VCF header from a set of header record lines
		/// </summary>
		/// <param name="headerStrings"> a list of strings that represent all the ## and # entries </param>
		/// <returns> a VCFHeader object </returns>
		protected internal virtual VCFHeader parseHeaderFromLines (IList<string> headerStrings, VCFHeaderVersion version)
		{
			this.version = version;
			ISet<VCFHeaderLine> metaData = new LinkedHashSet<VCFHeaderLine> ();
			ISet<string> sampleNames = new LinkedHashSet<string> ();
			int contigCounter = 0;
			// iterate over all the passed in strings
			foreach (string str in headerStrings) {
				if (!str.StartsWith (VCFHeader.METADATA_INDICATOR)) {//presumably the #CHROM POS ID REF ALT QUAL FILTER INFO   etc. line
					string[] strings = str.Substring (1).Split (VCFConstants.FIELD_SEPARATOR_CHAR);
					//check for null last string, grrr...
					if (String.IsNullOrEmpty (strings.Last ())) {
						strings = strings.Take (strings.Length - 1).ToArray ();
					}
					if (strings.Length < VCFHeader.HEADER_FIELDS.Length) {
						throw new VCFParsingError ("There are not enough columns present in the header line: " + str);
					}
					//Verify Arrays
					var misMatchedColumns = Enumerable.Range (0, VCFHeader.HEADER_FIELDS.Length).Where (x => VCFHeader.HEADER_FIELDS [x] != strings [x]).Select (x => strings [x]).ToArray ();
					if (misMatchedColumns.Length > 0) {
						throw new VCFParsingError ("We were not expecting column name '" + misMatchedColumns [0] + " in that position");
					}
					int arrayIndex = VCFHeader.HEADER_FIELDS.Length;//start after verified columns
					bool sawFormatTag = false;
					if (arrayIndex < strings.Length) {
						if (!strings [arrayIndex].Equals ("FORMAT")) {
							throw new VCFParsingError ("we were expecting column name 'FORMAT' but we saw '" + strings [arrayIndex] + "'");
						}
						sawFormatTag = true;
						arrayIndex++;
					}
					while (arrayIndex < strings.Length) {
						sampleNames.Add (strings [arrayIndex++]);
					}
					if (sawFormatTag && sampleNames.Count == 0) {
						throw new VCFParsingError ("The FORMAT field was provided but there is no genotype/sample data");
					}

				} else {
					if (str.StartsWith (VCFConstants.INFO_HEADER_START)) {
						VCFInfoHeaderLine info = new VCFInfoHeaderLine (str.Substring (7), version);
						metaData.Add (info);
					} else if (str.StartsWith (VCFConstants.FILTER_HEADER_START)) {
						VCFFilterHeaderLine filter = new VCFFilterHeaderLine (str.Substring (9), version);
						metaData.Add (filter);
					} else if (str.StartsWith (VCFConstants.FORMAT_HEADER_START)) {
						VCFFormatHeaderLine format = new VCFFormatHeaderLine (str.Substring (9), version);
						metaData.Add (format);
					} else if (str.StartsWith (VCFConstants.CONTIG_HEADER_START)) {
						VCFContigHeaderLine contig = new VCFContigHeaderLine (str.Substring (9), version, VCFConstants.CONTIG_HEADER_START.Substring (2), contigCounter++);
						metaData.Add (contig);
					} else if (str.StartsWith (VCFConstants.ALT_HEADER_START)) {
						//TODO: Consider giving Alt header lines their own class
						VCFSimpleHeaderLine alt = new VCFSimpleHeaderLine (str.Substring (6), version, VCFConstants.ALT_HEADER_START.Substring (2), "ID", "Description");
						metaData.Add (alt);
					} else {
						int equals = str.IndexOf ("=");
						if (equals != -1) {
							metaData.Add (new VCFHeaderLine (str.Substring (2, equals - 2), str.Substring (equals + 1)));
						}
					}
				}
			}
			this.header = new VCFHeader (metaData, sampleNames);
			if (doOnTheFlyModifications) {
				this.header = VCFStandardHeaderLines.repairStandardHeaderLines (this.header);
			}
			return this.header;
		}
Пример #12
0
 public VCFFormatHeaderLine(string line, VCFHeaderVersion version)
     : base(line, version, SupportedHeaderLineType.FORMAT)
 {
 }
Пример #13
0
		/// <summary>
		/// create a VCF info header line
		/// </summary>
		/// <param name="line">      the header line </param>
		/// <param name="version">   the vcf header version </param>
		/// <param name="key">            the key for this header line </param>
		/// <param name="expectedTagOrdering"> the tag ordering expected for this header line </param>
		public VCFSimpleHeaderLine(string line, VCFHeaderVersion version, string key, params string[] expectedTagOrdering) 
            : this(key, VCFHeaderLineTranslator.parseLine(version, line, expectedTagOrdering), expectedTagOrdering)
		{

		}
Пример #14
0
		/// <summary>
		/// create a VCF contig header line
		/// </summary>
		/// <param name="line">      the header line </param>
		/// <param name="version">   the vcf header version </param>
		/// <param name="key">            the key for this header line </param>
//JAVA TO C# CONVERTER WARNING: 'final' parameters are not allowed in .NET:
//ORIGINAL LINE: public VCFContigHeaderLine(final String line, final VCFHeaderVersion version, final String key, int contigIndex)
		public VCFContigHeaderLine(string line, VCFHeaderVersion version, string key, int contigIndex) : base(line, version, key, null)
		{
			this.contigIndex = contigIndex;
		}
Пример #15
0
		/// <summary>
		/// create a VCF info header line
		/// </summary>
		/// <param name="line">      the header line </param>
		/// <param name="version">   the vcf header version </param>
		public VCFFilterHeaderLine(string line, VCFHeaderVersion version) : base(line, version, "FILTER", "ID", "Description")
		{
		}
Пример #16
0
 /// <summary>
 /// create a VCF info header line
 /// </summary>
 /// <param name="line">      the header line </param>
 /// <param name="version">   the vcf header version </param>
 /// <param name="key">            the key for this header line </param>
 /// <param name="expectedTagOrdering"> the tag ordering expected for this header line </param>
 public VCFSimpleHeaderLine(string line, VCFHeaderVersion version, string key, params string[] expectedTagOrdering)
     : this(key, VCFHeaderLineTranslator.parseLine(version, line, expectedTagOrdering), expectedTagOrdering)
 {
 }
Пример #17
0
 /// <summary>
 /// create a VCF contig header line
 /// </summary>
 /// <param name="line">      the header line </param>
 /// <param name="version">   the vcf header version </param>
 /// <param name="key">            the key for this header line </param>
 //JAVA TO C# CONVERTER WARNING: 'final' parameters are not allowed in .NET:
 //ORIGINAL LINE: public VCFContigHeaderLine(final String line, final VCFHeaderVersion version, final String key, int contigIndex)
 public VCFContigHeaderLine(string line, VCFHeaderVersion version, string key, int contigIndex) : base(line, version, key, null)
 {
     this.contigIndex = contigIndex;
 }
Пример #18
0
 public static IDictionary <string, string> parseLine(VCFHeaderVersion version, string valueLine, params string[] expectedTagOrder)
 {
     return(mapping[version].parseLine(valueLine, expectedTagOrder));
 }