/// <summary> /// Parses a line from a VCF File /// </summary> /// <param name="parts">An array of length >8 where the 9th element contains unsplit genotype data (if present)</param> /// <param name="includeGenotypes"> Whether or not to also parse the genotype data </param> /// <returns></returns> private VariantContext parseVCFLine(string[] parts, bool includeGenotypes) { VariantContextBuilder builder = new VariantContextBuilder(); builder.Source = Name; // increment the line count lineNo++; // parse out the required fields string chr = GetCachedString(parts [0]); builder.Contig = chr; int pos = -1; try { pos = Convert.ToInt32(parts [1]); } catch (FormatException e) { generateException(parts [1] + " is not a valid start position in the VCF format"); } builder.Start = pos; if (parts [2].Length == 0) { generateException("The VCF specification requires a valid ID field"); } else if (parts [2].Equals(VCFConstants.EMPTY_ID_FIELD)) { builder.ID = VCFConstants.EMPTY_ID_FIELD; } else { builder.ID = parts [2]; } string refe = GetCachedString(parts [3].ToUpper()); string alts = GetCachedString(parts [4].ToUpper()); builder.Log10PError = parseQual(parts [5]); string filterStr = GetCachedString(parts [6]); var filters = filterHash [filterStr]; if (filters != null) //means filter data present { builder.SetFilters(filters.Hash); } IDictionary <string, object> attrs = parseInfo(parts [7]); builder.Attributes = attrs; if (attrs.ContainsKey(VCFConstants.END_KEY)) { // update stop with the end key if provided try { builder.Stop = Convert.ToInt32(attrs [VCFConstants.END_KEY].ToString()); } catch (Exception e) { generateException("the END value in the INFO field is not valid"); } } else { builder.Stop = (pos + refe.Length - 1); } // get our alleles, filters, and setup an attribute map IList <Allele> alleles = parseAlleles(refe, alts, lineNo); builder.SetAlleles(alleles); // do we have genotyping data if (parts.Length > NUM_STANDARD_FIELDS && includeGenotypes) { int nGenotypes = header.NGenotypeSamples; LazyGenotypesContext lazy = new LazyGenotypesContext(this, alleles, chr, pos, parts [8], nGenotypes); // did we resort the sample names? If so, we need to load the genotype data if (!header.SamplesWereAlreadySorted) { lazy.Decode(); } builder.SetGenotypes(lazy, false); } VariantContext vc = null; try { vc = builder.make(); } catch (Exception e) { generateException(e.Message); } return(vc); }
/// <summary> /// Parses a line from a VCF File /// </summary> /// <param name="parts">An array of length >8 where the 9th element contains unsplit genotype data (if present)</param> /// <param name="includeGenotypes"> Whether or not to also parse the genotype data </param> /// <returns></returns> private VariantContext parseVCFLine (string[] parts, bool includeGenotypes) { VariantContextBuilder builder = new VariantContextBuilder (); builder.Source = Name; // increment the line count lineNo++; // parse out the required fields string chr = GetCachedString (parts [0]); builder.Contig = chr; int pos = -1; try { pos = Convert.ToInt32 (parts [1]); } catch (FormatException e) { generateException (parts [1] + " is not a valid start position in the VCF format"); } builder.Start = pos; if (parts [2].Length == 0) { generateException ("The VCF specification requires a valid ID field"); } else if (parts [2].Equals (VCFConstants.EMPTY_ID_FIELD)) { builder.ID = VCFConstants.EMPTY_ID_FIELD; } else { builder.ID = parts [2]; } string refe = GetCachedString (parts [3].ToUpper ()); string alts = GetCachedString (parts [4].ToUpper ()); builder.Log10PError = parseQual (parts [5]); string filterStr = GetCachedString (parts [6]); var filters = filterHash [filterStr]; if (filters != null) {//means filter data present builder.SetFilters (filters.Hash); } IDictionary<string, object> attrs = parseInfo (parts [7]); builder.Attributes = attrs; if (attrs.ContainsKey (VCFConstants.END_KEY)) { // update stop with the end key if provided try { builder.Stop = Convert.ToInt32 (attrs [VCFConstants.END_KEY].ToString ()); } catch (Exception e) { generateException ("the END value in the INFO field is not valid"); } } else { builder.Stop = (pos + refe.Length - 1); } // get our alleles, filters, and setup an attribute map IList<Allele> alleles = parseAlleles (refe, alts, lineNo); builder.SetAlleles (alleles); // do we have genotyping data if (parts.Length > NUM_STANDARD_FIELDS && includeGenotypes) { int nGenotypes = header.NGenotypeSamples; LazyGenotypesContext lazy = new LazyGenotypesContext (this, alleles, chr, pos, parts [8], nGenotypes); // did we resort the sample names? If so, we need to load the genotype data if (!header.SamplesWereAlreadySorted) { lazy.Decode (); } builder.SetGenotypes (lazy, false); } VariantContext vc = null; try { vc = builder.make (); } catch (Exception e) { generateException (e.Message); } return vc; }