public VariantContextBuilder(VariantContextBuilder parent) : this() { if (parent == null) { throw new System.ArgumentException("BUG: VariantContext parent argument cannot be null in VariantContextBuilder"); } this.alleles_Renamed = parent.alleles_Renamed; this.attributesCanBeModified = false; this.Contig = parent.Contig; this.genotypes_Renamed = parent.genotypes_Renamed; this.ID = parent.ID; this.Log10PError = parent.Log10PError; this.Source = parent.Source; this.start_Renamed = parent.start_Renamed; this.stop_Renamed = parent.stop_Renamed; this.FullyDecoded = parent.FullyDecoded; this.Attributes = parent.attributes_Renamed; this.SetFilters(parent.filters_Renamed); }
/// <summary> /// This method subsets down to a set of samples. /// /// At the same time returns the alleles to just those in use by the samples, /// if rederiveAllelesFromGenotypes is true, otherwise the full set of alleles /// in this VC is returned as the set of alleles in the subContext, even if /// some of those alleles aren't in the samples /// /// WARNING: BE CAREFUL WITH rederiveAllelesFromGenotypes UNLESS YOU KNOW WHAT YOU ARE DOING /// </summary> /// <param name="sampleNames"> the sample names </param> /// <param name="rederiveAllelesFromGenotypes"> if true, returns the alleles to just those in use by the samples, true should be default </param> /// <returns> new VariantContext subsetting to just the given samples </returns> public VariantContext SubContextFromSamples(ISet <string> sampleNames, bool rederiveAllelesFromGenotypes) { if (sampleNames.SetEquals(SampleNames) && !rederiveAllelesFromGenotypes) { return(this); // fast path when you don't have any work to do } else { VariantContextBuilder builder = new VariantContextBuilder(this); GenotypesContext newGenotypes = genotypes.subsetToSamples(sampleNames); if (rederiveAllelesFromGenotypes) { builder.SetAlleles(allelesOfGenotypes(newGenotypes)); } else { builder.SetAlleles(alleles); } builder.SetGenotypes(newGenotypes); return(builder.make()); } }
/// <summary> /// This method subsets down to a set of samples. /// /// At the same time returns the alleles to just those in use by the samples, /// if rederiveAllelesFromGenotypes is true, otherwise the full set of alleles /// in this VC is returned as the set of alleles in the subContext, even if /// some of those alleles aren't in the samples /// /// WARNING: BE CAREFUL WITH rederiveAllelesFromGenotypes UNLESS YOU KNOW WHAT YOU ARE DOING /// </summary> /// <param name="sampleNames"> the sample names </param> /// <param name="rederiveAllelesFromGenotypes"> if true, returns the alleles to just those in use by the samples, true should be default </param> /// <returns> new VariantContext subsetting to just the given samples </returns> public VariantContext SubContextFromSamples(ISet<string> sampleNames, bool rederiveAllelesFromGenotypes) { if (sampleNames.SetEquals(SampleNames) && !rederiveAllelesFromGenotypes) { return this; // fast path when you don't have any work to do } else { VariantContextBuilder builder = new VariantContextBuilder(this); GenotypesContext newGenotypes = genotypes.subsetToSamples(sampleNames); if (rederiveAllelesFromGenotypes) { builder.SetAlleles(allelesOfGenotypes(newGenotypes)); } else { builder.SetAlleles(alleles); } builder.SetGenotypes(newGenotypes); return builder.make(); } }
private void fullyDecodeGenotypes (VariantContextBuilder builder, VCFHeader header) { GenotypesContext gc = new GenotypesContext (); foreach (Genotype g in Genotypes) { gc.Add (fullyDecodeGenotypes (g, header)); } builder.SetGenotypes (gc, false); }
private void fullyDecodeInfo (VariantContextBuilder builder, VCFHeader header, bool lenientDecoding) { builder.Attributes = fullyDecodeAttributes (Attributes, header, lenientDecoding); }
// --------------------------------------------------------------------------------------------------------- // // Fully decode // // --------------------------------------------------------------------------------------------------------- /// <summary> /// Return a VC equivalent to this one but where all fields are fully decoded /// /// See VariantContext document about fully decoded /// </summary> /// <param name="header"> containing types about all fields in this VC </param> /// <returns> a fully decoded version of this VC </returns> public VariantContext FullyDecode (VCFHeader header, bool lenientDecoding) { if (FullyDecoded) { return this; } else { // TODO -- warning this is potentially very expensive as it creates copies over and over VariantContextBuilder builder = new VariantContextBuilder (this); fullyDecodeInfo (builder, header, lenientDecoding); fullyDecodeGenotypes (builder, header); builder.FullyDecoded = true; return builder.make (); } }
/// <summary> /// Update the attributes of the attributes map in the VariantContextBuilder to reflect the proper /// chromosome-based VCF tags based on the current VC produced by builder.make() /// </summary> /// <param name="builder"> the VariantContextBuilder we are updating </param> /// <param name="founderIds"> - Set of founders to take into account. AF and FC will be calculated over the founders only. /// If empty or null, counts are generated for all samples as unrelated individuals </param> /// <param name="removeStaleValues"> should we remove stale values from the mapping? </param> public static void CalculateChromosomeCounts (VariantContextBuilder builder, bool removeStaleValues, ISet<string> founderIds) { VariantContext vc = builder.make (); builder.Attributes = CalculateChromosomeCounts (vc, new Dictionary<string, object> (vc.Attributes), removeStaleValues, founderIds); }
/// <summary> /// Update the attributes of the attributes map in the VariantContextBuilder to reflect the proper /// chromosome-based VCF tags based on the current VC produced by builder.make() /// </summary> /// <param name="builder"> the VariantContextBuilder we are updating </param> /// <param name="founderIds"> - Set of founders to take into account. AF and FC will be calculated over the founders only. /// If empty or null, counts are generated for all samples as unrelated individuals </param> /// <param name="removeStaleValues"> should we remove stale values from the mapping? </param> public static void CalculateChromosomeCounts(VariantContextBuilder builder, bool removeStaleValues, ISet <string> founderIds) { VariantContext vc = builder.make(); builder.Attributes = CalculateChromosomeCounts(vc, new Dictionary <string, object> (vc.Attributes), removeStaleValues, founderIds); }
/// <summary> /// Parses a line from a VCF File /// </summary> /// <param name="parts">An array of length >8 where the 9th element contains unsplit genotype data (if present)</param> /// <param name="includeGenotypes"> Whether or not to also parse the genotype data </param> /// <returns></returns> private VariantContext parseVCFLine(string[] parts, bool includeGenotypes) { VariantContextBuilder builder = new VariantContextBuilder(); builder.Source = Name; // increment the line count lineNo++; // parse out the required fields string chr = GetCachedString(parts [0]); builder.Contig = chr; int pos = -1; try { pos = Convert.ToInt32(parts [1]); } catch (FormatException e) { generateException(parts [1] + " is not a valid start position in the VCF format"); } builder.Start = pos; if (parts [2].Length == 0) { generateException("The VCF specification requires a valid ID field"); } else if (parts [2].Equals(VCFConstants.EMPTY_ID_FIELD)) { builder.ID = VCFConstants.EMPTY_ID_FIELD; } else { builder.ID = parts [2]; } string refe = GetCachedString(parts [3].ToUpper()); string alts = GetCachedString(parts [4].ToUpper()); builder.Log10PError = parseQual(parts [5]); string filterStr = GetCachedString(parts [6]); var filters = filterHash [filterStr]; if (filters != null) //means filter data present { builder.SetFilters(filters.Hash); } IDictionary <string, object> attrs = parseInfo(parts [7]); builder.Attributes = attrs; if (attrs.ContainsKey(VCFConstants.END_KEY)) { // update stop with the end key if provided try { builder.Stop = Convert.ToInt32(attrs [VCFConstants.END_KEY].ToString()); } catch (Exception e) { generateException("the END value in the INFO field is not valid"); } } else { builder.Stop = (pos + refe.Length - 1); } // get our alleles, filters, and setup an attribute map IList <Allele> alleles = parseAlleles(refe, alts, lineNo); builder.SetAlleles(alleles); // do we have genotyping data if (parts.Length > NUM_STANDARD_FIELDS && includeGenotypes) { int nGenotypes = header.NGenotypeSamples; LazyGenotypesContext lazy = new LazyGenotypesContext(this, alleles, chr, pos, parts [8], nGenotypes); // did we resort the sample names? If so, we need to load the genotype data if (!header.SamplesWereAlreadySorted) { lazy.Decode(); } builder.SetGenotypes(lazy, false); } VariantContext vc = null; try { vc = builder.make(); } catch (Exception e) { generateException(e.Message); } return(vc); }
/// <summary> /// Parses a line from a VCF File /// </summary> /// <param name="parts">An array of length >8 where the 9th element contains unsplit genotype data (if present)</param> /// <param name="includeGenotypes"> Whether or not to also parse the genotype data </param> /// <returns></returns> private VariantContext parseVCFLine (string[] parts, bool includeGenotypes) { VariantContextBuilder builder = new VariantContextBuilder (); builder.Source = Name; // increment the line count lineNo++; // parse out the required fields string chr = GetCachedString (parts [0]); builder.Contig = chr; int pos = -1; try { pos = Convert.ToInt32 (parts [1]); } catch (FormatException e) { generateException (parts [1] + " is not a valid start position in the VCF format"); } builder.Start = pos; if (parts [2].Length == 0) { generateException ("The VCF specification requires a valid ID field"); } else if (parts [2].Equals (VCFConstants.EMPTY_ID_FIELD)) { builder.ID = VCFConstants.EMPTY_ID_FIELD; } else { builder.ID = parts [2]; } string refe = GetCachedString (parts [3].ToUpper ()); string alts = GetCachedString (parts [4].ToUpper ()); builder.Log10PError = parseQual (parts [5]); string filterStr = GetCachedString (parts [6]); var filters = filterHash [filterStr]; if (filters != null) {//means filter data present builder.SetFilters (filters.Hash); } IDictionary<string, object> attrs = parseInfo (parts [7]); builder.Attributes = attrs; if (attrs.ContainsKey (VCFConstants.END_KEY)) { // update stop with the end key if provided try { builder.Stop = Convert.ToInt32 (attrs [VCFConstants.END_KEY].ToString ()); } catch (Exception e) { generateException ("the END value in the INFO field is not valid"); } } else { builder.Stop = (pos + refe.Length - 1); } // get our alleles, filters, and setup an attribute map IList<Allele> alleles = parseAlleles (refe, alts, lineNo); builder.SetAlleles (alleles); // do we have genotyping data if (parts.Length > NUM_STANDARD_FIELDS && includeGenotypes) { int nGenotypes = header.NGenotypeSamples; LazyGenotypesContext lazy = new LazyGenotypesContext (this, alleles, chr, pos, parts [8], nGenotypes); // did we resort the sample names? If so, we need to load the genotype data if (!header.SamplesWereAlreadySorted) { lazy.Decode (); } builder.SetGenotypes (lazy, false); } VariantContext vc = null; try { vc = builder.make (); } catch (Exception e) { generateException (e.Message); } return vc; }
public VariantContextBuilder (VariantContextBuilder parent) : this () { if (parent == null) { throw new System.ArgumentException ("BUG: VariantContext parent argument cannot be null in VariantContextBuilder"); } this.alleles_Renamed = parent.alleles_Renamed; this.attributesCanBeModified = false; this.Contig = parent.Contig; this.genotypes_Renamed = parent.genotypes_Renamed; this.ID = parent.ID; this.Log10PError = parent.Log10PError; this.Source = parent.Source; this.start_Renamed = parent.start_Renamed; this.stop_Renamed = parent.stop_Renamed; this.FullyDecoded = parent.FullyDecoded; this.Attributes = parent.attributes_Renamed; this.SetFilters (parent.filters_Renamed); }