protected internal static Genotype Create(string sampleName, List <Allele> alleles, double[] gls) { var gb = new GenotypeBuilder(sampleName, alleles); gb.setPL(gls); return(gb.Make()); }
public static Genotype Create(string sampleName, List <Allele> alleles, Dictionary <string, object> attributes) { var gb = new GenotypeBuilder(sampleName, alleles); gb.AddAttributes(attributes); return(gb.Make()); }
private Genotype fullyDecodeGenotypes (Genotype g, VCFHeader header) { IDictionary<string, object> map = fullyDecodeAttributes (g.ExtendedAttributes, header, true); var g2 = new GenotypeBuilder (g); g2.AddAttributes (map); return g2.Make (); }
protected internal static Genotype Create(string sampleName, List<Allele> alleles, double[] gls) { var gb = new GenotypeBuilder(sampleName, alleles); gb.setPL(gls); return gb.Make(); }
public static Genotype Create(string sampleName, List<Allele> alleles, Dictionary<string, object> attributes) { var gb = new GenotypeBuilder(sampleName, alleles); gb.AddAttributes(attributes); return gb.Make(); }
/// <summary> /// Create a genotype map /// </summary> /// <param name="str"> the string </param> /// <param name="alleles"> the list of alleles </param> /// <returns> a mapping of sample name to genotype object </returns> public LazyGenotypesContext.LazyData CreateGenotypeMap(string str, IList <Allele> alleles, string chr, int pos) { if (genotypeParts == null) { genotypeParts = new String[header.ColumnCount - NUM_STANDARD_FIELDS]; } try { FastStringUtils.Split(str, VCFConstants.FIELD_SEPARATOR_CHAR, genotypeParts); } catch (Exception e) { throw new VCFParsingError("Could not parse genotypes, was expecting " + (genotypeParts.Length - 1).ToString() + " but found " + str.Split(VCFConstants.FIELD_SEPARATOR_CHAR).Length.ToString(), e); } List <Genotype> genotypes = new List <Genotype> (genotypeParts.Length); // get the format keys //int nGTKeys = ParsingUtils.Split(genotypeParts[0], genotypeKeyArray, VCFConstants.GENOTYPE_FIELD_SEPARATOR_CHAR); string[] genotypeKeyArray = genotypeParts [0].Split(VCFConstants.GENOTYPE_FIELD_SEPARATOR_CHAR); int genotypeAlleleLocation = Array.IndexOf(genotypeKeyArray, VCFConstants.GENOTYPE_KEY); if (version != VCFHeaderVersion.VCF4_1 && genotypeAlleleLocation == -1) { generateException("Unable to find the GT field for the record; the GT field is required in VCF4.0"); } // clear out our allele mapping alleleMap.Clear(); GenotypeBuilder gb = new GenotypeBuilder(); // cycle through the genotype strings for (int genotypeOffset = 1; genotypeOffset < genotypeParts.Length; genotypeOffset++) { Genotype curGenotype; string sampleName = header.GenotypeSampleNames [genotypeOffset - 1]; var currentGeno = genotypeParts [genotypeOffset]; //shortcut for null alleles if (currentGeno == "./.") { curGenotype = GenotypeBuilder.CreateMissing(sampleName, 2); } else if (currentGeno == ".") { curGenotype = GenotypeBuilder.CreateMissing(sampleName, 1); } else { gb.Reset(false); gb.SampleName = sampleName; string[] GTValueArray = FastStringUtils.Split(currentGeno, VCFConstants.GENOTYPE_FIELD_SEPARATOR_CHAR, int.MaxValue, StringSplitOptions.None); // cycle through the sample names // check to see if the value list is longer than the key list, which is a problem if (genotypeKeyArray.Length < GTValueArray.Length) { generateException("There are too many keys for the sample " + sampleName + ", line is: keys = " + genotypeParts [0] + ", values = " + genotypeParts [genotypeOffset]); } if (genotypeAlleleLocation > 0) { generateException("Saw GT field at position " + genotypeAlleleLocation + ", but it must be at the first position for genotypes when present"); } //TODO: THIS IS A DAMNED MESS //Code loops over all fields in the key and decodes them, adding them as information to the genotype builder, which then makes it. if (genotypeKeyArray.Length > 0) { gb.MaxAttributes(genotypeKeyArray.Length - 1); for (int i = 0; i < genotypeKeyArray.Length; i++) { string gtKey = genotypeKeyArray [i]; if (i >= GTValueArray.Length) { break; } // todo -- all of these on the fly parsing of the missing value should be static constants if (gtKey == VCFConstants.GENOTYPE_FILTER_KEY) { IList <string> filters = parseFilters(GetCachedString(GTValueArray [i])); if (filters != null) { gb.SetFilters(filters.ToList()); } } else if (GTValueArray [i] == VCFConstants.MISSING_VALUE_v4) { // don't add missing values to the map } else { if (gtKey == VCFConstants.GENOTYPE_QUALITY_KEY) { if (GTValueArray [i] == VCFConstants.MISSING_GENOTYPE_QUALITY_v3) { gb.noGQ(); } else { gb.GQ = ((int)Math.Round(Convert.ToDouble(GTValueArray [i]))); } } else if (gtKey == VCFConstants.GENOTYPE_ALLELE_DEPTHS) { gb.AD = (decodeInts(GTValueArray [i])); } else if (gtKey == VCFConstants.GENOTYPE_PL_KEY) { gb.PL = (decodeInts(GTValueArray [i])); } else if (gtKey == VCFConstants.GENOTYPE_LIKELIHOODS_KEY) { gb.PL = (GenotypeLikelihoods.fromGLField(GTValueArray [i]).AsPLs); } else if (gtKey.Equals(VCFConstants.DEPTH_KEY)) { gb.DP = (Convert.ToInt32(GTValueArray [i])); } else { gb.AddAttribute(gtKey, GTValueArray [i]); } } } } List <Allele> GTalleles; if (genotypeAlleleLocation == -1) { GTalleles = new List <Allele> (0); } else { GTalleles = parseGenotypeAlleles(GTValueArray [genotypeAlleleLocation], alleles, alleleMap); } gb.Alleles = GTalleles; gb.Phased = genotypeAlleleLocation != -1 && GTValueArray [genotypeAlleleLocation].IndexOf(VCFConstants.PHASED_AS_CHAR) != -1; // add it to the list try { curGenotype = gb.Make(); } catch (Exception e) { throw new VCFParsingError(e.Message + ", at position " + chr + ":" + pos); } } genotypes.Add(curGenotype); } return(new LazyGenotypesContext.LazyData(genotypes, header.SampleNamesInOrder, header.SampleNameToOffset)); }
/// <summary> /// Create a genotype map /// </summary> /// <param name="str"> the string </param> /// <param name="alleles"> the list of alleles </param> /// <returns> a mapping of sample name to genotype object </returns> public LazyGenotypesContext.LazyData CreateGenotypeMap (string str, IList<Allele> alleles, string chr, int pos) { if (genotypeParts == null) genotypeParts = new String[header.ColumnCount - NUM_STANDARD_FIELDS]; try { FastStringUtils.Split (str, VCFConstants.FIELD_SEPARATOR_CHAR, genotypeParts); } catch (Exception e) { throw new VCFParsingError ("Could not parse genotypes, was expecting " + (genotypeParts.Length - 1).ToString () + " but found " + str.Split (VCFConstants.FIELD_SEPARATOR_CHAR).Length.ToString (), e); } List<Genotype> genotypes = new List<Genotype> (genotypeParts.Length); // get the format keys //int nGTKeys = ParsingUtils.Split(genotypeParts[0], genotypeKeyArray, VCFConstants.GENOTYPE_FIELD_SEPARATOR_CHAR); string[] genotypeKeyArray = genotypeParts [0].Split (VCFConstants.GENOTYPE_FIELD_SEPARATOR_CHAR); int genotypeAlleleLocation = Array.IndexOf (genotypeKeyArray, VCFConstants.GENOTYPE_KEY); if (version != VCFHeaderVersion.VCF4_1 && genotypeAlleleLocation == -1) { generateException ("Unable to find the GT field for the record; the GT field is required in VCF4.0"); } // clear out our allele mapping alleleMap.Clear (); GenotypeBuilder gb = new GenotypeBuilder (); // cycle through the genotype strings for (int genotypeOffset = 1; genotypeOffset < genotypeParts.Length; genotypeOffset++) { Genotype curGenotype; string sampleName = header.GenotypeSampleNames [genotypeOffset - 1]; var currentGeno = genotypeParts [genotypeOffset]; //shortcut for null alleles if (currentGeno == "./.") { curGenotype = GenotypeBuilder.CreateMissing (sampleName, 2); } else if (currentGeno == ".") { curGenotype = GenotypeBuilder.CreateMissing (sampleName, 1); } else { gb.Reset (false); gb.SampleName = sampleName; string[] GTValueArray = FastStringUtils.Split (currentGeno, VCFConstants.GENOTYPE_FIELD_SEPARATOR_CHAR, int.MaxValue, StringSplitOptions.None); // cycle through the sample names // check to see if the value list is longer than the key list, which is a problem if (genotypeKeyArray.Length < GTValueArray.Length) { generateException ("There are too many keys for the sample " + sampleName + ", line is: keys = " + genotypeParts [0] + ", values = " + genotypeParts [genotypeOffset]); } if (genotypeAlleleLocation > 0) { generateException ("Saw GT field at position " + genotypeAlleleLocation + ", but it must be at the first position for genotypes when present"); } //TODO: THIS IS A DAMNED MESS //Code loops over all fields in the key and decodes them, adding them as information to the genotype builder, which then makes it. if (genotypeKeyArray.Length > 0) { gb.MaxAttributes (genotypeKeyArray.Length - 1); for (int i = 0; i < genotypeKeyArray.Length; i++) { string gtKey = genotypeKeyArray [i]; if (i >= GTValueArray.Length) { break; } // todo -- all of these on the fly parsing of the missing value should be static constants if (gtKey == VCFConstants.GENOTYPE_FILTER_KEY) { IList<string> filters = parseFilters (GetCachedString (GTValueArray [i])); if (filters != null) { gb.SetFilters (filters.ToList ()); } } else if (GTValueArray [i] == VCFConstants.MISSING_VALUE_v4) { // don't add missing values to the map } else { if (gtKey == VCFConstants.GENOTYPE_QUALITY_KEY) { if (GTValueArray [i] == VCFConstants.MISSING_GENOTYPE_QUALITY_v3) { gb.noGQ (); } else { gb.GQ = ((int)Math.Round (Convert.ToDouble (GTValueArray [i]))); } } else if (gtKey == VCFConstants.GENOTYPE_ALLELE_DEPTHS) { gb.AD = (decodeInts (GTValueArray [i])); } else if (gtKey == VCFConstants.GENOTYPE_PL_KEY) { gb.PL = (decodeInts (GTValueArray [i])); } else if (gtKey == VCFConstants.GENOTYPE_LIKELIHOODS_KEY) { gb.PL = (GenotypeLikelihoods.fromGLField (GTValueArray [i]).AsPLs); } else if (gtKey.Equals (VCFConstants.DEPTH_KEY)) { gb.DP = (Convert.ToInt32 (GTValueArray [i])); } else { gb.AddAttribute (gtKey, GTValueArray [i]); } } } } List<Allele> GTalleles; if (genotypeAlleleLocation == -1) { GTalleles = new List<Allele> (0); } else { GTalleles = parseGenotypeAlleles (GTValueArray [genotypeAlleleLocation], alleles, alleleMap); } gb.Alleles = GTalleles; gb.Phased = genotypeAlleleLocation != -1 && GTValueArray [genotypeAlleleLocation].IndexOf (VCFConstants.PHASED_AS_CHAR) != -1; // add it to the list try { curGenotype = gb.Make (); } catch (Exception e) { throw new VCFParsingError (e.Message + ", at position " + chr + ":" + pos); } } genotypes.Add (curGenotype); } return new LazyGenotypesContext.LazyData (genotypes, header.SampleNamesInOrder, header.SampleNameToOffset); }