Beispiel #1
0
        protected internal static Genotype Create(string sampleName, List <Allele> alleles, double[] gls)
        {
            var gb = new GenotypeBuilder(sampleName, alleles);

            gb.setPL(gls);
            return(gb.Make());
        }
Beispiel #2
0
        public static Genotype Create(string sampleName, List <Allele> alleles, Dictionary <string, object> attributes)
        {
            var gb = new GenotypeBuilder(sampleName, alleles);

            gb.AddAttributes(attributes);
            return(gb.Make());
        }
Beispiel #3
0
		private Genotype fullyDecodeGenotypes (Genotype g, VCFHeader header)
		{
			IDictionary<string, object> map = fullyDecodeAttributes (g.ExtendedAttributes, header, true);
			var g2 = new GenotypeBuilder (g);
			g2.AddAttributes (map);
			return g2.Make ();
		}
Beispiel #4
0
 protected internal static Genotype Create(string sampleName, List<Allele> alleles, double[] gls)
 {
     var gb = new GenotypeBuilder(sampleName, alleles);
     gb.setPL(gls);
     return gb.Make();
 }
Beispiel #5
0
 public static Genotype Create(string sampleName, List<Allele> alleles, Dictionary<string, object> attributes)
 {
     var gb = new GenotypeBuilder(sampleName, alleles);
     gb.AddAttributes(attributes);
     return gb.Make();
 }
Beispiel #6
0
        /// <summary>
        /// Create a genotype map
        /// </summary>
        /// <param name="str"> the string </param>
        /// <param name="alleles"> the list of alleles </param>
        /// <returns> a mapping of sample name to genotype object </returns>
        public LazyGenotypesContext.LazyData CreateGenotypeMap(string str, IList <Allele> alleles, string chr, int pos)
        {
            if (genotypeParts == null)
            {
                genotypeParts = new String[header.ColumnCount - NUM_STANDARD_FIELDS];
            }
            try {
                FastStringUtils.Split(str, VCFConstants.FIELD_SEPARATOR_CHAR, genotypeParts);
            } catch (Exception e) {
                throw new VCFParsingError("Could not parse genotypes, was expecting " + (genotypeParts.Length - 1).ToString() + " but found " + str.Split(VCFConstants.FIELD_SEPARATOR_CHAR).Length.ToString(), e);
            }
            List <Genotype> genotypes = new List <Genotype> (genotypeParts.Length);

            // get the format keys
            //int nGTKeys = ParsingUtils.Split(genotypeParts[0], genotypeKeyArray, VCFConstants.GENOTYPE_FIELD_SEPARATOR_CHAR);
            string[] genotypeKeyArray       = genotypeParts [0].Split(VCFConstants.GENOTYPE_FIELD_SEPARATOR_CHAR);
            int      genotypeAlleleLocation = Array.IndexOf(genotypeKeyArray, VCFConstants.GENOTYPE_KEY);

            if (version != VCFHeaderVersion.VCF4_1 && genotypeAlleleLocation == -1)
            {
                generateException("Unable to find the GT field for the record; the GT field is required in VCF4.0");
            }
            // clear out our allele mapping
            alleleMap.Clear();
            GenotypeBuilder gb = new GenotypeBuilder();

            // cycle through the genotype strings
            for (int genotypeOffset = 1; genotypeOffset < genotypeParts.Length; genotypeOffset++)
            {
                Genotype curGenotype;
                string   sampleName  = header.GenotypeSampleNames [genotypeOffset - 1];
                var      currentGeno = genotypeParts [genotypeOffset];
                //shortcut for null alleles
                if (currentGeno == "./.")
                {
                    curGenotype = GenotypeBuilder.CreateMissing(sampleName, 2);
                }
                else if (currentGeno == ".")
                {
                    curGenotype = GenotypeBuilder.CreateMissing(sampleName, 1);
                }
                else
                {
                    gb.Reset(false);
                    gb.SampleName = sampleName;
                    string[] GTValueArray = FastStringUtils.Split(currentGeno, VCFConstants.GENOTYPE_FIELD_SEPARATOR_CHAR, int.MaxValue, StringSplitOptions.None);
                    // cycle through the sample names
                    // check to see if the value list is longer than the key list, which is a problem
                    if (genotypeKeyArray.Length < GTValueArray.Length)
                    {
                        generateException("There are too many keys for the sample " + sampleName + ", line is: keys = " + genotypeParts [0] + ", values = " + genotypeParts [genotypeOffset]);
                    }
                    if (genotypeAlleleLocation > 0)
                    {
                        generateException("Saw GT field at position " + genotypeAlleleLocation + ", but it must be at the first position for genotypes when present");
                    }

                    //TODO: THIS IS A DAMNED MESS
                    //Code loops over all fields in the key and decodes them, adding them as information to the genotype builder, which then makes it.
                    if (genotypeKeyArray.Length > 0)
                    {
                        gb.MaxAttributes(genotypeKeyArray.Length - 1);
                        for (int i = 0; i < genotypeKeyArray.Length; i++)
                        {
                            string gtKey = genotypeKeyArray [i];
                            if (i >= GTValueArray.Length)
                            {
                                break;
                            }
                            // todo -- all of these on the fly parsing of the missing value should be static constants
                            if (gtKey == VCFConstants.GENOTYPE_FILTER_KEY)
                            {
                                IList <string> filters = parseFilters(GetCachedString(GTValueArray [i]));
                                if (filters != null)
                                {
                                    gb.SetFilters(filters.ToList());
                                }
                            }
                            else if (GTValueArray [i] == VCFConstants.MISSING_VALUE_v4)
                            {
                                // don't add missing values to the map
                            }
                            else
                            {
                                if (gtKey == VCFConstants.GENOTYPE_QUALITY_KEY)
                                {
                                    if (GTValueArray [i] == VCFConstants.MISSING_GENOTYPE_QUALITY_v3)
                                    {
                                        gb.noGQ();
                                    }
                                    else
                                    {
                                        gb.GQ = ((int)Math.Round(Convert.ToDouble(GTValueArray [i])));
                                    }
                                }
                                else if (gtKey == VCFConstants.GENOTYPE_ALLELE_DEPTHS)
                                {
                                    gb.AD = (decodeInts(GTValueArray [i]));
                                }
                                else if (gtKey == VCFConstants.GENOTYPE_PL_KEY)
                                {
                                    gb.PL = (decodeInts(GTValueArray [i]));
                                }
                                else if (gtKey == VCFConstants.GENOTYPE_LIKELIHOODS_KEY)
                                {
                                    gb.PL = (GenotypeLikelihoods.fromGLField(GTValueArray [i]).AsPLs);
                                }
                                else if (gtKey.Equals(VCFConstants.DEPTH_KEY))
                                {
                                    gb.DP = (Convert.ToInt32(GTValueArray [i]));
                                }
                                else
                                {
                                    gb.AddAttribute(gtKey, GTValueArray [i]);
                                }
                            }
                        }
                    }

                    List <Allele> GTalleles;
                    if (genotypeAlleleLocation == -1)
                    {
                        GTalleles = new List <Allele> (0);
                    }
                    else
                    {
                        GTalleles = parseGenotypeAlleles(GTValueArray [genotypeAlleleLocation], alleles, alleleMap);
                    }
                    gb.Alleles = GTalleles;
                    gb.Phased  = genotypeAlleleLocation != -1 && GTValueArray [genotypeAlleleLocation].IndexOf(VCFConstants.PHASED_AS_CHAR) != -1;

                    // add it to the list
                    try {
                        curGenotype = gb.Make();
                    } catch (Exception e) {
                        throw new VCFParsingError(e.Message + ", at position " + chr + ":" + pos);
                    }
                }
                genotypes.Add(curGenotype);
            }
            return(new LazyGenotypesContext.LazyData(genotypes, header.SampleNamesInOrder, header.SampleNameToOffset));
        }
Beispiel #7
0
		/// <summary>
		/// Create a genotype map
		/// </summary>
		/// <param name="str"> the string </param>
		/// <param name="alleles"> the list of alleles </param>
		/// <returns> a mapping of sample name to genotype object </returns>
		public LazyGenotypesContext.LazyData CreateGenotypeMap (string str, IList<Allele> alleles, string chr, int pos)
		{            
			if (genotypeParts == null)
				genotypeParts = new String[header.ColumnCount - NUM_STANDARD_FIELDS];
			try {
				FastStringUtils.Split (str, VCFConstants.FIELD_SEPARATOR_CHAR, genotypeParts);
			} catch (Exception e) {
				throw new VCFParsingError ("Could not parse genotypes, was expecting " + (genotypeParts.Length - 1).ToString () + " but found " + str.Split (VCFConstants.FIELD_SEPARATOR_CHAR).Length.ToString (), e);
			}
			List<Genotype> genotypes = new List<Genotype> (genotypeParts.Length);
			// get the format keys
			//int nGTKeys = ParsingUtils.Split(genotypeParts[0], genotypeKeyArray, VCFConstants.GENOTYPE_FIELD_SEPARATOR_CHAR);
			string[] genotypeKeyArray = genotypeParts [0].Split (VCFConstants.GENOTYPE_FIELD_SEPARATOR_CHAR);
			int genotypeAlleleLocation = Array.IndexOf (genotypeKeyArray, VCFConstants.GENOTYPE_KEY);
			if (version != VCFHeaderVersion.VCF4_1 && genotypeAlleleLocation == -1) {
				generateException ("Unable to find the GT field for the record; the GT field is required in VCF4.0");
			}
			// clear out our allele mapping
			alleleMap.Clear ();
			GenotypeBuilder gb = new GenotypeBuilder ();
                    
			// cycle through the genotype strings
			for (int genotypeOffset = 1; genotypeOffset < genotypeParts.Length; genotypeOffset++) {
				Genotype curGenotype;
				string sampleName = header.GenotypeSampleNames [genotypeOffset - 1];
				var currentGeno = genotypeParts [genotypeOffset];
				//shortcut for null alleles
				if (currentGeno == "./.") {
					curGenotype = GenotypeBuilder.CreateMissing (sampleName, 2);
				} else if (currentGeno == ".") {
					curGenotype = GenotypeBuilder.CreateMissing (sampleName, 1);
				} else {
					gb.Reset (false);
					gb.SampleName = sampleName;
					string[] GTValueArray = FastStringUtils.Split (currentGeno, VCFConstants.GENOTYPE_FIELD_SEPARATOR_CHAR, int.MaxValue, StringSplitOptions.None);
					// cycle through the sample names
					// check to see if the value list is longer than the key list, which is a problem
					if (genotypeKeyArray.Length < GTValueArray.Length) {
						generateException ("There are too many keys for the sample " + sampleName + ", line is: keys = " + genotypeParts [0] + ", values = " + genotypeParts [genotypeOffset]);
					}
					if (genotypeAlleleLocation > 0) {
						generateException ("Saw GT field at position " + genotypeAlleleLocation + ", but it must be at the first position for genotypes when present");
					}

					//TODO: THIS IS A DAMNED MESS
					//Code loops over all fields in the key and decodes them, adding them as information to the genotype builder, which then makes it.
					if (genotypeKeyArray.Length > 0) {
						gb.MaxAttributes (genotypeKeyArray.Length - 1);
						for (int i = 0; i < genotypeKeyArray.Length; i++) {
							string gtKey = genotypeKeyArray [i];
							if (i >= GTValueArray.Length) {
								break;
							}
							// todo -- all of these on the fly parsing of the missing value should be static constants
							if (gtKey == VCFConstants.GENOTYPE_FILTER_KEY) {
								IList<string> filters = parseFilters (GetCachedString (GTValueArray [i]));
								if (filters != null) {
									gb.SetFilters (filters.ToList ());
								}
							} else if (GTValueArray [i] == VCFConstants.MISSING_VALUE_v4) {
								// don't add missing values to the map
							} else {
								if (gtKey == VCFConstants.GENOTYPE_QUALITY_KEY) {
									if (GTValueArray [i] == VCFConstants.MISSING_GENOTYPE_QUALITY_v3) {
										gb.noGQ ();
									} else {
										gb.GQ = ((int)Math.Round (Convert.ToDouble (GTValueArray [i])));
									}
								} else if (gtKey == VCFConstants.GENOTYPE_ALLELE_DEPTHS) {
									gb.AD = (decodeInts (GTValueArray [i]));
								} else if (gtKey == VCFConstants.GENOTYPE_PL_KEY) {
									gb.PL = (decodeInts (GTValueArray [i]));
								} else if (gtKey == VCFConstants.GENOTYPE_LIKELIHOODS_KEY) {
									gb.PL = (GenotypeLikelihoods.fromGLField (GTValueArray [i]).AsPLs);
								} else if (gtKey.Equals (VCFConstants.DEPTH_KEY)) {
									gb.DP = (Convert.ToInt32 (GTValueArray [i]));
								} else {
									gb.AddAttribute (gtKey, GTValueArray [i]);
								}
							}
						}
					}

					List<Allele> GTalleles;
					if (genotypeAlleleLocation == -1) {
						GTalleles = new List<Allele> (0);
					} else {
						GTalleles = parseGenotypeAlleles (GTValueArray [genotypeAlleleLocation], alleles, alleleMap);
					}
					gb.Alleles = GTalleles;
					gb.Phased = genotypeAlleleLocation != -1 && GTValueArray [genotypeAlleleLocation].IndexOf (VCFConstants.PHASED_AS_CHAR) != -1;

					// add it to the list
					try {
						curGenotype = gb.Make ();
					} catch (Exception e) {
						throw new VCFParsingError (e.Message + ", at position " + chr + ":" + pos);
					}
				}
				genotypes.Add (curGenotype);
			}
			return new LazyGenotypesContext.LazyData (genotypes, header.SampleNamesInOrder, header.SampleNameToOffset);
		}