Example #1
0
        /// <summary>
        /// This method subsets down to a set of samples.
        ///
        /// At the same time returns the alleles to just those in use by the samples,
        /// if rederiveAllelesFromGenotypes is true, otherwise the full set of alleles
        /// in this VC is returned as the set of alleles in the subContext, even if
        /// some of those alleles aren't in the samples
        ///
        /// WARNING: BE CAREFUL WITH rederiveAllelesFromGenotypes UNLESS YOU KNOW WHAT YOU ARE DOING
        /// </summary>
        /// <param name="sampleNames">    the sample names </param>
        /// <param name="rederiveAllelesFromGenotypes"> if true, returns the alleles to just those in use by the samples, true should be default </param>
        /// <returns> new VariantContext subsetting to just the given samples </returns>
        public VariantContext SubContextFromSamples(ISet <string> sampleNames, bool rederiveAllelesFromGenotypes)
        {
            if (sampleNames.SetEquals(SampleNames) && !rederiveAllelesFromGenotypes)
            {
                return(this); // fast path when you don't have any work to do
            }
            else
            {
                VariantContextBuilder builder      = new VariantContextBuilder(this);
                GenotypesContext      newGenotypes = genotypes.subsetToSamples(sampleNames);
                if (rederiveAllelesFromGenotypes)
                {
                    builder.SetAlleles(allelesOfGenotypes(newGenotypes));
                }
                else
                {
                    builder.SetAlleles(alleles);
                }

                builder.SetGenotypes(newGenotypes);
                return(builder.make());
            }
        }
        /// <summary>
        /// This method subsets down to a set of samples.
        /// 
        /// At the same time returns the alleles to just those in use by the samples,
        /// if rederiveAllelesFromGenotypes is true, otherwise the full set of alleles
        /// in this VC is returned as the set of alleles in the subContext, even if
        /// some of those alleles aren't in the samples
        /// 
        /// WARNING: BE CAREFUL WITH rederiveAllelesFromGenotypes UNLESS YOU KNOW WHAT YOU ARE DOING
        /// </summary>
        /// <param name="sampleNames">    the sample names </param>
        /// <param name="rederiveAllelesFromGenotypes"> if true, returns the alleles to just those in use by the samples, true should be default </param>
        /// <returns> new VariantContext subsetting to just the given samples </returns>
        public VariantContext SubContextFromSamples(ISet<string> sampleNames, bool rederiveAllelesFromGenotypes)
        {
            if (sampleNames.SetEquals(SampleNames) && !rederiveAllelesFromGenotypes)
            {
                return this; // fast path when you don't have any work to do
            }
            else
            {
                VariantContextBuilder builder = new VariantContextBuilder(this);
                GenotypesContext newGenotypes = genotypes.subsetToSamples(sampleNames);
                if (rederiveAllelesFromGenotypes)
                {
                    builder.SetAlleles(allelesOfGenotypes(newGenotypes));
                }
                else
                {
                    builder.SetAlleles(alleles);
                }

                builder.SetGenotypes(newGenotypes);
                return builder.make();
            }
        }
Example #3
0
		private void fullyDecodeGenotypes (VariantContextBuilder builder, VCFHeader header)
		{
			GenotypesContext gc = new GenotypesContext ();
			foreach (Genotype g in Genotypes) {
				gc.Add (fullyDecodeGenotypes (g, header));
			}
			builder.SetGenotypes (gc, false);
		}
Example #4
0
        /// <summary>
        /// Parses a line from a VCF File
        /// </summary>
        /// <param name="parts">An array of length >8 where the 9th element contains unsplit genotype data (if present)</param>
        /// <param name="includeGenotypes"> Whether or not to also parse the genotype data </param>
        /// <returns></returns>
        private VariantContext parseVCFLine(string[] parts, bool includeGenotypes)
        {
            VariantContextBuilder builder = new VariantContextBuilder();

            builder.Source = Name;
            // increment the line count
            lineNo++;
            // parse out the required fields
            string chr = GetCachedString(parts [0]);

            builder.Contig = chr;
            int pos = -1;

            try {
                pos = Convert.ToInt32(parts [1]);
            } catch (FormatException e) {
                generateException(parts [1] + " is not a valid start position in the VCF format");
            }
            builder.Start = pos;
            if (parts [2].Length == 0)
            {
                generateException("The VCF specification requires a valid ID field");
            }
            else if (parts [2].Equals(VCFConstants.EMPTY_ID_FIELD))
            {
                builder.ID = VCFConstants.EMPTY_ID_FIELD;
            }
            else
            {
                builder.ID = parts [2];
            }
            string refe = GetCachedString(parts [3].ToUpper());
            string alts = GetCachedString(parts [4].ToUpper());

            builder.Log10PError = parseQual(parts [5]);

            string filterStr = GetCachedString(parts [6]);
            var    filters   = filterHash [filterStr];

            if (filters != null)              //means filter data present
            {
                builder.SetFilters(filters.Hash);
            }

            IDictionary <string, object> attrs = parseInfo(parts [7]);

            builder.Attributes = attrs;

            if (attrs.ContainsKey(VCFConstants.END_KEY))
            {
                // update stop with the end key if provided
                try {
                    builder.Stop = Convert.ToInt32(attrs [VCFConstants.END_KEY].ToString());
                } catch (Exception e) {
                    generateException("the END value in the INFO field is not valid");
                }
            }
            else
            {
                builder.Stop = (pos + refe.Length - 1);
            }

            // get our alleles, filters, and setup an attribute map
            IList <Allele> alleles = parseAlleles(refe, alts, lineNo);

            builder.SetAlleles(alleles);

            // do we have genotyping data
            if (parts.Length > NUM_STANDARD_FIELDS && includeGenotypes)
            {
                int nGenotypes            = header.NGenotypeSamples;
                LazyGenotypesContext lazy = new LazyGenotypesContext(this, alleles, chr, pos, parts [8], nGenotypes);
                // did we resort the sample names?  If so, we need to load the genotype data
                if (!header.SamplesWereAlreadySorted)
                {
                    lazy.Decode();
                }
                builder.SetGenotypes(lazy, false);
            }

            VariantContext vc = null;

            try {
                vc = builder.make();
            } catch (Exception e) {
                generateException(e.Message);
            }
            return(vc);
        }
Example #5
0
		/// <summary>
		/// Parses a line from a VCF File
		/// </summary>
		/// <param name="parts">An array of length >8 where the 9th element contains unsplit genotype data (if present)</param>
		/// <param name="includeGenotypes"> Whether or not to also parse the genotype data </param>
		/// <returns></returns>
		private VariantContext parseVCFLine (string[] parts, bool includeGenotypes)
		{
			VariantContextBuilder builder = new VariantContextBuilder ();
			builder.Source = Name;
			// increment the line count
			lineNo++;
			// parse out the required fields
			string chr = GetCachedString (parts [0]);
			builder.Contig = chr;
			int pos = -1;
			try {
				pos = Convert.ToInt32 (parts [1]);
			} catch (FormatException e) {
				generateException (parts [1] + " is not a valid start position in the VCF format");
			}
			builder.Start = pos;
			if (parts [2].Length == 0) {
				generateException ("The VCF specification requires a valid ID field");
			} else if (parts [2].Equals (VCFConstants.EMPTY_ID_FIELD)) {
				builder.ID = VCFConstants.EMPTY_ID_FIELD;
			} else {
				builder.ID = parts [2];
			}
			string refe = GetCachedString (parts [3].ToUpper ());
			string alts = GetCachedString (parts [4].ToUpper ());
			builder.Log10PError = parseQual (parts [5]);

			string filterStr = GetCachedString (parts [6]);
			var filters = filterHash [filterStr];
			if (filters != null) {//means filter data present
				builder.SetFilters (filters.Hash);
			}

			IDictionary<string, object> attrs = parseInfo (parts [7]);
			builder.Attributes = attrs;

			if (attrs.ContainsKey (VCFConstants.END_KEY)) {
				// update stop with the end key if provided
				try {
					builder.Stop = Convert.ToInt32 (attrs [VCFConstants.END_KEY].ToString ());
				} catch (Exception e) {
					generateException ("the END value in the INFO field is not valid");
				}
			} else {
				builder.Stop = (pos + refe.Length - 1);
			}

			// get our alleles, filters, and setup an attribute map
			IList<Allele> alleles = parseAlleles (refe, alts, lineNo);
			builder.SetAlleles (alleles);

			// do we have genotyping data
			if (parts.Length > NUM_STANDARD_FIELDS && includeGenotypes) {
				int nGenotypes = header.NGenotypeSamples;
				LazyGenotypesContext lazy = new LazyGenotypesContext (this, alleles, chr, pos, parts [8], nGenotypes);
				// did we resort the sample names?  If so, we need to load the genotype data
				if (!header.SamplesWereAlreadySorted) {
					lazy.Decode ();
				}
				builder.SetGenotypes (lazy, false);
			}

			VariantContext vc = null;
			try {
				vc = builder.make ();
			} catch (Exception e) {
				generateException (e.Message);
			}
			return vc;
		}