Exemplo n.º 1
0
 public VCFHeader(ISet <VCFHeaderLine> metaData, IList <string> genotypeSampleNames) : this(metaData)
 {
     if (genotypeSampleNames.Count != (new HashSet <string>(genotypeSampleNames)).Count)
     {
         throw new VCFParsingError("BUG: VCF header has duplicate sample names");
     }
     GenotypeSampleNames.AddRange(genotypeSampleNames);
     SamplesWereAlreadySorted = ParsingUtils.IsSorted(genotypeSampleNames);
     buildVCFReaderMaps(genotypeSampleNames);
 }
Exemplo n.º 2
0
 /// <summary>
 /// Return a VCF-like string representation for the alleles of this genotype.
 ///
 /// If ignoreRefState is true, will not append the reference * marker on the alleles.
 /// </summary>
 /// <returns> a string representing the genotypes, or null if the type is unavailable. </returns>
 public virtual string getGenotypeString(bool ignoreRefState)
 {
     if (Ploidy == 0)
     {
         return("NA");
     }
     // Notes:
     // 1. Make sure to use the appropriate separator depending on whether the genotype is phased
     // 2. If ignoreRefState is true, then we want just the bases of the Alleles (ignoring the '*' indicating a ref Allele)
     // 3. So that everything is deterministic with regards to integration tests, we sort Alleles (when the genotype isn't phased, of course)
     return(String.Join(Phased ? PHASED_ALLELE_SEPARATOR : UNPHASED_ALLELE_SEPARATOR, ignoreRefState ? AlleleStrings :
                        (Phased ? Alleles.Select(x => x.ToString()) : ParsingUtils.SortList(Alleles).Select(x => x.ToString()))));
 }
Exemplo n.º 3
0
        // --------------------------------------------------------------------------------
        //
        // implementation functions
        //
        // --------------------------------------------------------------------------------
        private string getFilterString(VariantContext vc)
        {
            if (vc.Filtered)
            {
                foreach (String filter in vc.Filters)
                {
                    if (!mHeader.hasFilterLine(filter))
                    {
                        fieldIsMissingFromHeaderError(vc, filter, "FILTER");
                    }
                }

                return(String.Join(";", ParsingUtils.SortList(vc.Filters.ToList()).ToArray()));
            }
            else if (vc.FiltersWereApplied)
            {
                return(VCFConstants.PASSES_FILTERS_v4);
            }
            else
            {
                return(VCFConstants.UNFILTERED);
            }
        }
Exemplo n.º 4
0
        /// <summary>
        /// Determine which genotype fields are in use in the genotypes in VC </summary>
        /// <param name="vc"> </param>
        /// <returns> an ordered list of genotype fields in use in VC.  If vc has genotypes this will always include GT first </returns>
        private static IList <string> calcVCFGenotypeKeys(VariantContext vc)
        {
            //TODO: not sure who wrote this, these boolean flags should be removed though
            HashSet <string> keys = new HashSet <string>();

            bool sawGoodGT         = false;
            bool sawGoodQual       = false;
            bool sawGenotypeFilter = false;
            bool sawDP             = false;
            bool sawAD             = false;
            bool sawPL             = false;

            foreach (Genotype g in vc.Genotypes)
            {
                //todo, make this a string later
                foreach (string s in g.ExtendedAttributes.Keys.Select(x => x.ToString()))
                {
                    keys.Add(s);
                }
                if (g.Available)
                {
                    sawGoodGT = true;
                }
                if (g.HasGQ)
                {
                    sawGoodQual = true;
                }
                if (g.HasDP)
                {
                    sawDP = true;
                }
                if (g.HasAD)
                {
                    sawAD = true;
                }
                if (g.HasPL)
                {
                    sawPL = true;
                }
                if (g.Filtered)
                {
                    sawGenotypeFilter = true;
                }
            }

            if (sawGoodQual)
            {
            }
            if (sawDP)
            {
                keys.Add(VCFConstants.DEPTH_KEY);
            }
            if (sawAD)
            {
                keys.Add(VCFConstants.GENOTYPE_ALLELE_DEPTHS);
            }
            if (sawPL)
            {
                keys.Add(VCFConstants.GENOTYPE_PL_KEY);
            }
            if (sawGenotypeFilter)
            {
                keys.Add(VCFConstants.GENOTYPE_FILTER_KEY);
            }

            IList <string> sortedList = ParsingUtils.SortList(new List <string>(keys));

            // make sure the GT is first
            if (sawGoodGT)
            {
                IList <string> newList = new List <string>(sortedList.Count + 1);
                newList.Add(VCFConstants.GENOTYPE_KEY);
                foreach (string s in sortedList)
                {
                    newList.Add(s);
                }
                sortedList = newList;
            }
            if (sortedList.Count == 0)
            {
                // this needs to be done in case all samples are no-calls
                return(new List <string>()
                {
                    VCFConstants.GENOTYPE_KEY
                });
            }
            else
            {
                return(sortedList);
            }
        }