public VCFHeader(ISet <VCFHeaderLine> metaData, IList <string> genotypeSampleNames) : this(metaData) { if (genotypeSampleNames.Count != (new HashSet <string>(genotypeSampleNames)).Count) { throw new VCFParsingError("BUG: VCF header has duplicate sample names"); } GenotypeSampleNames.AddRange(genotypeSampleNames); SamplesWereAlreadySorted = ParsingUtils.IsSorted(genotypeSampleNames); buildVCFReaderMaps(genotypeSampleNames); }
/// <summary> /// Return a VCF-like string representation for the alleles of this genotype. /// /// If ignoreRefState is true, will not append the reference * marker on the alleles. /// </summary> /// <returns> a string representing the genotypes, or null if the type is unavailable. </returns> public virtual string getGenotypeString(bool ignoreRefState) { if (Ploidy == 0) { return("NA"); } // Notes: // 1. Make sure to use the appropriate separator depending on whether the genotype is phased // 2. If ignoreRefState is true, then we want just the bases of the Alleles (ignoring the '*' indicating a ref Allele) // 3. So that everything is deterministic with regards to integration tests, we sort Alleles (when the genotype isn't phased, of course) return(String.Join(Phased ? PHASED_ALLELE_SEPARATOR : UNPHASED_ALLELE_SEPARATOR, ignoreRefState ? AlleleStrings : (Phased ? Alleles.Select(x => x.ToString()) : ParsingUtils.SortList(Alleles).Select(x => x.ToString())))); }
// -------------------------------------------------------------------------------- // // implementation functions // // -------------------------------------------------------------------------------- private string getFilterString(VariantContext vc) { if (vc.Filtered) { foreach (String filter in vc.Filters) { if (!mHeader.hasFilterLine(filter)) { fieldIsMissingFromHeaderError(vc, filter, "FILTER"); } } return(String.Join(";", ParsingUtils.SortList(vc.Filters.ToList()).ToArray())); } else if (vc.FiltersWereApplied) { return(VCFConstants.PASSES_FILTERS_v4); } else { return(VCFConstants.UNFILTERED); } }
/// <summary> /// Determine which genotype fields are in use in the genotypes in VC </summary> /// <param name="vc"> </param> /// <returns> an ordered list of genotype fields in use in VC. If vc has genotypes this will always include GT first </returns> private static IList <string> calcVCFGenotypeKeys(VariantContext vc) { //TODO: not sure who wrote this, these boolean flags should be removed though HashSet <string> keys = new HashSet <string>(); bool sawGoodGT = false; bool sawGoodQual = false; bool sawGenotypeFilter = false; bool sawDP = false; bool sawAD = false; bool sawPL = false; foreach (Genotype g in vc.Genotypes) { //todo, make this a string later foreach (string s in g.ExtendedAttributes.Keys.Select(x => x.ToString())) { keys.Add(s); } if (g.Available) { sawGoodGT = true; } if (g.HasGQ) { sawGoodQual = true; } if (g.HasDP) { sawDP = true; } if (g.HasAD) { sawAD = true; } if (g.HasPL) { sawPL = true; } if (g.Filtered) { sawGenotypeFilter = true; } } if (sawGoodQual) { } if (sawDP) { keys.Add(VCFConstants.DEPTH_KEY); } if (sawAD) { keys.Add(VCFConstants.GENOTYPE_ALLELE_DEPTHS); } if (sawPL) { keys.Add(VCFConstants.GENOTYPE_PL_KEY); } if (sawGenotypeFilter) { keys.Add(VCFConstants.GENOTYPE_FILTER_KEY); } IList <string> sortedList = ParsingUtils.SortList(new List <string>(keys)); // make sure the GT is first if (sawGoodGT) { IList <string> newList = new List <string>(sortedList.Count + 1); newList.Add(VCFConstants.GENOTYPE_KEY); foreach (string s in sortedList) { newList.Add(s); } sortedList = newList; } if (sortedList.Count == 0) { // this needs to be done in case all samples are no-calls return(new List <string>() { VCFConstants.GENOTYPE_KEY }); } else { return(sortedList); } }