/// <summary> /// Adds a single genotype to this context. /// /// There are many constraints on this input, and important /// impacts on the performance of other functions provided by this /// context. /// /// First, the sample name of genotype must be unique within this /// context. However, this is not enforced in the code itself, through /// you will invalid the contract on this context if you add duplicate /// samples and are running with CoFoJa enabled. /// /// Second, adding genotype also updates the sample name -> index map, /// so add() followed by containsSample and related function is an efficient /// series of operations. /// /// Third, adding the genotype invalidates the sorted list of sample names, to /// add() followed by any of the SampleNamesInOrder operations is inefficient, as /// each SampleNamesInOrder must rebuild the sorted list of sample names at /// an O(n log n) cost. /// </summary> /// <param name="genotype"> /// @return </param> public void Add(Genotype genotype) { checkImmutability(); invalidateSampleOrdering(); if (sampleNameToOffset != null) { // update the name map by adding entries sampleNameToOffset[genotype.SampleName] = Count; } Genotypes.Add(genotype); }
public SequenceVariantDescription(string description) { Description = description; if (description == null) { return; } // Parse description into string[] vcfFields = description.Split(new[] { @"\t" }, StringSplitOptions.None); if (vcfFields.Length < 10) { return; } ReferenceAlleleString = vcfFields[3]; AlternateAlleleString = vcfFields[4]; Info = new SnpEffAnnotation(vcfFields[7]); AlleleIndex = Info.Allele == null ? -1 : AlternateAlleleString.Split(',').ToList().IndexOf(Info.Allele) + 1; // reference is zero Format = vcfFields[8]; string[] genotypes = Enumerable.Range(9, vcfFields.Length - 9).Select(i => vcfFields[i]).ToArray(); // loop through genotypes for this variant (e.g. tumor and normal) for (int individual = 0; individual < genotypes.Length; individual++) { var genotypeFields = GenotypeDictionary(Format.Trim(), genotypes[individual].Trim()); // parse genotype string[] gt = null; if (genotypeFields.TryGetValue("GT", out string gtString)) { gt = gtString.Split('/'); } if (gt == null) { continue; } // parse allele depth (might be null, technically, but shouldn't be in most use cases) string[] ad = null; if (genotypeFields.TryGetValue("AD", out string adString)) { ad = adString.Split(','); } Genotypes.Add(individual.ToString(), gt); AlleleDepths.Add(individual.ToString(), ad); Homozygous.Add(individual.ToString(), gt.Distinct().Count() == 1); Heterozygous.Add(individual.ToString(), gt.Distinct().Count() > 1); } }