/// <summary> /// Note that remove requires us to invalidate our sample -> index /// cache. The loop: /// /// GenotypesContext gc = ... /// for ( sample in samples ) /// if ( gc.containsSample(sample) ) /// gc.remove(sample) /// /// is extremely inefficient, as each call to remove invalidates the cache /// and containsSample requires us to rebuild it, an O(n) operation. /// /// If you must remove many samples from the GC, use either removeAll or retainAll /// to avoid this O(n * m) operation. /// </summary> /// <param name="i"> /// @return </param> public void RemoveAt(int i) { checkImmutability(); invalidateSampleNameMap(); invalidateSampleOrdering(); Genotypes.RemoveAt(i); }
public bool Remove(Genotype item) { checkImmutability(); invalidateSampleNameMap(); invalidateSampleOrdering(); return(Genotypes.Remove(item)); }
public void Clear() { CheckImmutability(); invalidateSampleNameMap(); invalidateSampleOrdering(); Genotypes.Clear(); }
/// <summary> /// Adds a single genotype to this context. /// /// There are many constraints on this input, and important /// impacts on the performance of other functions provided by this /// context. /// /// First, the sample name of genotype must be unique within this /// context. However, this is not enforced in the code itself, through /// you will invalid the contract on this context if you add duplicate /// samples and are running with CoFoJa enabled. /// /// Second, adding genotype also updates the sample name -> index map, /// so add() followed by containsSample and related function is an efficient /// series of operations. /// /// Third, adding the genotype invalidates the sorted list of sample names, to /// add() followed by any of the SampleNamesInOrder operations is inefficient, as /// each SampleNamesInOrder must rebuild the sorted list of sample names at /// an O(n log n) cost. /// </summary> /// <param name="genotype"> /// @return </param> public void Add(Genotype genotype) { checkImmutability(); invalidateSampleOrdering(); if (sampleNameToOffset != null) { // update the name map by adding entries sampleNameToOffset[genotype.SampleName] = Count; } Genotypes.Add(genotype); }
public SequenceVariantDescription(string description) { Description = description; if (description == null) { return; } // Parse description into string[] vcfFields = description.Split(new[] { @"\t" }, StringSplitOptions.None); if (vcfFields.Length < 10) { return; } ReferenceAlleleString = vcfFields[3]; AlternateAlleleString = vcfFields[4]; Info = new SnpEffAnnotation(vcfFields[7]); AlleleIndex = Info.Allele == null ? -1 : AlternateAlleleString.Split(',').ToList().IndexOf(Info.Allele) + 1; // reference is zero Format = vcfFields[8]; string[] genotypes = Enumerable.Range(9, vcfFields.Length - 9).Select(i => vcfFields[i]).ToArray(); // loop through genotypes for this variant (e.g. tumor and normal) for (int individual = 0; individual < genotypes.Length; individual++) { var genotypeFields = GenotypeDictionary(Format.Trim(), genotypes[individual].Trim()); // parse genotype string[] gt = null; if (genotypeFields.TryGetValue("GT", out string gtString)) { gt = gtString.Split('/'); } if (gt == null) { continue; } // parse allele depth (might be null, technically, but shouldn't be in most use cases) string[] ad = null; if (genotypeFields.TryGetValue("AD", out string adString)) { ad = adString.Split(','); } Genotypes.Add(individual.ToString(), gt); AlleleDepths.Add(individual.ToString(), ad); Homozygous.Add(individual.ToString(), gt.Distinct().Count() == 1); Heterozygous.Add(individual.ToString(), gt.Distinct().Count() > 1); } }
public void RemoveAll(IEnumerable <Genotype> objects) { checkImmutability(); invalidateSampleNameMap(); invalidateSampleOrdering(); bool toRet = true; foreach (var o in objects) { toRet = toRet & Genotypes.Remove(o); } if (!toRet) { throw new ArgumentException("Tried to remove genotype from context that was not in the collection"); } }
/// <summary> /// Adds all of the genotypes to this context /// /// See <seealso cref="#add(Genotype)"/> for important information about this functions /// constraints and performance costs /// </summary> /// <param name="genotypes"> public void AddRange(IEnumerable <Genotype> genotypes) { checkImmutability(); invalidateSampleOrdering(); if (sampleNameToOffset != null) { // update the name map by adding entries int pos = Count; foreach (Genotype g in genotypes) { sampleNameToOffset[g.SampleName] = pos; pos += 1; } } Genotypes.AddRange(genotypes); }
public IEnumerator <Genotype> GetEnumerator() { return(Genotypes.GetEnumerator()); }
public void CopyTo(Genotype[] array, int arrayIndex) { Genotypes.CopyTo(array, arrayIndex); }
public bool Contains(Genotype item) { return(Genotypes.Contains(item)); }
public int IndexOf(Genotype item) { return(Genotypes.IndexOf(item)); }
public Genotype[] ToArray() { return(Genotypes.ToArray()); }