Esempio n. 1
0
 /// <summary>
 /// Adds a single genotype to this context.
 ///
 /// There are many constraints on this input, and important
 /// impacts on the performance of other functions provided by this
 /// context.
 ///
 /// First, the sample name of genotype must be unique within this
 /// context.  However, this is not enforced in the code itself, through
 /// you will invalid the contract on this context if you add duplicate
 /// samples and are running with CoFoJa enabled.
 ///
 /// Second, adding genotype also updates the sample name -> index map,
 /// so add() followed by containsSample and related function is an efficient
 /// series of operations.
 ///
 /// Third, adding the genotype invalidates the sorted list of sample names, to
 /// add() followed by any of the SampleNamesInOrder operations is inefficient, as
 /// each SampleNamesInOrder must rebuild the sorted list of sample names at
 /// an O(n log n) cost.
 /// </summary>
 /// <param name="genotype">
 /// @return </param>
 public void Add(Genotype genotype)
 {
     checkImmutability();
     invalidateSampleOrdering();
     if (sampleNameToOffset != null)
     {
         // update the name map by adding entries
         sampleNameToOffset[genotype.SampleName] = Count;
     }
     Genotypes.Add(genotype);
 }
Esempio n. 2
0
        public SequenceVariantDescription(string description)
        {
            Description = description;
            if (description == null)
            {
                return;
            }

            // Parse description into
            string[] vcfFields = description.Split(new[] { @"\t" }, StringSplitOptions.None);
            if (vcfFields.Length < 10)
            {
                return;
            }
            ReferenceAlleleString = vcfFields[3];
            AlternateAlleleString = vcfFields[4];
            Info        = new SnpEffAnnotation(vcfFields[7]);
            AlleleIndex = Info.Allele == null ? -1 : AlternateAlleleString.Split(',').ToList().IndexOf(Info.Allele) + 1; // reference is zero
            Format      = vcfFields[8];
            string[] genotypes = Enumerable.Range(9, vcfFields.Length - 9).Select(i => vcfFields[i]).ToArray();

            // loop through genotypes for this variant (e.g. tumor and normal)
            for (int individual = 0; individual < genotypes.Length; individual++)
            {
                var genotypeFields = GenotypeDictionary(Format.Trim(), genotypes[individual].Trim());

                // parse genotype
                string[] gt = null;
                if (genotypeFields.TryGetValue("GT", out string gtString))
                {
                    gt = gtString.Split('/');
                }
                if (gt == null)
                {
                    continue;
                }

                // parse allele depth (might be null, technically, but shouldn't be in most use cases)
                string[] ad = null;
                if (genotypeFields.TryGetValue("AD", out string adString))
                {
                    ad = adString.Split(',');
                }

                Genotypes.Add(individual.ToString(), gt);
                AlleleDepths.Add(individual.ToString(), ad);
                Homozygous.Add(individual.ToString(), gt.Distinct().Count() == 1);
                Heterozygous.Add(individual.ToString(), gt.Distinct().Count() > 1);
            }
        }