Esempio n. 1
0
        private static CalledAllele ProcessDeletion(CalledAllele deletionVar, CalledAllele variant, AlleleReader reader,
                                                    RecalibratedVariantsCollection snvLoci, RecalibratedVariantsCollection indelLoci)
        {
            CalledAllele lastVar;

            for (int i = 1; i < deletionVar.ReferenceAllele.Length; i++)
            {
                if (ShouldSkipVariant(variant))
                {
                    continue;
                }

                if (variant.HasAnAltAllele)
                {
                    if (GetVariantType(variant) == VariantType.Snv)
                    {
                        snvLoci.AddLocus(variant);
                    }
                    else
                    {
                        indelLoci.AddLocus(variant);
                    }
                }

                lastVar = variant;
                variant = new CalledAllele();

                GetNextUncrushedAllele(reader, out variant);

                // If there is multiallelic variant inside deletion, ignore locus
                if (variant.IsCoLocatedAllele(lastVar) && lastVar.HasAnAltAllele)
                {
                    if (GetVariantType(lastVar) == VariantType.Snv)
                    {
                        snvLoci.RemoveLastEntry();
                    }
                    else
                    {
                        indelLoci.RemoveLastEntry();
                    }
                }
                while (variant.ReferencePosition == lastVar.ReferencePosition &&
                       variant.Chromosome == lastVar.Chromosome)
                {
                    lastVar = variant;
                    variant = new CalledAllele();
                    GetNextUncrushedAllele(reader, out variant);
                }

                if (variant.ReferencePosition > deletionVar.ReferencePosition + deletionVar.ReferenceAllele.Length - 1 &&
                    variant.Chromosome == deletionVar.Chromosome)
                {
                    break;
                }
            }

            return(variant);
        }
Esempio n. 2
0
        public static List <RecalibratedVariantsCollection> GetVariantFrequencies(string vcfIn)
        {
            CalledAllele variant = new CalledAllele();
            CalledAllele lastVar = new CalledAllele();

            var snvLoci   = new RecalibratedVariantsCollection();
            var indelLoci = new RecalibratedVariantsCollection();

            var alleleReader = new AlleleReader(vcfIn);

            // Check headers
            CheckHeader(alleleReader);

            using (alleleReader)
            {
                while (GetNextUncrushedAllele(alleleReader, out variant))
                {
                    try
                    {
                        // Check if multiallelic
                        if (variant.IsCoLocatedAllele(lastVar))
                        {
                            variant = ProcessMultiAllelicVariant(lastVar, variant, alleleReader, snvLoci, indelLoci);
                        }

                        // Check if within deletion
                        if (lastVar.ReferenceAllele != null && variant != null &&
                            lastVar.ReferenceAllele.Length > 1 &&
                            lastVar.Genotype != Genotype.HomozygousRef &&
                            variant.ReferencePosition == lastVar.ReferencePosition + 1)
                        {
                            variant = ProcessDeletion(lastVar, variant, alleleReader, snvLoci, indelLoci);
                        }


                        // this happens if last variants in file are multi-allelic or a deletion
                        if (variant == null)
                        {
                            break;
                        }

                        if (ShouldSkipVariant(variant) || !variant.Chromosome.Any(char.IsDigit))
                        {
                            continue;
                        }

                        var variantType = GetVariantType(variant);
                        if (variantType == VariantType.NoVariant)
                        {
                            snvLoci.AddLocus(variant);
                            indelLoci.AddLocus(variant);
                        }
                        else if (variantType == VariantType.Snv)
                        {
                            snvLoci.AddLocus(variant);
                        }
                        else if (variantType == VariantType.Indel)
                        {
                            indelLoci.AddLocus(variant);
                        }

                        lastVar = variant;
                        variant = new CalledAllele();
                    }

                    catch (Exception ex)
                    {
                        Logger.WriteToLog(string.Format("Fatal error processing vcf; Check {0}, position {1}.  Exception: {2}",
                                                        variant.Chromosome, variant.ReferencePosition, ex));
                        throw;
                    }
                }
            }
            return(new List <RecalibratedVariantsCollection> {
                snvLoci, indelLoci
            });
        }
Esempio n. 3
0
        private static CalledAllele ProcessMultiAllelicVariant(CalledAllele lastVar, CalledAllele variant,
                                                               AlleleReader reader, RecalibratedVariantsCollection snvLoci, RecalibratedVariantsCollection indelLoci)
        {
            // SNPs and insertions are processed the same way--check and see if the two major variants have total VF > 0.8
            // 1/2 variants should not be used in the modeling because this model is for alignment bias of the reference

            List <CalledAllele> variants = new List <CalledAllele>()
            {
                lastVar, variant
            };

            // Use VF to find the two major variants
            List <double> vf = new List <double>()
            {
                GetAlternateAlleleFrequency(lastVar),
                GetAlternateAlleleFrequency(variant)
            };

            int[] topIndices = new int[] { 0, 1 };
            Array.Sort(vf.ToArray(), topIndices);
            Array.Reverse(topIndices);

            // Keep track of ref vf
            // NB: refVf is only approximate and could be negative if ref alleles are different lengths
            double refVf = 1 - vf[0] - vf[1];

            int currIndex = 2;

            while (GetNextUncrushedAllele(reader, out variant))
            {
                if (!variant.IsCoLocatedAllele(lastVar))
                {
                    break;
                }

                // Handle variant and update top 2 in VF
                variants.Add(variant);
                double newVf = GetAlternateAlleleFrequency(variant);
                vf.Add(newVf);
                if (newVf > vf[topIndices[0]])
                {
                    topIndices[1] = topIndices[0];
                    topIndices[0] = currIndex;
                }
                else if (newVf > vf[topIndices[1]])
                {
                    topIndices[1] = currIndex;
                }

                refVf = refVf - vf[currIndex];
                currIndex++;
                lastVar = variant;
                variant = new CalledAllele();
            }

            // Remove the last entry
            if (GetVariantType(variants[0]) == VariantType.Snv)
            {
                snvLoci.RemoveLastEntry();
            }
            else if (GetVariantType(variants[0]) == VariantType.Indel)
            {
                indelLoci.RemoveLastEntry();
            }

            // Determine type of entry
            RecalibratedVariantsCollection currLoci;

            if (GetVariantType(variants[topIndices[0]]) == VariantType.Snv &&
                GetVariantType(variants[topIndices[1]]) == VariantType.Snv)
            {
                currLoci = snvLoci;
            }
            else if (GetVariantType(variants[topIndices[0]]) == VariantType.Indel &&
                     GetVariantType(variants[topIndices[1]]) == VariantType.Indel)
            {
                currLoci = indelLoci;
            }
            else // mixed type
            {
                return(variant);
            }

            if ((currLoci == snvLoci &&                                                                                                                  // multiallelic check for SNVs
                 (GetAlternateAlleleFrequency(variants[topIndices[0]]) + GetAlternateAlleleFrequency(variants[topIndices[1]]) > MultiAllelicThreshold || // top 2 VF > 0.8
                  GetAlternateAlleleFrequency(variants[topIndices[0]]) + refVf > MultiAllelicThreshold) ||

                 currLoci == indelLoci) &&

                !ShouldSkipVariant(variants[topIndices[0]]) &&                                                                  // should not skip variant

                !(vf[topIndices[0]] > HetThreshold && vf[topIndices[0]] < HomAltThreshold && vf[topIndices[1]] > HetThreshold)) // not 1/2

            {
                currLoci.AddLocus(variants[topIndices[0]]);
            }

            return(variant);
        }