private static CalledAllele ProcessDeletion(CalledAllele deletionVar, CalledAllele variant, AlleleReader reader, RecalibratedVariantsCollection snvLoci, RecalibratedVariantsCollection indelLoci) { CalledAllele lastVar; for (int i = 1; i < deletionVar.ReferenceAllele.Length; i++) { if (ShouldSkipVariant(variant)) { continue; } if (variant.HasAnAltAllele) { if (GetVariantType(variant) == VariantType.Snv) { snvLoci.AddLocus(variant); } else { indelLoci.AddLocus(variant); } } lastVar = variant; variant = new CalledAllele(); GetNextUncrushedAllele(reader, out variant); // If there is multiallelic variant inside deletion, ignore locus if (variant.IsCoLocatedAllele(lastVar) && lastVar.HasAnAltAllele) { if (GetVariantType(lastVar) == VariantType.Snv) { snvLoci.RemoveLastEntry(); } else { indelLoci.RemoveLastEntry(); } } while (variant.ReferencePosition == lastVar.ReferencePosition && variant.Chromosome == lastVar.Chromosome) { lastVar = variant; variant = new CalledAllele(); GetNextUncrushedAllele(reader, out variant); } if (variant.ReferencePosition > deletionVar.ReferencePosition + deletionVar.ReferenceAllele.Length - 1 && variant.Chromosome == deletionVar.Chromosome) { break; } } return(variant); }
public static List <RecalibratedVariantsCollection> GetVariantFrequencies(string vcfIn) { CalledAllele variant = new CalledAllele(); CalledAllele lastVar = new CalledAllele(); var snvLoci = new RecalibratedVariantsCollection(); var indelLoci = new RecalibratedVariantsCollection(); var alleleReader = new AlleleReader(vcfIn); // Check headers CheckHeader(alleleReader); using (alleleReader) { while (GetNextUncrushedAllele(alleleReader, out variant)) { try { // Check if multiallelic if (variant.IsCoLocatedAllele(lastVar)) { variant = ProcessMultiAllelicVariant(lastVar, variant, alleleReader, snvLoci, indelLoci); } // Check if within deletion if (lastVar.ReferenceAllele != null && variant != null && lastVar.ReferenceAllele.Length > 1 && lastVar.Genotype != Genotype.HomozygousRef && variant.ReferencePosition == lastVar.ReferencePosition + 1) { variant = ProcessDeletion(lastVar, variant, alleleReader, snvLoci, indelLoci); } // this happens if last variants in file are multi-allelic or a deletion if (variant == null) { break; } if (ShouldSkipVariant(variant) || !variant.Chromosome.Any(char.IsDigit)) { continue; } var variantType = GetVariantType(variant); if (variantType == VariantType.NoVariant) { snvLoci.AddLocus(variant); indelLoci.AddLocus(variant); } else if (variantType == VariantType.Snv) { snvLoci.AddLocus(variant); } else if (variantType == VariantType.Indel) { indelLoci.AddLocus(variant); } lastVar = variant; variant = new CalledAllele(); } catch (Exception ex) { Logger.WriteToLog(string.Format("Fatal error processing vcf; Check {0}, position {1}. Exception: {2}", variant.Chromosome, variant.ReferencePosition, ex)); throw; } } } return(new List <RecalibratedVariantsCollection> { snvLoci, indelLoci }); }
private static CalledAllele ProcessMultiAllelicVariant(CalledAllele lastVar, CalledAllele variant, AlleleReader reader, RecalibratedVariantsCollection snvLoci, RecalibratedVariantsCollection indelLoci) { // SNPs and insertions are processed the same way--check and see if the two major variants have total VF > 0.8 // 1/2 variants should not be used in the modeling because this model is for alignment bias of the reference List <CalledAllele> variants = new List <CalledAllele>() { lastVar, variant }; // Use VF to find the two major variants List <double> vf = new List <double>() { GetAlternateAlleleFrequency(lastVar), GetAlternateAlleleFrequency(variant) }; int[] topIndices = new int[] { 0, 1 }; Array.Sort(vf.ToArray(), topIndices); Array.Reverse(topIndices); // Keep track of ref vf // NB: refVf is only approximate and could be negative if ref alleles are different lengths double refVf = 1 - vf[0] - vf[1]; int currIndex = 2; while (GetNextUncrushedAllele(reader, out variant)) { if (!variant.IsCoLocatedAllele(lastVar)) { break; } // Handle variant and update top 2 in VF variants.Add(variant); double newVf = GetAlternateAlleleFrequency(variant); vf.Add(newVf); if (newVf > vf[topIndices[0]]) { topIndices[1] = topIndices[0]; topIndices[0] = currIndex; } else if (newVf > vf[topIndices[1]]) { topIndices[1] = currIndex; } refVf = refVf - vf[currIndex]; currIndex++; lastVar = variant; variant = new CalledAllele(); } // Remove the last entry if (GetVariantType(variants[0]) == VariantType.Snv) { snvLoci.RemoveLastEntry(); } else if (GetVariantType(variants[0]) == VariantType.Indel) { indelLoci.RemoveLastEntry(); } // Determine type of entry RecalibratedVariantsCollection currLoci; if (GetVariantType(variants[topIndices[0]]) == VariantType.Snv && GetVariantType(variants[topIndices[1]]) == VariantType.Snv) { currLoci = snvLoci; } else if (GetVariantType(variants[topIndices[0]]) == VariantType.Indel && GetVariantType(variants[topIndices[1]]) == VariantType.Indel) { currLoci = indelLoci; } else // mixed type { return(variant); } if ((currLoci == snvLoci && // multiallelic check for SNVs (GetAlternateAlleleFrequency(variants[topIndices[0]]) + GetAlternateAlleleFrequency(variants[topIndices[1]]) > MultiAllelicThreshold || // top 2 VF > 0.8 GetAlternateAlleleFrequency(variants[topIndices[0]]) + refVf > MultiAllelicThreshold) || currLoci == indelLoci) && !ShouldSkipVariant(variants[topIndices[0]]) && // should not skip variant !(vf[topIndices[0]] > HetThreshold && vf[topIndices[0]] < HomAltThreshold && vf[topIndices[1]] > HetThreshold)) // not 1/2 { currLoci.AddLocus(variants[topIndices[0]]); } return(variant); }