/// <summary> /// Initializes a new instance of the <see cref="Bio.Variant.ContinuousFrequencySNPGenotype"/> class. /// </summary> /// <param name="res">Res.</param> /// <param name="pileup">Pileup.</param> public ContinuousFrequencySNPGenotype(GenotypeCallResult res, PileUp pileup = null) { ResultType = res; if (pileup != null) { pOriginalPosition = pileup.Position; InsertionOffset = pileup.InsertionOffSet; } }
private static ContinuousFrequencySNPGenotype callGenotype(PileUp pu) { //If it looks like a deletion, skip it if (pileupHasTooManyIndels(pu)) { return(new ContinuousFrequencySNPGenotype(GenotypeCallResult.TooManyGaps, pu)); } // Otherwise, drop gaps, ambiguous bases and low scoring reads. var filteredBases = pu.Bases.Where(z => z.Base != BaseAndQuality.N_BASE_INDEX && z.Base != BaseAndQuality.GAP_BASE_INDEX && z.PhredScore > 17).ToArray(); if (filteredBases.Length == 0) { return(new ContinuousFrequencySNPGenotype(GenotypeCallResult.NoData)); } // initialize the continuous frequency based on counts of bases. var base_pair_counts = new int[BasePairFrequencies.NUM_BASES]; foreach (var bp in filteredBases) { base_pair_counts [bp.Base]++; } var freqs = base_pair_counts.Select(x => x / (double)filteredBases.Length).ToArray(); var theta = new BasePairFrequencies(freqs); //if only one base has data or if //if we are not doing EM optimization, we are done. if (base_pair_counts.Count(x => x > 0) > 1 && DO_EM_ESTIMATION) { //first make an NumReads * Num_Bases Matrix double[][] conditionalProbs = new double[filteredBases.Length][]; for (int i = 0; i < filteredBases.Length; i++) { conditionalProbs [i] = new double[BasePairFrequencies.NUM_BASES]; } double likDif = double.MaxValue; double last_lik = double.MinValue; while (likDif > 1e-3) { double lik = 0; for (int i = 0; i < conditionalProbs.Length; i++) { lik += updateConditionalProbabilities(theta, conditionalProbs [i], filteredBases [i]); } likDif = lik - last_lik; last_lik = lik; //now update thetas by summing the conditional probability of each value Array.Clear(theta.Frequencies, 0, BasePairFrequencies.NUM_BASES); for (int i = 0; i < conditionalProbs.Length; i++) { var cur = conditionalProbs [i]; for (int j = 0; j < cur.Length; j++) { theta.Frequencies [j] += cur [j]; } } for (int j = 0; j < theta.Frequencies.Length; j++) { theta.Frequencies [j] /= (double)conditionalProbs.Length; } } } return(new ContinuousFrequencySNPGenotype(theta, base_pair_counts, pu)); }
private static bool pileupHasTooManyIndels(PileUp up) { var freq = up.Bases.Count(z => z.Base == BaseAndQuality.GAP_BASE_INDEX) / (double)up.Bases.Count; return(freq >= MIN_DELETION_PERCENTAGE_NEEDED_TO_CALL); }
public ContinuousFrequencySNPGenotype(BasePairFrequencies frequencies, int[] originalReadCounts, PileUp pileup = null) : this(GenotypeCallResult.GenotypeCalled, pileup) { Frequencies = frequencies; OriginalBasePairCounts = originalReadCounts; var freq = frequencies.Frequencies; this.indexMax = 0; var max = freq [0]; for (int i = 1; i < freq.Length; i++) { if (freq [i] > max) { max = freq [i]; indexMax = i; } } }