public override string GetAnnotation(Polymorphism poly) { int pos = poly.position; string reference, alt, inBestHaplotype; alt = "ERROR"; //TODO: Move this logic in to polymorphism class if (poly.mutation == Mutations.INS) { reference = "-"; } else { reference = Polymorphism.rCRS[poly.position - 1].ToString(); alt = poly.InsertedPolys; } if (poly.mutation == Mutations.DEL) { alt = "-"; } else if (MutationAssigner.MutationIsBasePair(poly.mutation)) { alt = MutationAssigner.getBase(poly.mutation); } if (!validPolys.ContainsKey(poly.position)) { inBestHaplotype = "MutationExcludedFromSearch"; } else { inBestHaplotype = bestMatch.MatchingPolymorphisms.Contains(poly).ToString(); } return(String.Join(FIELD_DELIM, new[] { pos.ToString(), reference, alt, inBestHaplotype })); }
public override string GetAnnotation(Polymorphism p) { if (MutationAssigner.MutationIsComplex(p.Mutation)) { return(nullAnnotation); } else { var site = p.Position; var refBP = Polymorphism.getReferenceBaseSingle(site); var alt = MutationAssigner.getBase(p.Mutation); var key = makeQueryString(site, refBP, alt); if (!positionToData.ContainsKey(key)) { key = makeQueryString(site, BigTableData.ALL_SITE_SAME, BigTableData.ALL_SITE_SAME); if (positionToData.ContainsKey(key)) { return(positionToData[key].Data); } return(nullAnnotation); } else { return(positionToData[key].Data); } } }
/// <summary> /// Call SNPs from the sorted list of sequences using the pile-up method. /// </summary> /// <returns>The SN ps.</returns> /// <param name="sequences">Sequences.</param> public static SNPCallerReport CallSNPs(IEnumerable <CompactSAMSequence> sequences) { // Get a pile up and convert it to genotypes var pileups = PileUpProducer.CreatePileupFromReads(sequences); var genotypes = ContinuousGenotypeCaller.CallContinuousGenotypes(pileups).ToList(); // Filter down to a usable set var usable = genotypes.Where(x => x.ResultType == GenotypeCallResult.GenotypeCalled && x.OriginalPosition.HasValue).ToList(); if (usable.Count == 0) { return(new SNPCallerReport(AlgorithmResult.Failed)); } // Get median coverage at sites var data_counts = usable.Select(x => x.TotalObservedBases).ToList(); data_counts.Sort(); var median = data_counts[data_counts.Count / 2]; //now create a cut-off for required coverage as the square root of the median. var cut_off = Math.Sqrt(median); //Get a list of genotypes, and if a simple SNP, make a polymorphism if it doesn't match //the reference var genotypedPositions = new HashSet <int> (); List <Polymorphism> polys = new List <Polymorphism> (); foreach (var geno in usable) { if (geno.TotalObservedBases >= cut_off) { genotypedPositions.Add(geno.OriginalPosition.Value); var org_bp = ReferenceGenome.GetReferenceBaseAt_rCRSPosition(geno.OriginalPosition.Value); var cur_bp = geno.GetMostFrequentGenotype(); if (org_bp != cur_bp[0]) { var poly = new Polymorphism(geno.OriginalPosition.Value, MutationAssigner.getBase(cur_bp)); polys.Add(poly); } } } //Now assign haplotype HaplotypeSearcher hts = new HaplotypeSearcher(); PolymorphismFilter pf = new PolymorphismFilter(p => p.IsSNP && genotypedPositions.Contains(p.Position)); var simpSample = new SimpleSample("Pileup", polys, pf); var hap_report = hts.GetHaplotypeReport(simpSample); return(new SNPCallerReport(genotypes, hap_report)); }
public override string GetAnnotation(Polymorphism p) { if (MutationAssigner.MutationIsComplex(p.Mutation)) { return("NA"); } else { var site = p.Position; if (!positionToFrequency.ContainsKey(site)) { return("NA"); } else { return(positionToFrequency[site].ToString()); } } }
public void AddPoly(Polymorphism p) { History.Add(p); //This is a very backward way of representing this, as far as I can tell from the source code //if 152C on the tree is followed by 152C!, then this means the 152C mutation should simply be cleared if (p.isBackMutation) { //make sure we have one to remove var toDrop = CurrentState.Where(x => x.position == p.position && x.mutation == p.mutation).ToList(); if (toDrop.Count != 1) { throw new HaploGrepException("Cannot back mutate when no mutation appeared!"); } CurrentState.Remove(toDrop [0]); } else { var c1Count = CurrentState.Count == 1; var c1SNP = MutationAssigner.MutationIsBasePair(CurrentState [0].mutation); //simple SNP replacement if (c1Count && c1SNP) { CurrentState.Clear(); CurrentState.Add(p); } //insertions are appended on, and once any deletion happens we are shit out of luck else if (p.mutation == Mutations.INS && c1SNP && c1Count) { CurrentState.Add(p); } else if (p.mutation == Mutations.INS) { //Some spot have repeated inserts of the same base, e.g. if (CurrentState.Any(z => z.mutation != Mutations.INS) || CurrentState.Any(z => z.InsertedPolys.Length > 1)) { throw new HaploGrepException("Can't add insertions over multiple complex backgrounds"); } else { //TODO: 455.1T and 455.2T are simply indicative of a two bp insertion (at the 1 position and the 2 position, this is totally confusiing). //Verify that all insertions are the same, this happens in some cases such as 455.1T being followed by a 455.2T, note I know realize this var set = new HashSet <string> (CurrentState.Select(x => x.insertedPolys)); set.Add(p.insertedPolys); if (set.Count > 1) { throw new Exception("Cannot add a new type of insertion"); } CurrentState.Add(p); } } //Special exception for 1719.1G followed by 1719A, which is another odd case else if (p.position == 1719 && p.ToString() == "1719A") { if (CurrentState.Count != 1 || CurrentState [0].ToString() != "1719.1G") { throw new HaploGrepException("1719 posiiton exception"); } CurrentState.Add(p); CurrentState.Reverse(); } //this can happen if the base of interest is not the reference or last mutation. else { if (!UnresolvedSeriesOfChanges.Contains(History)) { UnresolvedSeriesOfChanges.Add(History); } throw new HaploGrepException("Tried to change a polymorphism at a location where a mutation already occurred without it being asimple thing!"); } } }