public HaploTypeReport(List <HaplotypeComparison> Comparisons, SimpleSample sample) { Sample = sample; Comparisons.Sort((x, y) => - x.Rank.CompareTo(y.Rank)); BestHit = Comparisons [0]; NumberOfEquallyGoodBestHits = Comparisons.TakeWhile(x => x.Rank == BestHit.Rank).Count(); SecondBestHit = Comparisons [1]; }
public HaploTypeReport GetHaplotypeReport(Sequence toBuildReportFor, List <string> dataLines, string id = "Sample") { var delts = ReferenceGenome.GetDeltaAlignments(toBuildReportFor).SelectMany(x => x).ToList(); if (delts.Count != 1) { return(null); throw new Exception("Final assembly had no or multiple delta alignments with the reference, whereas only 1 is expected"); } var delt = delts [0]; var aln = delt.ConvertDeltaToSequences(); dataLines.Add("REF_OFFSET =" + delt.FirstSequenceStart.ToString()); dataLines.Add("ASSEMBLY_OFFSET =" + delt.SecondSequenceStart.ToString()); //dataLines.Add("ALN_SCORE=" + aln.Score.ToString()); var refseq = aln.FirstSequence as Sequence; dataLines.Add("REF_SEQUENCE = " + refseq.ConvertToString()); var qseq = aln.SecondSequence as Sequence; dataLines.Add("QUERY_SEQUENCE= " + qseq.ConvertToString()); //now get all polymorphisms, and sort haplotypes //TODO: Don't call nucmer twice var AllPolys = GenomeToHaploGrepConverter.FindPolymorphisms(toBuildReportFor); PolymorphismFilter pf = new PolymorphismFilter(); var sample = new SimpleSample(id, AllPolys, pf); var comparisons = treeRoot.GetAllChildren().Select(x => new HaplotypeComparison(x, sample)).ToList(); var report = new HaploTypeReport(comparisons, sample); dataLines.AddRange(report.GetRowReportLines()); var passedFilters = sample.Polymorphisms.ToDictionary(x => x.position); //noq get list of differences dataLines.Add("#Data report below, (Note in rCRS positions, accounting for 'N' at position 3107 and in 1 based index)"); List <VariantAnnotation> reporters = new List <VariantAnnotation>() { new HaploTypeMatchAnnotation(report.BestHit, passedFilters), new FrequencyFileAnnotation(), new BigTableAnnotation() }; var headerLine = String.Join(VariantAnnotation.FIELD_DELIM, reporters.Select(x => x.GetHeaderLine()).ToArray()); dataLines.Add(headerLine); var infoLines = AllPolys.Select(x => String.Join(VariantAnnotation.FIELD_DELIM, reporters.Select(y => y.GetAnnotation(x)).ToArray())); dataLines.AddRange(infoLines); return(report); }
public HaploTypeReport GetHaplotypeReport(SimpleSample sample) { //verify filtering is the same foreach (var filt in filtersApplied) { if (sample.Polymorphisms.Count() != filt.FilterPolys(sample.Polymorphisms).Count()) { throw new HaploGrepException("It appears a different filter was used when constructing the tree as opposed to " + "constructing the samples"); } } var comparisons = treeRoot.GetAllChildren().Select(x => new HaplotypeComparison(x, sample)).ToList(); var report = new HaploTypeReport(comparisons, sample); return(report); }
/// <summary> /// Create a new class that compares a sample to a known haplotype /// </summary> public HaplotypeComparison(PhyloTreeNodev2 node, SimpleSample sample) { this.node = node; var polysInNode = sample.Filter.FilterPolys(node.Mutations).ToList(); NumberOfPolymorhpismsInHaplotype = (ushort)polysInNode.Count; haplotypeWeightPoly = polysInNode.Sum(x => x.getMutationRate()); MatchingPolymorphisms = polysInNode.Where(z => sample.Polymorphisms.Contains(z)).ToList(); NumberOfPolymorphismsMissingFromGenotype = (ushort)(polysInNode.Count - MatchingPolymorphisms.Count); NumberOfPolymorphismsMissingFromHaplotype = (ushort)(sample.Polymorphisms.Count - MatchingPolymorphisms.Count); matchingWeightPoly = MatchingPolymorphisms.Sum(x => x.getMutationRate()); var CorrectInHaplotypeRatio = haplotypeWeightPoly == 0 ? 1.0 : (matchingWeightPoly / haplotypeWeightPoly); var CorrectInSampleRatio = sample.TotalSampleWeight == 0 ? 1.0 : (matchingWeightPoly / sample.TotalSampleWeight); Rank = .5 * CorrectInHaplotypeRatio + .5 * CorrectInSampleRatio; }