private Tuple <int, int> CountNodeMatches( Dictionary <Haplogroup, Tuple <int, int> > mutationTable, Haplogroup child) { var node = child; int count = 0, matches = 0; do { var mutation = mutationTable[node]; if (mutation.Item2 > 0) { count++; if ((float)mutation.Item1 / mutation.Item2 >= _nodeMatchThreshold) { matches++; } } node = node.Parent; } while (node != null); return(Tuple.Create(matches, count)); }
private static Haplogroup RemoveFiller(Haplogroup group) { return(Visitor.Where( group, x => !x.Name.StartsWith("FILLER_"), (parent, child) => { parent.Children.Add(child); child.Parent = parent; }, (parent, child) => parent.Children.Remove(child), x => x.Children)); }
private static string Dump(Haplogroup group, bool dumpSnps) { var d = dumpSnps; return(StringTree.Create( group, x => !d || !x.Mutations.Any() ? x.Name : string.Format( "{0} ({1})", x.Name, x.Mutations.Select(y => y.Snp).Join(", ")), x => x.Children)); }
private Dictionary <Haplogroup, Tuple <int, int> > GetMutationMatches( Haplogroup root, Dictionary <int, Snp> snpTable) { var haplogroupMutations = new Dictionary <Haplogroup, Tuple <int, int> >(); root.Visit(x => haplogroupMutations.Add( x, Tuple.Create( x.Mutations .Count(y => snpTable.ContainsKey(y.Position) && snpTable[y.Position].Genotype[0] == y.NewNucleotide), x.Mutations.Count(y => snpTable.ContainsKey(y.Position))))); return(haplogroupMutations); }
private static Haplogroup FindHaplogroup(string name, Haplogroup group) { if (group.Name == name) { return(group); } else if (group.Children.Any()) { return(group.Children .Select(x => FindHaplogroup(name, x)) .FirstOrDefault(x => x != null)); } else { return(null); } }
private Haplogroup FindHaplogroup(Haplogroup root, Dictionary <int, Snp> snpTable) { var mutationMatches = GetMutationMatches(root, snpTable); var haplogroupMatches = mutationMatches .Where(x => x.Value.Item1 != 0) .Select(x => new { Haplogroup = x.Key, Result = CountNodeMatches(mutationMatches, x.Key), }) .Where(x => (float)x.Result.Item1 / x.Result.Item2 >= _haplogroupMatchThreshold) .OrderByDescending(x => x.Result.Item2) .ThenByDescending(x => x.Haplogroup.Name); return(haplogroupMatches.Select(x => x.Haplogroup).FirstOrDefault()); }
public PhyloTreeNodev2(XmlNode node, PhyloTreeNodev2 parentNode = null) { this.NodeID = ++NodeIDCounter; Children = new List <PhyloTreeNodev2>(); Mutations = new PolymorphismCollection(); //set the haplogroup haplogroup = new Haplogroup(node.Attributes.GetNamedItem("name").Value); var details = node.SelectSingleNode("details"); haplogroup.AccessionId = details.Attributes.GetNamedItem("accessionNr").Value; if (String.IsNullOrEmpty(haplogroup.AccessionId)) { NodesWithNoAccession++; } haplogroup.Reference = details.Attributes.GetNamedItem("reference").Value; //now copy polymorphism if needed if (parentNode != null) { Mutations.AddRange(parentNode.Mutations); } //now update with the mutations here var polys = details.SelectNodes("poly"); foreach (XmlNode p in polys) { if (p.InnerText.Contains("X")) { //System.Console.WriteLine("Skipping: " + p.InnerText); continue; } var currentPoly = new Polymorphism(p.InnerText); Mutations.Add(currentPoly); } //now make the children var children = node.SelectNodes("haplogroup"); foreach (XmlNode currentElement in children) { Children.Add(new PhyloTreeNodev2(currentElement, this)); } }
public override void Main(HaplogroupAnalyzerArgs args) { WriteInfoMessage( "Loading data file ~Cyan~{0}~R~", Path.GetFileName(args.DataFile.FullName)); List <Snp> snps = null; try { snps = GenotypeDataParser.Parse(args.DataFile.FullName); } catch (Exception e) { WriteFatalError(0x1000, "Could not load data file: {0}", e.Message); } var snpTable = snps .GroupBy(x => x.Position) .ToDictionary(x => x.Key, x => x.First()); WriteSuccessMessage("Data file loaded"); WriteInfoMessage("Loading Y-DNA haplogroup tree"); Haplogroup root = LoadYDnaTree(); var snpIndex = JsonSerializer .DeserializeFile <HaplogroupMutation[]>( PathHelper.GetExecutingPath("ydnasnps.json")) .Where(x => snpTable.ContainsKey(x.Position)) .ToArray(); WriteSuccessMessage("Y-DNA haplogroup tree loaded"); var matches = snpIndex .Where(x => snpTable.ContainsKey(x.Position) && snpTable[x.Position].Genotype[0] == x.NewNucleotide) .Select(x => FindHaplogroup(x.Haplogroup, root)) .Where(x => x != null) .ToArray(); var snpIndexTable = snpIndex .GroupBy(x => x.Haplogroup) .ToDictionary(x => x.Key, x => x.ToArray()); var matchTable = matches .GroupBy(x => x.Name) .ToDictionary(x => x.Key, x => x.ToArray()); var matchRoot = root .Where(x => Visitor.Any(x, y => matchTable.ContainsKey(y.Name), y => y.Children)); var mutationMatches = GetMutationMatches(root, snpTable); var stringNodes = Visitor.Select( matchRoot, x => { var m2 = mutationMatches[x]; if (m2.Item2 == 0) { return(new StringNode() { Value = "~DarkGray~" + x.Name + "~R~" }); } var value = (float)m2.Item1 / m2.Item2 * 100; var color = value >= 75 ? ConsoleColor.Green : value >= 50 ? ConsoleColor.Yellow : ConsoleColor.Red; return(new StringNode() { Value = string.Format( "{0}: ~{1}~{2:n0}% ({3:n0}/{4:n0})~R~", x.Name, color, value, m2.Item1, m2.Item2) }); }, (p, c) => p.Children.Add(c), x => x.Children); Cli.WriteLine(); Cli.WriteSubheader("Haplogroup Matches", "~|Blue~~White~"); Cli.WriteLine(); Cli.WriteLine(StringTree.Create(stringNodes, x => x.Value, x => x.Children)); var haplogroup = FindHaplogroup(root, snpTable); Cli.WriteLine("Best match: ~Cyan~{0}~R~", haplogroup.Name); }
private static Haplogroup ParseTree(string filename) { var flatTree = ParseFlatTree(filename); var stack = new Stack <Haplogroup>(); stack.Push(new Haplogroup() { Name = flatTree.First().Key }); Action <string> add = x => { var g = new Haplogroup() { Name = x, Parent = stack.Peek(), }; stack.Peek().Children.Add(g); stack.Push(g); }; var depth = 0; var filler = 0; foreach (var node in flatTree.Skip(1)) { if (node.Value == depth) { stack.Pop(); add(node.Key); } //else if (node.Value == depth + 1) else if (node.Value > depth) { var delta = node.Value - depth; depth += delta; foreach (var f in Enumerable.Range(0, delta - 1)) { add("FILLER_" + filler++); } add(node.Key); } //else if (node.Value > depth) //{ // Console.WriteLine(); //} else { var delta = depth - node.Value; depth -= delta; for (var i = 0; i < delta + 1; i++) { stack.Pop(); } add(node.Key); } } return(stack.Last()); }