private Tuple <int, int> CountNodeMatches(
            Dictionary <Haplogroup, Tuple <int, int> > mutationTable,
            Haplogroup child)
        {
            var node = child;

            int count = 0, matches = 0;

            do
            {
                var mutation = mutationTable[node];

                if (mutation.Item2 > 0)
                {
                    count++;

                    if ((float)mutation.Item1 / mutation.Item2 >=
                        _nodeMatchThreshold)
                    {
                        matches++;
                    }
                }

                node = node.Parent;
            } while (node != null);

            return(Tuple.Create(matches, count));
        }
예제 #2
0
 private static Haplogroup RemoveFiller(Haplogroup group)
 {
     return(Visitor.Where(
                group,
                x => !x.Name.StartsWith("FILLER_"),
                (parent, child) =>
     {
         parent.Children.Add(child);
         child.Parent = parent;
     },
                (parent, child) => parent.Children.Remove(child),
                x => x.Children));
 }
예제 #3
0
        private static string Dump(Haplogroup group, bool dumpSnps)
        {
            var d = dumpSnps;

            return(StringTree.Create(
                       group,
                       x => !d || !x.Mutations.Any() ?
                       x.Name :
                       string.Format(
                           "{0} ({1})",
                           x.Name,
                           x.Mutations.Select(y => y.Snp).Join(", ")),
                       x => x.Children));
        }
        private Dictionary <Haplogroup, Tuple <int, int> > GetMutationMatches(
            Haplogroup root,
            Dictionary <int, Snp> snpTable)
        {
            var haplogroupMutations = new Dictionary <Haplogroup, Tuple <int, int> >();

            root.Visit(x => haplogroupMutations.Add(
                           x,
                           Tuple.Create(
                               x.Mutations
                               .Count(y =>
                                      snpTable.ContainsKey(y.Position) &&
                                      snpTable[y.Position].Genotype[0] == y.NewNucleotide),
                               x.Mutations.Count(y => snpTable.ContainsKey(y.Position)))));

            return(haplogroupMutations);
        }
 private static Haplogroup FindHaplogroup(string name, Haplogroup group)
 {
     if (group.Name == name)
     {
         return(group);
     }
     else if (group.Children.Any())
     {
         return(group.Children
                .Select(x => FindHaplogroup(name, x))
                .FirstOrDefault(x => x != null));
     }
     else
     {
         return(null);
     }
 }
        private Haplogroup FindHaplogroup(Haplogroup root, Dictionary <int, Snp> snpTable)
        {
            var mutationMatches = GetMutationMatches(root, snpTable);

            var haplogroupMatches = mutationMatches
                                    .Where(x => x.Value.Item1 != 0)
                                    .Select(x => new
            {
                Haplogroup = x.Key,
                Result     = CountNodeMatches(mutationMatches, x.Key),
            })
                                    .Where(x => (float)x.Result.Item1 / x.Result.Item2 >= _haplogroupMatchThreshold)
                                    .OrderByDescending(x => x.Result.Item2)
                                    .ThenByDescending(x => x.Haplogroup.Name);

            return(haplogroupMatches.Select(x => x.Haplogroup).FirstOrDefault());
        }
예제 #7
0
        public PhyloTreeNodev2(XmlNode node, PhyloTreeNodev2 parentNode = null)
        {
            this.NodeID = ++NodeIDCounter;
            Children    = new List <PhyloTreeNodev2>();
            Mutations   = new PolymorphismCollection();
            //set the haplogroup
            haplogroup = new Haplogroup(node.Attributes.GetNamedItem("name").Value);
            var details = node.SelectSingleNode("details");

            haplogroup.AccessionId = details.Attributes.GetNamedItem("accessionNr").Value;
            if (String.IsNullOrEmpty(haplogroup.AccessionId))
            {
                NodesWithNoAccession++;
            }
            haplogroup.Reference = details.Attributes.GetNamedItem("reference").Value;
            //now copy polymorphism if needed
            if (parentNode != null)
            {
                Mutations.AddRange(parentNode.Mutations);
            }
            //now update with the mutations here
            var polys = details.SelectNodes("poly");

            foreach (XmlNode p in polys)
            {
                if (p.InnerText.Contains("X"))
                {     //System.Console.WriteLine("Skipping: " + p.InnerText);
                    continue;
                }
                var currentPoly = new Polymorphism(p.InnerText);
                Mutations.Add(currentPoly);
            }
            //now make the children
            var children = node.SelectNodes("haplogroup");

            foreach (XmlNode currentElement in children)
            {
                Children.Add(new PhyloTreeNodev2(currentElement, this));
            }
        }
        public override void Main(HaplogroupAnalyzerArgs args)
        {
            WriteInfoMessage(
                "Loading data file ~Cyan~{0}~R~",
                Path.GetFileName(args.DataFile.FullName));
            List <Snp> snps = null;

            try
            {
                snps = GenotypeDataParser.Parse(args.DataFile.FullName);
            }
            catch (Exception e)
            {
                WriteFatalError(0x1000, "Could not load data file: {0}", e.Message);
            }

            var snpTable = snps
                           .GroupBy(x => x.Position)
                           .ToDictionary(x => x.Key, x => x.First());

            WriteSuccessMessage("Data file loaded");
            WriteInfoMessage("Loading Y-DNA haplogroup tree");

            Haplogroup root = LoadYDnaTree();

            var snpIndex = JsonSerializer
                           .DeserializeFile <HaplogroupMutation[]>(
                PathHelper.GetExecutingPath("ydnasnps.json"))
                           .Where(x => snpTable.ContainsKey(x.Position))
                           .ToArray();

            WriteSuccessMessage("Y-DNA haplogroup tree loaded");

            var matches = snpIndex
                          .Where(x =>
                                 snpTable.ContainsKey(x.Position) &&
                                 snpTable[x.Position].Genotype[0] == x.NewNucleotide)
                          .Select(x => FindHaplogroup(x.Haplogroup, root))
                          .Where(x => x != null)
                          .ToArray();

            var snpIndexTable = snpIndex
                                .GroupBy(x => x.Haplogroup)
                                .ToDictionary(x => x.Key, x => x.ToArray());

            var matchTable = matches
                             .GroupBy(x => x.Name)
                             .ToDictionary(x => x.Key, x => x.ToArray());

            var matchRoot = root
                            .Where(x => Visitor.Any(x, y => matchTable.ContainsKey(y.Name), y => y.Children));

            var mutationMatches = GetMutationMatches(root, snpTable);

            var stringNodes = Visitor.Select(
                matchRoot,
                x =>
            {
                var m2 = mutationMatches[x];

                if (m2.Item2 == 0)
                {
                    return(new StringNode()
                    {
                        Value = "~DarkGray~" + x.Name + "~R~"
                    });
                }

                var value = (float)m2.Item1 / m2.Item2 * 100;

                var color =
                    value >= 75 ? ConsoleColor.Green :
                    value >= 50 ? ConsoleColor.Yellow :
                    ConsoleColor.Red;

                return(new StringNode()
                {
                    Value = string.Format(
                        "{0}: ~{1}~{2:n0}% ({3:n0}/{4:n0})~R~",
                        x.Name,
                        color,
                        value,
                        m2.Item1,
                        m2.Item2)
                });
            },
                (p, c) => p.Children.Add(c),
                x => x.Children);

            Cli.WriteLine();
            Cli.WriteSubheader("Haplogroup Matches", "~|Blue~~White~");
            Cli.WriteLine();
            Cli.WriteLine(StringTree.Create(stringNodes, x => x.Value, x => x.Children));
            var haplogroup = FindHaplogroup(root, snpTable);

            Cli.WriteLine("Best match: ~Cyan~{0}~R~", haplogroup.Name);
        }
예제 #9
0
        private static Haplogroup ParseTree(string filename)
        {
            var flatTree = ParseFlatTree(filename);
            var stack    = new Stack <Haplogroup>();

            stack.Push(new Haplogroup()
            {
                Name = flatTree.First().Key
            });

            Action <string> add = x =>
            {
                var g = new Haplogroup()
                {
                    Name   = x,
                    Parent = stack.Peek(),
                };
                stack.Peek().Children.Add(g);
                stack.Push(g);
            };

            var depth  = 0;
            var filler = 0;

            foreach (var node in flatTree.Skip(1))
            {
                if (node.Value == depth)
                {
                    stack.Pop();
                    add(node.Key);
                }
                //else if (node.Value == depth + 1)
                else if (node.Value > depth)

                {
                    var delta = node.Value - depth;
                    depth += delta;

                    foreach (var f in Enumerable.Range(0, delta - 1))
                    {
                        add("FILLER_" + filler++);
                    }

                    add(node.Key);
                }
                //else if (node.Value > depth)
                //{
                //    Console.WriteLine();
                //}
                else
                {
                    var delta = depth - node.Value;
                    depth -= delta;

                    for (var i = 0; i < delta + 1; i++)
                    {
                        stack.Pop();
                    }

                    add(node.Key);
                }
            }

            return(stack.Last());
        }