public static Polymorphism getReferenceBase(int position) { string refBase = Convert.ToString(rCRS [position - 1]); refBase = refBase.ToUpper(); return(new Polymorphism(position, MutationAssigner.getBase(refBase))); }
//public static string TREE_XML_FILE { get { return addFileNameToEndOfDirectory ("phylotree15.xml"); } } //public static string WEIGHT_FILE { get { return addFileNameToEndOfDirectory ("fluctRates15.txt"); } } /// <summary> /// Applies the current best practices filters to a list of polymorphisms, to /// avoid erroneous calls. Currently removes indels and bad positions /// </summary> /// <param name="polys"></param> /// <returns></returns> internal static IEnumerable <Polymorphism> CommonPolymorphismFilter(IEnumerable <Polymorphism> polys) { return(polys.Where(x => !EXCLUDED_POSITIONS.Contains(x.position) && MutationAssigner.MutationIsBasePair(x.mutation))); }
//TODO: VERIFY ALL OF THIS!!! //Nucleotide position numbers are relative to the RSRS and rCRS. Mutations are given in forward evolutionary time direction. In case of a transversion the derived allele is shown in lowercase. //Coding region mutations (np 577-16023) are shown in black; control region mutations (np 16024-576) in blue. //Back mutations to an ancestral state are indicated with an exclamation mark (!), two exclamation marks for a double back mutation (!!), etc. //Mutations between brackets () are recurrent/unstable within the respective clade, or are yet uncertain based on current data. //Mutation motifs in italic are preliminary and are likely to be further refined as additional sequences become available. //The mutations 309.1C(C), 315.1C, AC indels at 515-522, 16182C, 16183C, 16193.1C(C) and 16519 were not considered for phylogenetic reconstruction and are therefore excluded from the tree. //Accession numbers provided at the tips of branches are representative examples of mtDNA sequences available at GenBank or from individuals included in HapMap/1000Genomes. //The references are independent from the accession numbers and refer to publications that have described the corresponding branch and/or have proposed haplogroup nomenclature. //It may be convenient to use the Find function (Ctrl+F) of your browser to search for a particular mutation or haplogroup. private void parse(string phyloString) { phyloString = phyloString.Trim(); if (phyloString.StartsWith("(")) { phyloString = phyloString.Substring(1, phyloString.Length - 1 - 1); } if (phyloString.Contains("!")) { phyloString = phyloString.Replace("!", ""); BackMutation = true; } //TODO: This needs a length if ((phyloString.Contains("d")) || (phyloString.Contains("D"))) { //8288d phyloString = phyloString.Replace("del", ""); phyloString = phyloString.Replace("d", ""); phyloString = phyloString.Replace("DEL", ""); this.position = (int)Convert.ToInt32(phyloString); this.mutation = Mutations.DEL; } else if (phyloString.Contains(".")) { //315.1C var match = System.Text.RegularExpressions.Regex.Match(phyloString, @"(?<Position>[\d]+)\.(?<Size>[\d+|X])(?<Sequence>[A|C|G|T]+)"); //Giant pain above because the possible "X" for size is ambiguous try { this.position = Convert.ToInt32(match.Groups ["Position"].Value); } catch (FormatException e) { throw new FormatException("Could not convert " + match.Groups ["Position"].Value.ToString() + " to value"); } this.mutation = Mutations.INS; this.numberOfIns = "." + match.Groups ["Size"].Value; string mutationString = match.Groups ["Sequence"].Value; if (numberOfIns == "X") { this.numberOfIns = mutationString.Length.ToString(); } this.insertedPolys = match.Groups ["Sequence"].Value; if (match.Groups ["Sequence"].Value.Length != match.Groups ["Sequence"].Value.Length) { throw new HaploGrepException("Could not align mutations"); } //ORIGINAL CODE BELOW //string[] split = phyloString.Split('.'); //string token = split[0]; //string token1 = split[1].Trim(); ////TODO: Was going to translate this, but then it seemed that it was never used... ////Pattern p = Pattern.compile("\\d+"); ////Matcher m = p.matcher(token1); ////m.find(); //this.position = Convert.ToInt32(token); //this.mutation = Mutations.INS; //string mutationString = ""; ////TODO: This seems to only allow up to 3 basepairs //for (int i = 0; i <= 2; i++)//why can this only be up to 2??? //{ // string number = Convert.ToString(i); // if (token1.Contains(number)) // { // mutationString = token1.Replace(number, "");//C // this.numberOfIns = ("." + number);//1 // } //} //try //{ // for (int i = 0; i < mutationString.Length; i++) // { // MutationAssigner.getBase(Convert.ToString(mutationString[i])); // } //} //catch (Exception e) //{ // throw new HaploGrepException("Could not parse mutation: "+phyloString, e); //} //this.insertedPolys = mutationString; } else { var match = System.Text.RegularExpressions.Regex.Match(phyloString, "[a-zA-Z]"); if (match.Success) { //should throw exception otherwise this.mutation = MutationAssigner.getBase(match.Value); this.position = Convert.ToInt32(phyloString.Replace(match.Value, "")); } else { match = System.Text.RegularExpressions.Regex.Match(phyloString, "\\d+"); int position = Convert.ToInt32(match.Value); getTransitionPoly(position); } //152C //var match=System.Text.RegularExpressions.Regex.Match(phyloString, @"(?<Position>[\d]+)(?<Sequence>[A|C|G|T]+)"); // //this.mutation = Mutations.getBase(phyloString.Substring(m.start(), m.end() - (m.start()))); // this.mutation=MutationAssigner.getBase(match.Groups["Sequence"].Value); // // this.position = (int)Convert.ToInt32(phyloString.replaceFirst("[a-zA-Z]", "")); // this.position=Convert.ToInt32(match.Groups["Position"].Value); } }