public static Polymorphism getReferenceBase(int position)
        {
            string refBase = Convert.ToString(rCRS [position - 1]);

            refBase = refBase.ToUpper();
            return(new Polymorphism(position, MutationAssigner.getBase(refBase)));
        }
Beispiel #2
0
        //public static string TREE_XML_FILE { get { return addFileNameToEndOfDirectory ("phylotree15.xml"); } }
        //public static string WEIGHT_FILE { get { return addFileNameToEndOfDirectory ("fluctRates15.txt"); } }

        /// <summary>
        /// Applies the current best practices filters to a list of polymorphisms, to
        /// avoid erroneous calls.  Currently removes indels and bad positions
        /// </summary>
        /// <param name="polys"></param>
        /// <returns></returns>
        internal static IEnumerable <Polymorphism> CommonPolymorphismFilter(IEnumerable <Polymorphism> polys)
        {
            return(polys.Where(x => !EXCLUDED_POSITIONS.Contains(x.position) && MutationAssigner.MutationIsBasePair(x.mutation)));
        }
        //TODO: VERIFY ALL OF THIS!!!
        //Nucleotide position numbers are relative to the RSRS and rCRS. Mutations are given in forward evolutionary time direction. In case of a transversion the derived allele is shown in lowercase.
        //Coding region mutations (np 577-16023) are shown in black; control region mutations (np 16024-576) in blue.
        //Back mutations to an ancestral state are indicated with an exclamation mark (!), two exclamation marks for a double back mutation (!!), etc.
        //Mutations between brackets () are recurrent/unstable within the respective clade, or are yet uncertain based on current data.
        //Mutation motifs in italic are preliminary and are likely to be further refined as additional sequences become available.
        //The mutations 309.1C(C), 315.1C, AC indels at 515-522, 16182C, 16183C, 16193.1C(C) and 16519 were not considered for phylogenetic reconstruction and are therefore excluded from the tree.
        //Accession numbers provided at the tips of branches are representative examples of mtDNA sequences available at GenBank or from individuals included in HapMap/1000Genomes.
        //The references are independent from the accession numbers and refer to publications that have described the corresponding branch and/or have proposed haplogroup nomenclature.
        //It may be convenient to use the Find function (Ctrl+F) of your browser to search for a particular mutation or haplogroup.
        private void parse(string phyloString)
        {
            phyloString = phyloString.Trim();

            if (phyloString.StartsWith("("))
            {
                phyloString = phyloString.Substring(1, phyloString.Length - 1 - 1);
            }
            if (phyloString.Contains("!"))
            {
                phyloString  = phyloString.Replace("!", "");
                BackMutation = true;
            }
            //TODO: This needs a length
            if ((phyloString.Contains("d")) || (phyloString.Contains("D")))
            {
                //8288d
                phyloString   = phyloString.Replace("del", "");
                phyloString   = phyloString.Replace("d", "");
                phyloString   = phyloString.Replace("DEL", "");
                this.position = (int)Convert.ToInt32(phyloString);
                this.mutation = Mutations.DEL;
            }
            else if (phyloString.Contains("."))
            {
                //315.1C
                var match = System.Text.RegularExpressions.Regex.Match(phyloString, @"(?<Position>[\d]+)\.(?<Size>[\d+|X])(?<Sequence>[A|C|G|T]+)");
                //Giant pain above because the possible "X" for size is ambiguous
                try {
                    this.position = Convert.ToInt32(match.Groups ["Position"].Value);
                } catch (FormatException e) {
                    throw new FormatException("Could not convert " + match.Groups ["Position"].Value.ToString() + " to value");
                }
                this.mutation    = Mutations.INS;
                this.numberOfIns = "." + match.Groups ["Size"].Value;
                string mutationString = match.Groups ["Sequence"].Value;
                if (numberOfIns == "X")
                {
                    this.numberOfIns = mutationString.Length.ToString();
                }
                this.insertedPolys = match.Groups ["Sequence"].Value;
                if (match.Groups ["Sequence"].Value.Length != match.Groups ["Sequence"].Value.Length)
                {
                    throw new HaploGrepException("Could not align mutations");
                }
                //ORIGINAL CODE BELOW
                //string[] split = phyloString.Split('.');
                //string token = split[0];
                //string token1 = split[1].Trim();
                ////TODO: Was going to translate this, but then it seemed that it was never used...
                ////Pattern p = Pattern.compile("\\d+");
                ////Matcher m = p.matcher(token1);
                ////m.find();
                //this.position = Convert.ToInt32(token);
                //this.mutation = Mutations.INS;
                //string mutationString = "";
                ////TODO: This seems to only allow up to 3 basepairs
                //for (int i = 0; i <= 2; i++)//why can this only be up to 2???
                //{
                //    string number = Convert.ToString(i);
                //    if (token1.Contains(number))
                //    {
                //        mutationString = token1.Replace(number, "");//C
                //        this.numberOfIns = ("." + number);//1
                //    }
                //}
                //try
                //{
                //    for (int i = 0; i < mutationString.Length; i++)
                //    {
                //        MutationAssigner.getBase(Convert.ToString(mutationString[i]));
                //    }
                //}
                //catch (Exception e)
                //{
                //    throw new HaploGrepException("Could not parse mutation: "+phyloString, e);

                //}
                //this.insertedPolys = mutationString;
            }
            else
            {
                var match = System.Text.RegularExpressions.Regex.Match(phyloString, "[a-zA-Z]");
                if (match.Success)
                {
                    //should throw exception otherwise
                    this.mutation = MutationAssigner.getBase(match.Value);
                    this.position = Convert.ToInt32(phyloString.Replace(match.Value, ""));
                }
                else
                {
                    match = System.Text.RegularExpressions.Regex.Match(phyloString, "\\d+");
                    int position = Convert.ToInt32(match.Value);
                    getTransitionPoly(position);
                }

                //152C
                //var match=System.Text.RegularExpressions.Regex.Match(phyloString, @"(?<Position>[\d]+)(?<Sequence>[A|C|G|T]+)");
                //    //this.mutation = Mutations.getBase(phyloString.Substring(m.start(), m.end() - (m.start())));
                //    this.mutation=MutationAssigner.getBase(match.Groups["Sequence"].Value);
                //    // this.position = (int)Convert.ToInt32(phyloString.replaceFirst("[a-zA-Z]", ""));
                //    this.position=Convert.ToInt32(match.Groups["Position"].Value);
            }
        }