예제 #1
0
        private static void ProcessPage(SNPDatabase database, string pageTitle, string pageText)
        {
            if (pageText != null)
            {
                // Try to parse the page's SNP category properties.
                var snpInfo = ParseSNPPage(pageText);
                if (snpInfo != null)
                {
                    // Skip pages with mismatched SNP ID and title.
                    if (snpInfo.Value.id == pageTitle.ToLowerInvariant())
                    {
                        database.snpToInfoMap.Add(snpInfo.Value.id, snpInfo.Value);
                    }
                }
                else
                {
                    var genotypeCategoryText = ParsePageCategorySubstring(pageText, "Genotype");
                    if (genotypeCategoryText != null)
                    {
                        // Parse the trait summary for a SNP genotype and add it to the previously created SNPInfo.
                        var rsid                 = string.Format("rs{0}", ParseCategoryProperty(genotypeCategoryText, "rsid"));
                        var genotype             = DNA.StringToDiploidGenotype(string.Format("{0};{1}", ParseCategoryProperty(genotypeCategoryText, "allele1"), ParseCategoryProperty(genotypeCategoryText, "allele2")));
                        var genotypeTraitSummary = ParseCategoryProperty(genotypeCategoryText, "summary");
                        if (database.snpToInfoMap.ContainsKey(rsid))
                        {
                            var snpGenotypes = database.snpToInfoMap[rsid].genotypes;
                            for (var genotypeIndex = 0; genotypeIndex < snpGenotypes.Length; ++genotypeIndex)
                            {
                                if (snpGenotypes[genotypeIndex].genotype.Equals(genotype))
                                {
                                    snpGenotypes[genotypeIndex].trait = genotypeTraitSummary;
                                    break;
                                }
                            }
                        }
                    }
                    else if (ParsePageCategorySubstring(pageText, "is a \\| medical condition") != null ||
                             ParsePageCategorySubstring(pageText, "is a \\| medicine") != null ||
                             ParsePageCategorySubstring(pageText, "is a \\| gene") != null)
                    {
                        // If the page isn't a SNP, add it to the database as a trait with associated SNPs.
                        var traitInfo = new TraitInfo();
                        traitInfo.title = pageTitle;

                        // Parse links to associated SNPs.
                        var associatedSNPs = new List <string>();
                        Utilities.ProcessDelimitedItems(pageText, "[[", "]]", delegate(string itemText)
                        {
                            var snpId = itemText.ToLowerInvariant();
                            if (snpId.StartsWith("rs"))
                            {
                                if (!associatedSNPs.Contains(snpId))
                                {
                                    associatedSNPs.Add(snpId);
                                }
                            }
                            return(itemText);
                        });
                        traitInfo.associatedSNPs = associatedSNPs.ToArray();

                        if (traitInfo.associatedSNPs.Length > 0)
                        {
                            // Add the trait to the database.
                            database.traits.Add(traitInfo);
                        }
                    }
                }
            }
        }