public static bool ImportDatabase(Stream stream) { SNPDatabase newDatabase = null; if (SNPDatabase.Load(stream, ref newDatabase)) { localDatabase = newDatabase; return(true); } else { return(false); } }
public static async Task <SNPDatabase> CreateSNPDatabaseAsync(UpdateProgressDelegate updateProgressDelegate, CancellationToken cancellationToken) { var result = new SNPDatabase(); try { await UpdateSNPDatabaseAsync(result, updateProgressDelegate, cancellationToken); return(result); } catch (TaskCanceledException) { return(null); } }
private static SNPDatabase InternalLoadDatabase() { // Load the default database. using (var defaultDatabaseStream = new MemoryStream(Properties.Resources.DefaultSNPDatabase, false)) { SNPDatabase result = null; if (SNPDatabase.Load(defaultDatabaseStream, ref result)) { return(result); } else { return(new SNPDatabase()); } } }
private static async Task UpdatePages(SNPDatabase database, IEnumerable <string> pageTitles, CancellationToken cancellationToken) { var queryResponse = await CallMediaWikiAPI(string.Format( "action=query&prop=revisions&rvprop=content&titles={0}&format=xml", HttpUtility.UrlEncode(string.Join("|", pageTitles)) ), cancellationToken); if (!cancellationToken.IsCancellationRequested && queryResponse.bSuccess) { // Process each page received in the response. var pageElements = queryResponse.resultXML.SelectNodes("query/pages/page"); foreach (XmlNode pageElement in pageElements) { var pageTitle = pageElement.Attributes["title"].Value; var pageRevision = pageElement.SelectSingleNode("revisions/rev"); ProcessPage(database, pageTitle, pageRevision.InnerText); } } }
public static async Task UpdateSNPDatabaseAsync(SNPDatabase database, UpdateProgressDelegate updateProgressDelegate, CancellationToken cancellationToken) { // Read the page list. updateProgressDelegate("Reading SNPedia page list", 0); var pageList = await ReadPageListAsync(updateProgressDelegate, cancellationToken); // Sort the page list to ensure that genotype pages (e.g. rs1234(a;a)) are processed after the corresponding snp page (i.e. rs1234) pageList.Sort(StringComparer.OrdinalIgnoreCase); var pagesPerBatch = 50; for (int pageIndex = 0; pageIndex < pageList.Count; pageIndex += pagesPerBatch) { if (cancellationToken.IsCancellationRequested) { break; } await UpdatePages(database, new ArraySegment <string>(pageList.ToArray(), pageIndex, Math.Min(pageList.Count - pageIndex, pagesPerBatch)), cancellationToken); updateProgressDelegate(string.Format("Processed {0}/{1} pages", pageIndex, pageList.Count), (double)pageIndex / (double)pageList.Count); } }
public static bool Load(Stream stream, ref SNPDatabase outResult) { var reader = new BinaryReader(stream); // Read the file magic and version. char[] fileMagic = reader.ReadChars(referenceFileMagic.Length); // If the file doesn't have the expected magic header, abort and return an error. if (!Utilities.ArrayCompare(fileMagic, referenceFileMagic)) { return(false); } // Create the SNP info database that's about to be loaded. outResult = new SNPDatabase(); // Read the SNPs in the database. int numSNPs = reader.ReadInt32(); for (int snpIndex = 0; snpIndex < numSNPs; snpIndex++) { // Read a SNP ID and value pair, and add them to the database. var Key = reader.ReadString(); var Value = SNPInfo.Read(reader); outResult.snpToInfoMap.Add(Key, Value); } // Read the traits in the database. int numTraits = reader.ReadInt32(); for (int traitIndex = 0; traitIndex < numTraits; traitIndex++) { outResult.traits.Add(TraitInfo.Read(reader)); } return(true); }
private static void ProcessPage(SNPDatabase database, string pageTitle, string pageText) { if (pageText != null) { // Try to parse the page's SNP category properties. var snpInfo = ParseSNPPage(pageText); if (snpInfo != null) { // Skip pages with mismatched SNP ID and title. if (snpInfo.Value.id == pageTitle.ToLowerInvariant()) { database.snpToInfoMap.Add(snpInfo.Value.id, snpInfo.Value); } } else { var genotypeCategoryText = ParsePageCategorySubstring(pageText, "Genotype"); if (genotypeCategoryText != null) { // Parse the trait summary for a SNP genotype and add it to the previously created SNPInfo. var rsid = string.Format("rs{0}", ParseCategoryProperty(genotypeCategoryText, "rsid")); var genotype = DNA.StringToDiploidGenotype(string.Format("{0};{1}", ParseCategoryProperty(genotypeCategoryText, "allele1"), ParseCategoryProperty(genotypeCategoryText, "allele2"))); var genotypeTraitSummary = ParseCategoryProperty(genotypeCategoryText, "summary"); if (database.snpToInfoMap.ContainsKey(rsid)) { var snpGenotypes = database.snpToInfoMap[rsid].genotypes; for (var genotypeIndex = 0; genotypeIndex < snpGenotypes.Length; ++genotypeIndex) { if (snpGenotypes[genotypeIndex].genotype.Equals(genotype)) { snpGenotypes[genotypeIndex].trait = genotypeTraitSummary; break; } } } } else if (ParsePageCategorySubstring(pageText, "is a \\| medical condition") != null || ParsePageCategorySubstring(pageText, "is a \\| medicine") != null || ParsePageCategorySubstring(pageText, "is a \\| gene") != null) { // If the page isn't a SNP, add it to the database as a trait with associated SNPs. var traitInfo = new TraitInfo(); traitInfo.title = pageTitle; // Parse links to associated SNPs. var associatedSNPs = new List <string>(); Utilities.ProcessDelimitedItems(pageText, "[[", "]]", delegate(string itemText) { var snpId = itemText.ToLowerInvariant(); if (snpId.StartsWith("rs")) { if (!associatedSNPs.Contains(snpId)) { associatedSNPs.Add(snpId); } } return(itemText); }); traitInfo.associatedSNPs = associatedSNPs.ToArray(); if (traitInfo.associatedSNPs.Length > 0) { // Add the trait to the database. database.traits.Add(traitInfo); } } } } }
public static void RevertDatabase() { localDatabase = InternalLoadDatabase(); }