public static SNPInfo Read(BinaryReader reader) { var result = new SNPInfo(); result.id = reader.ReadString(); result.descriptionWikiText = reader.ReadString(); result.gene = reader.ReadString(); result.chromosome = reader.ReadString(); result.position = reader.ReadInt32(); result.orientation = (Orientation)reader.ReadByte(); result.updateTime = DateTime.FromBinary(reader.ReadInt64()); result.genotypes = new SNPGenotypeInfo[reader.ReadInt32()]; for (int genotypeIndex = 0; genotypeIndex < result.genotypes.Length; genotypeIndex++) { var genotypeInfo = new SNPGenotypeInfo(); genotypeInfo.genotype = DiploidGenotype.Read(reader); genotypeInfo.trait = reader.ReadString(); genotypeInfo.populationFrequencies = new Dictionary <string, float>(); int numPopulations = reader.ReadInt32(); for (int populationIndex = 0; populationIndex < numPopulations; populationIndex++) { var populationTag = reader.ReadString(); var populationFrequency = reader.ReadSingle(); genotypeInfo.populationFrequencies.Add(populationTag, populationFrequency); } result.genotypes[genotypeIndex] = genotypeInfo; } return(result); }
/** Determines the orientation of the genotypes of a SNP. */ public Orientation GetOrientation(SNPInfo snpInfo) { // Check whether any of the SNP's genotypes and their complements don't match the valid alleles for this orientation. var matches = new bool[2] { true, true }; foreach (var genotypeInfo in snpInfo.genotypes) { // Determine whether this genotype or its complement matches the valid alleles for this orientation. var orientedGenotypes = new DiploidGenotype[] { genotypeInfo.genotype, genotypeInfo.genotype.GetComplement() }; for (int tryIndex = 0; tryIndex < orientedGenotypes.Length; tryIndex++) { if (!DoesGenotypeMatch(orientedGenotypes[tryIndex].a) || !DoesGenotypeMatch(orientedGenotypes[tryIndex].b)) { matches[tryIndex] = false; } } } if (matches[0] && !matches[1]) { // If the SNP's genotypes all match this orientation's valid alleles, they have the same orientation. return(orientation); } else if (matches[1] && !matches[0]) { // If the SNP's genotypes' complements all match this orientation's valid alleles, the SNP's genotypes have the opposite orientation. return(GetOppositeOrientation()); } else { // If none of the SNP's genotypes or their complements mismatch this orientation's alleles, we can't determine the orientation of the SNP's genotypes. return(Orientation.Unknown); } }
public SimpleDiploidTraitPage( SNPInfo snpInfo, DiploidGenotype personalGenotype ) { InitializeComponent(); // Setup the SNP information controls. nameLabel.Content = snpInfo.id; descriptionLabel.Text = Utilities.ConvertWikiTextToPlainText(snpInfo.descriptionWikiText); snpediaLink.NavigateUri = new Uri(string.Format("http://www.snpedia.com/index.php?title={0}", snpInfo.id)); // Setup the list of genotypes for this SNP. bHasMatchingGenotype = false; foreach (var genotypeInfo in snpInfo.genotypes) { bool bGenotypeMatchesPersonalGenome = personalGenotype.Equals(genotypeInfo.genotype); if (bGenotypeMatchesPersonalGenome) { bHasMatchingGenotype = true; } genotypeList.Items.Add(new SNPGenotypeUIAdapter( genotypeInfo, bGenotypeMatchesPersonalGenome )); } // If the genome doesn't match any of the genotypes, create a placeholder genotype for it. if (!bHasMatchingGenotype) { var genotypeInfo = new SNPGenotypeInfo(); genotypeInfo.genotype = personalGenotype; genotypeInfo.trait = ""; genotypeInfo.populationFrequencies = new Dictionary <string, float>(); genotypeList.Items.Add(new SNPGenotypeUIAdapter( genotypeInfo, true )); } }
public static bool Load(Stream stream, ref SNPDatabase outResult) { var reader = new BinaryReader(stream); // Read the file magic and version. char[] fileMagic = reader.ReadChars(referenceFileMagic.Length); // If the file doesn't have the expected magic header, abort and return an error. if (!Utilities.ArrayCompare(fileMagic, referenceFileMagic)) { return(false); } // Create the SNP info database that's about to be loaded. outResult = new SNPDatabase(); // Read the SNPs in the database. int numSNPs = reader.ReadInt32(); for (int snpIndex = 0; snpIndex < numSNPs; snpIndex++) { // Read a SNP ID and value pair, and add them to the database. var Key = reader.ReadString(); var Value = SNPInfo.Read(reader); outResult.snpToInfoMap.Add(Key, Value); } // Read the traits in the database. int numTraits = reader.ReadInt32(); for (int traitIndex = 0; traitIndex < numTraits; traitIndex++) { outResult.traits.Add(TraitInfo.Read(reader)); } return(true); }
private static void CreateTraitPage(string snpId, SNPInfo snpInfo, ref List <UIElement> genotypedPageList, ref List <UIElement> ungenotypedPageList) { // Check if the current genome database has a genotype for this SNP. var value = App.document.GetSNPValue(snpId); var genotype = value != null? value.Value.GetOrientedGenotype(snpInfo.orientation) : new DiploidGenotype(Genotype.Unknown, Genotype.Unknown); // Create the trait page for this SNP. var page = new SimpleDiploidTraitPage( snpInfo, genotype ); // Add the trait page to the appropriate list depending on whether there's a genotype for it. if (page.bHasMatchingGenotype) { genotypedPageList.Add(page); } else { ungenotypedPageList.Add(page); } }
private static SNPInfo?ParseSNPPage(string pageText) { // Check if this page is a SNP. string snpSubstring = ParsePageCategorySubstring(pageText, "rsnum"); if (snpSubstring != null) { // Parse this SNP's properties. var result = new SNPInfo(); result.id = string.Format("rs{0}", ParseCategoryProperty(snpSubstring, "rsid")); result.gene = ParseCategoryProperty(snpSubstring, "gene"); result.chromosome = ParseCategoryProperty(snpSubstring, "chromosome"); if (!int.TryParse(ParseCategoryProperty(snpSubstring, "position"), out result.position)) { result.position = -1; } result.orientation = Orientation.Unknown; result.updateTime = DateTime.Today; // If the page is empty, don't add it to the database. result.descriptionWikiText = pageText; if (Utilities.ConvertWikiTextToPlainText(pageText).Trim().Length == 0) { return(null); } // Parse the SNP's genotypes. var tempGenotypes = new List <SNPGenotypeInfo>(); for (int genotypeIndex = 0;; genotypeIndex++) { string genotypeString = ParseCategoryProperty(snpSubstring, string.Format("geno{0}", genotypeIndex + 1)); if (genotypeString == "") { break; } else { // SNPedia represents deletion genotypes as a '-', but we use D. genotypeString = genotypeString.Replace('-', 'D'); // Parse the genotype info. var newGenotype = new SNPGenotypeInfo(); newGenotype.genotype = DNA.StringToDiploidGenotype(genotypeString); newGenotype.trait = ""; newGenotype.populationFrequencies = new Dictionary <string, float>(); tempGenotypes.Add(newGenotype); // If any of the genotypes are deletions or unknown, don't add this snp to the database as we can't correctly parse the inserted genotype yet. if (newGenotype.genotype.a == Genotype.Unknown || newGenotype.genotype.a == Genotype.Deletion || newGenotype.genotype.b == Genotype.Unknown || newGenotype.genotype.b == Genotype.Deletion) { return(null); } } } result.genotypes = tempGenotypes.ToArray(); // Check if the page has SNP population frequency data. var populationFrequencySubstring = ParsePageCategorySubstring(pageText, "population diversity"); if (populationFrequencySubstring != null) { // Remap the genotype indices to match the genotypes declared above. var genotypeIndexRemap = new int?[result.genotypes.Length]; for (int genotypeIndex = 0; genotypeIndex < result.genotypes.Length; genotypeIndex++) { genotypeIndexRemap[genotypeIndex] = null; var genotype = DNA.StringToDiploidGenotype(ParseCategoryProperty(populationFrequencySubstring, string.Format("geno{0}", genotypeIndex + 1))); for (int genotypeInfoIndex = 0; genotypeInfoIndex < result.genotypes.Length; genotypeInfoIndex++) { if (result.genotypes[genotypeInfoIndex].genotype.Equals(genotype)) { genotypeIndexRemap[genotypeIndex] = genotypeInfoIndex; break; } } } // Ignore population frequencies if they are redundantly defined or not defined for any genotypes. bool bValidPopulationData = true; for (int genotypeIndex = 0; genotypeIndex < result.genotypes.Length; genotypeIndex++) { int numRemaps = 0; for (int remapIndex = 0; remapIndex < genotypeIndexRemap.Length; remapIndex++) { if (genotypeIndexRemap[remapIndex] == genotypeIndex) { numRemaps++; } } if (numRemaps != 1) { bValidPopulationData = false; } } if (bValidPopulationData) { // Parse each population's frequencies for this SNP. string[] populations = new string[] { "CEU", "HCB", "JPT", "YRI" }; foreach (var population in populations) { var frequencies = ParseList(ParseCategoryProperty(populationFrequencySubstring, population)); if (frequencies.Count == result.genotypes.Length) { for (int genotypeIndex = 0; genotypeIndex < genotypeIndexRemap.Length; genotypeIndex++) { if (genotypeIndexRemap[genotypeIndex] != null) { float frequency; if (float.TryParse(frequencies[genotypeIndex], out frequency)) { result.genotypes[genotypeIndexRemap[genotypeIndex].Value].populationFrequencies.Add(population, frequency / 100.0f); } } } } } } } return(result); } else { return(null); } }