예제 #1
0
        public static SNPInfo Read(BinaryReader reader)
        {
            var result = new SNPInfo();

            result.id = reader.ReadString();
            result.descriptionWikiText = reader.ReadString();
            result.gene        = reader.ReadString();
            result.chromosome  = reader.ReadString();
            result.position    = reader.ReadInt32();
            result.orientation = (Orientation)reader.ReadByte();
            result.updateTime  = DateTime.FromBinary(reader.ReadInt64());
            result.genotypes   = new SNPGenotypeInfo[reader.ReadInt32()];
            for (int genotypeIndex = 0; genotypeIndex < result.genotypes.Length; genotypeIndex++)
            {
                var genotypeInfo = new SNPGenotypeInfo();
                genotypeInfo.genotype = DiploidGenotype.Read(reader);
                genotypeInfo.trait    = reader.ReadString();
                genotypeInfo.populationFrequencies = new Dictionary <string, float>();
                int numPopulations = reader.ReadInt32();
                for (int populationIndex = 0; populationIndex < numPopulations; populationIndex++)
                {
                    var populationTag       = reader.ReadString();
                    var populationFrequency = reader.ReadSingle();
                    genotypeInfo.populationFrequencies.Add(populationTag, populationFrequency);
                }
                result.genotypes[genotypeIndex] = genotypeInfo;
            }
            return(result);
        }
예제 #2
0
        public SimpleDiploidTraitPage(
            SNPInfo snpInfo,
            DiploidGenotype personalGenotype
            )
        {
            InitializeComponent();

            // Setup the SNP information controls.
            nameLabel.Content       = snpInfo.id;
            descriptionLabel.Text   = Utilities.ConvertWikiTextToPlainText(snpInfo.descriptionWikiText);
            snpediaLink.NavigateUri = new Uri(string.Format("http://www.snpedia.com/index.php?title={0}", snpInfo.id));

            // Setup the list of genotypes for this SNP.
            bHasMatchingGenotype = false;
            foreach (var genotypeInfo in snpInfo.genotypes)
            {
                bool bGenotypeMatchesPersonalGenome = personalGenotype.Equals(genotypeInfo.genotype);
                if (bGenotypeMatchesPersonalGenome)
                {
                    bHasMatchingGenotype = true;
                }
                genotypeList.Items.Add(new SNPGenotypeUIAdapter(
                                           genotypeInfo,
                                           bGenotypeMatchesPersonalGenome
                                           ));
            }

            // If the genome doesn't match any of the genotypes, create a placeholder genotype for it.
            if (!bHasMatchingGenotype)
            {
                var genotypeInfo = new SNPGenotypeInfo();
                genotypeInfo.genotype = personalGenotype;
                genotypeInfo.trait    = "";
                genotypeInfo.populationFrequencies = new Dictionary <string, float>();
                genotypeList.Items.Add(new SNPGenotypeUIAdapter(
                                           genotypeInfo,
                                           true
                                           ));
            }
        }
예제 #3
0
 /** Initialization constructor. */
 public SNPGenotypeUIAdapter(SNPGenotypeInfo inGenotypeInfo, bool bInPersonalGenotype)
 {
     genotypeInfo      = inGenotypeInfo;
     bPersonalGenotype = bInPersonalGenotype;
 }
예제 #4
0
        private static SNPInfo?ParseSNPPage(string pageText)
        {
            // Check if this page is a SNP.
            string snpSubstring = ParsePageCategorySubstring(pageText, "rsnum");

            if (snpSubstring != null)
            {
                // Parse this SNP's properties.
                var result = new SNPInfo();
                result.id         = string.Format("rs{0}", ParseCategoryProperty(snpSubstring, "rsid"));
                result.gene       = ParseCategoryProperty(snpSubstring, "gene");
                result.chromosome = ParseCategoryProperty(snpSubstring, "chromosome");
                if (!int.TryParse(ParseCategoryProperty(snpSubstring, "position"), out result.position))
                {
                    result.position = -1;
                }
                result.orientation = Orientation.Unknown;
                result.updateTime  = DateTime.Today;

                // If the page is empty, don't add it to the database.
                result.descriptionWikiText = pageText;
                if (Utilities.ConvertWikiTextToPlainText(pageText).Trim().Length == 0)
                {
                    return(null);
                }

                // Parse the SNP's genotypes.
                var tempGenotypes = new List <SNPGenotypeInfo>();
                for (int genotypeIndex = 0;; genotypeIndex++)
                {
                    string genotypeString = ParseCategoryProperty(snpSubstring, string.Format("geno{0}", genotypeIndex + 1));
                    if (genotypeString == "")
                    {
                        break;
                    }
                    else
                    {
                        // SNPedia represents deletion genotypes as a '-', but we use D.
                        genotypeString = genotypeString.Replace('-', 'D');

                        // Parse the genotype info.
                        var newGenotype = new SNPGenotypeInfo();
                        newGenotype.genotype = DNA.StringToDiploidGenotype(genotypeString);
                        newGenotype.trait    = "";
                        newGenotype.populationFrequencies = new Dictionary <string, float>();
                        tempGenotypes.Add(newGenotype);

                        // If any of the genotypes are deletions or unknown, don't add this snp to the database as we can't correctly parse the inserted genotype yet.
                        if (newGenotype.genotype.a == Genotype.Unknown || newGenotype.genotype.a == Genotype.Deletion ||
                            newGenotype.genotype.b == Genotype.Unknown || newGenotype.genotype.b == Genotype.Deletion)
                        {
                            return(null);
                        }
                    }
                }
                result.genotypes = tempGenotypes.ToArray();

                // Check if the page has SNP population frequency data.
                var populationFrequencySubstring = ParsePageCategorySubstring(pageText, "population diversity");
                if (populationFrequencySubstring != null)
                {
                    // Remap the genotype indices to match the genotypes declared above.
                    var genotypeIndexRemap = new int?[result.genotypes.Length];
                    for (int genotypeIndex = 0; genotypeIndex < result.genotypes.Length; genotypeIndex++)
                    {
                        genotypeIndexRemap[genotypeIndex] = null;

                        var genotype = DNA.StringToDiploidGenotype(ParseCategoryProperty(populationFrequencySubstring, string.Format("geno{0}", genotypeIndex + 1)));
                        for (int genotypeInfoIndex = 0; genotypeInfoIndex < result.genotypes.Length; genotypeInfoIndex++)
                        {
                            if (result.genotypes[genotypeInfoIndex].genotype.Equals(genotype))
                            {
                                genotypeIndexRemap[genotypeIndex] = genotypeInfoIndex;
                                break;
                            }
                        }
                    }

                    // Ignore population frequencies if they are redundantly defined or not defined for any genotypes.
                    bool bValidPopulationData = true;
                    for (int genotypeIndex = 0; genotypeIndex < result.genotypes.Length; genotypeIndex++)
                    {
                        int numRemaps = 0;
                        for (int remapIndex = 0; remapIndex < genotypeIndexRemap.Length; remapIndex++)
                        {
                            if (genotypeIndexRemap[remapIndex] == genotypeIndex)
                            {
                                numRemaps++;
                            }
                        }
                        if (numRemaps != 1)
                        {
                            bValidPopulationData = false;
                        }
                    }

                    if (bValidPopulationData)
                    {
                        // Parse each population's frequencies for this SNP.
                        string[] populations = new string[] { "CEU", "HCB", "JPT", "YRI" };
                        foreach (var population in populations)
                        {
                            var frequencies = ParseList(ParseCategoryProperty(populationFrequencySubstring, population));
                            if (frequencies.Count == result.genotypes.Length)
                            {
                                for (int genotypeIndex = 0; genotypeIndex < genotypeIndexRemap.Length; genotypeIndex++)
                                {
                                    if (genotypeIndexRemap[genotypeIndex] != null)
                                    {
                                        float frequency;
                                        if (float.TryParse(frequencies[genotypeIndex], out frequency))
                                        {
                                            result.genotypes[genotypeIndexRemap[genotypeIndex].Value].populationFrequencies.Add(population, frequency / 100.0f);
                                        }
                                    }
                                }
                            }
                        }
                    }
                }

                return(result);
            }
            else
            {
                return(null);
            }
        }