示例#1
0
        public static SNPInfo Read(BinaryReader reader)
        {
            var result = new SNPInfo();

            result.id = reader.ReadString();
            result.descriptionWikiText = reader.ReadString();
            result.gene        = reader.ReadString();
            result.chromosome  = reader.ReadString();
            result.position    = reader.ReadInt32();
            result.orientation = (Orientation)reader.ReadByte();
            result.updateTime  = DateTime.FromBinary(reader.ReadInt64());
            result.genotypes   = new SNPGenotypeInfo[reader.ReadInt32()];
            for (int genotypeIndex = 0; genotypeIndex < result.genotypes.Length; genotypeIndex++)
            {
                var genotypeInfo = new SNPGenotypeInfo();
                genotypeInfo.genotype = DiploidGenotype.Read(reader);
                genotypeInfo.trait    = reader.ReadString();
                genotypeInfo.populationFrequencies = new Dictionary <string, float>();
                int numPopulations = reader.ReadInt32();
                for (int populationIndex = 0; populationIndex < numPopulations; populationIndex++)
                {
                    var populationTag       = reader.ReadString();
                    var populationFrequency = reader.ReadSingle();
                    genotypeInfo.populationFrequencies.Add(populationTag, populationFrequency);
                }
                result.genotypes[genotypeIndex] = genotypeInfo;
            }
            return(result);
        }
            /** Determines the orientation of the genotypes of a SNP. */
            public Orientation GetOrientation(SNPInfo snpInfo)
            {
                // Check whether any of the SNP's genotypes and their complements don't match the valid alleles for this orientation.
                var matches = new bool[2] {
                    true, true
                };

                foreach (var genotypeInfo in snpInfo.genotypes)
                {
                    // Determine whether this genotype or its complement matches the valid alleles for this orientation.
                    var orientedGenotypes = new DiploidGenotype[]
                    {
                        genotypeInfo.genotype,
                        genotypeInfo.genotype.GetComplement()
                    };
                    for (int tryIndex = 0; tryIndex < orientedGenotypes.Length; tryIndex++)
                    {
                        if (!DoesGenotypeMatch(orientedGenotypes[tryIndex].a) ||
                            !DoesGenotypeMatch(orientedGenotypes[tryIndex].b))
                        {
                            matches[tryIndex] = false;
                        }
                    }
                }

                if (matches[0] && !matches[1])
                {
                    // If the SNP's genotypes all match this orientation's valid alleles, they have the same orientation.
                    return(orientation);
                }
                else if (matches[1] && !matches[0])
                {
                    // If the SNP's genotypes' complements all match this orientation's valid alleles, the SNP's genotypes have the opposite orientation.
                    return(GetOppositeOrientation());
                }
                else
                {
                    // If none of the SNP's genotypes or their complements mismatch this orientation's alleles, we can't determine the orientation of the SNP's genotypes.
                    return(Orientation.Unknown);
                }
            }
示例#3
0
        public SimpleDiploidTraitPage(
            SNPInfo snpInfo,
            DiploidGenotype personalGenotype
            )
        {
            InitializeComponent();

            // Setup the SNP information controls.
            nameLabel.Content       = snpInfo.id;
            descriptionLabel.Text   = Utilities.ConvertWikiTextToPlainText(snpInfo.descriptionWikiText);
            snpediaLink.NavigateUri = new Uri(string.Format("http://www.snpedia.com/index.php?title={0}", snpInfo.id));

            // Setup the list of genotypes for this SNP.
            bHasMatchingGenotype = false;
            foreach (var genotypeInfo in snpInfo.genotypes)
            {
                bool bGenotypeMatchesPersonalGenome = personalGenotype.Equals(genotypeInfo.genotype);
                if (bGenotypeMatchesPersonalGenome)
                {
                    bHasMatchingGenotype = true;
                }
                genotypeList.Items.Add(new SNPGenotypeUIAdapter(
                                           genotypeInfo,
                                           bGenotypeMatchesPersonalGenome
                                           ));
            }

            // If the genome doesn't match any of the genotypes, create a placeholder genotype for it.
            if (!bHasMatchingGenotype)
            {
                var genotypeInfo = new SNPGenotypeInfo();
                genotypeInfo.genotype = personalGenotype;
                genotypeInfo.trait    = "";
                genotypeInfo.populationFrequencies = new Dictionary <string, float>();
                genotypeList.Items.Add(new SNPGenotypeUIAdapter(
                                           genotypeInfo,
                                           true
                                           ));
            }
        }
        public static bool Load(Stream stream, ref SNPDatabase outResult)
        {
            var reader = new BinaryReader(stream);

            // Read the file magic and version.
            char[] fileMagic = reader.ReadChars(referenceFileMagic.Length);

            // If the file doesn't have the expected magic header, abort and return an error.
            if (!Utilities.ArrayCompare(fileMagic, referenceFileMagic))
            {
                return(false);
            }

            // Create the SNP info database that's about to be loaded.
            outResult = new SNPDatabase();

            // Read the SNPs in the database.
            int numSNPs = reader.ReadInt32();

            for (int snpIndex = 0; snpIndex < numSNPs; snpIndex++)
            {
                // Read a SNP ID and value pair, and add them to the database.
                var Key   = reader.ReadString();
                var Value = SNPInfo.Read(reader);
                outResult.snpToInfoMap.Add(Key, Value);
            }

            // Read the traits in the database.
            int numTraits = reader.ReadInt32();

            for (int traitIndex = 0; traitIndex < numTraits; traitIndex++)
            {
                outResult.traits.Add(TraitInfo.Read(reader));
            }

            return(true);
        }
示例#5
0
        private static void CreateTraitPage(string snpId, SNPInfo snpInfo, ref List <UIElement> genotypedPageList, ref List <UIElement> ungenotypedPageList)
        {
            // Check if the current genome database has a genotype for this SNP.
            var value    = App.document.GetSNPValue(snpId);
            var genotype = value != null?
                           value.Value.GetOrientedGenotype(snpInfo.orientation) :
                               new DiploidGenotype(Genotype.Unknown, Genotype.Unknown);

            // Create the trait page for this SNP.
            var page = new SimpleDiploidTraitPage(
                snpInfo,
                genotype
                );

            // Add the trait page to the appropriate list depending on whether there's a genotype for it.
            if (page.bHasMatchingGenotype)
            {
                genotypedPageList.Add(page);
            }
            else
            {
                ungenotypedPageList.Add(page);
            }
        }
示例#6
0
        private static SNPInfo?ParseSNPPage(string pageText)
        {
            // Check if this page is a SNP.
            string snpSubstring = ParsePageCategorySubstring(pageText, "rsnum");

            if (snpSubstring != null)
            {
                // Parse this SNP's properties.
                var result = new SNPInfo();
                result.id         = string.Format("rs{0}", ParseCategoryProperty(snpSubstring, "rsid"));
                result.gene       = ParseCategoryProperty(snpSubstring, "gene");
                result.chromosome = ParseCategoryProperty(snpSubstring, "chromosome");
                if (!int.TryParse(ParseCategoryProperty(snpSubstring, "position"), out result.position))
                {
                    result.position = -1;
                }
                result.orientation = Orientation.Unknown;
                result.updateTime  = DateTime.Today;

                // If the page is empty, don't add it to the database.
                result.descriptionWikiText = pageText;
                if (Utilities.ConvertWikiTextToPlainText(pageText).Trim().Length == 0)
                {
                    return(null);
                }

                // Parse the SNP's genotypes.
                var tempGenotypes = new List <SNPGenotypeInfo>();
                for (int genotypeIndex = 0;; genotypeIndex++)
                {
                    string genotypeString = ParseCategoryProperty(snpSubstring, string.Format("geno{0}", genotypeIndex + 1));
                    if (genotypeString == "")
                    {
                        break;
                    }
                    else
                    {
                        // SNPedia represents deletion genotypes as a '-', but we use D.
                        genotypeString = genotypeString.Replace('-', 'D');

                        // Parse the genotype info.
                        var newGenotype = new SNPGenotypeInfo();
                        newGenotype.genotype = DNA.StringToDiploidGenotype(genotypeString);
                        newGenotype.trait    = "";
                        newGenotype.populationFrequencies = new Dictionary <string, float>();
                        tempGenotypes.Add(newGenotype);

                        // If any of the genotypes are deletions or unknown, don't add this snp to the database as we can't correctly parse the inserted genotype yet.
                        if (newGenotype.genotype.a == Genotype.Unknown || newGenotype.genotype.a == Genotype.Deletion ||
                            newGenotype.genotype.b == Genotype.Unknown || newGenotype.genotype.b == Genotype.Deletion)
                        {
                            return(null);
                        }
                    }
                }
                result.genotypes = tempGenotypes.ToArray();

                // Check if the page has SNP population frequency data.
                var populationFrequencySubstring = ParsePageCategorySubstring(pageText, "population diversity");
                if (populationFrequencySubstring != null)
                {
                    // Remap the genotype indices to match the genotypes declared above.
                    var genotypeIndexRemap = new int?[result.genotypes.Length];
                    for (int genotypeIndex = 0; genotypeIndex < result.genotypes.Length; genotypeIndex++)
                    {
                        genotypeIndexRemap[genotypeIndex] = null;

                        var genotype = DNA.StringToDiploidGenotype(ParseCategoryProperty(populationFrequencySubstring, string.Format("geno{0}", genotypeIndex + 1)));
                        for (int genotypeInfoIndex = 0; genotypeInfoIndex < result.genotypes.Length; genotypeInfoIndex++)
                        {
                            if (result.genotypes[genotypeInfoIndex].genotype.Equals(genotype))
                            {
                                genotypeIndexRemap[genotypeIndex] = genotypeInfoIndex;
                                break;
                            }
                        }
                    }

                    // Ignore population frequencies if they are redundantly defined or not defined for any genotypes.
                    bool bValidPopulationData = true;
                    for (int genotypeIndex = 0; genotypeIndex < result.genotypes.Length; genotypeIndex++)
                    {
                        int numRemaps = 0;
                        for (int remapIndex = 0; remapIndex < genotypeIndexRemap.Length; remapIndex++)
                        {
                            if (genotypeIndexRemap[remapIndex] == genotypeIndex)
                            {
                                numRemaps++;
                            }
                        }
                        if (numRemaps != 1)
                        {
                            bValidPopulationData = false;
                        }
                    }

                    if (bValidPopulationData)
                    {
                        // Parse each population's frequencies for this SNP.
                        string[] populations = new string[] { "CEU", "HCB", "JPT", "YRI" };
                        foreach (var population in populations)
                        {
                            var frequencies = ParseList(ParseCategoryProperty(populationFrequencySubstring, population));
                            if (frequencies.Count == result.genotypes.Length)
                            {
                                for (int genotypeIndex = 0; genotypeIndex < genotypeIndexRemap.Length; genotypeIndex++)
                                {
                                    if (genotypeIndexRemap[genotypeIndex] != null)
                                    {
                                        float frequency;
                                        if (float.TryParse(frequencies[genotypeIndex], out frequency))
                                        {
                                            result.genotypes[genotypeIndexRemap[genotypeIndex].Value].populationFrequencies.Add(population, frequency / 100.0f);
                                        }
                                    }
                                }
                            }
                        }
                    }
                }

                return(result);
            }
            else
            {
                return(null);
            }
        }