Example #1
0
 private static HgncGene[] LoadHgncGenes(Stream stream, IDictionary <string, IChromosome> refNameToChromosome)
 {
     HgncGene[] genes;
     using (var reader = new HgncReader(stream, refNameToChromosome)) genes = reader.GetGenes();
     return(genes);
 }
Example #2
0
        private static SymbolDataSource ParseHgncFile(string hgncPath,
                                                      Dictionary <string, UniqueString> entrezGeneIdToEnsemblId,
                                                      Dictionary <string, UniqueString> ensemblIdToEntrezGeneId)
        {
            Console.WriteLine();
            Console.WriteLine("- loading HGNC file:");

            var entrezGeneIdToSymbol = new Dictionary <string, UniqueString>();
            var ensemblIdToSymbol    = new Dictionary <string, UniqueString>();
            var entrezGeneIdToHgncId = new Dictionary <string, UniqueInt>();
            var ensemblIdToHgncId    = new Dictionary <string, UniqueInt>();

            int numEntries = 0;

            using (var reader = new HgncReader(hgncPath))
            {
                while (true)
                {
                    var geneinfo = reader.Next();
                    if (geneinfo == null)
                    {
                        break;
                    }
                    if (geneinfo.IsEmpty)
                    {
                        continue;
                    }

                    numEntries++;

                    bool hasEntrezGeneId = !string.IsNullOrEmpty(geneinfo.EntrezGeneId);
                    bool hasEnsemblId    = !string.IsNullOrEmpty(geneinfo.EnsemblId);
                    bool hasSymbol       = !string.IsNullOrEmpty(geneinfo.Symbol);
                    bool hasHgncId       = geneinfo.HgncId != -1;

                    if (hasSymbol)
                    {
                        if (hasEntrezGeneId)
                        {
                            AddIdToUniqueString(entrezGeneIdToSymbol, geneinfo.EntrezGeneId, geneinfo.Symbol);
                        }
                        if (hasEnsemblId)
                        {
                            AddIdToUniqueString(ensemblIdToSymbol, geneinfo.EnsemblId, geneinfo.Symbol);
                        }
                    }

                    if (hasHgncId)
                    {
                        if (hasEntrezGeneId)
                        {
                            AddIdToHgncId(entrezGeneIdToHgncId, geneinfo.EntrezGeneId, geneinfo.HgncId);
                        }
                        if (hasEnsemblId)
                        {
                            AddIdToHgncId(ensemblIdToHgncId, geneinfo.EnsemblId, geneinfo.HgncId);
                        }
                    }

                    if (hasEnsemblId && hasEntrezGeneId)
                    {
                        AddIdToUniqueString(ensemblIdToEntrezGeneId, geneinfo.EnsemblId, geneinfo.EntrezGeneId);
                        AddIdToUniqueString(entrezGeneIdToEnsemblId, geneinfo.EntrezGeneId, geneinfo.EnsemblId);
                    }
                }
            }

            Console.WriteLine($"  - {numEntries} entries loaded.");

            Console.WriteLine($"  - Entrez Gene ID -> symbol:  {entrezGeneIdToSymbol.Count} ({GetNonConflictCount(entrezGeneIdToSymbol)})");
            Console.WriteLine($"  - Ensembl ID -> symbol:      {ensemblIdToSymbol.Count} ({GetNonConflictCount(ensemblIdToSymbol)})");
            Console.WriteLine($"  - Entrez Gene ID -> HGNC id: {entrezGeneIdToHgncId.Count} ({GetNonConflictCount(entrezGeneIdToHgncId)})");
            Console.WriteLine($"  - Ensembl ID -> HGNC id:     {ensemblIdToHgncId.Count} ({GetNonConflictCount(ensemblIdToHgncId)})");

            return(new SymbolDataSource(entrezGeneIdToSymbol, ensemblIdToSymbol, entrezGeneIdToHgncId, ensemblIdToHgncId));
        }