private static HgncGene[] LoadHgncGenes(Stream stream, IDictionary <string, IChromosome> refNameToChromosome) { HgncGene[] genes; using (var reader = new HgncReader(stream, refNameToChromosome)) genes = reader.GetGenes(); return(genes); }
private static SymbolDataSource ParseHgncFile(string hgncPath, Dictionary <string, UniqueString> entrezGeneIdToEnsemblId, Dictionary <string, UniqueString> ensemblIdToEntrezGeneId) { Console.WriteLine(); Console.WriteLine("- loading HGNC file:"); var entrezGeneIdToSymbol = new Dictionary <string, UniqueString>(); var ensemblIdToSymbol = new Dictionary <string, UniqueString>(); var entrezGeneIdToHgncId = new Dictionary <string, UniqueInt>(); var ensemblIdToHgncId = new Dictionary <string, UniqueInt>(); int numEntries = 0; using (var reader = new HgncReader(hgncPath)) { while (true) { var geneinfo = reader.Next(); if (geneinfo == null) { break; } if (geneinfo.IsEmpty) { continue; } numEntries++; bool hasEntrezGeneId = !string.IsNullOrEmpty(geneinfo.EntrezGeneId); bool hasEnsemblId = !string.IsNullOrEmpty(geneinfo.EnsemblId); bool hasSymbol = !string.IsNullOrEmpty(geneinfo.Symbol); bool hasHgncId = geneinfo.HgncId != -1; if (hasSymbol) { if (hasEntrezGeneId) { AddIdToUniqueString(entrezGeneIdToSymbol, geneinfo.EntrezGeneId, geneinfo.Symbol); } if (hasEnsemblId) { AddIdToUniqueString(ensemblIdToSymbol, geneinfo.EnsemblId, geneinfo.Symbol); } } if (hasHgncId) { if (hasEntrezGeneId) { AddIdToHgncId(entrezGeneIdToHgncId, geneinfo.EntrezGeneId, geneinfo.HgncId); } if (hasEnsemblId) { AddIdToHgncId(ensemblIdToHgncId, geneinfo.EnsemblId, geneinfo.HgncId); } } if (hasEnsemblId && hasEntrezGeneId) { AddIdToUniqueString(ensemblIdToEntrezGeneId, geneinfo.EnsemblId, geneinfo.EntrezGeneId); AddIdToUniqueString(entrezGeneIdToEnsemblId, geneinfo.EntrezGeneId, geneinfo.EnsemblId); } } } Console.WriteLine($" - {numEntries} entries loaded."); Console.WriteLine($" - Entrez Gene ID -> symbol: {entrezGeneIdToSymbol.Count} ({GetNonConflictCount(entrezGeneIdToSymbol)})"); Console.WriteLine($" - Ensembl ID -> symbol: {ensemblIdToSymbol.Count} ({GetNonConflictCount(ensemblIdToSymbol)})"); Console.WriteLine($" - Entrez Gene ID -> HGNC id: {entrezGeneIdToHgncId.Count} ({GetNonConflictCount(entrezGeneIdToHgncId)})"); Console.WriteLine($" - Ensembl ID -> HGNC id: {ensemblIdToHgncId.Count} ({GetNonConflictCount(ensemblIdToHgncId)})"); return(new SymbolDataSource(entrezGeneIdToSymbol, ensemblIdToSymbol, entrezGeneIdToHgncId, ensemblIdToHgncId)); }