private static IEnumerable <GeneInfo> LoadGeneInfoGenes(string filePath) { GeneInfo[] genes; using (var streamReader = GZipUtilities.GetAppropriateStreamReader(filePath)) using (var reader = new GeneInfoReader(streamReader)) genes = reader.GetGenes(); return(genes); }
private static SymbolDataSource ParseGeneInfoFiles(List <string> geneInfoPaths) { Console.WriteLine("- loading gene_info files:"); var entrezGeneIdToSymbol = new Dictionary <string, UniqueString>(); var ensemblIdToSymbol = new Dictionary <string, UniqueString>(); var entrezGeneIdToHgncId = new Dictionary <string, UniqueInt>(); var ensemblIdToHgncId = new Dictionary <string, UniqueInt>(); foreach (var geneInfoPath in geneInfoPaths) { Console.Write(" - {0}... ", Path.GetFileName(geneInfoPath)); int numEntries = 0; using (var reader = new GeneInfoReader(geneInfoPath)) { while (true) { var geneinfo = reader.Next(); if (geneinfo == null) { break; } if (geneinfo.IsEmpty) { continue; } numEntries++; bool hasEntrezGeneId = !string.IsNullOrEmpty(geneinfo.EntrezGeneId); bool hasEnsemblId = !string.IsNullOrEmpty(geneinfo.EnsemblId); bool hasSymbol = !string.IsNullOrEmpty(geneinfo.Symbol); bool hasHgncId = geneinfo.HgncId != -1; if (hasSymbol) { if (hasEntrezGeneId) { AddIdToUniqueString(entrezGeneIdToSymbol, geneinfo.EntrezGeneId, geneinfo.Symbol); } if (hasEnsemblId) { AddIdToUniqueString(ensemblIdToSymbol, geneinfo.EnsemblId, geneinfo.Symbol); } } if (hasHgncId) { if (hasEntrezGeneId) { AddIdToHgncId(entrezGeneIdToHgncId, geneinfo.EntrezGeneId, geneinfo.HgncId); } if (hasEnsemblId) { AddIdToHgncId(ensemblIdToHgncId, geneinfo.EnsemblId, geneinfo.HgncId); } } } } Console.WriteLine($"{numEntries} entries loaded."); } Console.WriteLine($" - Entrez Gene ID -> symbol: {entrezGeneIdToSymbol.Count} ({GetNonConflictCount(entrezGeneIdToSymbol)})"); Console.WriteLine($" - Ensembl ID -> symbol: {ensemblIdToSymbol.Count} ({GetNonConflictCount(ensemblIdToSymbol)})"); Console.WriteLine($" - Entrez Gene ID -> HGNC id: {entrezGeneIdToHgncId.Count} ({GetNonConflictCount(entrezGeneIdToHgncId)})"); Console.WriteLine($" - Ensembl ID -> HGNC id: {ensemblIdToHgncId.Count} ({GetNonConflictCount(ensemblIdToHgncId)})"); return(new SymbolDataSource(entrezGeneIdToSymbol, ensemblIdToSymbol, entrezGeneIdToHgncId, ensemblIdToHgncId)); }