Пример #1
0
 private static IEnumerable <GeneInfo> LoadGeneInfoGenes(string filePath)
 {
     GeneInfo[] genes;
     using (var streamReader = GZipUtilities.GetAppropriateStreamReader(filePath))
         using (var reader = new GeneInfoReader(streamReader)) genes = reader.GetGenes();
     return(genes);
 }
Пример #2
0
        private static SymbolDataSource ParseGeneInfoFiles(List <string> geneInfoPaths)
        {
            Console.WriteLine("- loading gene_info files:");

            var entrezGeneIdToSymbol = new Dictionary <string, UniqueString>();
            var ensemblIdToSymbol    = new Dictionary <string, UniqueString>();
            var entrezGeneIdToHgncId = new Dictionary <string, UniqueInt>();
            var ensemblIdToHgncId    = new Dictionary <string, UniqueInt>();

            foreach (var geneInfoPath in geneInfoPaths)
            {
                Console.Write("  - {0}... ", Path.GetFileName(geneInfoPath));
                int numEntries = 0;

                using (var reader = new GeneInfoReader(geneInfoPath))
                {
                    while (true)
                    {
                        var geneinfo = reader.Next();
                        if (geneinfo == null)
                        {
                            break;
                        }
                        if (geneinfo.IsEmpty)
                        {
                            continue;
                        }

                        numEntries++;

                        bool hasEntrezGeneId = !string.IsNullOrEmpty(geneinfo.EntrezGeneId);
                        bool hasEnsemblId    = !string.IsNullOrEmpty(geneinfo.EnsemblId);
                        bool hasSymbol       = !string.IsNullOrEmpty(geneinfo.Symbol);
                        bool hasHgncId       = geneinfo.HgncId != -1;

                        if (hasSymbol)
                        {
                            if (hasEntrezGeneId)
                            {
                                AddIdToUniqueString(entrezGeneIdToSymbol, geneinfo.EntrezGeneId, geneinfo.Symbol);
                            }
                            if (hasEnsemblId)
                            {
                                AddIdToUniqueString(ensemblIdToSymbol, geneinfo.EnsemblId, geneinfo.Symbol);
                            }
                        }

                        if (hasHgncId)
                        {
                            if (hasEntrezGeneId)
                            {
                                AddIdToHgncId(entrezGeneIdToHgncId, geneinfo.EntrezGeneId, geneinfo.HgncId);
                            }
                            if (hasEnsemblId)
                            {
                                AddIdToHgncId(ensemblIdToHgncId, geneinfo.EnsemblId, geneinfo.HgncId);
                            }
                        }
                    }
                }

                Console.WriteLine($"{numEntries} entries loaded.");
            }

            Console.WriteLine($"  - Entrez Gene ID -> symbol:  {entrezGeneIdToSymbol.Count} ({GetNonConflictCount(entrezGeneIdToSymbol)})");
            Console.WriteLine($"  - Ensembl ID -> symbol:      {ensemblIdToSymbol.Count} ({GetNonConflictCount(ensemblIdToSymbol)})");
            Console.WriteLine($"  - Entrez Gene ID -> HGNC id: {entrezGeneIdToHgncId.Count} ({GetNonConflictCount(entrezGeneIdToHgncId)})");
            Console.WriteLine($"  - Ensembl ID -> HGNC id:     {ensemblIdToHgncId.Count} ({GetNonConflictCount(ensemblIdToHgncId)})");

            return(new SymbolDataSource(entrezGeneIdToSymbol, ensemblIdToSymbol, entrezGeneIdToHgncId, ensemblIdToHgncId));
        }