public MutableGene(IChromosome chromosome, int start, int end, bool onReverseStrand, string symbol, GeneSymbolSource symbolSource, string geneId, int hgncId) { Chromosome = chromosome; Start = start; End = end; OnReverseStrand = onReverseStrand; Symbol = symbol; SymbolSource = symbolSource; GeneId = geneId; HgncId = hgncId; }
public Transcript(BioType biotype, Exon[] transExons, Gene gene, Translation translation, VariantEffectFeatureCache cache, Slice slice, bool onReverseStrand, bool isCanonical, int cdnaCodingStart, int cdnaCodingEnd, ushort referenceIndex, int start, int end, string ccdsId, string databaseId, string proteinId, string refSeqId, string geneStableId, string stableId, string geneSymbol, GeneSymbolSource geneSymbolSource, int hgncId, byte version, SimpleInterval[] microRnas) : base(referenceIndex, start, end) { BioType = biotype; CcdsId = ccdsId; CompDnaCodingEnd = cdnaCodingEnd; CompDnaCodingStart = cdnaCodingStart; DatabaseId = databaseId; Gene = gene; GeneStableId = geneStableId; GeneSymbol = geneSymbol; GeneSymbolSource = geneSymbolSource; HgncId = hgncId; IsCanonical = isCanonical; MicroRnas = microRnas; OnReverseStrand = onReverseStrand; ProteinId = proteinId; RefSeqId = refSeqId; Slice = slice; StableId = stableId; TransExons = transExons; Translation = translation; VariantEffectCache = cache; Version = version; var entrezId = ImportDataStore.TranscriptSource == TranscriptDataSource.Ensembl ? CompactId.Empty : CompactId.Convert(geneStableId); var ensemblId = ImportDataStore.TranscriptSource == TranscriptDataSource.Ensembl ? CompactId.Convert(geneStableId) : CompactId.Empty; FinalGene = new VariantAnnotation.DataStructures.Gene(referenceIndex, start, end, onReverseStrand, geneSymbol, hgncId, entrezId, ensemblId, -1); GenerateHashCode(); }
/// <summary> /// adds the gene symbol source to both dictionaries /// </summary> private static void AddGeneSymbolSource(string s, GeneSymbolSource geneSymbolSource) { StringToGeneSymbolSource[s] = geneSymbolSource; }
/// <summary> /// loads the NCBI gene_info file /// </summary> public void LoadGeneInfo(string geneInfoPath) { const int numExpectedCols = 15; Console.Write("- loading gene_info data... "); using (var reader = GZipUtilities.GetAppropriateStreamReader(geneInfoPath)) { while (true) { var line = reader.ReadLine(); if (string.IsNullOrEmpty(line)) { break; } // skip comments if (line.StartsWith("#")) { continue; } var cols = line.Split('\t'); if (cols.Length != numExpectedCols) { throw new GeneralException( $"Expected {numExpectedCols} columns, but found {cols.Length}: [{line}]"); } // skip entries where we can't convert the taxonomic ID int taxId; if (!int.TryParse(cols[0], out taxId)) { continue; } if (taxId != HumanTaxonomicId) { continue; } // convert the geneID int geneId = GetGeneId(cols[1]); // we're going to use symbol rather than the one from the nomenclature authority var symbol = cols[2]; var synonyms = FixNullValue(cols[4]); var dbXRefs = cols[5]; var hgncSymbol = FixNullValue(cols[10]); // get the HGNC id var hgncId = GetHgncId(dbXRefs); // get the nomenclature source GeneSymbolSource geneSymbolSource = GeneSymbolSource.NCBI; if ((hgncId != null) && (symbol == hgncSymbol)) { geneSymbolSource = GeneSymbolSource.HGNC; } // add to the gene symbols list GeneInfo geneInfo; if (_geneSymbols.TryGetValue(geneId, out geneInfo)) { throw new GeneralException("Found a conflicting geneID in gene_info: " + line); } geneInfo = new GeneInfo { GeneID = geneId, GeneSymbol = symbol, GeneSymbolSource = geneSymbolSource, HgncID = hgncId, Synonyms = synonyms }; _geneSymbols[geneId] = geneInfo; } } Console.WriteLine("{0} genes loaded.", _geneSymbols.Count); }