Example #1
0
 public MutableGene(IChromosome chromosome, int start, int end, bool onReverseStrand, string symbol,
                    GeneSymbolSource symbolSource, string geneId, int hgncId)
 {
     Chromosome      = chromosome;
     Start           = start;
     End             = end;
     OnReverseStrand = onReverseStrand;
     Symbol          = symbol;
     SymbolSource    = symbolSource;
     GeneId          = geneId;
     HgncId          = hgncId;
 }
Example #2
0
        public Transcript(BioType biotype, Exon[] transExons, Gene gene, Translation translation, VariantEffectFeatureCache cache, Slice slice,
            bool onReverseStrand, bool isCanonical, int cdnaCodingStart, int cdnaCodingEnd, ushort referenceIndex, int start, int end, 
            string ccdsId, string databaseId, string proteinId, string refSeqId, string geneStableId, string stableId, string geneSymbol, 
            GeneSymbolSource geneSymbolSource, int hgncId, byte version, SimpleInterval[] microRnas)
            : base(referenceIndex, start, end)
        {
            BioType            = biotype;
            CcdsId             = ccdsId;
            CompDnaCodingEnd   = cdnaCodingEnd;
            CompDnaCodingStart = cdnaCodingStart;
            DatabaseId         = databaseId;
            Gene               = gene;
            GeneStableId       = geneStableId;
            GeneSymbol         = geneSymbol;
            GeneSymbolSource   = geneSymbolSource;
            HgncId             = hgncId;
            IsCanonical        = isCanonical;
            MicroRnas          = microRnas;
            OnReverseStrand    = onReverseStrand;
            ProteinId          = proteinId;
            RefSeqId           = refSeqId;
            Slice              = slice;
            StableId           = stableId;
            TransExons         = transExons;
            Translation        = translation;
            VariantEffectCache = cache;
            Version            = version;

            var entrezId = ImportDataStore.TranscriptSource == TranscriptDataSource.Ensembl
                ? CompactId.Empty
                : CompactId.Convert(geneStableId);

            var ensemblId = ImportDataStore.TranscriptSource == TranscriptDataSource.Ensembl
                ? CompactId.Convert(geneStableId)
                : CompactId.Empty;

            FinalGene = new VariantAnnotation.DataStructures.Gene(referenceIndex, start, end, onReverseStrand,
                geneSymbol, hgncId, entrezId, ensemblId, -1);

            GenerateHashCode();
        }
Example #3
0
 /// <summary>
 /// adds the gene symbol source to both dictionaries
 /// </summary>
 private static void AddGeneSymbolSource(string s, GeneSymbolSource geneSymbolSource)
 {
     StringToGeneSymbolSource[s] = geneSymbolSource;
 }
Example #4
0
        /// <summary>
        /// loads the NCBI gene_info file
        /// </summary>
        public void LoadGeneInfo(string geneInfoPath)
        {
            const int numExpectedCols = 15;

            Console.Write("- loading gene_info data... ");

            using (var reader = GZipUtilities.GetAppropriateStreamReader(geneInfoPath))
            {
                while (true)
                {
                    var line = reader.ReadLine();
                    if (string.IsNullOrEmpty(line))
                    {
                        break;
                    }

                    // skip comments
                    if (line.StartsWith("#"))
                    {
                        continue;
                    }

                    var cols = line.Split('\t');
                    if (cols.Length != numExpectedCols)
                    {
                        throw new GeneralException(
                                  $"Expected {numExpectedCols} columns, but found {cols.Length}: [{line}]");
                    }

                    // skip entries where we can't convert the taxonomic ID
                    int taxId;
                    if (!int.TryParse(cols[0], out taxId))
                    {
                        continue;
                    }
                    if (taxId != HumanTaxonomicId)
                    {
                        continue;
                    }

                    // convert the geneID
                    int geneId = GetGeneId(cols[1]);

                    // we're going to use symbol rather than the one from the nomenclature authority
                    var symbol     = cols[2];
                    var synonyms   = FixNullValue(cols[4]);
                    var dbXRefs    = cols[5];
                    var hgncSymbol = FixNullValue(cols[10]);

                    // get the HGNC id
                    var hgncId = GetHgncId(dbXRefs);

                    // get the nomenclature source
                    GeneSymbolSource geneSymbolSource = GeneSymbolSource.NCBI;
                    if ((hgncId != null) && (symbol == hgncSymbol))
                    {
                        geneSymbolSource = GeneSymbolSource.HGNC;
                    }

                    // add to the gene symbols list
                    GeneInfo geneInfo;
                    if (_geneSymbols.TryGetValue(geneId, out geneInfo))
                    {
                        throw new GeneralException("Found a conflicting geneID in gene_info: " + line);
                    }

                    geneInfo = new GeneInfo
                    {
                        GeneID           = geneId,
                        GeneSymbol       = symbol,
                        GeneSymbolSource = geneSymbolSource,
                        HgncID           = hgncId,
                        Synonyms         = synonyms
                    };

                    _geneSymbols[geneId] = geneInfo;
                }
            }

            Console.WriteLine("{0} genes loaded.", _geneSymbols.Count);
        }