Ejemplo n.º 1
0
        // ReSharper disable once InconsistentNaming
        public void SRGAP2C()
        {
            var genesA           = GetSrgap2CEnsemblGenes();
            var genesB           = GetSrgap2CRefSeqGenes();
            var linkedEnsemblIds = new Dictionary <string, string> {
                ["ENSG00000171943"] = "653464"
            };

            var merger      = new GeneMerger(genesA, genesB, linkedEnsemblIds);
            var mergedGenes = merger.Merge();

            Assert.Equal(2, mergedGenes.Count);

            var mergedGene = mergedGenes[0];

            Assert.Equal(0, mergedGene.ReferenceIndex);
            Assert.Equal(120835810, mergedGene.Start);
            Assert.Equal(120838261, mergedGene.End);
            Assert.Equal("SRGAP2C", mergedGene.Symbol);
            Assert.Equal(-1, mergedGene.HgncId);
            Assert.Equal("653464", mergedGene.EntrezGeneId.ToString());
            Assert.True(mergedGene.EnsemblId.IsEmpty);
            Assert.True(mergedGene.OnReverseStrand);

            var mergedGene2 = mergedGenes[1];

            Assert.Equal(0, mergedGene2.ReferenceIndex);
            Assert.Equal(121107124, mergedGene2.Start);
            Assert.Equal(121131061, mergedGene2.End);
            Assert.Equal("SRGAP2C", mergedGene2.Symbol);
            Assert.Equal(30584, mergedGene2.HgncId);
            Assert.Equal("653464", mergedGene2.EntrezGeneId.ToString());
            Assert.Equal("ENSG00000171943", mergedGene2.EnsemblId.ToString());
            Assert.False(mergedGene2.OnReverseStrand);
        }
Ejemplo n.º 2
0
        // ReSharper disable once InconsistentNaming
        public void CDRT1()
        {
            var genesA           = GetCdrt1EnsemblGenes();
            var genesB           = GetCdrt1RefSeqGenes();
            var linkedEnsemblIds = new Dictionary <string, string> {
                ["ENSG00000241322"] = "374286"
            };

            var merger      = new GeneMerger(genesA, genesB, linkedEnsemblIds);
            var mergedGenes = merger.Merge();

            Assert.Equal(3, mergedGenes.Count);

            var mergedGene = mergedGenes[0];

            Assert.Equal(16, mergedGene.ReferenceIndex);
            Assert.Equal(15468797, mergedGene.Start);
            Assert.Equal(15469590, mergedGene.End);
            Assert.Equal("CDRT1", mergedGene.Symbol);
            Assert.Equal(-1, mergedGene.HgncId);
            Assert.True(mergedGene.EntrezGeneId.IsEmpty);
            Assert.Equal("ENSG00000181464", mergedGene.EnsemblId.ToString());
            Assert.True(mergedGene.OnReverseStrand);

            var mergedGene2 = mergedGenes[1];

            Assert.Equal(16, mergedGene2.ReferenceIndex);
            Assert.Equal(15468798, mergedGene2.Start);
            Assert.Equal(15523018, mergedGene2.End);
            Assert.Equal("CDRT1", mergedGene2.Symbol);
            Assert.Equal(14379, mergedGene2.HgncId);
            Assert.Equal("374286", mergedGene2.EntrezGeneId.ToString());
            Assert.Equal("ENSG00000241322", mergedGene2.EnsemblId.ToString());
            Assert.True(mergedGene2.OnReverseStrand);

            var mergedGene3 = mergedGenes[2];

            Assert.Equal(16, mergedGene3.ReferenceIndex);
            Assert.Equal(15474805, mergedGene3.Start);
            Assert.Equal(15554967, mergedGene3.End);
            Assert.Equal("CDRT1", mergedGene3.Symbol);
            Assert.Equal(-1, mergedGene3.HgncId);
            Assert.True(mergedGene3.EntrezGeneId.IsEmpty);
            Assert.Equal("ENSG00000251537", mergedGene3.EnsemblId.ToString());
            Assert.True(mergedGene3.OnReverseStrand);
        }
Ejemplo n.º 3
0
        // ReSharper disable once InconsistentNaming
        public void SH3RF3()
        {
            var genesA           = GetSh3EnsemblGenes();
            var genesB           = GetSh3RefSeqGenes();
            var linkedEnsemblIds = new Dictionary <string, string> {
                ["ENSG00000172985"] = "344558"
            };

            var merger      = new GeneMerger(genesA, genesB, linkedEnsemblIds);
            var mergedGenes = merger.Merge();

            Assert.Equal(1, mergedGenes.Count);

            var mergedGene = mergedGenes[0];

            Assert.Equal(1, mergedGene.ReferenceIndex);
            Assert.Equal(109745804, mergedGene.Start);
            Assert.Equal(110262207, mergedGene.End);
            Assert.Equal("SH3RF3", mergedGene.Symbol);
            Assert.Equal(24699, mergedGene.HgncId);
            Assert.Equal("344558", mergedGene.EntrezGeneId.ToString());
            Assert.Equal("ENSG00000172985", mergedGene.EnsemblId.ToString());
            Assert.False(mergedGene.OnReverseStrand);
        }
Ejemplo n.º 4
0
        /// <summary>
        /// constructor
        /// </summary>
        public GeneCombiner(string inputGenesPath, string inputGenes2Path, List <string> geneInfoPaths, string hgncPath,
                            string refSeqGff3Path)
        {
            _geneInfoSource = ParseGeneInfoFiles(geneInfoPaths);

            var entrezGeneIdToEnsemblId = new Dictionary <string, UniqueString>();
            var ensemblIdToEntrezGeneId = new Dictionary <string, UniqueString>();

            _hgncSource = ParseHgncFile(hgncPath, entrezGeneIdToEnsemblId, ensemblIdToEntrezGeneId);

            Console.WriteLine();
            Console.WriteLine("- linking Ensembl and Entrez gene IDs: ");

            var linkedEnsemblIds = LinkIds(entrezGeneIdToEnsemblId, ensemblIdToEntrezGeneId);

            Console.WriteLine();
            Console.WriteLine("- loading RefSeq GFF3: ");

            _refSeqGff3GeneInfo = GetRefSeqGff3GeneInfo(refSeqGff3Path);

            Console.WriteLine();
            Console.WriteLine("- loading genes: ");

            string descriptionA = Path.GetFileName(inputGenesPath);
            string descriptionB = Path.GetFileName(inputGenes2Path);

            var genesA = LoadGenes(inputGenesPath, descriptionA);
            var genesB = LoadGenes(inputGenes2Path, descriptionB);

            Console.WriteLine();
            Console.WriteLine("- update gene symbols: ");

            var updaterA = new GeneSymbolUpdater(genesA, descriptionA, _geneInfoSource, _hgncSource);

            updaterA.Update();

            var updaterB = new GeneSymbolUpdater(genesB, descriptionB, _geneInfoSource, _hgncSource);

            updaterB.Update();

            Console.WriteLine();
            Console.WriteLine("- flattening genes: ");

            var flattenerA = new GeneFlattener(genesA, descriptionA);
            var flatGenesA = flattenerA.Flatten();

            var flattenerB = new GeneFlattener(genesB, descriptionB);
            var flatGenesB = flattenerB.Flatten();

            Console.WriteLine();
            Console.WriteLine("- merging Ensembl and RefSeq:");

            var merger = new GeneMerger(flatGenesA, flatGenesB, linkedEnsemblIds);

            _mergedGenes = merger.Merge();

            Console.WriteLine();
            Console.WriteLine("- update HGNC ids:");

            UpdateHgncIds(_mergedGenes);
        }