// ReSharper disable once InconsistentNaming public void SRGAP2C() { var genesA = GetSrgap2CEnsemblGenes(); var genesB = GetSrgap2CRefSeqGenes(); var linkedEnsemblIds = new Dictionary <string, string> { ["ENSG00000171943"] = "653464" }; var merger = new GeneMerger(genesA, genesB, linkedEnsemblIds); var mergedGenes = merger.Merge(); Assert.Equal(2, mergedGenes.Count); var mergedGene = mergedGenes[0]; Assert.Equal(0, mergedGene.ReferenceIndex); Assert.Equal(120835810, mergedGene.Start); Assert.Equal(120838261, mergedGene.End); Assert.Equal("SRGAP2C", mergedGene.Symbol); Assert.Equal(-1, mergedGene.HgncId); Assert.Equal("653464", mergedGene.EntrezGeneId.ToString()); Assert.True(mergedGene.EnsemblId.IsEmpty); Assert.True(mergedGene.OnReverseStrand); var mergedGene2 = mergedGenes[1]; Assert.Equal(0, mergedGene2.ReferenceIndex); Assert.Equal(121107124, mergedGene2.Start); Assert.Equal(121131061, mergedGene2.End); Assert.Equal("SRGAP2C", mergedGene2.Symbol); Assert.Equal(30584, mergedGene2.HgncId); Assert.Equal("653464", mergedGene2.EntrezGeneId.ToString()); Assert.Equal("ENSG00000171943", mergedGene2.EnsemblId.ToString()); Assert.False(mergedGene2.OnReverseStrand); }
// ReSharper disable once InconsistentNaming public void CDRT1() { var genesA = GetCdrt1EnsemblGenes(); var genesB = GetCdrt1RefSeqGenes(); var linkedEnsemblIds = new Dictionary <string, string> { ["ENSG00000241322"] = "374286" }; var merger = new GeneMerger(genesA, genesB, linkedEnsemblIds); var mergedGenes = merger.Merge(); Assert.Equal(3, mergedGenes.Count); var mergedGene = mergedGenes[0]; Assert.Equal(16, mergedGene.ReferenceIndex); Assert.Equal(15468797, mergedGene.Start); Assert.Equal(15469590, mergedGene.End); Assert.Equal("CDRT1", mergedGene.Symbol); Assert.Equal(-1, mergedGene.HgncId); Assert.True(mergedGene.EntrezGeneId.IsEmpty); Assert.Equal("ENSG00000181464", mergedGene.EnsemblId.ToString()); Assert.True(mergedGene.OnReverseStrand); var mergedGene2 = mergedGenes[1]; Assert.Equal(16, mergedGene2.ReferenceIndex); Assert.Equal(15468798, mergedGene2.Start); Assert.Equal(15523018, mergedGene2.End); Assert.Equal("CDRT1", mergedGene2.Symbol); Assert.Equal(14379, mergedGene2.HgncId); Assert.Equal("374286", mergedGene2.EntrezGeneId.ToString()); Assert.Equal("ENSG00000241322", mergedGene2.EnsemblId.ToString()); Assert.True(mergedGene2.OnReverseStrand); var mergedGene3 = mergedGenes[2]; Assert.Equal(16, mergedGene3.ReferenceIndex); Assert.Equal(15474805, mergedGene3.Start); Assert.Equal(15554967, mergedGene3.End); Assert.Equal("CDRT1", mergedGene3.Symbol); Assert.Equal(-1, mergedGene3.HgncId); Assert.True(mergedGene3.EntrezGeneId.IsEmpty); Assert.Equal("ENSG00000251537", mergedGene3.EnsemblId.ToString()); Assert.True(mergedGene3.OnReverseStrand); }
// ReSharper disable once InconsistentNaming public void SH3RF3() { var genesA = GetSh3EnsemblGenes(); var genesB = GetSh3RefSeqGenes(); var linkedEnsemblIds = new Dictionary <string, string> { ["ENSG00000172985"] = "344558" }; var merger = new GeneMerger(genesA, genesB, linkedEnsemblIds); var mergedGenes = merger.Merge(); Assert.Equal(1, mergedGenes.Count); var mergedGene = mergedGenes[0]; Assert.Equal(1, mergedGene.ReferenceIndex); Assert.Equal(109745804, mergedGene.Start); Assert.Equal(110262207, mergedGene.End); Assert.Equal("SH3RF3", mergedGene.Symbol); Assert.Equal(24699, mergedGene.HgncId); Assert.Equal("344558", mergedGene.EntrezGeneId.ToString()); Assert.Equal("ENSG00000172985", mergedGene.EnsemblId.ToString()); Assert.False(mergedGene.OnReverseStrand); }
/// <summary> /// constructor /// </summary> public GeneCombiner(string inputGenesPath, string inputGenes2Path, List <string> geneInfoPaths, string hgncPath, string refSeqGff3Path) { _geneInfoSource = ParseGeneInfoFiles(geneInfoPaths); var entrezGeneIdToEnsemblId = new Dictionary <string, UniqueString>(); var ensemblIdToEntrezGeneId = new Dictionary <string, UniqueString>(); _hgncSource = ParseHgncFile(hgncPath, entrezGeneIdToEnsemblId, ensemblIdToEntrezGeneId); Console.WriteLine(); Console.WriteLine("- linking Ensembl and Entrez gene IDs: "); var linkedEnsemblIds = LinkIds(entrezGeneIdToEnsemblId, ensemblIdToEntrezGeneId); Console.WriteLine(); Console.WriteLine("- loading RefSeq GFF3: "); _refSeqGff3GeneInfo = GetRefSeqGff3GeneInfo(refSeqGff3Path); Console.WriteLine(); Console.WriteLine("- loading genes: "); string descriptionA = Path.GetFileName(inputGenesPath); string descriptionB = Path.GetFileName(inputGenes2Path); var genesA = LoadGenes(inputGenesPath, descriptionA); var genesB = LoadGenes(inputGenes2Path, descriptionB); Console.WriteLine(); Console.WriteLine("- update gene symbols: "); var updaterA = new GeneSymbolUpdater(genesA, descriptionA, _geneInfoSource, _hgncSource); updaterA.Update(); var updaterB = new GeneSymbolUpdater(genesB, descriptionB, _geneInfoSource, _hgncSource); updaterB.Update(); Console.WriteLine(); Console.WriteLine("- flattening genes: "); var flattenerA = new GeneFlattener(genesA, descriptionA); var flatGenesA = flattenerA.Flatten(); var flattenerB = new GeneFlattener(genesB, descriptionB); var flatGenesB = flattenerB.Flatten(); Console.WriteLine(); Console.WriteLine("- merging Ensembl and RefSeq:"); var merger = new GeneMerger(flatGenesA, flatGenesB, linkedEnsemblIds); _mergedGenes = merger.Merge(); Console.WriteLine(); Console.WriteLine("- update HGNC ids:"); UpdateHgncIds(_mergedGenes); }