예제 #1
0
        public void Flatten_ReturnSameGene_WhenListHasOneEntry()
        {
            var genes = new List <MutableGene>
            {
                new MutableGene(null, 100, 120, false, null, GeneSymbolSource.Unknown, "test", -1)
            };

            var flatGenes = GeneFlattener.FlattenWithSameId(genes);

            Assert.Single(flatGenes);
            Assert.Equal(genes[0].Start, flatGenes[0].Start);
            Assert.Equal(genes[0].End, flatGenes[0].End);
        }
예제 #2
0
        public void Ensembl()
        {
            var genes     = GetCdrt1EnsemblGenes();
            var flattener = new GeneFlattener(genes, "test", false);
            var flatGenes = flattener.Flatten();

            Assert.Equal(1, flatGenes.Count);

            var flatGene = flatGenes[0];

            Assert.Equal(15468798, flatGene.Start);
            Assert.Equal(15522826, flatGene.End);
        }
예제 #3
0
        public void Flatten_NoGenesShouldBeCombined()
        {
            var genes = new List <MutableGene>
            {
                new MutableGene(null, 100, 120, false, null, GeneSymbolSource.Unknown, "test", -1),
                new MutableGene(null, 130, 140, false, null, GeneSymbolSource.Unknown, "test", -1),
                new MutableGene(null, 150, 160, false, null, GeneSymbolSource.Unknown, "test", -1)
            };

            var flatGenes = GeneFlattener.FlattenWithSameId(genes);

            Assert.Equal(3, flatGenes.Count);
            for (int i = 0; i < flatGenes.Count; i++)
            {
                Assert.Equal(genes[i].Start, flatGenes[i].Start);
                Assert.Equal(genes[i].End, flatGenes[i].End);
            }
        }
예제 #4
0
        public void Flatten_AllGenesShouldBeCombined()
        {
            var genes = new List <MutableGene>
            {
                new MutableGene(null, 100, 120, false, null, GeneSymbolSource.Unknown, "test", -1),
                new MutableGene(null, 110, 115, false, null, GeneSymbolSource.Unknown, "test", -1),
                new MutableGene(null, 120, 130, false, null, GeneSymbolSource.Unknown, "test", -1)
            };

            var flatGenes = GeneFlattener.FlattenWithSameId(genes);

            Assert.Single(flatGenes);

            var flatGene = flatGenes[0];

            Assert.Equal(100, flatGene.Start);
            Assert.Equal(130, flatGene.End);
        }
예제 #5
0
        public void Flatten_ReturnNull_WhenInputNull()
        {
            var flatGenes = GeneFlattener.FlattenWithSameId(null as List <MutableGene>);

            Assert.Null(flatGenes);
        }
예제 #6
0
        /// <summary>
        /// constructor
        /// </summary>
        public GeneCombiner(string inputGenesPath, string inputGenes2Path, List <string> geneInfoPaths, string hgncPath,
                            string refSeqGff3Path)
        {
            _geneInfoSource = ParseGeneInfoFiles(geneInfoPaths);

            var entrezGeneIdToEnsemblId = new Dictionary <string, UniqueString>();
            var ensemblIdToEntrezGeneId = new Dictionary <string, UniqueString>();

            _hgncSource = ParseHgncFile(hgncPath, entrezGeneIdToEnsemblId, ensemblIdToEntrezGeneId);

            Console.WriteLine();
            Console.WriteLine("- linking Ensembl and Entrez gene IDs: ");

            var linkedEnsemblIds = LinkIds(entrezGeneIdToEnsemblId, ensemblIdToEntrezGeneId);

            Console.WriteLine();
            Console.WriteLine("- loading RefSeq GFF3: ");

            _refSeqGff3GeneInfo = GetRefSeqGff3GeneInfo(refSeqGff3Path);

            Console.WriteLine();
            Console.WriteLine("- loading genes: ");

            string descriptionA = Path.GetFileName(inputGenesPath);
            string descriptionB = Path.GetFileName(inputGenes2Path);

            var genesA = LoadGenes(inputGenesPath, descriptionA);
            var genesB = LoadGenes(inputGenes2Path, descriptionB);

            Console.WriteLine();
            Console.WriteLine("- update gene symbols: ");

            var updaterA = new GeneSymbolUpdater(genesA, descriptionA, _geneInfoSource, _hgncSource);

            updaterA.Update();

            var updaterB = new GeneSymbolUpdater(genesB, descriptionB, _geneInfoSource, _hgncSource);

            updaterB.Update();

            Console.WriteLine();
            Console.WriteLine("- flattening genes: ");

            var flattenerA = new GeneFlattener(genesA, descriptionA);
            var flatGenesA = flattenerA.Flatten();

            var flattenerB = new GeneFlattener(genesB, descriptionB);
            var flatGenesB = flattenerB.Flatten();

            Console.WriteLine();
            Console.WriteLine("- merging Ensembl and RefSeq:");

            var merger = new GeneMerger(flatGenesA, flatGenesB, linkedEnsemblIds);

            _mergedGenes = merger.Merge();

            Console.WriteLine();
            Console.WriteLine("- update HGNC ids:");

            UpdateHgncIds(_mergedGenes);
        }