Example #1
0
        private static MutableGene GetFlattenedGene(MutableGene seedGene, List <MutableGene> genesWithSameGeneId,
                                                    int overlapStart, int overlapEnd)
        {
            var  flattenedGene = MutableGene.Clone(seedGene);
            bool useOverlap    = overlapStart != -1 && overlapEnd != -1;

            foreach (var gene in genesWithSameGeneId)
            {
                if (gene.Invalid || flattenedGene.OnReverseStrand != gene.OnReverseStrand ||
                    flattenedGene.ReferenceIndex != gene.ReferenceIndex)
                {
                    continue;
                }

                if (useOverlap && !Overlap.Partial(overlapStart, overlapEnd, gene.Start, gene.End))
                {
                    continue;
                }
                if (!useOverlap && !Overlap.Partial(flattenedGene.Start, flattenedGene.End, gene.Start, gene.End))
                {
                    continue;
                }

                UpdateCoordinates(gene, flattenedGene);
                gene.Invalid = true;
            }

            return(flattenedGene);
        }
Example #2
0
        private void MergesGenesWithSameSymbol(MutableGene seedGene, List <MutableGene> genesWithSameSymbol)
        {
            int overlapStart, overlapEnd;
            var validGenes = GetValidGenes(seedGene, genesWithSameSymbol, out overlapStart, out overlapEnd);

            var ensemblGenes = GeneUtilities.GetGenesByDataSource(validGenes, TranscriptDataSource.Ensembl);
            var refSeqGenes  = GeneUtilities.GetGenesByDataSource(validGenes, TranscriptDataSource.RefSeq);

            var ensemblFlattener = new GeneFlattener(ensemblGenes, "Ensembl", false);
            var flatEnsemblGenes = ensemblFlattener.Flatten(overlapStart, overlapEnd);

            var refSeqFlattener = new GeneFlattener(refSeqGenes, "RefSeq", false);
            var flatRefSeqGenes = refSeqFlattener.Flatten(overlapStart, overlapEnd);

            foreach (var ensemblGene in flatEnsemblGenes)
            {
                // add the unused Ensembl genes
                string linkedEntrezId;
                if (!_linkedEnsemblIds.TryGetValue(ensemblGene.EnsemblId.ToString(), out linkedEntrezId))
                {
                    AddEnsemblOrphan(ensemblGene);
                    continue;
                }

                var refSeqGene = GeneUtilities.GetRefSeqGeneById(flatRefSeqGenes, linkedEntrezId);

                if (refSeqGene == null)
                {
                    AddEnsemblOrphan(ensemblGene);
                    continue;
                }

                // merge the Ensembl and RefSeq gene
                var mergedGene = MutableGene.Clone(ensemblGene);
                mergedGene.TranscriptDataSource = TranscriptDataSource.BothRefSeqAndEnsembl;
                UpdateCoordinates(refSeqGene, mergedGene);

                if (mergedGene.HgncId == -1 && refSeqGene.HgncId != -1)
                {
                    mergedGene.HgncId = refSeqGene.HgncId;
                }
                mergedGene.EntrezGeneId = refSeqGene.EntrezGeneId;
                _mergedGenes.Add(mergedGene);

                refSeqGene.Invalid  = true;
                ensemblGene.Invalid = true;
                _numMergedGenes++;
            }

            // add the unused RefSeq genes
            foreach (var refSeqGene in flatRefSeqGenes)
            {
                if (refSeqGene.Invalid)
                {
                    continue;
                }
                AddRefSeqOrphan(refSeqGene);
            }
        }