private static MutableGene GetFlattenedGene(MutableGene seedGene, List <MutableGene> genesWithSameGeneId, int overlapStart, int overlapEnd) { var flattenedGene = MutableGene.Clone(seedGene); bool useOverlap = overlapStart != -1 && overlapEnd != -1; foreach (var gene in genesWithSameGeneId) { if (gene.Invalid || flattenedGene.OnReverseStrand != gene.OnReverseStrand || flattenedGene.ReferenceIndex != gene.ReferenceIndex) { continue; } if (useOverlap && !Overlap.Partial(overlapStart, overlapEnd, gene.Start, gene.End)) { continue; } if (!useOverlap && !Overlap.Partial(flattenedGene.Start, flattenedGene.End, gene.Start, gene.End)) { continue; } UpdateCoordinates(gene, flattenedGene); gene.Invalid = true; } return(flattenedGene); }
private void MergesGenesWithSameSymbol(MutableGene seedGene, List <MutableGene> genesWithSameSymbol) { int overlapStart, overlapEnd; var validGenes = GetValidGenes(seedGene, genesWithSameSymbol, out overlapStart, out overlapEnd); var ensemblGenes = GeneUtilities.GetGenesByDataSource(validGenes, TranscriptDataSource.Ensembl); var refSeqGenes = GeneUtilities.GetGenesByDataSource(validGenes, TranscriptDataSource.RefSeq); var ensemblFlattener = new GeneFlattener(ensemblGenes, "Ensembl", false); var flatEnsemblGenes = ensemblFlattener.Flatten(overlapStart, overlapEnd); var refSeqFlattener = new GeneFlattener(refSeqGenes, "RefSeq", false); var flatRefSeqGenes = refSeqFlattener.Flatten(overlapStart, overlapEnd); foreach (var ensemblGene in flatEnsemblGenes) { // add the unused Ensembl genes string linkedEntrezId; if (!_linkedEnsemblIds.TryGetValue(ensemblGene.EnsemblId.ToString(), out linkedEntrezId)) { AddEnsemblOrphan(ensemblGene); continue; } var refSeqGene = GeneUtilities.GetRefSeqGeneById(flatRefSeqGenes, linkedEntrezId); if (refSeqGene == null) { AddEnsemblOrphan(ensemblGene); continue; } // merge the Ensembl and RefSeq gene var mergedGene = MutableGene.Clone(ensemblGene); mergedGene.TranscriptDataSource = TranscriptDataSource.BothRefSeqAndEnsembl; UpdateCoordinates(refSeqGene, mergedGene); if (mergedGene.HgncId == -1 && refSeqGene.HgncId != -1) { mergedGene.HgncId = refSeqGene.HgncId; } mergedGene.EntrezGeneId = refSeqGene.EntrezGeneId; _mergedGenes.Add(mergedGene); refSeqGene.Invalid = true; ensemblGene.Invalid = true; _numMergedGenes++; } // add the unused RefSeq genes foreach (var refSeqGene in flatRefSeqGenes) { if (refSeqGene.Invalid) { continue; } AddRefSeqOrphan(refSeqGene); } }