private static void CombineSet(ICollection <UgaGene> combinedGenes, IEnumerable <UgaGene> uga37, IEnumerable <UgaGene> uga38, ICollection <UgaGene> remainingGenes37, ICollection <UgaGene> remainingGenes38) { var keyToGene37 = uga37.GetMultiValueDict(GetKey); var keyToGene38 = uga38.GetMultiValueDict(GetKey); var keys = GetAllKeys(keyToGene37.Keys, keyToGene38.Keys); foreach (var key in keys) { var genes37 = GetGenesByKey(keyToGene37, key); var genes38 = GetGenesByKey(keyToGene38, key); CombinerUtils.RemoveGenes(genes37, remainingGenes37); CombinerUtils.RemoveGenes(genes38, remainingGenes38); // this happens for both Entrez Gene Only & Ensembl Only if (genes37.Count == 1 && genes38.Count == 1) { var gene37 = genes37[0]; var gene38 = genes38[0]; var mergedGene = CombinerUtils.Merge(gene37, gene38); combinedGenes.Add(mergedGene); continue; } // the following situations happen if we have: // - one gene from GRCh37 and none from GRCh38 (or vice versa) // - two or more non-overlapping genes on the same assembly (14 occurrences) CombinerUtils.AddOrphans(combinedGenes, genes37); CombinerUtils.AddOrphans(combinedGenes, genes38); } }
public void Combine(List <UgaGene> combinedGenes, HashSet <UgaGene> remainingGenes37, HashSet <UgaGene> remainingGenes38) { var hgncIds = GetHgncIds(remainingGenes37, remainingGenes38); var genesByHgnc37 = remainingGenes37.GetMultiValueDict(x => x.HgncId); var genesByHgnc38 = remainingGenes38.GetMultiValueDict(x => x.HgncId); foreach (var hgncId in hgncIds) { var genes37 = GetGenesByHgncId(genesByHgnc37, hgncId); var genes38 = GetGenesByHgncId(genesByHgnc38, hgncId); CombinerUtils.RemoveGenes(genes37, remainingGenes37); CombinerUtils.RemoveGenes(genes38, remainingGenes38); // merge if we have one gene on each genome assembly and they're on the same strand if (genes37.Count == 1 && genes38.Count == 1) { var gene37 = genes37[0]; var gene38 = genes38[0]; if (gene37.OnReverseStrand == gene38.OnReverseStrand) { var mergedGene = CombinerUtils.Merge(gene37, gene38); combinedGenes.Add(mergedGene); continue; } } // the following situations happen if we have: // - one gene from GRCh37 and none from GRCh38 (or vice versa) // - there is a mixture of genes forward and reverse strands (13 occurrences) CombinerUtils.AddOrphans(combinedGenes, genes37); CombinerUtils.AddOrphans(combinedGenes, genes38); } }