private static void CombineSet(ICollection <UgaGene> combinedGenes, IEnumerable <UgaGene> uga37,
                                       IEnumerable <UgaGene> uga38, ICollection <UgaGene> remainingGenes37, ICollection <UgaGene> remainingGenes38)
        {
            var keyToGene37 = uga37.GetMultiValueDict(GetKey);
            var keyToGene38 = uga38.GetMultiValueDict(GetKey);
            var keys        = GetAllKeys(keyToGene37.Keys, keyToGene38.Keys);

            foreach (var key in keys)
            {
                var genes37 = GetGenesByKey(keyToGene37, key);
                var genes38 = GetGenesByKey(keyToGene38, key);

                CombinerUtils.RemoveGenes(genes37, remainingGenes37);
                CombinerUtils.RemoveGenes(genes38, remainingGenes38);

                // this happens for both Entrez Gene Only & Ensembl Only
                if (genes37.Count == 1 && genes38.Count == 1)
                {
                    var gene37 = genes37[0];
                    var gene38 = genes38[0];

                    var mergedGene = CombinerUtils.Merge(gene37, gene38);
                    combinedGenes.Add(mergedGene);
                    continue;
                }

                // the following situations happen if we have:
                // - one gene from GRCh37 and none from GRCh38 (or vice versa)
                // - two or more non-overlapping genes on the same assembly (14 occurrences)
                CombinerUtils.AddOrphans(combinedGenes, genes37);
                CombinerUtils.AddOrphans(combinedGenes, genes38);
            }
        }
Beispiel #2
0
        public void Combine(List <UgaGene> combinedGenes, HashSet <UgaGene> remainingGenes37,
                            HashSet <UgaGene> remainingGenes38)
        {
            var hgncIds       = GetHgncIds(remainingGenes37, remainingGenes38);
            var genesByHgnc37 = remainingGenes37.GetMultiValueDict(x => x.HgncId);
            var genesByHgnc38 = remainingGenes38.GetMultiValueDict(x => x.HgncId);

            foreach (var hgncId in hgncIds)
            {
                var genes37 = GetGenesByHgncId(genesByHgnc37, hgncId);
                var genes38 = GetGenesByHgncId(genesByHgnc38, hgncId);

                CombinerUtils.RemoveGenes(genes37, remainingGenes37);
                CombinerUtils.RemoveGenes(genes38, remainingGenes38);

                // merge if we have one gene on each genome assembly and they're on the same strand
                if (genes37.Count == 1 && genes38.Count == 1)
                {
                    var gene37 = genes37[0];
                    var gene38 = genes38[0];

                    if (gene37.OnReverseStrand == gene38.OnReverseStrand)
                    {
                        var mergedGene = CombinerUtils.Merge(gene37, gene38);
                        combinedGenes.Add(mergedGene);
                        continue;
                    }
                }

                // the following situations happen if we have:
                // - one gene from GRCh37 and none from GRCh38 (or vice versa)
                // - there is a mixture of genes forward and reverse strands (13 occurrences)
                CombinerUtils.AddOrphans(combinedGenes, genes37);
                CombinerUtils.AddOrphans(combinedGenes, genes38);
            }
        }