private SortedDictionary <int, HashSet <TranscriptMetadata> > GetTranscriptsByEntrezGeneId(IEnumerable <MutableTranscript> transcripts) { var genes = new SortedDictionary <int, HashSet <TranscriptMetadata> >(); foreach (var transcript in transcripts) { string idWithVersion = transcript.Id + '.' + transcript.Version; int cdsLength = transcript.CodingRegion?.Length ?? 0; int transcriptLength = transcript.End - transcript.Start + 1; bool isLrg = _lrgTranscriptIds.Contains(transcript.Id); int accession = AccessionUtilities.GetAccessionNumber(transcript.Id); var metadata = new TranscriptMetadata(idWithVersion, accession, transcriptLength, cdsLength, isLrg); int geneId = ConvertGeneIdToInt(transcript.Gene.GeneId); if (genes.TryGetValue(geneId, out var observedMetadata)) { observedMetadata.Add(metadata); } else { genes[geneId] = new HashSet <TranscriptMetadata> { metadata } }; } return(genes); }
/// <summary> /// returns a dictionary that aggregates the transcripts by gene /// </summary> private SortedDictionary <int, HashSet <TranscriptMetadata> > AggregateGenes(List <Transcript> transcripts) { var genes = new SortedDictionary <int, HashSet <TranscriptMetadata> >(); foreach (var transcript in transcripts) { int geneId = GetGeneId(transcript); int cdsLength = GetCdsLength(transcript); int transcriptLength = transcript.End - transcript.Start + 1; var isLrg = _lrgEntries.Contains(transcript.Id.ToString()); var metadata = new TranscriptMetadata(transcript.Id, transcriptLength, cdsLength, isLrg); HashSet <TranscriptMetadata> observedMetadata; if (genes.TryGetValue(geneId, out observedMetadata)) { observedMetadata.Add(metadata); } else { observedMetadata = new HashSet <TranscriptMetadata> { metadata }; genes[geneId] = observedMetadata; } } return(genes); }