public static Dictionary <string, string> GetEnstToGeneSymbols(ISequenceProvider sequenceProvider, TranscriptCacheData transcriptData) { var cache = transcriptData.GetCache(); var enstToGeneSymbols = new Dictionary <string, string>(); foreach (var chromIndex in sequenceProvider.RefIndexToChromosome.Keys) { var overlappingTranscripts = cache.TranscriptIntervalForest.GetAllOverlappingValues(chromIndex, 1, int.MaxValue); if (overlappingTranscripts == null) { continue; } foreach (var transcript in overlappingTranscripts) { if (transcript.Id.WithoutVersion.StartsWith("ENST")) { enstToGeneSymbols[transcript.Id.WithoutVersion] = transcript.Gene.Symbol; } } } return(enstToGeneSymbols); }
public static Dictionary <ushort, IntervalArray <byte> > GetSpliceIntervals(ISequenceProvider sequenceProvider, TranscriptCacheData transcriptData) { var cache = transcriptData.GetCache(); var spliceIntervalDict = new Dictionary <ushort, IntervalArray <byte> >(sequenceProvider.RefIndexToChromosome.Count); foreach (var chromIndex in sequenceProvider.RefIndexToChromosome.Keys) { var spliceIntervals = new List <Interval <byte> >(8 * 1024); var overlappingTranscripts = cache.TranscriptIntervalForest.GetAllOverlappingValues(chromIndex, 1, int.MaxValue); if (overlappingTranscripts == null) { continue; } foreach (var transcript in overlappingTranscripts) { if (transcript.Id.IsPredictedTranscript()) { continue; } bool isFirstExon = true; foreach (var transcriptRegion in transcript.TranscriptRegions) { if (transcriptRegion.Type != TranscriptRegionType.Exon) { continue; } var firstSplicePosition = transcriptRegion.Start; var secondSplicePosition = transcriptRegion.End; var firstInterval = new Interval <byte>(firstSplicePosition - SpliceFlankLength, firstSplicePosition + SpliceFlankLength, 0); var secondInterval = new Interval <byte>(secondSplicePosition - SpliceFlankLength, secondSplicePosition + SpliceFlankLength, 0); if (!isFirstExon) { spliceIntervals.Add(firstInterval); } spliceIntervals.Add(secondInterval); isFirstExon = false; } //remove the last added interval since this is the tail of the last exon- which is not a splice site if (spliceIntervals.Count > 0) { spliceIntervals.RemoveAt(spliceIntervals.Count - 1); } } spliceIntervalDict[chromIndex] = new IntervalArray <byte>(spliceIntervals.OrderBy(x => x.Begin).ThenBy(x => x.End).ToArray()); } return(spliceIntervalDict); }
public static Dictionary <ushort, IntervalArray <byte> > GetSpliceIntervals(ISequenceProvider sequenceProvider, TranscriptCacheData transcriptData) { var cache = transcriptData.GetCache(); var spliceIntervals = new Dictionary <ushort, IntervalArray <byte> >(sequenceProvider.RefIndexToChromosome.Count); foreach (var chromIndex in sequenceProvider.RefIndexToChromosome.Keys) { var spliceInterval = new List <Interval <byte> >(8 * 1024); var overlappingTranscripts = cache.TranscriptIntervalForest.GetAllOverlappingValues(chromIndex, 1, int.MaxValue); if (overlappingTranscripts == null) { continue; } foreach (var transcript in overlappingTranscripts) { if (transcript.Id.IsPredictedTranscript()) { continue; } foreach (var transcriptRegion in transcript.TranscriptRegions) { if (transcriptRegion.Type != TranscriptRegionType.Exon) { continue; } var firstSplicePosition = transcriptRegion.Start; var secondSplicePosition = transcriptRegion.End; var firstInterval = new Interval <byte>(firstSplicePosition - SpliceFlankLength, firstSplicePosition + SpliceFlankLength, 0); var secondInterval = new Interval <byte>(secondSplicePosition - SpliceFlankLength, secondSplicePosition + SpliceFlankLength, 0); spliceInterval.Add(firstInterval); spliceInterval.Add(secondInterval); } } spliceIntervals[chromIndex] = new IntervalArray <byte>(spliceInterval.OrderBy(x => x.Begin).ThenBy(x => x.End).ToArray()); } return(spliceIntervals); }