Esempio n. 1
0
        public static Dictionary <string, string> GetEnstToGeneSymbols(ISequenceProvider sequenceProvider, TranscriptCacheData transcriptData)
        {
            var cache             = transcriptData.GetCache();
            var enstToGeneSymbols = new Dictionary <string, string>();

            foreach (var chromIndex in sequenceProvider.RefIndexToChromosome.Keys)
            {
                var overlappingTranscripts =
                    cache.TranscriptIntervalForest.GetAllOverlappingValues(chromIndex, 1, int.MaxValue);

                if (overlappingTranscripts == null)
                {
                    continue;
                }

                foreach (var transcript in overlappingTranscripts)
                {
                    if (transcript.Id.WithoutVersion.StartsWith("ENST"))
                    {
                        enstToGeneSymbols[transcript.Id.WithoutVersion] = transcript.Gene.Symbol;
                    }
                }
            }

            return(enstToGeneSymbols);
        }
Esempio n. 2
0
        public static Dictionary <ushort, IntervalArray <byte> > GetSpliceIntervals(ISequenceProvider sequenceProvider, TranscriptCacheData transcriptData)
        {
            var cache = transcriptData.GetCache();

            var spliceIntervalDict = new Dictionary <ushort, IntervalArray <byte> >(sequenceProvider.RefIndexToChromosome.Count);

            foreach (var chromIndex in sequenceProvider.RefIndexToChromosome.Keys)
            {
                var spliceIntervals        = new List <Interval <byte> >(8 * 1024);
                var overlappingTranscripts =
                    cache.TranscriptIntervalForest.GetAllOverlappingValues(chromIndex, 1, int.MaxValue);

                if (overlappingTranscripts == null)
                {
                    continue;
                }

                foreach (var transcript in overlappingTranscripts)
                {
                    if (transcript.Id.IsPredictedTranscript())
                    {
                        continue;
                    }
                    bool isFirstExon = true;
                    foreach (var transcriptRegion in transcript.TranscriptRegions)
                    {
                        if (transcriptRegion.Type != TranscriptRegionType.Exon)
                        {
                            continue;
                        }
                        var firstSplicePosition  = transcriptRegion.Start;
                        var secondSplicePosition = transcriptRegion.End;

                        var firstInterval  = new Interval <byte>(firstSplicePosition - SpliceFlankLength, firstSplicePosition + SpliceFlankLength, 0);
                        var secondInterval = new Interval <byte>(secondSplicePosition - SpliceFlankLength, secondSplicePosition + SpliceFlankLength, 0);

                        if (!isFirstExon)
                        {
                            spliceIntervals.Add(firstInterval);
                        }
                        spliceIntervals.Add(secondInterval);
                        isFirstExon = false;
                    }
                    //remove the last added interval since this is the tail of the last exon- which is not a splice site
                    if (spliceIntervals.Count > 0)
                    {
                        spliceIntervals.RemoveAt(spliceIntervals.Count - 1);
                    }
                }

                spliceIntervalDict[chromIndex] = new IntervalArray <byte>(spliceIntervals.OrderBy(x => x.Begin).ThenBy(x => x.End).ToArray());
            }

            return(spliceIntervalDict);
        }
Esempio n. 3
0
        public static Dictionary <ushort, IntervalArray <byte> > GetSpliceIntervals(ISequenceProvider sequenceProvider, TranscriptCacheData transcriptData)
        {
            var cache = transcriptData.GetCache();

            var spliceIntervals = new Dictionary <ushort, IntervalArray <byte> >(sequenceProvider.RefIndexToChromosome.Count);

            foreach (var chromIndex in sequenceProvider.RefIndexToChromosome.Keys)
            {
                var spliceInterval         = new List <Interval <byte> >(8 * 1024);
                var overlappingTranscripts =
                    cache.TranscriptIntervalForest.GetAllOverlappingValues(chromIndex, 1, int.MaxValue);

                if (overlappingTranscripts == null)
                {
                    continue;
                }

                foreach (var transcript in overlappingTranscripts)
                {
                    if (transcript.Id.IsPredictedTranscript())
                    {
                        continue;
                    }
                    foreach (var transcriptRegion in transcript.TranscriptRegions)
                    {
                        if (transcriptRegion.Type != TranscriptRegionType.Exon)
                        {
                            continue;
                        }
                        var firstSplicePosition  = transcriptRegion.Start;
                        var secondSplicePosition = transcriptRegion.End;

                        var firstInterval  = new Interval <byte>(firstSplicePosition - SpliceFlankLength, firstSplicePosition + SpliceFlankLength, 0);
                        var secondInterval = new Interval <byte>(secondSplicePosition - SpliceFlankLength, secondSplicePosition + SpliceFlankLength, 0);

                        spliceInterval.Add(firstInterval);
                        spliceInterval.Add(secondInterval);
                    }
                }

                spliceIntervals[chromIndex] = new IntervalArray <byte>(spliceInterval.OrderBy(x => x.Begin).ThenBy(x => x.End).ToArray());
            }

            return(spliceIntervals);
        }