예제 #1
0
        }                                                         // used to find overlap genes for now

        public PositionBuffer(ICodonInfoProvider codonInfoProvider, IIntervalForest <IGene> geneIntervalForest)
        {
            CodonInfoProvider  = codonInfoProvider;
            GeneIntervalForest = geneIntervalForest;
            CurrentChromosome  = new EmptyChromosome(string.Empty);
            BufferedPositions  = BufferedPositions.CreatEmptyBufferedPositions();
        }
예제 #2
0
        /// <summary>
        /// checks if the variant is a reference no-call and sets the flag accordingly
        /// </summary>
        public static void Check(VariantFeature variant, bool limitToTranscript,
                                 IIntervalForest <Transcript> transcriptIntervals)
        {
            // make sure we enabled reference no-call checking and that this is a reference site
            if (!variant.IsReference)
            {
                return;
            }

            // make sure the filters failed
            if (variant.PassFilter())
            {
                return;
            }

            if (!limitToTranscript)
            {
                variant.IsRefNoCall = true;
                return;
            }

            // check if the variant overlaps any transcripts
            variant.IsRefNoCall = transcriptIntervals.OverlapsAny(variant.ReferenceIndex, variant.OverlapReferenceBegin,
                                                                  variant.OverlapReferenceEnd);
        }
예제 #3
0
        private void AddTranscripts(IAnnotatedVariant[] annotatedVariants, IIntervalForest <ITranscript> transcriptIntervalForest)
        {
            foreach (var annotatedVariant in annotatedVariants)
            {
                var variant = annotatedVariant.Variant;
                if (variant.Behavior.Equals(AnnotationBehavior.MinimalAnnotationBehavior))
                {
                    continue;
                }

                ITranscript[] geneFusionCandidates = GetGeneFusionCandidates(variant.BreakEnds, transcriptIntervalForest);
                ITranscript[] transcripts          = transcriptIntervalForest.GetAllFlankingValues(variant);
                if (transcripts == null)
                {
                    continue;
                }

                IList <IAnnotatedTranscript> annotatedTranscripts =
                    TranscriptAnnotationFactory.GetAnnotatedTranscripts(variant, transcripts, _sequence, _siftCache,
                                                                        _polyphenCache, geneFusionCandidates);

                if (annotatedTranscripts.Count == 0)
                {
                    continue;
                }

                foreach (var annotatedTranscript in annotatedTranscripts)
                {
                    annotatedVariant.Transcripts.Add(annotatedTranscript);
                }
            }
        }
예제 #4
0
        private static ITranscript[] GetGeneFusionCandidates(IBreakEnd[] breakEnds, IIntervalForest <ITranscript> transcriptIntervalForest)
        {
            if (breakEnds == null || breakEnds.Length == 0)
            {
                return(null);
            }

            var geneFusionCandidates = new HashSet <ITranscript>();

            foreach (var breakEnd in breakEnds)
            {
                ITranscript[] transcripts = transcriptIntervalForest.GetAllOverlappingValues(
                    breakEnd.Piece2.Chromosome.Index, breakEnd.Piece2.Position, breakEnd.Piece2.Position);
                if (transcripts == null)
                {
                    continue;
                }

                foreach (var transcript in transcripts)
                {
                    if (transcript.Id.IsPredictedTranscript())
                    {
                        continue;
                    }
                    geneFusionCandidates.Add(transcript);
                }
            }

            return(geneFusionCandidates.ToArray());
        }
예제 #5
0
        /// <summary>
        /// loads the transcript cache
        /// </summary>
        private static void LoadTranscriptCache(Stream stream, int numRefSeqs,
                                                out IIntervalForest <Transcript> transcriptIntervalForest)
        {
            GlobalCache cache;

            using (var reader = new GlobalCacheReader(stream)) cache = reader.Read();
            transcriptIntervalForest = IntervalArrayFactory.CreateIntervalForest(cache.Transcripts, numRefSeqs);
        }
예제 #6
0
 public RepeatExpansionProvider(GenomeAssembly genomeAssembly, IDictionary <string, IChromosome> refNameToChromosome,
                                int numRefSeqs, string customTsvPath)
 {
     using (Stream stream = GetTsvStream(genomeAssembly, customTsvPath))
     {
         IIntervalForest <RepeatExpansionPhenotype> phenotypeForest = RepeatExpansionReader.Load(stream, genomeAssembly, refNameToChromosome, numRefSeqs);
         _matcher = new Matcher(phenotypeForest);
     }
 }
예제 #7
0
 /// <summary>
 /// adds the gene list to our reader
 /// </summary>
 public void AddLists(List <SimpleInterval> introns, List <SimpleInterval> microRnas,
                      List <string> peptideSeqs, IIntervalForest <MutableGene> mergedGeneForest)
 {
     _introns          = introns;
     _microRnas        = microRnas;
     _peptideSeqs      = peptideSeqs;
     _mergedGeneForest = mergedGeneForest;
     _hasLists         = true;
 }
예제 #8
0
 public TranscriptCache(IEnumerable <IDataSourceVersion> dataSourceVersions, GenomeAssembly genomeAssembly,
                        IntervalArray <ITranscript>[] transcriptIntervalArrays,
                        IntervalArray <IRegulatoryRegion>[] regulatoryRegionIntervalArrays)
 {
     Name = "Transcript annotation provider";
     DataSourceVersions        = dataSourceVersions;
     GenomeAssembly            = genomeAssembly;
     _transcriptIntervalForest = new IntervalForest <ITranscript>(transcriptIntervalArrays);
     _regulatoryIntervalForest = new IntervalForest <IRegulatoryRegion>(regulatoryRegionIntervalArrays);
 }
예제 #9
0
        /// <summary>
        /// constructor
        /// </summary>
        public MockSupplementaryAnnotationProvider(ISupplementaryAnnotationReader saReader, ChromosomeRenamer renamer)
        {
            if (saReader == null)
            {
                return;
            }
            _saReader = saReader;

            _overlappingSupplementaryIntervals = new List <ISupplementaryInterval>();
            _suppIntervalForest = _saReader.GetIntervalForest(renamer);
        }
예제 #10
0
        public void Load(string ucscReferenceName, IChromosomeRenamer renamer)
        {
            if (_ciDirs.Count == 0 || ucscReferenceName == _currentUcscReferenceName)
            {
                return;
            }

            var intervals = GetIntervals(ucscReferenceName);

            _intervalForest = IntervalArrayFactory.CreateIntervalArray(intervals, renamer);
            _hasIntervals   = !(_intervalForest is NullIntervalSearch <ICustomInterval>);

            _currentUcscReferenceName = ucscReferenceName;
        }
예제 #11
0
        public void Load(string ucscReferenceName, IChromosomeRenamer renamer)
        {
            if (string.IsNullOrEmpty(_saDir) || ucscReferenceName == _currentUcscReferenceName)
            {
                return;
            }

            var saPath = Path.Combine(_saDir, ucscReferenceName + ".nsa");

            _saReader = File.Exists(saPath) ? new SupplementaryAnnotationReader(saPath) : null;

            _intervalForest = _saReader?.GetIntervalForest(renamer);
            _hasIntervals   = !(_intervalForest is NullIntervalSearch <ISupplementaryInterval>);

            _currentUcscReferenceName = ucscReferenceName;
        }
예제 #12
0
        private static AnnotationPosition[] AdjustPartitionGenomicStarts(IReadOnlyList <long> blockBasedOffsets, string vcfUrl,
                                                                         IIntervalForest <IGene> geneIntervalForest, IDictionary <string, IChromosome> refNameToChromosome)
        {
            var allAdjustedStarts = new AnnotationPosition[blockBasedOffsets.Count];

            for (var index = 0; index < blockBasedOffsets.Count; index++)
            {
                long blockBasedOffset = blockBasedOffsets[index];

                using (var stream = PersistentStreamUtils.GetReadStream(vcfUrl, blockBasedOffset))
                    using (var gzipStream = new BlockGZipStream(stream, CompressionMode.Decompress))
                    {
                        var annotationPosition = GetFirstGenomicPosition(gzipStream, index == 0);
                        allAdjustedStarts[index] = FindProperStartPosition(annotationPosition, geneIntervalForest, refNameToChromosome);
                    }
            }

            AnnotationPosition[] adjustedStarts = MergeConsecutiveEqualValues(allAdjustedStarts).ToArray();
            return(adjustedStarts);
        }
예제 #13
0
        public MockCustomIntervalProvider(Stream stream, ChromosomeRenamer renamer)
        {
            var intervals = new List <ICustomInterval>();

            using (var reader = new CustomIntervalReader(stream))
            {
                while (true)
                {
                    var interval = reader.GetNextCustomInterval();
                    if (interval == null)
                    {
                        break;
                    }
                    intervals.Add(interval);
                }
            }

            _hasIntervals   = intervals.Count > 0;
            _intervalForest = IntervalArrayFactory.CreateIntervalArray(intervals, renamer);
        }
        private static void AddRegulatoryRegions(IAnnotatedVariant[] annotatedVariants, IIntervalForest <IRegulatoryRegion> regulatoryIntervalForest)
        {
            foreach (var annotatedVariant in annotatedVariants)
            {
                if (!annotatedVariant.Variant.Behavior.NeedRegulatoryRegions)
                {
                    continue;
                }

                // In case of insertions, the base(s) are assumed to be inserted at the end position
                // if this is an insertion just before the beginning of the regulatory element, this takes care of it
                var variant      = annotatedVariant.Variant;
                int variantBegin = variant.Type == VariantType.insertion ? variant.End : variant.Start;

                if (SkipLargeVariants(variantBegin, variant.End))
                {
                    continue;
                }

                IRegulatoryRegion[] regulatoryRegions =
                    regulatoryIntervalForest.GetAllOverlappingValues(variant.Chromosome.Index, variantBegin,
                                                                     variant.End);
                if (regulatoryRegions == null)
                {
                    continue;
                }

                foreach (var regulatoryRegion in regulatoryRegions)
                {
                    // if the insertion is at the end, its past the feature and therefore not overlapping
                    if (variant.Type == VariantType.insertion && variant.End == regulatoryRegion.End)
                    {
                        continue;
                    }

                    annotatedVariant.RegulatoryRegions.Add(RegulatoryRegionAnnotator.Annotate(variant, regulatoryRegion));
                }
            }
        }
예제 #15
0
        private void AssignUgaGenesToTranscripts(IEnumerable <MutableTranscript> transcripts, IIntervalForest <UgaGene> geneForest)
        {
            foreach (var transcript in transcripts)
            {
                var originalGene = transcript.Gene;
                var ugaGenes     = geneForest.GetAllOverlappingValues(originalGene.Chromosome.Index, originalGene.Start, originalGene.End);

                if (ugaGenes == null)
                {
                    var strand = originalGene.OnReverseStrand ? "R" : "F";
                    throw new InvalidDataException($"Found a transcript ({transcript.Id}) that does not have an overlapping UGA gene: gene ID: {originalGene.GeneId} {originalGene.Chromosome.UcscName} {originalGene.Start} {originalGene.End} {strand}");
                }

                transcript.UpdatedGene = PickGeneById(ugaGenes, originalGene.GeneId).ToGene(_genomeAssembly);
            }
        }
예제 #16
0
        public TranscriptCacheStaging CreateTranscriptCache(MutableTranscript[] mutableTranscripts,
                                                            IEnumerable <IRegulatoryRegion> regulatoryRegions, IIntervalForest <UgaGene> geneForest, int numRefSeqs)
        {
            _logger.Write("- assigning UGA genes to transcripts... ");
            AssignUgaGenesToTranscripts(mutableTranscripts, geneForest);
            _logger.WriteLine("finished.");

            var transcriptIntervalArrays       = mutableTranscripts.ToTranscripts().ToIntervalArrays(numRefSeqs);
            var regulatoryRegionIntervalArrays = regulatoryRegions.ToIntervalArrays(numRefSeqs);

            var customHeader = new TranscriptCacheCustomHeader(_vepVersion, _vepReleaseTicks);
            var header       = new CacheHeader(CacheConstants.Identifier, CacheConstants.SchemaVersion,
                                               CacheConstants.DataVersion, _source, DateTime.Now.Ticks, _genomeAssembly, customHeader);

            return(TranscriptCacheStaging.GetStaging(header, transcriptIntervalArrays, regulatoryRegionIntervalArrays));
        }
예제 #17
0
        private static AnnotationPosition FindProperStartPosition(AnnotationPosition genomicPosition, IIntervalForest <IGene> geneIntervalForest, IDictionary <string, IChromosome> refNameToChromosome)
        {
            var chromosome = ReferenceNameUtilities.GetChromosome(refNameToChromosome, genomicPosition.Chromosome);

            int currentPosition = genomicPosition.Position;

            IGene[] overlappingGenes;
            while ((overlappingGenes = geneIntervalForest.GetAllOverlappingValues(chromosome.Index,
                                                                                  currentPosition, currentPosition)) != null)
            {
                if (overlappingGenes.Length > 0)
                {
                    currentPosition = overlappingGenes.Select(x => x.Start).Min() - 1;
                }
            }

            // Always return the position right before the overlapping genes to KISS
            return(new AnnotationPosition(genomicPosition.Chromosome, currentPosition < 1 ? 1 : currentPosition));
        }
예제 #18
0
 public Matcher(IIntervalForest <RepeatExpansionPhenotype> phenotypeForest) => _phenotypeForest = phenotypeForest;
예제 #19
0
 /// <summary>
 /// constructor
 /// </summary>
 public MockCustomIntervalProvider(List <ICustomInterval> intervals, ChromosomeRenamer renamer)
 {
     _hasIntervals   = intervals.Count > 0;
     _intervalForest = IntervalArrayFactory.CreateIntervalArray(intervals, renamer);
 }
 public static ITranscript[] GetAllFlankingValues(this IIntervalForest <ITranscript> transcriptIntervalForest,
                                                  IChromosomeInterval interval) => transcriptIntervalForest.GetAllOverlappingValues(interval.Chromosome.Index,
                                                                                                                                    interval.Start - interval.Chromosome.FlankingLength, interval.End + interval.Chromosome.FlankingLength);
예제 #21
0
 /// <summary>
 /// constructor
 /// </summary>
 public SupplementaryAnnotationProvider(string saDir)
 {
     _saDir = saDir;
     _overlappingIntervals = new List <ISupplementaryInterval>();
     _intervalForest       = new NullIntervalSearch <ISupplementaryInterval>();
 }
예제 #22
0
 /// <summary>
 /// constructor
 /// </summary>
 public CustomIntervalProvider(IEnumerable <string> ciDirs)
 {
     _ciDirs = ciDirs.ToList();
     _overlappingIntervals = new List <ICustomInterval>();
     _intervalForest       = new NullIntervalSearch <ICustomInterval>();
 }