} // used to find overlap genes for now public PositionBuffer(ICodonInfoProvider codonInfoProvider, IIntervalForest <IGene> geneIntervalForest) { CodonInfoProvider = codonInfoProvider; GeneIntervalForest = geneIntervalForest; CurrentChromosome = new EmptyChromosome(string.Empty); BufferedPositions = BufferedPositions.CreatEmptyBufferedPositions(); }
/// <summary> /// checks if the variant is a reference no-call and sets the flag accordingly /// </summary> public static void Check(VariantFeature variant, bool limitToTranscript, IIntervalForest <Transcript> transcriptIntervals) { // make sure we enabled reference no-call checking and that this is a reference site if (!variant.IsReference) { return; } // make sure the filters failed if (variant.PassFilter()) { return; } if (!limitToTranscript) { variant.IsRefNoCall = true; return; } // check if the variant overlaps any transcripts variant.IsRefNoCall = transcriptIntervals.OverlapsAny(variant.ReferenceIndex, variant.OverlapReferenceBegin, variant.OverlapReferenceEnd); }
private void AddTranscripts(IAnnotatedVariant[] annotatedVariants, IIntervalForest <ITranscript> transcriptIntervalForest) { foreach (var annotatedVariant in annotatedVariants) { var variant = annotatedVariant.Variant; if (variant.Behavior.Equals(AnnotationBehavior.MinimalAnnotationBehavior)) { continue; } ITranscript[] geneFusionCandidates = GetGeneFusionCandidates(variant.BreakEnds, transcriptIntervalForest); ITranscript[] transcripts = transcriptIntervalForest.GetAllFlankingValues(variant); if (transcripts == null) { continue; } IList <IAnnotatedTranscript> annotatedTranscripts = TranscriptAnnotationFactory.GetAnnotatedTranscripts(variant, transcripts, _sequence, _siftCache, _polyphenCache, geneFusionCandidates); if (annotatedTranscripts.Count == 0) { continue; } foreach (var annotatedTranscript in annotatedTranscripts) { annotatedVariant.Transcripts.Add(annotatedTranscript); } } }
private static ITranscript[] GetGeneFusionCandidates(IBreakEnd[] breakEnds, IIntervalForest <ITranscript> transcriptIntervalForest) { if (breakEnds == null || breakEnds.Length == 0) { return(null); } var geneFusionCandidates = new HashSet <ITranscript>(); foreach (var breakEnd in breakEnds) { ITranscript[] transcripts = transcriptIntervalForest.GetAllOverlappingValues( breakEnd.Piece2.Chromosome.Index, breakEnd.Piece2.Position, breakEnd.Piece2.Position); if (transcripts == null) { continue; } foreach (var transcript in transcripts) { if (transcript.Id.IsPredictedTranscript()) { continue; } geneFusionCandidates.Add(transcript); } } return(geneFusionCandidates.ToArray()); }
/// <summary> /// loads the transcript cache /// </summary> private static void LoadTranscriptCache(Stream stream, int numRefSeqs, out IIntervalForest <Transcript> transcriptIntervalForest) { GlobalCache cache; using (var reader = new GlobalCacheReader(stream)) cache = reader.Read(); transcriptIntervalForest = IntervalArrayFactory.CreateIntervalForest(cache.Transcripts, numRefSeqs); }
public RepeatExpansionProvider(GenomeAssembly genomeAssembly, IDictionary <string, IChromosome> refNameToChromosome, int numRefSeqs, string customTsvPath) { using (Stream stream = GetTsvStream(genomeAssembly, customTsvPath)) { IIntervalForest <RepeatExpansionPhenotype> phenotypeForest = RepeatExpansionReader.Load(stream, genomeAssembly, refNameToChromosome, numRefSeqs); _matcher = new Matcher(phenotypeForest); } }
/// <summary> /// adds the gene list to our reader /// </summary> public void AddLists(List <SimpleInterval> introns, List <SimpleInterval> microRnas, List <string> peptideSeqs, IIntervalForest <MutableGene> mergedGeneForest) { _introns = introns; _microRnas = microRnas; _peptideSeqs = peptideSeqs; _mergedGeneForest = mergedGeneForest; _hasLists = true; }
public TranscriptCache(IEnumerable <IDataSourceVersion> dataSourceVersions, GenomeAssembly genomeAssembly, IntervalArray <ITranscript>[] transcriptIntervalArrays, IntervalArray <IRegulatoryRegion>[] regulatoryRegionIntervalArrays) { Name = "Transcript annotation provider"; DataSourceVersions = dataSourceVersions; GenomeAssembly = genomeAssembly; _transcriptIntervalForest = new IntervalForest <ITranscript>(transcriptIntervalArrays); _regulatoryIntervalForest = new IntervalForest <IRegulatoryRegion>(regulatoryRegionIntervalArrays); }
/// <summary> /// constructor /// </summary> public MockSupplementaryAnnotationProvider(ISupplementaryAnnotationReader saReader, ChromosomeRenamer renamer) { if (saReader == null) { return; } _saReader = saReader; _overlappingSupplementaryIntervals = new List <ISupplementaryInterval>(); _suppIntervalForest = _saReader.GetIntervalForest(renamer); }
public void Load(string ucscReferenceName, IChromosomeRenamer renamer) { if (_ciDirs.Count == 0 || ucscReferenceName == _currentUcscReferenceName) { return; } var intervals = GetIntervals(ucscReferenceName); _intervalForest = IntervalArrayFactory.CreateIntervalArray(intervals, renamer); _hasIntervals = !(_intervalForest is NullIntervalSearch <ICustomInterval>); _currentUcscReferenceName = ucscReferenceName; }
public void Load(string ucscReferenceName, IChromosomeRenamer renamer) { if (string.IsNullOrEmpty(_saDir) || ucscReferenceName == _currentUcscReferenceName) { return; } var saPath = Path.Combine(_saDir, ucscReferenceName + ".nsa"); _saReader = File.Exists(saPath) ? new SupplementaryAnnotationReader(saPath) : null; _intervalForest = _saReader?.GetIntervalForest(renamer); _hasIntervals = !(_intervalForest is NullIntervalSearch <ISupplementaryInterval>); _currentUcscReferenceName = ucscReferenceName; }
private static AnnotationPosition[] AdjustPartitionGenomicStarts(IReadOnlyList <long> blockBasedOffsets, string vcfUrl, IIntervalForest <IGene> geneIntervalForest, IDictionary <string, IChromosome> refNameToChromosome) { var allAdjustedStarts = new AnnotationPosition[blockBasedOffsets.Count]; for (var index = 0; index < blockBasedOffsets.Count; index++) { long blockBasedOffset = blockBasedOffsets[index]; using (var stream = PersistentStreamUtils.GetReadStream(vcfUrl, blockBasedOffset)) using (var gzipStream = new BlockGZipStream(stream, CompressionMode.Decompress)) { var annotationPosition = GetFirstGenomicPosition(gzipStream, index == 0); allAdjustedStarts[index] = FindProperStartPosition(annotationPosition, geneIntervalForest, refNameToChromosome); } } AnnotationPosition[] adjustedStarts = MergeConsecutiveEqualValues(allAdjustedStarts).ToArray(); return(adjustedStarts); }
public MockCustomIntervalProvider(Stream stream, ChromosomeRenamer renamer) { var intervals = new List <ICustomInterval>(); using (var reader = new CustomIntervalReader(stream)) { while (true) { var interval = reader.GetNextCustomInterval(); if (interval == null) { break; } intervals.Add(interval); } } _hasIntervals = intervals.Count > 0; _intervalForest = IntervalArrayFactory.CreateIntervalArray(intervals, renamer); }
private static void AddRegulatoryRegions(IAnnotatedVariant[] annotatedVariants, IIntervalForest <IRegulatoryRegion> regulatoryIntervalForest) { foreach (var annotatedVariant in annotatedVariants) { if (!annotatedVariant.Variant.Behavior.NeedRegulatoryRegions) { continue; } // In case of insertions, the base(s) are assumed to be inserted at the end position // if this is an insertion just before the beginning of the regulatory element, this takes care of it var variant = annotatedVariant.Variant; int variantBegin = variant.Type == VariantType.insertion ? variant.End : variant.Start; if (SkipLargeVariants(variantBegin, variant.End)) { continue; } IRegulatoryRegion[] regulatoryRegions = regulatoryIntervalForest.GetAllOverlappingValues(variant.Chromosome.Index, variantBegin, variant.End); if (regulatoryRegions == null) { continue; } foreach (var regulatoryRegion in regulatoryRegions) { // if the insertion is at the end, its past the feature and therefore not overlapping if (variant.Type == VariantType.insertion && variant.End == regulatoryRegion.End) { continue; } annotatedVariant.RegulatoryRegions.Add(RegulatoryRegionAnnotator.Annotate(variant, regulatoryRegion)); } } }
private void AssignUgaGenesToTranscripts(IEnumerable <MutableTranscript> transcripts, IIntervalForest <UgaGene> geneForest) { foreach (var transcript in transcripts) { var originalGene = transcript.Gene; var ugaGenes = geneForest.GetAllOverlappingValues(originalGene.Chromosome.Index, originalGene.Start, originalGene.End); if (ugaGenes == null) { var strand = originalGene.OnReverseStrand ? "R" : "F"; throw new InvalidDataException($"Found a transcript ({transcript.Id}) that does not have an overlapping UGA gene: gene ID: {originalGene.GeneId} {originalGene.Chromosome.UcscName} {originalGene.Start} {originalGene.End} {strand}"); } transcript.UpdatedGene = PickGeneById(ugaGenes, originalGene.GeneId).ToGene(_genomeAssembly); } }
public TranscriptCacheStaging CreateTranscriptCache(MutableTranscript[] mutableTranscripts, IEnumerable <IRegulatoryRegion> regulatoryRegions, IIntervalForest <UgaGene> geneForest, int numRefSeqs) { _logger.Write("- assigning UGA genes to transcripts... "); AssignUgaGenesToTranscripts(mutableTranscripts, geneForest); _logger.WriteLine("finished."); var transcriptIntervalArrays = mutableTranscripts.ToTranscripts().ToIntervalArrays(numRefSeqs); var regulatoryRegionIntervalArrays = regulatoryRegions.ToIntervalArrays(numRefSeqs); var customHeader = new TranscriptCacheCustomHeader(_vepVersion, _vepReleaseTicks); var header = new CacheHeader(CacheConstants.Identifier, CacheConstants.SchemaVersion, CacheConstants.DataVersion, _source, DateTime.Now.Ticks, _genomeAssembly, customHeader); return(TranscriptCacheStaging.GetStaging(header, transcriptIntervalArrays, regulatoryRegionIntervalArrays)); }
private static AnnotationPosition FindProperStartPosition(AnnotationPosition genomicPosition, IIntervalForest <IGene> geneIntervalForest, IDictionary <string, IChromosome> refNameToChromosome) { var chromosome = ReferenceNameUtilities.GetChromosome(refNameToChromosome, genomicPosition.Chromosome); int currentPosition = genomicPosition.Position; IGene[] overlappingGenes; while ((overlappingGenes = geneIntervalForest.GetAllOverlappingValues(chromosome.Index, currentPosition, currentPosition)) != null) { if (overlappingGenes.Length > 0) { currentPosition = overlappingGenes.Select(x => x.Start).Min() - 1; } } // Always return the position right before the overlapping genes to KISS return(new AnnotationPosition(genomicPosition.Chromosome, currentPosition < 1 ? 1 : currentPosition)); }
public Matcher(IIntervalForest <RepeatExpansionPhenotype> phenotypeForest) => _phenotypeForest = phenotypeForest;
/// <summary> /// constructor /// </summary> public MockCustomIntervalProvider(List <ICustomInterval> intervals, ChromosomeRenamer renamer) { _hasIntervals = intervals.Count > 0; _intervalForest = IntervalArrayFactory.CreateIntervalArray(intervals, renamer); }
public static ITranscript[] GetAllFlankingValues(this IIntervalForest <ITranscript> transcriptIntervalForest, IChromosomeInterval interval) => transcriptIntervalForest.GetAllOverlappingValues(interval.Chromosome.Index, interval.Start - interval.Chromosome.FlankingLength, interval.End + interval.Chromosome.FlankingLength);
/// <summary> /// constructor /// </summary> public SupplementaryAnnotationProvider(string saDir) { _saDir = saDir; _overlappingIntervals = new List <ISupplementaryInterval>(); _intervalForest = new NullIntervalSearch <ISupplementaryInterval>(); }
/// <summary> /// constructor /// </summary> public CustomIntervalProvider(IEnumerable <string> ciDirs) { _ciDirs = ciDirs.ToList(); _overlappingIntervals = new List <ICustomInterval>(); _intervalForest = new NullIntervalSearch <ICustomInterval>(); }