public void Annotate(IAnnotatedPosition annotatedPosition) { LoadChromosome(annotatedPosition.Position.Chromosome); if (_saReaders == null || _saReaders.Count == 0) { return; } AddSupplementaryIntervals(annotatedPosition); foreach (var annotatedVariant in annotatedPosition.AnnotatedVariants) { if (!annotatedVariant.Variant.Behavior.NeedSaPosition) { continue; } foreach (var saReader in _saReaders) { var saPosition = saReader.GetAnnotation(annotatedVariant.Variant.Start); if (saPosition != null) { AddSaPositon(saPosition, annotatedVariant); } } } }
public void Annotate(IAnnotatedPosition annotatedPosition) { foreach (var annotatedVariant in annotatedPosition.AnnotatedVariants) { annotatedVariant.InLowComplexityRegion = _nsiReader.OverlapsAny(annotatedVariant.Variant); } }
private static string ExtractDbId(IAnnotatedPosition annotatedPosition) { var dbSnp = new VcfField(); var nonDbsnpIds = GetNonDbsnpIds(annotatedPosition.Position.VcfFields[VcfCommon.IdIndex]); if (nonDbsnpIds != null) { foreach (var nonDbsnpId in nonDbsnpIds) { dbSnp.Add(nonDbsnpId); } } foreach (var annotatedVariant in annotatedPosition.AnnotatedVariants) { foreach (var suppAnnotation in annotatedVariant.SupplementaryAnnotations) { if (suppAnnotation.SaDataSource.KeyName != DbSnpKeyName) { continue; } foreach (var s in suppAnnotation.GetVcfStrings()) { dbSnp.Add(s); } } } return(dbSnp.GetString("")); }
private void GetStructuralVariantAnnotations(IAnnotatedPosition annotatedPosition) { bool needSaIntervals = annotatedPosition.AnnotatedVariants.Any(x => x.Variant.Behavior.NeedSaInterval); bool needSmallAnnotation = annotatedPosition.AnnotatedVariants.Any(x => x.Variant.Behavior == AnnotationBehavior.SmallVariants); foreach (INsiReader nsiReader in _nsiReaders) { var position = annotatedPosition.Position; if (nsiReader.ReportFor == ReportFor.SmallVariants && !needSmallAnnotation) { continue; } if (nsiReader.ReportFor == ReportFor.StructuralVariants && !needSaIntervals) { continue; } var annotations = nsiReader.GetAnnotation(position.Variants[0]); if (annotations == null) { continue; } annotatedPosition.SupplementaryIntervals.Add(new SupplementaryAnnotation(nsiReader.JsonKey, true, false, null, annotations)); } }
private static string GetPositionAnnotation(IPosition position, IAnnotationResources resources, string[] sampleNames, bool preloadRequired) { if (preloadRequired) { resources.SingleVariantPreLoad(position); } IAnnotatedPosition annotatedPosition = resources.Annotator.Annotate(position); string json = annotatedPosition?.GetJsonString(); if (json == null) { throw new UserErrorException("No variant is provided for annotation"); } var outputJsonStream = new MemoryStream(); using (var jsonWriter = new JsonWriter(outputJsonStream, null, resources, Date.CurrentTimeStamp, sampleNames, true)) { WriteAnnotatedPosition(annotatedPosition, jsonWriter, json); jsonWriter.WriteGenes(resources.Annotator.GetGeneAnnotations()); } outputJsonStream.Position = 0; return(Encoding.UTF8.GetString(outputJsonStream.ToArray())); }
private void AddTranscripts(IAnnotatedPosition annotatedPosition) { var overlappingTranscripts = _transcriptCache.GetOverlappingFlankingTranscripts(annotatedPosition.Position); if (overlappingTranscripts == null) { // todo: handle intergenic variants return; } foreach (var annotatedVariant in annotatedPosition.AnnotatedVariants) { var annotatedTranscripts = new List <IAnnotatedTranscript>(); PianoAnnotationUtils.GetAnnotatedTranscripts(annotatedVariant.Variant, overlappingTranscripts, _sequence, annotatedTranscripts); if (annotatedTranscripts.Count == 0) { continue; } foreach (var annotatedTranscript in annotatedTranscripts) { if (annotatedTranscript.Transcript.Source == Source.Ensembl) { annotatedVariant.EnsemblTranscripts.Add(annotatedTranscript); } else { annotatedVariant.RefSeqTranscripts.Add(annotatedTranscript); } } } }
private void AddSupplementaryIntervals(IAnnotatedPosition annotatedPosition) { if (!_hasSmallVariantIntervals && !_hasSvIntervals && !_hasAllVariantIntervals) { return; } var firstAltAllele = annotatedPosition.AnnotatedVariants[0].Variant; var begin = firstAltAllele.Type == VariantType.insertion ? firstAltAllele.End : firstAltAllele.Start; var end = firstAltAllele.End; if (firstAltAllele.Behavior.NeedSaInterval) { if (_hasSmallVariantIntervals) { AddIntervals(annotatedPosition, _smallVariantIntervalArray, begin, end); } } if (firstAltAllele.Behavior.NeedSaInterval) { if (_hasSvIntervals) { AddIntervals(annotatedPosition, _svIntervalArray, begin, end); } } if (_hasAllVariantIntervals) { AddIntervals(annotatedPosition, _allVariantIntervalArray, begin, end); } }
private void AddSmallVariantAnnotations(IAnnotatedPosition annotatedPosition) { foreach (var annotatedVariant in annotatedPosition.AnnotatedVariants) { if (!annotatedVariant.Variant.Behavior.NeedSaPosition) { continue; } AddSmallAnnotations(annotatedVariant); } }
public void Annotate(IAnnotatedPosition annotatedPosition) { foreach (var annotatedVariant in annotatedPosition.AnnotatedVariants) { if (annotatedVariant.Variant.Type != VariantType.SNV) { continue; } annotatedVariant.PhylopScore = _phylopReader.GetAnnotation(annotatedPosition.Position.Chromosome, annotatedVariant.Variant.Start); } }
public void Annotate(IAnnotatedPosition annotatedPosition) { if (_nsaReaders != null) { AddPositionAndAlleleAnnotations(annotatedPosition); } if (_nsiReaders != null) { GetStructuralVariantAnnotations(annotatedPosition); } }
public static void Annotate(this IEnumerable <IPlugin> plugins, IAnnotatedPosition annotatedPosition, ISequence sequence) { if (sequence == null) { return; } foreach (var plugin in plugins) { plugin.Annotate(annotatedPosition, sequence); } }
private void TrackAffectedGenes(IAnnotatedPosition annotatedPosition) { if (_geneAnnotationProvider == null) { return; } foreach (var variant in annotatedPosition.AnnotatedVariants) { AddGenesFromTranscripts(variant.Transcripts); } }
private void AddPositionAndAlleleAnnotations(IAnnotatedPosition annotatedPosition) { foreach (var annotatedVariant in annotatedPosition.AnnotatedVariants) { var needSaPosition = annotatedVariant.Variant.Behavior.NeedSaPosition; var needSaAllele = annotatedVariant.Variant.Behavior.NeedSaAllele; if (!needSaPosition && !needSaAllele) { continue; } AddSmallAnnotations(annotatedVariant, needSaPosition, needSaAllele); } }
public void Annotate(IAnnotatedPosition annotatedPosition) { if (annotatedPosition.AnnotatedVariants == null || annotatedPosition.AnnotatedVariants.Length == 0) { return; } var refIndex = annotatedPosition.Position.Chromosome.Index; LoadPredictionCaches(refIndex); AddTranscripts(annotatedPosition); }
public void Annotate(IAnnotatedPosition annotatedPosition) { if (annotatedPosition.AnnotatedVariants == null || annotatedPosition.AnnotatedVariants.Length == 0) { return; } ushort refIndex = annotatedPosition.Position.Chromosome.Index; LoadPredictionCaches(refIndex); AddRegulatoryRegions(annotatedPosition.AnnotatedVariants, _transcriptCache.RegulatoryIntervalForest); AddTranscripts(annotatedPosition.AnnotatedVariants, _transcriptCache.TranscriptIntervalForest); }
public void Annotate(IAnnotatedPosition annotatedPosition) { if (_currentUcscReferenceName != annotatedPosition.Position.Chromosome.UcscName) { LoadChromosome(annotatedPosition.Position.Chromosome); } foreach (var annotatedVariant in annotatedPosition.AnnotatedVariants) { if (annotatedVariant.Variant.Type != VariantType.SNV) { continue; } annotatedVariant.PhylopScore = _phylopReader.GetScore(annotatedVariant.Variant.Start); } }
private void WriteOutput(IAnnotatedPosition annotatedPosition, long textWriterPosition, OnTheFlyIndexCreator jasixIndexCreator, IJsonWriter jsonWriter, LiteVcfWriter vcfWriter, LiteVcfWriter gvcfWriter, string jsonOutput) { jasixIndexCreator.Add(annotatedPosition.Position, textWriterPosition); jsonWriter.WriteJsonEntry(jsonOutput); if (vcfWriter == null && gvcfWriter == null || annotatedPosition.Position.IsRecomposed) { return; } string vcfLine = _conversion.Convert(annotatedPosition); vcfWriter?.Write(vcfLine); gvcfWriter?.Write(vcfLine); }
private static void WriteAnnotatedPostion(IAnnotatedPosition annotatedPosition, StreamWriter writer) { //"#Chrom\tPos\tRefAllele\tAltAllele\tGeneSymbol\tGeneId\tTranscriptID\tProteinID\tProteinPos\tUpstream\tAAchange\tDownstream\tConsequences"; if (annotatedPosition.AnnotatedVariants == null || annotatedPosition.AnnotatedVariants.Length == 0) { return; } for (int i = 0; i < annotatedPosition.AnnotatedVariants.Length; i++) { var annotatedVariant = annotatedPosition.AnnotatedVariants[i]; var chromosome = annotatedPosition.Position.VcfFields[VcfCommon.ChromIndex]; var position = annotatedPosition.Position.Start; var refAllele = annotatedPosition.Position.RefAllele; var altAllele = annotatedPosition.Position.AltAlleles[i]; foreach (var ensemblTranscript in annotatedVariant.EnsemblTranscripts) { var transcript = ensemblTranscript; if (transcript.ToString() == null) { continue; } var line = chromosome + "\t" + position + "\t" + refAllele + "\t" + altAllele + "\t" + transcript; writer.WriteLine(line); } foreach (var refSeqTranscript in annotatedVariant.RefSeqTranscripts) { var transcript = refSeqTranscript; if (transcript.ToString() == null) { continue; } var line = chromosome + "\t" + position + "\t" + refAllele + "\t" + altAllele + "\t" + transcript; writer.WriteLine(line); } } }
public void Annotate(IAnnotatedPosition annotatedPosition) { foreach (var variant in annotatedPosition.AnnotatedVariants) { if (variant.Variant.Type != VariantType.short_tandem_repeat_variation) { continue; } var repeatExpansion = (RepeatExpansion)variant.Variant; var phenotypes = _matcher.GetMatchingAnnotations(repeatExpansion); if (phenotypes == null) { continue; } variant.RepeatExpansionPhenotypes = phenotypes; } }
public void Annotate(IAnnotatedPosition annotatedPosition) { if (annotatedPosition.AnnotatedVariants == null || annotatedPosition.AnnotatedVariants.Length == 0) { return; } var position = annotatedPosition.Position; ushort refIndex = position.Chromosome.Index; LoadPredictionCaches(refIndex); AddRegulatoryRegions(annotatedPosition.AnnotatedVariants, _transcriptCache.RegulatoryIntervalForest); AddTranscripts(annotatedPosition.AnnotatedVariants); if (position.HasStructuralVariant) { AddGeneFusions(annotatedPosition.AnnotatedVariants); } }
private void AddInfoField(IAnnotatedPosition annotatedPosition, StringBuilder sb) { var infoEntries = new VcfField(); var infoField = annotatedPosition.Position.InfoData.UpdatedInfoField; if (!string.IsNullOrEmpty(infoField)) { infoEntries.Add(infoField); } ExtractInfo(annotatedPosition, infoEntries); infoField = infoEntries.GetString(""); // remove . if (infoField == ".") { infoField = ""; } sb.Append(infoField); var csqs = new List <CsqEntry>(); ExtractCsqs(annotatedPosition, csqs); if (csqs.Count != 0) { if (infoField.Length > 0) { sb.Append(";"); } } // append CSQ tags using delegate from annotator sb.Append(GetCsqtAndCsqrVcfInfo(csqs)); if (csqs.Count == 0 && infoField.Length == 0) { sb.Append("."); } }
private static void AddIntervals(IAnnotatedPosition annotatedPosition, IIntervalSearch <ISupplementaryInterval> intervalArray, int begin, int end) { var intervals = intervalArray.GetAllOverlappingValues(begin, end); if (intervals == null) { return; } foreach (var overlappingInterval in intervals) { var reciprocalOverlap = annotatedPosition.Position.Start >= annotatedPosition.Position.End ? null : overlappingInterval.GetReciprocalOverlap(annotatedPosition.AnnotatedVariants[0].Variant); annotatedPosition.SupplementaryIntervals.Add( new AnnotatedSupplementaryInterval(overlappingInterval, reciprocalOverlap)); } }
private void AddRegulatoryRegions(IAnnotatedPosition annotatedPosition) { var overlappingRegulatoryRegions = _transcriptCache.GetOverlappingRegulatoryRegions(annotatedPosition.Position); if (overlappingRegulatoryRegions == null) { return; } foreach (var annotatedVariant in annotatedPosition.AnnotatedVariants) { // In case of insertions, the base(s) are assumed to be inserted at the end position // if this is an insertion just before the beginning of the regulatory element, this takes care of it var variant = annotatedVariant.Variant; var variantEnd = variant.End; var variantBegin = variant.Type == VariantType.insertion ? variant.End : variant.Start; // disable regulatory region for SV larger than 50kb if (variantEnd - variantBegin + 1 > MaxSvLengthForRegulatoryRegionAnnotation) { continue; } foreach (var regulatoryRegion in overlappingRegulatoryRegions) { if (!variant.Overlaps(regulatoryRegion)) { continue; } // if the insertion is at the end, its past the feature and therefore not overlapping if (variant.Type == VariantType.insertion && variantEnd == regulatoryRegion.End) { continue; } annotatedVariant.RegulatoryRegions.Add(RegulatoryRegionAnnotator.Annotate(variant, regulatoryRegion)); } } }
/// <summary> /// returns true if the specified reference sequence is in the standard reference sequences and in VEP /// </summary> public void Annotate(IAnnotatedPosition annotatedPosition) { LoadChromosome(annotatedPosition.Position.Chromosome); if (annotatedPosition.AnnotatedVariants == null) { return; } annotatedPosition.CytogeneticBand = _cytogeneticBands.GetCytogeneticBand(annotatedPosition.Position.Chromosome, annotatedPosition.Position.Start, annotatedPosition.Position.End); if (annotatedPosition.Position.Chromosome.UcscName != "chrM") { return; } const string assertionNumber = "NC_012920.1"; foreach (var annotatedVariant in annotatedPosition.AnnotatedVariants) { annotatedVariant.HgvsgNotation = HgvsgNotation.GetNotation(assertionNumber, annotatedVariant.Variant, Sequence, new Interval(0, Sequence.Length)); } }
public string Convert(IAnnotatedPosition annotatedPosition) { _sb.Clear(); var fields = annotatedPosition.Position.VcfFields; // add all of the fields before the info field for (var vcfIndex = 0; vcfIndex < VcfCommon.IdIndex; vcfIndex++) { _sb.Append(fields[vcfIndex]); _sb.Append('\t'); } // add dbSNP id var dbSnpId = ExtractDbId(annotatedPosition); _sb.Append(dbSnpId); _sb.Append('\t'); for (var vcfIndex = VcfCommon.IdIndex + 1; vcfIndex < VcfCommon.InfoIndex; vcfIndex++) { _sb.Append(fields[vcfIndex]); _sb.Append('\t'); } AddInfoField(annotatedPosition, _sb); // add all of the fields after the info field var numColumns = fields.Length; for (var vcfIndex = VcfCommon.InfoIndex + 1; vcfIndex < numColumns; vcfIndex++) { _sb.Append('\t'); _sb.Append(fields[vcfIndex]); } return(_sb.ToString()); }
private static void ExtractCsqs(IAnnotatedPosition unifiedJson, List <CsqEntry> csqs) { for (int i = 0; i < unifiedJson.AnnotatedVariants.Length; i++) { var genotypeIndex = i + 1; var jsonVariant = unifiedJson.AnnotatedVariants[i]; csqs.AddRange( jsonVariant.EnsemblTranscripts.Where(x => x.Transcript.IsCanonical) .Select(transcript => new CsqEntry { Allele = genotypeIndex.ToString(), Feature = transcript.Transcript.Id.WithVersion, FeatureType = CsqCommon.TranscriptFeatureType, Symbol = transcript.Transcript.Gene.Symbol, Consequence = transcript.Consequences == null ? null : string.Join("&", transcript.Consequences.Select(ConsequenceUtil.GetConsequence)) })); csqs.AddRange(from transcript in jsonVariant.RefSeqTranscripts where transcript.Transcript.IsCanonical select new CsqEntry { Allele = genotypeIndex.ToString(), Feature = transcript.Transcript.Id.WithVersion, FeatureType = CsqCommon.TranscriptFeatureType, Symbol = transcript.Transcript.Gene.Symbol, Consequence = transcript.Consequences == null ? null : string.Join("&", transcript.Consequences.Select(ConsequenceUtil.GetConsequence)) }); csqs.AddRange(jsonVariant.RegulatoryRegions.Select(regulatoryRegion => new CsqEntry { Allele = genotypeIndex.ToString(), Consequence = string.Join("&", regulatoryRegion.Consequences.Select(ConsequenceUtil.GetConsequence)), Feature = regulatoryRegion.RegulatoryRegion.Id.WithoutVersion, FeatureType = CsqCommon.RegulatoryFeatureType })); } }
public void Annotate(IAnnotatedPosition annotatedPosition) { if (annotatedPosition.AnnotatedVariants == null) { return; } annotatedPosition.CytogeneticBand = Sequence.CytogeneticBands.Find(annotatedPosition.Position.Chromosome, annotatedPosition.Position.Start, annotatedPosition.Position.End); // we don't want HGVS g. nomenclature for structural variants or STRs if (annotatedPosition.Position.HasStructuralVariant || annotatedPosition.Position.HasShortTandemRepeat) { return; } string refSeqAccession = annotatedPosition.Position.Chromosome.RefSeqAccession; foreach (var annotatedVariant in annotatedPosition.AnnotatedVariants) { annotatedVariant.HgvsgNotation = HgvsgNotation.GetNotation(refSeqAccession, annotatedVariant.Variant, Sequence, new Interval(0, Sequence.Length)); } }
private void AddTranscripts(IAnnotatedPosition annotatedPosition) { var overlappingTranscripts = _transcriptCache.GetOverlappingTranscripts(annotatedPosition.Position); if (overlappingTranscripts == null) { return; } foreach (var annotatedVariant in annotatedPosition.AnnotatedVariants) { var geneFusionCandidates = GetGeneFusionCandiates(annotatedVariant.Variant.BreakEnds); var annotatedTranscripts = new List <IAnnotatedTranscript>(); TranscriptAnnotationFactory.GetAnnotatedTranscripts(annotatedVariant.Variant, overlappingTranscripts, _sequence, annotatedTranscripts, annotatedVariant.OverlappingGenes, annotatedVariant.OverlappingTranscripts, _siftCache, _polyphenCache, geneFusionCandidates); if (annotatedTranscripts.Count == 0) { continue; } foreach (var annotatedTranscript in annotatedTranscripts) { if (annotatedTranscript.Transcript.Source == Source.Ensembl) { annotatedVariant.EnsemblTranscripts.Add(annotatedTranscript); } else { annotatedVariant.RefSeqTranscripts.Add(annotatedTranscript); } } } }
internal void TrackAffectedGenes(IAnnotatedPosition annotatedPosition) { if (_geneAnnotationProvider == null) { return; } foreach (var variant in annotatedPosition.AnnotatedVariants) { if (variant.OverlappingGenes != null) { foreach (var gene in variant.OverlappingGenes) { _affectedGenes.Add(gene); } } foreach (var ensemblTranscript in variant.EnsemblTranscripts) { if (!ensemblTranscript.Consequences.Contains(ConsequenceTag.downstream_gene_variant) && !ensemblTranscript.Consequences.Contains(ConsequenceTag.upstream_gene_variant)) { _affectedGenes.Add(ensemblTranscript.Transcript.Gene.Symbol); } } foreach (var refSeqTranscript in variant.RefSeqTranscripts) { if (!refSeqTranscript.Consequences.Contains(ConsequenceTag.downstream_gene_variant) && !refSeqTranscript.Consequences.Contains(ConsequenceTag.upstream_gene_variant)) { _affectedGenes.Add(refSeqTranscript.Transcript.Gene.Symbol); } } } }
private static void WriteAnnotatedPosition(IAnnotatedPosition annotatedPosition, IJsonWriter jsonWriter, string jsonOutput) => jsonWriter.WritePosition(annotatedPosition.Position, jsonOutput);