Exemple #1
0
        public void Annotate(IAnnotatedPosition annotatedPosition)
        {
            LoadChromosome(annotatedPosition.Position.Chromosome);
            if (_saReaders == null || _saReaders.Count == 0)
            {
                return;
            }

            AddSupplementaryIntervals(annotatedPosition);


            foreach (var annotatedVariant in annotatedPosition.AnnotatedVariants)
            {
                if (!annotatedVariant.Variant.Behavior.NeedSaPosition)
                {
                    continue;
                }
                foreach (var saReader in _saReaders)
                {
                    var saPosition = saReader.GetAnnotation(annotatedVariant.Variant.Start);
                    if (saPosition != null)
                    {
                        AddSaPositon(saPosition, annotatedVariant);
                    }
                }
            }
        }
Exemple #2
0
 public void Annotate(IAnnotatedPosition annotatedPosition)
 {
     foreach (var annotatedVariant in annotatedPosition.AnnotatedVariants)
     {
         annotatedVariant.InLowComplexityRegion = _nsiReader.OverlapsAny(annotatedVariant.Variant);
     }
 }
Exemple #3
0
        private static string ExtractDbId(IAnnotatedPosition annotatedPosition)
        {
            var dbSnp = new VcfField();

            var nonDbsnpIds = GetNonDbsnpIds(annotatedPosition.Position.VcfFields[VcfCommon.IdIndex]);

            if (nonDbsnpIds != null)
            {
                foreach (var nonDbsnpId in nonDbsnpIds)
                {
                    dbSnp.Add(nonDbsnpId);
                }
            }

            foreach (var annotatedVariant in annotatedPosition.AnnotatedVariants)
            {
                foreach (var suppAnnotation in annotatedVariant.SupplementaryAnnotations)
                {
                    if (suppAnnotation.SaDataSource.KeyName != DbSnpKeyName)
                    {
                        continue;
                    }
                    foreach (var s in suppAnnotation.GetVcfStrings())
                    {
                        dbSnp.Add(s);
                    }
                }
            }

            return(dbSnp.GetString(""));
        }
Exemple #4
0
        private void GetStructuralVariantAnnotations(IAnnotatedPosition annotatedPosition)
        {
            bool needSaIntervals     = annotatedPosition.AnnotatedVariants.Any(x => x.Variant.Behavior.NeedSaInterval);
            bool needSmallAnnotation = annotatedPosition.AnnotatedVariants.Any(x => x.Variant.Behavior == AnnotationBehavior.SmallVariants);

            foreach (INsiReader nsiReader in _nsiReaders)
            {
                var position = annotatedPosition.Position;
                if (nsiReader.ReportFor == ReportFor.SmallVariants && !needSmallAnnotation)
                {
                    continue;
                }
                if (nsiReader.ReportFor == ReportFor.StructuralVariants && !needSaIntervals)
                {
                    continue;
                }

                var annotations = nsiReader.GetAnnotation(position.Variants[0]);
                if (annotations == null)
                {
                    continue;
                }

                annotatedPosition.SupplementaryIntervals.Add(new SupplementaryAnnotation(nsiReader.JsonKey, true, false, null, annotations));
            }
        }
        private static string GetPositionAnnotation(IPosition position, IAnnotationResources resources, string[] sampleNames, bool preloadRequired)
        {
            if (preloadRequired)
            {
                resources.SingleVariantPreLoad(position);
            }
            IAnnotatedPosition annotatedPosition = resources.Annotator.Annotate(position);
            string             json = annotatedPosition?.GetJsonString();

            if (json == null)
            {
                throw new UserErrorException("No variant is provided for annotation");
            }

            var outputJsonStream = new MemoryStream();

            using (var jsonWriter = new JsonWriter(outputJsonStream, null, resources, Date.CurrentTimeStamp, sampleNames, true))
            {
                WriteAnnotatedPosition(annotatedPosition, jsonWriter, json);
                jsonWriter.WriteGenes(resources.Annotator.GetGeneAnnotations());
            }

            outputJsonStream.Position = 0;
            return(Encoding.UTF8.GetString(outputJsonStream.ToArray()));
        }
Exemple #6
0
        private void AddTranscripts(IAnnotatedPosition annotatedPosition)
        {
            var overlappingTranscripts = _transcriptCache.GetOverlappingFlankingTranscripts(annotatedPosition.Position);

            if (overlappingTranscripts == null)
            {
                // todo: handle intergenic variants
                return;
            }

            foreach (var annotatedVariant in annotatedPosition.AnnotatedVariants)
            {
                var annotatedTranscripts = new List <IAnnotatedTranscript>();

                PianoAnnotationUtils.GetAnnotatedTranscripts(annotatedVariant.Variant, overlappingTranscripts,
                                                             _sequence, annotatedTranscripts);

                if (annotatedTranscripts.Count == 0)
                {
                    continue;
                }

                foreach (var annotatedTranscript in annotatedTranscripts)
                {
                    if (annotatedTranscript.Transcript.Source == Source.Ensembl)
                    {
                        annotatedVariant.EnsemblTranscripts.Add(annotatedTranscript);
                    }
                    else
                    {
                        annotatedVariant.RefSeqTranscripts.Add(annotatedTranscript);
                    }
                }
            }
        }
Exemple #7
0
        private void AddSupplementaryIntervals(IAnnotatedPosition annotatedPosition)
        {
            if (!_hasSmallVariantIntervals && !_hasSvIntervals && !_hasAllVariantIntervals)
            {
                return;
            }

            var firstAltAllele = annotatedPosition.AnnotatedVariants[0].Variant;

            var begin = firstAltAllele.Type == VariantType.insertion
                                ? firstAltAllele.End
                                : firstAltAllele.Start;
            var end = firstAltAllele.End;

            if (firstAltAllele.Behavior.NeedSaInterval)
            {
                if (_hasSmallVariantIntervals)
                {
                    AddIntervals(annotatedPosition, _smallVariantIntervalArray, begin, end);
                }
            }

            if (firstAltAllele.Behavior.NeedSaInterval)
            {
                if (_hasSvIntervals)
                {
                    AddIntervals(annotatedPosition, _svIntervalArray, begin, end);
                }
            }

            if (_hasAllVariantIntervals)
            {
                AddIntervals(annotatedPosition, _allVariantIntervalArray, begin, end);
            }
        }
Exemple #8
0
 private void AddSmallVariantAnnotations(IAnnotatedPosition annotatedPosition)
 {
     foreach (var annotatedVariant in annotatedPosition.AnnotatedVariants)
     {
         if (!annotatedVariant.Variant.Behavior.NeedSaPosition)
         {
             continue;
         }
         AddSmallAnnotations(annotatedVariant);
     }
 }
 public void Annotate(IAnnotatedPosition annotatedPosition)
 {
     foreach (var annotatedVariant in annotatedPosition.AnnotatedVariants)
     {
         if (annotatedVariant.Variant.Type != VariantType.SNV)
         {
             continue;
         }
         annotatedVariant.PhylopScore = _phylopReader.GetAnnotation(annotatedPosition.Position.Chromosome, annotatedVariant.Variant.Start);
     }
 }
Exemple #10
0
        public void Annotate(IAnnotatedPosition annotatedPosition)
        {
            if (_nsaReaders != null)
            {
                AddPositionAndAlleleAnnotations(annotatedPosition);
            }

            if (_nsiReaders != null)
            {
                GetStructuralVariantAnnotations(annotatedPosition);
            }
        }
Exemple #11
0
 public static void Annotate(this IEnumerable <IPlugin> plugins, IAnnotatedPosition annotatedPosition,
                             ISequence sequence)
 {
     if (sequence == null)
     {
         return;
     }
     foreach (var plugin in plugins)
     {
         plugin.Annotate(annotatedPosition, sequence);
     }
 }
Exemple #12
0
        private void TrackAffectedGenes(IAnnotatedPosition annotatedPosition)
        {
            if (_geneAnnotationProvider == null)
            {
                return;
            }

            foreach (var variant in annotatedPosition.AnnotatedVariants)
            {
                AddGenesFromTranscripts(variant.Transcripts);
            }
        }
Exemple #13
0
 private void AddPositionAndAlleleAnnotations(IAnnotatedPosition annotatedPosition)
 {
     foreach (var annotatedVariant in annotatedPosition.AnnotatedVariants)
     {
         var needSaPosition = annotatedVariant.Variant.Behavior.NeedSaPosition;
         var needSaAllele   = annotatedVariant.Variant.Behavior.NeedSaAllele;
         if (!needSaPosition && !needSaAllele)
         {
             continue;
         }
         AddSmallAnnotations(annotatedVariant, needSaPosition, needSaAllele);
     }
 }
Exemple #14
0
        public void Annotate(IAnnotatedPosition annotatedPosition)
        {
            if (annotatedPosition.AnnotatedVariants == null || annotatedPosition.AnnotatedVariants.Length == 0)
            {
                return;
            }

            var refIndex = annotatedPosition.Position.Chromosome.Index;

            LoadPredictionCaches(refIndex);

            AddTranscripts(annotatedPosition);
        }
Exemple #15
0
        public void Annotate(IAnnotatedPosition annotatedPosition)
        {
            if (annotatedPosition.AnnotatedVariants == null || annotatedPosition.AnnotatedVariants.Length == 0)
            {
                return;
            }

            ushort refIndex = annotatedPosition.Position.Chromosome.Index;

            LoadPredictionCaches(refIndex);

            AddRegulatoryRegions(annotatedPosition.AnnotatedVariants, _transcriptCache.RegulatoryIntervalForest);
            AddTranscripts(annotatedPosition.AnnotatedVariants, _transcriptCache.TranscriptIntervalForest);
        }
        public void Annotate(IAnnotatedPosition annotatedPosition)
        {
            if (_currentUcscReferenceName != annotatedPosition.Position.Chromosome.UcscName)
            {
                LoadChromosome(annotatedPosition.Position.Chromosome);
            }

            foreach (var annotatedVariant in annotatedPosition.AnnotatedVariants)
            {
                if (annotatedVariant.Variant.Type != VariantType.SNV)
                {
                    continue;
                }
                annotatedVariant.PhylopScore = _phylopReader.GetScore(annotatedVariant.Variant.Start);
            }
        }
Exemple #17
0
        private void WriteOutput(IAnnotatedPosition annotatedPosition, long textWriterPosition,
                                 OnTheFlyIndexCreator jasixIndexCreator, IJsonWriter jsonWriter, LiteVcfWriter vcfWriter,
                                 LiteVcfWriter gvcfWriter, string jsonOutput)
        {
            jasixIndexCreator.Add(annotatedPosition.Position, textWriterPosition);
            jsonWriter.WriteJsonEntry(jsonOutput);

            if (vcfWriter == null && gvcfWriter == null || annotatedPosition.Position.IsRecomposed)
            {
                return;
            }

            string vcfLine = _conversion.Convert(annotatedPosition);

            vcfWriter?.Write(vcfLine);
            gvcfWriter?.Write(vcfLine);
        }
Exemple #18
0
        private static void WriteAnnotatedPostion(IAnnotatedPosition annotatedPosition, StreamWriter writer)
        {
            //"#Chrom\tPos\tRefAllele\tAltAllele\tGeneSymbol\tGeneId\tTranscriptID\tProteinID\tProteinPos\tUpstream\tAAchange\tDownstream\tConsequences";
            if (annotatedPosition.AnnotatedVariants == null || annotatedPosition.AnnotatedVariants.Length == 0)
            {
                return;
            }

            for (int i = 0; i < annotatedPosition.AnnotatedVariants.Length; i++)
            {
                var annotatedVariant = annotatedPosition.AnnotatedVariants[i];
                var chromosome       = annotatedPosition.Position.VcfFields[VcfCommon.ChromIndex];
                var position         = annotatedPosition.Position.Start;
                var refAllele        = annotatedPosition.Position.RefAllele;
                var altAllele        = annotatedPosition.Position.AltAlleles[i];

                foreach (var ensemblTranscript in annotatedVariant.EnsemblTranscripts)
                {
                    var transcript = ensemblTranscript;

                    if (transcript.ToString() == null)
                    {
                        continue;
                    }

                    var line = chromosome + "\t" + position + "\t" + refAllele +
                               "\t" + altAllele + "\t" + transcript;
                    writer.WriteLine(line);
                }
                foreach (var refSeqTranscript in annotatedVariant.RefSeqTranscripts)
                {
                    var transcript = refSeqTranscript;
                    if (transcript.ToString() == null)
                    {
                        continue;
                    }

                    var line = chromosome + "\t" + position + "\t" + refAllele +
                               "\t" + altAllele + "\t" + transcript;
                    writer.WriteLine(line);
                }
            }
        }
        public void Annotate(IAnnotatedPosition annotatedPosition)
        {
            foreach (var variant in annotatedPosition.AnnotatedVariants)
            {
                if (variant.Variant.Type != VariantType.short_tandem_repeat_variation)
                {
                    continue;
                }
                var repeatExpansion = (RepeatExpansion)variant.Variant;

                var phenotypes = _matcher.GetMatchingAnnotations(repeatExpansion);
                if (phenotypes == null)
                {
                    continue;
                }

                variant.RepeatExpansionPhenotypes = phenotypes;
            }
        }
        public void Annotate(IAnnotatedPosition annotatedPosition)
        {
            if (annotatedPosition.AnnotatedVariants == null || annotatedPosition.AnnotatedVariants.Length == 0)
            {
                return;
            }

            var    position = annotatedPosition.Position;
            ushort refIndex = position.Chromosome.Index;

            LoadPredictionCaches(refIndex);

            AddRegulatoryRegions(annotatedPosition.AnnotatedVariants, _transcriptCache.RegulatoryIntervalForest);
            AddTranscripts(annotatedPosition.AnnotatedVariants);
            if (position.HasStructuralVariant)
            {
                AddGeneFusions(annotatedPosition.AnnotatedVariants);
            }
        }
Exemple #21
0
        private void AddInfoField(IAnnotatedPosition annotatedPosition, StringBuilder sb)
        {
            var infoEntries = new VcfField();
            var infoField   = annotatedPosition.Position.InfoData.UpdatedInfoField;

            if (!string.IsNullOrEmpty(infoField))
            {
                infoEntries.Add(infoField);
            }

            ExtractInfo(annotatedPosition, infoEntries);

            infoField = infoEntries.GetString("");

            // remove .
            if (infoField == ".")
            {
                infoField = "";
            }

            sb.Append(infoField);

            var csqs = new List <CsqEntry>();

            ExtractCsqs(annotatedPosition, csqs);

            if (csqs.Count != 0)
            {
                if (infoField.Length > 0)
                {
                    sb.Append(";");
                }
            }

            // append CSQ tags using delegate from annotator
            sb.Append(GetCsqtAndCsqrVcfInfo(csqs));

            if (csqs.Count == 0 && infoField.Length == 0)
            {
                sb.Append(".");
            }
        }
Exemple #22
0
        private static void AddIntervals(IAnnotatedPosition annotatedPosition,
                                         IIntervalSearch <ISupplementaryInterval> intervalArray, int begin, int end)
        {
            var intervals = intervalArray.GetAllOverlappingValues(begin, end);

            if (intervals == null)
            {
                return;
            }

            foreach (var overlappingInterval in intervals)
            {
                var reciprocalOverlap = annotatedPosition.Position.Start >= annotatedPosition.Position.End
                                ? null
                                : overlappingInterval.GetReciprocalOverlap(annotatedPosition.AnnotatedVariants[0].Variant);

                annotatedPosition.SupplementaryIntervals.Add(
                    new AnnotatedSupplementaryInterval(overlappingInterval, reciprocalOverlap));
            }
        }
        private void AddRegulatoryRegions(IAnnotatedPosition annotatedPosition)
        {
            var overlappingRegulatoryRegions = _transcriptCache.GetOverlappingRegulatoryRegions(annotatedPosition.Position);

            if (overlappingRegulatoryRegions == null)
            {
                return;
            }

            foreach (var annotatedVariant in annotatedPosition.AnnotatedVariants)
            {
                // In case of insertions, the base(s) are assumed to be inserted at the end position

                // if this is an insertion just before the beginning of the regulatory element, this takes care of it
                var variant      = annotatedVariant.Variant;
                var variantEnd   = variant.End;
                var variantBegin = variant.Type == VariantType.insertion ? variant.End : variant.Start;

                // disable regulatory region for SV larger than 50kb
                if (variantEnd - variantBegin + 1 > MaxSvLengthForRegulatoryRegionAnnotation)
                {
                    continue;
                }

                foreach (var regulatoryRegion in overlappingRegulatoryRegions)
                {
                    if (!variant.Overlaps(regulatoryRegion))
                    {
                        continue;
                    }

                    // if the insertion is at the end, its past the feature and therefore not overlapping
                    if (variant.Type == VariantType.insertion && variantEnd == regulatoryRegion.End)
                    {
                        continue;
                    }

                    annotatedVariant.RegulatoryRegions.Add(RegulatoryRegionAnnotator.Annotate(variant, regulatoryRegion));
                }
            }
        }
Exemple #24
0
        /// <summary>
        /// returns true if the specified reference sequence is in the standard reference sequences and in VEP
        /// </summary>
        public void Annotate(IAnnotatedPosition annotatedPosition)
        {
            LoadChromosome(annotatedPosition.Position.Chromosome);
            if (annotatedPosition.AnnotatedVariants == null)
            {
                return;
            }

            annotatedPosition.CytogeneticBand = _cytogeneticBands.GetCytogeneticBand(annotatedPosition.Position.Chromosome, annotatedPosition.Position.Start,
                                                                                     annotatedPosition.Position.End);

            if (annotatedPosition.Position.Chromosome.UcscName != "chrM")
            {
                return;
            }
            const string assertionNumber = "NC_012920.1";

            foreach (var annotatedVariant in annotatedPosition.AnnotatedVariants)
            {
                annotatedVariant.HgvsgNotation = HgvsgNotation.GetNotation(assertionNumber, annotatedVariant.Variant, Sequence, new Interval(0, Sequence.Length));
            }
        }
Exemple #25
0
        public string Convert(IAnnotatedPosition annotatedPosition)
        {
            _sb.Clear();

            var fields = annotatedPosition.Position.VcfFields;

            // add all of the fields before the info field
            for (var vcfIndex = 0; vcfIndex < VcfCommon.IdIndex; vcfIndex++)
            {
                _sb.Append(fields[vcfIndex]);
                _sb.Append('\t');
            }

            // add dbSNP id
            var dbSnpId = ExtractDbId(annotatedPosition);

            _sb.Append(dbSnpId);
            _sb.Append('\t');

            for (var vcfIndex = VcfCommon.IdIndex + 1; vcfIndex < VcfCommon.InfoIndex; vcfIndex++)
            {
                _sb.Append(fields[vcfIndex]);
                _sb.Append('\t');
            }

            AddInfoField(annotatedPosition, _sb);

            // add all of the fields after the info field
            var numColumns = fields.Length;

            for (var vcfIndex = VcfCommon.InfoIndex + 1; vcfIndex < numColumns; vcfIndex++)
            {
                _sb.Append('\t');
                _sb.Append(fields[vcfIndex]);
            }

            return(_sb.ToString());
        }
Exemple #26
0
        private static void ExtractCsqs(IAnnotatedPosition unifiedJson, List <CsqEntry> csqs)
        {
            for (int i = 0; i < unifiedJson.AnnotatedVariants.Length; i++)
            {
                var genotypeIndex = i + 1;
                var jsonVariant   = unifiedJson.AnnotatedVariants[i];

                csqs.AddRange(
                    jsonVariant.EnsemblTranscripts.Where(x => x.Transcript.IsCanonical)
                    .Select(transcript => new CsqEntry
                {
                    Allele      = genotypeIndex.ToString(),
                    Feature     = transcript.Transcript.Id.WithVersion,
                    FeatureType = CsqCommon.TranscriptFeatureType,
                    Symbol      = transcript.Transcript.Gene.Symbol,
                    Consequence = transcript.Consequences == null ? null : string.Join("&", transcript.Consequences.Select(ConsequenceUtil.GetConsequence))
                }));

                csqs.AddRange(from transcript in jsonVariant.RefSeqTranscripts
                              where transcript.Transcript.IsCanonical
                              select new CsqEntry
                {
                    Allele      = genotypeIndex.ToString(),
                    Feature     = transcript.Transcript.Id.WithVersion,
                    FeatureType = CsqCommon.TranscriptFeatureType,
                    Symbol      = transcript.Transcript.Gene.Symbol,
                    Consequence = transcript.Consequences == null ? null : string.Join("&", transcript.Consequences.Select(ConsequenceUtil.GetConsequence))
                });

                csqs.AddRange(jsonVariant.RegulatoryRegions.Select(regulatoryRegion => new CsqEntry
                {
                    Allele      = genotypeIndex.ToString(),
                    Consequence = string.Join("&", regulatoryRegion.Consequences.Select(ConsequenceUtil.GetConsequence)),
                    Feature     = regulatoryRegion.RegulatoryRegion.Id.WithoutVersion,
                    FeatureType = CsqCommon.RegulatoryFeatureType
                }));
            }
        }
Exemple #27
0
        public void Annotate(IAnnotatedPosition annotatedPosition)
        {
            if (annotatedPosition.AnnotatedVariants == null)
            {
                return;
            }

            annotatedPosition.CytogeneticBand = Sequence.CytogeneticBands.Find(annotatedPosition.Position.Chromosome, annotatedPosition.Position.Start,
                                                                               annotatedPosition.Position.End);

            // we don't want HGVS g. nomenclature for structural variants or STRs
            if (annotatedPosition.Position.HasStructuralVariant || annotatedPosition.Position.HasShortTandemRepeat)
            {
                return;
            }

            string refSeqAccession = annotatedPosition.Position.Chromosome.RefSeqAccession;

            foreach (var annotatedVariant in annotatedPosition.AnnotatedVariants)
            {
                annotatedVariant.HgvsgNotation = HgvsgNotation.GetNotation(refSeqAccession, annotatedVariant.Variant, Sequence, new Interval(0, Sequence.Length));
            }
        }
        private void AddTranscripts(IAnnotatedPosition annotatedPosition)
        {
            var overlappingTranscripts = _transcriptCache.GetOverlappingTranscripts(annotatedPosition.Position);

            if (overlappingTranscripts == null)
            {
                return;
            }

            foreach (var annotatedVariant in annotatedPosition.AnnotatedVariants)
            {
                var geneFusionCandidates = GetGeneFusionCandiates(annotatedVariant.Variant.BreakEnds);
                var annotatedTranscripts = new List <IAnnotatedTranscript>();

                TranscriptAnnotationFactory.GetAnnotatedTranscripts(annotatedVariant.Variant, overlappingTranscripts,
                                                                    _sequence, annotatedTranscripts, annotatedVariant.OverlappingGenes,
                                                                    annotatedVariant.OverlappingTranscripts, _siftCache, _polyphenCache, geneFusionCandidates);

                if (annotatedTranscripts.Count == 0)
                {
                    continue;
                }

                foreach (var annotatedTranscript in annotatedTranscripts)
                {
                    if (annotatedTranscript.Transcript.Source == Source.Ensembl)
                    {
                        annotatedVariant.EnsemblTranscripts.Add(annotatedTranscript);
                    }
                    else
                    {
                        annotatedVariant.RefSeqTranscripts.Add(annotatedTranscript);
                    }
                }
            }
        }
Exemple #29
0
        internal void TrackAffectedGenes(IAnnotatedPosition annotatedPosition)
        {
            if (_geneAnnotationProvider == null)
            {
                return;
            }

            foreach (var variant in annotatedPosition.AnnotatedVariants)
            {
                if (variant.OverlappingGenes != null)
                {
                    foreach (var gene in variant.OverlappingGenes)
                    {
                        _affectedGenes.Add(gene);
                    }
                }

                foreach (var ensemblTranscript in variant.EnsemblTranscripts)
                {
                    if (!ensemblTranscript.Consequences.Contains(ConsequenceTag.downstream_gene_variant) &&
                        !ensemblTranscript.Consequences.Contains(ConsequenceTag.upstream_gene_variant))
                    {
                        _affectedGenes.Add(ensemblTranscript.Transcript.Gene.Symbol);
                    }
                }

                foreach (var refSeqTranscript in variant.RefSeqTranscripts)
                {
                    if (!refSeqTranscript.Consequences.Contains(ConsequenceTag.downstream_gene_variant) &&
                        !refSeqTranscript.Consequences.Contains(ConsequenceTag.upstream_gene_variant))
                    {
                        _affectedGenes.Add(refSeqTranscript.Transcript.Gene.Symbol);
                    }
                }
            }
        }
 private static void WriteAnnotatedPosition(IAnnotatedPosition annotatedPosition, IJsonWriter jsonWriter,
                                            string jsonOutput) => jsonWriter.WritePosition(annotatedPosition.Position, jsonOutput);