/// <summary>
        /// Assuming at least one cDNA coordinate overlaps with an exon, the covered cDNA coordinates represent
        /// the coordinates actually covered by the variant.
        /// </summary>
        public static (int Start, int End) GetCoveredCdnaPositions(this ITranscriptRegion[] regions, int cdnaStart, int startRegionIndex,
                                                                   int cdnaEnd, int endRegionIndex, bool onReverseStrand)
        {
            // exon case
            if (cdnaStart != -1 && cdnaEnd != -1)
            {
                return(cdnaStart, cdnaEnd);
            }

            if (onReverseStrand)
            {
                Swap.Int(ref startRegionIndex, ref endRegionIndex);
            }

            var startRegion = regions.GetCoveredRegion(startRegionIndex);
            var endRegion   = regions.GetCoveredRegion(endRegionIndex);

            if (startRegion.Type != TranscriptRegionType.Exon && endRegion.Type != TranscriptRegionType.Exon)
            {
                return(-1, -1);
            }

            int codingEnd = onReverseStrand ? regions[0].CdnaEnd : regions[regions.Length - 1].CdnaEnd;

            cdnaStart = GetCoveredCdnaPosition(cdnaStart, startRegion, startRegionIndex, codingEnd, onReverseStrand, false);
            cdnaEnd   = GetCoveredCdnaPosition(cdnaEnd, endRegion, endRegionIndex, codingEnd, onReverseStrand, true);

            return(cdnaStart < cdnaEnd ? (cdnaStart, cdnaEnd) : (cdnaEnd, cdnaStart));
        }
Esempio n. 2
0
        /// <summary>
        /// returns a range string representation of two integers
        /// </summary>
        private static string GetProtRangeString(TranscriptAnnotation ta)
        {
            if (!ta.HasValidCdsStart && !ta.HasValidCdsEnd)
            {
                return("");
            }
            if (!ta.HasValidCdsStart && ta.HasValidCdsEnd)
            {
                return("?-" + ta.ProteinEnd);
            }
            if (!ta.HasValidCdsEnd && ta.HasValidCdsStart)
            {
                return(ta.ProteinBegin + "-?");
            }

            var begin = ta.ProteinBegin;
            var end   = ta.ProteinEnd;

            if (end < begin)
            {
                Swap.Int(ref begin, ref end);
            }

            return(begin == end?begin.ToString(CultureInfo.InvariantCulture) : $"{begin}-{end}");
        }
        private static string GetHgvsFrameshiftNotation(ISequence refSequence, int cdsBegin, int cdsEnd,
                                                        string transcriptAltAllele, ITranscript transcript, bool isMitochondrial, string proteinId, int start,
                                                        int end)
        {
            var peptideSeq    = transcript.Translation.PeptideSeq;
            var altPeptideSeq = HgvsUtilities.GetAltPeptideSequence(refSequence, cdsBegin, cdsEnd, transcriptAltAllele, transcript, isMitochondrial);

            if (start > end)
            {
                Swap.Int(ref start, ref end);
            }

            var frameshiftedParameters = HgvsUtilities.GetChangesAfterFrameshift(start, peptideSeq, altPeptideSeq);

            start = frameshiftedParameters.Item1;
            var refAminoAcid = frameshiftedParameters.Item2;
            var altAminoAcid = frameshiftedParameters.Item3;

            var refAbbreviation = AminoAcids.ConvertAminoAcidToAbbreviation(refAminoAcid);

            if (altAminoAcid == AminoAcids.StopCodonChar)
            {
                return(HgvspNotation.GetSubstitutionNotation(proteinId, start, refAbbreviation, "Ter"));
            }

            var altAbbreviation = AminoAcids.ConvertAminoAcidToAbbreviation(altAminoAcid);
            var countToStop     = HgvsUtilities.GetNumAminoAcidsUntilStopCodon(altPeptideSeq, peptideSeq, start - 1, true);

            return(HgvspNotation.GetFrameshiftNotation(proteinId, start, refAbbreviation, altAbbreviation, countToStop));
        }
Esempio n. 4
0
        /// <summary>
        /// returns a range string representation of two integers
        /// </summary>
        private static string GetCdnaRangeString(TranscriptAnnotation ta)
        {
            if (!ta.HasValidCdnaStart && !ta.HasValidCdnaEnd)
            {
                return(null);
            }
            if (!ta.HasValidCdnaStart && ta.HasValidCdnaEnd)
            {
                return("?-" + ta.ComplementaryDnaEnd);
            }
            if (!ta.HasValidCdnaEnd && ta.HasValidCdnaStart)
            {
                return(ta.ComplementaryDnaBegin + "-?");
            }

            var begin = ta.ComplementaryDnaBegin;
            var end   = ta.ComplementaryDnaEnd;

            if (end < begin)
            {
                Swap.Int(ref begin, ref end);
            }

            return(begin == end?begin.ToString(CultureInfo.InvariantCulture) : $"{begin}-{end}");
        }
Esempio n. 5
0
        public void Swap_Int()
        {
            const int expectedA = 5;
            const int expectedB = 3;

            int observedA = expectedB;
            int observedB = expectedA;

            Swap.Int(ref observedA, ref observedB);

            Assert.Equal(expectedA, observedA);
            Assert.Equal(expectedB, observedB);
        }
        private static (int Start, int End) FindDesiredRegionIds(this ITranscriptRegion[] regions,
                                                                 Func <TranscriptRegionType, bool> hasDesiredRegion, int startIndex, int endIndex)
        {
            int regionStart   = FindFirst(regions, hasDesiredRegion, startIndex, endIndex);
            int newStartIndex = regionStart != -1 ? regionStart : startIndex;
            int regionEnd     = FindLast(regions, hasDesiredRegion, newStartIndex, endIndex);

            int startId = regionStart == -1 ? -1 : regions[regionStart].Id;
            int endId   = regionEnd == -1 ? -1 : regions[regionEnd].Id;

            if (endId < startId)
            {
                Swap.Int(ref startId, ref endId);
            }
            return(startId, endId);
        }
        private static (int Start, int End) FindDesiredRegionIds(this ITranscriptRegion[] regions,
                                                                 TranscriptRegionType desiredType, int startIndex, int endIndex)
        {
            var regionStart   = FindFirst(regions, desiredType, startIndex, endIndex);
            var newStartIndex = regionStart != -1 ? regionStart : startIndex;
            var regionEnd     = FindLast(regions, desiredType, newStartIndex, endIndex);

            var startId = regionStart == -1 ? -1 : regions[regionStart].Id;
            var endId   = regionEnd == -1 ? -1 : regions[regionEnd].Id;

            if (endId < startId)
            {
                Swap.Int(ref startId, ref endId);
            }
            return(startId, endId);
        }
Esempio n. 8
0
 private static string GetRangeString(int start, int end)
 {
     if (start == -1 && end == -1)
     {
         return(null);
     }
     if (start == -1)
     {
         return("?-" + end);
     }
     if (end == -1)
     {
         return(start + "-?");
     }
     if (start > end)
     {
         Swap.Int(ref start, ref end);
     }
     return(start == end?start.ToString(CultureInfo.InvariantCulture) : start + "-" + end);
 }
        private static IMappedPosition GetMappedPosition(ITranscriptRegion[] regions, ITranscriptRegion startRegion,
                                                         int startIndex, ITranscriptRegion endRegion, int endIndex, IInterval variant, bool onReverseStrand,
                                                         ICodingRegion codingRegion, byte startExonPhase, bool isInsertion)
        {
            (int cdnaStart, int cdnaEnd) = MappedPositionUtilities.GetCdnaPositions(startRegion, endRegion, variant, onReverseStrand, isInsertion);
            if (onReverseStrand)
            {
                Swap.Int(ref cdnaStart, ref cdnaEnd);
            }

            (int cdsStart, int cdsEnd) = MappedPositionUtilities.GetCdsPositions(codingRegion, cdnaStart, cdnaEnd,
                                                                                 startExonPhase, isInsertion);

            int proteinStart = MappedPositionUtilities.GetProteinPosition(cdsStart);
            int proteinEnd   = MappedPositionUtilities.GetProteinPosition(cdsEnd);

            (int exonStart, int exonEnd, int intronStart, int intronEnd) = regions.GetExonsAndIntrons(startIndex, endIndex);

            return(new MappedPosition(cdnaStart, cdnaEnd, cdsStart, cdsEnd, proteinStart, proteinEnd, exonStart,
                                      exonEnd, intronStart, intronEnd, startIndex, endIndex));
        }
Esempio n. 10
0
        private static string GetNullablePositionRange(NullableInterval interval)
        {
            if (interval.Start == null && interval.End == null)
            {
                return(null);
            }
            if (interval.Start == null)
            {
                return("?-" + interval.End.Value);
            }
            if (interval.End == null)
            {
                return(interval.Start.Value + "-?");
            }
            var start = interval.Start.Value;
            var end   = interval.End.Value;

            if (start > end)
            {
                Swap.Int(ref start, ref end);
            }
            return(start == end?start.ToString(CultureInfo.InvariantCulture) : start + "-" + end);
        }
Esempio n. 11
0
        private static string GetTranscriptRefAllele(IMappedPosition position, ISequence cdnaSequence, ISimpleVariant variant,
                                                     bool onReverseStrand)
        {
            var variantRef = HgvsUtilities.GetTranscriptAllele(variant.RefAllele, onReverseStrand);

            if (position == null || cdnaSequence == null)
            {
                return(variantRef);
            }
            var start = position.CoveredCdnaStart;
            var end   = position.CoveredCdnaEnd;

            if (start == -1 && end == -1)
            {
                return(variantRef);
            }
            if (start != -1 && end != -1 && end < start)
            {
                Swap.Int(ref start, ref end);
            }

            return(cdnaSequence.Substring(start - 1, end - start + 1));
        }
        public static string GetHgvsProteinAnnotation(
            ITranscript transcript,
            string refAminoAcids,
            string altAminoAcids,
            string transcriptAltAllele,
            IMappedPosition position,
            VariantEffect variantEffect,
            ISimpleVariant variant,
            ISequence refSequence,
            string hgvscNotation,
            bool isMitochondrial)
        {
            if (IsHgvspNull(transcriptAltAllele, position.CdsStart, position.CdsEnd, variant, hgvscNotation))
            {
                return(null);
            }

            var peptideSeq = transcript.Translation.PeptideSeq;

            // Amino acid seq should never go past the stop codon
            refAminoAcids = !refAminoAcids.EndsWith(AminoAcids.StopCodon) && refAminoAcids.Contains(AminoAcids.StopCodon)
                ? refAminoAcids.OptimizedSplit(AminoAcids.StopCodon[0])[0] + AminoAcids.StopCodon
                : refAminoAcids;

            int proteinStart = position.ProteinStart;

            HgvsUtilities.ShiftAndRotateAlleles(ref proteinStart, ref refAminoAcids, ref altAminoAcids, peptideSeq);

            var end             = proteinStart + refAminoAcids.Length - 1;
            var refAbbreviation = AminoAcids.GetAbbreviations(refAminoAcids);
            var altAbbreviation = AminoAcids.GetAbbreviations(altAminoAcids);

            var proteinId     = transcript.Translation.ProteinId.WithVersion;
            var proteinChange = GetProteinChange(proteinStart, refAminoAcids, altAminoAcids, peptideSeq, variantEffect);

            // ReSharper disable once SwitchStatementMissingSomeCases
            switch (proteinChange)
            {
            case ProteinChange.Substitution:
                return(HgvspNotation.GetSubstitutionNotation(proteinId, proteinStart, refAbbreviation, altAbbreviation));

            case ProteinChange.Unknown:
                return(HgvspNotation.GetUnknownNotation(proteinId, proteinStart, end, refAbbreviation, altAbbreviation));

            case ProteinChange.Deletion:
                return(HgvspNotation.GetDeletionNotation(proteinId, proteinStart, end, refAbbreviation, variantEffect.IsStopGained()));

            case ProteinChange.Duplication:
                proteinStart -= altAminoAcids.Length;
                return(HgvspNotation.GetDuplicationNotation(proteinId, proteinStart, end, altAbbreviation));

            case ProteinChange.Frameshift:
                return(GetHgvsFrameshiftNotation(refSequence, position.CdsStart, position.CdsEnd, transcriptAltAllele,
                                                 transcript, isMitochondrial, proteinId, proteinStart, end));

            case ProteinChange.None:
                return(HgvspNotation.GetSilentNotation(hgvscNotation, proteinStart, refAbbreviation, variantEffect.IsStopRetained()));

            case ProteinChange.DelIns:
                return(HgvspNotation.GetDelInsNotation(proteinId, proteinStart, end, refAbbreviation, altAbbreviation));

            case ProteinChange.Insertion:
                Swap.Int(ref proteinStart, ref end);
                return(HgvspNotation.GetInsertionNotation(proteinId, proteinStart, end, altAbbreviation, peptideSeq));

            case ProteinChange.Extension:
                var altPeptideSequence = HgvsUtilities.GetAltPeptideSequence(refSequence, position.CdsStart, position.CdsEnd,
                                                                             transcriptAltAllele, transcript, isMitochondrial);
                altAbbreviation = proteinStart <= altPeptideSequence.Length ? AminoAcids.ConvertAminoAcidToAbbreviation(altPeptideSequence[proteinStart - 1]): "Ter";
                var countToStop = HgvsUtilities.GetNumAminoAcidsUntilStopCodon(altPeptideSequence, peptideSeq, proteinStart - 1, false);

                return(HgvspNotation.GetExtensionNotation(proteinId, proteinStart, refAbbreviation, altAbbreviation, countToStop));

            case ProteinChange.StartLost:
                return(HgvspNotation.GetStartLostNotation(proteinId, proteinStart, end, refAbbreviation));
            }

            return(null);
        }