예제 #1
0
 public void ConvertAminoAcidToAbbreviation_not_support()
 {
     Assert.Throws <NotSupportedException>(delegate
     {
         AminoAcids.ConvertAminoAcidToAbbreviation('a');
     });
 }
예제 #2
0
 public void GetAbbreviations_null_or_empty_input()
 {
     // null
     Assert.Equal("", AminoAcids.GetAbbreviations(null));
     // empty
     Assert.Equal("", AminoAcids.GetAbbreviations(""));
 }
예제 #3
0
        public void TranslateBases_nulls_input()
        {
            var aminoAcids = new AminoAcids(true);

            // null
            Assert.Null(aminoAcids.TranslateBases(null, true));
        }
예제 #4
0
        public IEnumerable <ISequence <IAminoAcid> > Parse(Stream inputStream)
        {
            var sequences = new List <Sequence>();

            var reader = new StreamReader(inputStream);

            while (!reader.EndOfStream)
            {
                var sequence = new Sequence();
                sequence.Process(reader);
                sequences.Add(sequence);
            }

            //for now only proteins (amino acids) AND only the FIRST one are assumed in the fasta file
            var proteins = new List <Protein>();

            foreach (var sequence in sequences)
            {
                var protein = new Protein();
                protein.Description = sequence.Comment;

                foreach (char letter in sequence.Letters)
                {
                    protein.Add(AminoAcids.FromLetter(letter));
                }

                proteins.Add(protein);
            }

            return(proteins);
        }
예제 #5
0
        public void TrimPrefix()
        {
            // RSS/R
            var hn = new HgvsProteinNomenclature.HgvsNotation("RSS", "R", "bob", 100, 102)
            {
                Type = ProteinChange.Deletion
            };

            AminoAcids.RemovePrefixAndSuffix(hn);

            const string expectedReference = "SS";

            Assert.Equal(expectedReference, hn.ReferenceAminoAcids);

            const string expectedAlternate = null;

            Assert.Equal(expectedAlternate, hn.AlternateAminoAcids);

            const int expectedStart = 101;

            Assert.Equal(expectedStart, hn.Start);

            const int expectedEnd = 102;

            Assert.Equal(expectedEnd, hn.End);
        }
예제 #6
0
        public void TrimBothPrefixAndSuffix()
        {
            // RT/RMLMLT
            var hn = new HgvsProteinNomenclature.HgvsNotation("RT", "RMLMLT", "bob", 100, 101)
            {
                Type = ProteinChange.Insertion
            };

            AminoAcids.RemovePrefixAndSuffix(hn);

            const string expectedReference = null;

            Assert.Equal(expectedReference, hn.ReferenceAminoAcids);

            const string expectedAlternate = "MLML";

            Assert.Equal(expectedAlternate, hn.AlternateAminoAcids);

            const int expectedStart = 101;

            Assert.Equal(expectedStart, hn.Start);

            const int expectedEnd = 100;

            Assert.Equal(expectedEnd, hn.End);
        }
예제 #7
0
        private static string GetHgvsFrameshiftNotation(ISequence refSequence, int cdsBegin, int cdsEnd,
                                                        string transcriptAltAllele, ITranscript transcript, bool isMitochondrial, string proteinId, int start,
                                                        int end)
        {
            var peptideSeq    = transcript.Translation.PeptideSeq;
            var altPeptideSeq = HgvsUtilities.GetAltPeptideSequence(refSequence, cdsBegin, cdsEnd, transcriptAltAllele, transcript, isMitochondrial);

            if (start > end)
            {
                Swap.Int(ref start, ref end);
            }

            var frameshiftedParameters = HgvsUtilities.GetChangesAfterFrameshift(start, peptideSeq, altPeptideSeq);

            start = frameshiftedParameters.Item1;
            var refAminoAcid = frameshiftedParameters.Item2;
            var altAminoAcid = frameshiftedParameters.Item3;

            var refAbbreviation = AminoAcids.ConvertAminoAcidToAbbreviation(refAminoAcid);

            if (altAminoAcid == AminoAcids.StopCodonChar)
            {
                return(HgvspNotation.GetSubstitutionNotation(proteinId, start, refAbbreviation, "Ter"));
            }

            var altAbbreviation = AminoAcids.ConvertAminoAcidToAbbreviation(altAminoAcid);
            var countToStop     = HgvsUtilities.GetNumAminoAcidsUntilStopCodon(altPeptideSeq, peptideSeq, start - 1, true);

            return(HgvspNotation.GetFrameshiftNotation(proteinId, start, refAbbreviation, altAbbreviation, countToStop));
        }
예제 #8
0
 public void Dispose()
 {
     MoleculeReference.Dispose();
     AminoAcids.ForEach(aminoAcid => aminoAcid.Dispose());
     Annotations.ForEach(annotation => annotation.Dispose());
     AminoAcids.Clear();
     Annotations.Clear();
 }
예제 #9
0
        public void Assign_translate()
        {
            var aminoAcids = new AminoAcids(false);
            var aa         = aminoAcids.Translate("TTC", "CTC");

            Assert.Equal("F", aa.Reference);
            Assert.Equal("L", aa.Alternate);
        }
예제 #10
0
        public frmChart(string wildTypePeptide, List <char> usedAminoAcids, Dictionary <int, Dictionary <char, double> > weights)
        {
            InitializeComponent();

            Settings settings = Settings.Load("default.settings");

            chart.Series.Clear();
            chart.ChartAreas[0].AxisX.LabelStyle.Angle = 0;
            List <AminoAcid> sortedAminoAcids = AminoAcids.GetSortedAminoAcidList();

            foreach (AminoAcid aminoacid in sortedAminoAcids)
            {
                char aa = aminoacid.Abbrev1;
                if (!usedAminoAcids.Contains(aa))
                {
                    continue;
                }
                if (aa == 'X')
                {
                    continue;
                }
                Series s = chart.Series.Add(aa.ToString());
                s.Label           = aa.ToString();
                s.BorderWidth     = 1;
                s.BorderColor     = Color.Black;
                s.BorderDashStyle = ChartDashStyle.Solid;
                s.ChartType       = SeriesChartType.StackedColumn;
                s.Color           = settings.GetColorOfAminoAcid(aa);
                if (s.Color == Color.Black)
                {
                    s.LabelForeColor = Color.White;
                    s.BorderColor    = Color.Red;
                }
            }
            foreach (int position in weights.Keys)
            {
                CustomLabel cl = new CustomLabel();
                cl.FromPosition = position - 0.5;
                cl.ToPosition   = position + 0.5;
                cl.Text         = wildTypePeptide[position].ToString();
                chart.ChartAreas[0].AxisX.CustomLabels.Add(cl);
                Dictionary <char, double> weightedList = weights[position];
                foreach (char aa in usedAminoAcids)
                {
                    double    weight = weightedList.ContainsKey(aa) ? weightedList[aa] : 0;
                    DataPoint dp     = new DataPoint(position, weight);
                    if (weight < 0.001)
                    {
                        dp.Label = "";
                    }
                    chart.Series[aa.ToString()].Points.Add(dp);//.AddXY(position, weight);
                }
            }

            chart.ChartAreas[0].AxisX.MajorGrid.LineWidth = 0;
            chart.ChartAreas[0].AxisX.MinorGrid.LineWidth = 0;
        }
예제 #11
0
 private void LoadColors()
 {
     pColors.SuspendLayout();
     if (pColors.Controls.Count < 2)
     {
         int rowind = 1;
         int top    = 20;
         int left   = 10;
         foreach (char c in settings.AminoAcidMotifColors.Keys.Union(AminoAcids.GetFullAminoAcidList().Select(aa => aa.Abbrev1)))
         {
             Label label = new Label();
             label.Text     = c.ToString();
             label.AutoSize = false;
             label.Height   = 20;
             label.Width    = 20;
             //label.Margin = new Padding(0, 5, 0, 0);
             label.Top  = top + 5;
             label.Left = left;
             left      += 20;
             pColors.Controls.Add(label);
             Button button = new Button
             {
                 BackColor = settings.GetColorOfAminoAcid(c),
                 Tag       = c
             };
             button.Click   += Button_Click;
             button.AutoSize = false;
             button.Height   = 20;
             button.Width    = 20;
             button.Top      = top;
             button.Left     = left;
             left           -= 20;
             top            += 25;
             pColors.Controls.Add(button);
             if (++rowind > 6)
             {
                 rowind = 1;
                 left  += 60;
                 top    = 20;
             }
         }
     }
     else
     {
         foreach (Control control in pColors.Controls)
         {
             if (control.GetType() == typeof(Button))
             {
                 char c = (char)control.Tag;
                 control.BackColor = settings.AminoAcidMotifColors[c];
             }
         }
     }
     pColors.ResumeLayout();
 }
예제 #12
0
        /// <summary>
        /// constructor
        /// </summary>
        public HgvsProteinNomenclature(VariantEffect variantEffect, TranscriptAnnotation ta, Transcript transcript,
                                       VariantFeature variant, ICompressedSequence compressedSequence, AminoAcids aminoAcids)
        {
            _variantEffect      = variantEffect;
            _ta                 = ta;
            _transcript         = transcript;
            _variant            = variant;
            _compressedSequence = compressedSequence;
            _aminoAcids         = aminoAcids;

            _hgvsNotation = new HgvsNotation(_ta.ReferenceAminoAcids, _ta.AlternateAminoAcids,
                                             FormatUtilities.CombineIdAndVersion(_transcript.Translation.ProteinId, _transcript.Translation.ProteinVersion),
                                             _ta.ProteinBegin, _ta.ProteinEnd);
        }
예제 #13
0
        // ReSharper disable once InconsistentNaming
        public void Shift3PrimeSS()
        {
            // given a SS/- deletion in RS[SS]SSS, we want to move to: RSSSS[SS]
            const string transcriptPeptides = "RSSSSSS";
            var          hn = new HgvsProteinNomenclature.HgvsNotation("SS", null, "bob", 3, 4)
            {
                Type = ProteinChange.Deletion
            };

            AminoAcids.Rotate3Prime(hn, transcriptPeptides);

            Assert.Equal(6, hn.Start);
            Assert.Equal(7, hn.End);
        }
예제 #14
0
        // ReSharper disable once InconsistentNaming
        public void Shift3PrimeSTM()
        {
            // given a STM/- deletion in R[STM]STMP, we want to move to: RSTM[STM]P
            const string transcriptPeptides = "RSTMSTMP";
            var          hn = new HgvsProteinNomenclature.HgvsNotation("STM", null, "bob", 2, 4)
            {
                Type = ProteinChange.Deletion
            };

            AminoAcids.Rotate3Prime(hn, transcriptPeptides);

            Assert.Equal(5, hn.Start);
            Assert.Equal(7, hn.End);
        }
예제 #15
0
        public void Assign_null_or_empty_input()
        {
            var aminoAcids = new AminoAcids(true);

            // null
            var aa = aminoAcids.Translate(null, null);

            Assert.Equal("", aa.Reference);
            Assert.Equal("", aa.Alternate);

            // empty
            aa = aminoAcids.Translate("", "");
            Assert.Equal("", aa.Reference);
            Assert.Equal("", aa.Alternate);
        }
예제 #16
0
        /// <summary>
        /// constructor
        /// </summary>
        public PianoAnnotationSource(Stream transcriptCacheStream, CompressedSequenceReader compressedSequenceReader)
        {
            OverlappingTranscripts = new List <Transcript>();
            _performanceMetrics    = PerformanceMetrics.Instance;

            _compressedSequence       = new CompressedSequence();
            _dataFileManager          = new DataFileManager(compressedSequenceReader, _compressedSequence);
            _dataFileManager.Changed += LoadData;

            _renamer    = _compressedSequence.Renamer;
            _aminoAcids = new AminoAcids();
            _vid        = new VID();

            LoadTranscriptCache(transcriptCacheStream, _renamer.NumRefSeqs, out _transcriptIntervalForest);
        }
예제 #17
0
        public void Assign_codons_with_N()
        {
            var aminoAcids = new AminoAcids(true);

            // referenceCodons with "N"
            var aa = aminoAcids.Translate("ANA", "AAA");

            Assert.Equal("", aa.Reference);
            Assert.Equal("", aa.Alternate);

            // alternateCodons with "N"
            aa = aminoAcids.Translate("AAA", "ANA");
            Assert.Equal("", aa.Reference);
            Assert.Equal("", aa.Alternate);
        }
예제 #18
0
        private List <Tags> ExtractSingleTags()
        {
            var singletagList = new List <Tags>();

            for (var i = 0; i < _peakList.Count; i++)
            {
                for (var j = i + 1; j < _peakList.Count; j++)
                {
                    var innerPeak = _peakList[i];
                    var outerPeak = _peakList[j];

                    var differenceOfPeaks = _peakList[j] - _peakList[i];
                    var tempAminoAcid     = AminoAcids.GetAminoAcid(differenceOfPeaks, _hopTol);
                    if (differenceOfPeaks > MaximumAminoAcidMolecularWeight)
                    {
                        break;
                    }
                    if (tempAminoAcid != AminoAcidDoesNotExist)
                    {
                        //Calculating error square
                        var errorSquare = new List <double>
                        {
                            Math.Pow(AminoAcids.GetMolecularWeight(tempAminoAcid) - (outerPeak - innerPeak), 2)
                        };


                        var avgIntensity = new List <double> {
                            GetAverage(_intensity[i], _intensity[j])
                        };


                        var temp = new Tags(tempAminoAcid.ToString(), i, j, errorSquare, avgIntensity);


                        temp.Locations.Add(i);
                        temp.Locations.Add(j);

                        /////////////////////////////////////////////////////////////////////////////////////////////

                        singletagList.Add(temp);
                    }
                } //inner loop terminated
            }
            return(singletagList);
        }
예제 #19
0
 private void LoadGrid()
 {
     dgAminoAcid.RowCount = 0;
     foreach (AminoAcid aa in AminoAcids.GetSortedFullAminoAcidList())
     {
         int rowind = dgAminoAcid.RowCount++;
         dgAminoAcid[0, rowind].Value = aa.Name;
         dgAminoAcid[1, rowind].Value = aa.Abbrev1;
         if (settings.AminoAcidExcludeList != null && settings.AminoAcidExcludeList.Contains(aa.Abbrev1))
         {
             dgAminoAcid[2, rowind].Value = false;
         }
         else
         {
             dgAminoAcid[2, rowind].Value = true;
         }
     }
 }
        private static double sequencelength(string sequence)
        {
            double length = 0;

            foreach (char c in sequence)
            {
                if (AminoAcids.ContainsKey(c))
                {
                    length = length + AminoAcids[c]; //AminoAcidHelpers.AminoAcidMass3[c];
                }
                else
                {
                    return(double.NaN);
                }
            }

            return(Math.Round(length, 4));
        }
예제 #21
0
        public static string GetInsertionNotation(string proteinId, int start, int end, string altAbbreviation, string peptideSeq)
        {
            // insertion past the last AA
            if (end > peptideSeq.Length)
            {
                return(null);
            }

            var leftFlankingAa = AminoAcids.ConvertAminoAcidToAbbreviation(peptideSeq[start - 1]);

            if (altAbbreviation.StartsWith("Ter"))
            {
                var refAminoAcid = AminoAcids.ConvertAminoAcidToAbbreviation(peptideSeq[start]);
                return($"{proteinId}:p.({refAminoAcid}{end}Ter)");
            }

            var rightFlankingAa = end > peptideSeq.Length ? "Ter" : AminoAcids.ConvertAminoAcidToAbbreviation(peptideSeq[end - 1]);

            return($"{proteinId}:p.({leftFlankingAa}{start}_{rightFlankingAa}{end}ins{altAbbreviation})");
        }
예제 #22
0
        /// <summary>
        /// return a string representing the protein-level effect of this allele in HGVS format [TranscriptVariationAllele.pm:717 hgvs_protein]
        /// </summary>
        public void SetAnnotation()
        {
            // sanity check: don't try to handle odd characters, make sure this is not a reference allele,
            //               and make sure that we have protein coordinates
            if (_variant.IsReference || !_ta.HasValidCdsEnd || !_ta.HasValidCdsEnd ||
                SequenceUtilities.HasNonCanonicalBase(_ta.TranscriptAlternateAllele))
            {
                return;
            }

            // check if this is a stop retained variant
            if (_variantEffect.IsStopRetained())
            {
                _ta.HgvsProteinSequenceName = $"{_ta.HgvsCodingSequenceName}(p.=)";
                return;
            }

            // clip the alleles
            AminoAcids.RemovePrefixAndSuffix(_hgvsNotation);

            // set the protein change
            _hgvsNotation.Type = GetGeneralProteinChange();

            if (_hgvsNotation.Type != ProteinChange.None)
            {
                _hgvsNotation.Type = GetSpecificProteinChange();

                // convert ref & alt peptides taking into account HGVS rules
                GetHgvsPeptides(_ta);
            }

            // no protein change - return transcript nomenclature with flag for neutral protein consequence
            if (_hgvsNotation.Type == ProteinChange.None)
            {
                _ta.HgvsProteinSequenceName = $"{_ta.HgvsCodingSequenceName}(p.=)";
                return;
            }

            // string formatting
            _ta.HgvsProteinSequenceName = GetHgvsProteinFormat(_ta);
        }
예제 #23
0
        /// <summary>
        /// Calculates the 147 TAE descriptors for amino acids.
        /// </summary>
        /// <returns>The 147 TAE descriptors</returns>
        public Result Calculate(IBioPolymer container)
        {
            container = (IBioPolymer)container.Clone(); // don't mod original

            var peptide = container;

            // I assume that we get single letter names
            var desc = new double[ndesc];

            for (int i = 0; i < ndesc; i++)
            {
                desc[i] = 0.0;
            }

            var monomers = GetMonomers(peptide);

            foreach (var monomer in monomers)
            {
                string o = monomer.MonomerName;

                if (o.Length == 0)
                {
                    continue;
                }

                var olc = o.Substring(0, 1).ToUpperInvariant();
                var tlc = AminoAcids.ConvertOneLetterCodeToThreeLetterCode(olc).ToLowerInvariant();

                Debug.WriteLine($"Converted {olc} to {tlc}");

                // get the params for this AA
                var parameters = taeParams[tlc];

                for (int i = 0; i < ndesc; i++)
                {
                    desc[i] += parameters[i];
                }
            }

            return(new Result(desc));
        }
예제 #24
0
        public static IAnnotatedTranscript GetAnnotatedTranscript(ITranscript transcript, IVariant variant, ISequence refSequence,
                                                                  AminoAcids aminoAcidsProvider)
        {
            var mappedPositions = MappedPositionsUtils.ComputeMappedPositions(variant.Start, variant.End, transcript);

            var transcriptRefAllele = HgvsUtilities.GetTranscriptAllele(variant.RefAllele, transcript.Gene.OnReverseStrand);
            var transcriptAltAllele = HgvsUtilities.GetTranscriptAllele(variant.AltAllele, transcript.Gene.OnReverseStrand);

            var codonsAndAminoAcids = GetCodonsAndAminoAcids(transcript, refSequence, transcriptRefAllele, transcriptAltAllele, variant, mappedPositions, aminoAcidsProvider);

            var referenceCodons     = codonsAndAminoAcids.Item1;
            var alternateCodons     = codonsAndAminoAcids.Item2;
            var referenceAminoAcids = codonsAndAminoAcids.Item3;
            var alternateAminoAcids = codonsAndAminoAcids.Item4;


            var insertionInStartCodonAndNoimpact = variant.Type == VariantType.insertion &&
                                                   mappedPositions.ProteinInterval.Start <= 1 &&
                                                   alternateAminoAcids.EndsWith(referenceAminoAcids);

            var variantEffect = GetVariantEffect(transcript, variant, mappedPositions, referenceAminoAcids,
                                                 alternateAminoAcids, referenceCodons, alternateCodons, insertionInStartCodonAndNoimpact);


            var consequences = GetConsequences(transcript, variant, variantEffect);

            var proteinBegin = mappedPositions.ProteinInterval.Start == null
                ? -1
                : mappedPositions.ProteinInterval.Start.Value;

            var proteinEnd = mappedPositions.ProteinInterval.End == null
                ? -1
                : mappedPositions.ProteinInterval.End.Value;

            var upStreamAminoAcids   = GetFlankingPeptides(transcript.Translation?.PeptideSeq, proteinBegin, proteinEnd, FlankingAminoAcidLength, true);
            var downStreamAminoAcids = consequences.Contains(ConsequenceTag.frameshift_variant)? null: GetFlankingPeptides(transcript.Translation?.PeptideSeq, proteinBegin, proteinEnd, FlankingAminoAcidLength, false);

            return(new PianoAnnotatedTranscript(transcript, referenceAminoAcids, alternateAminoAcids, mappedPositions, upStreamAminoAcids, downStreamAminoAcids, consequences));
        }
예제 #25
0
        private static Tuple <string, string, string, string> GetCodonsAndAminoAcids(ITranscript transcript, ISequence refSequence,
                                                                                     string transcriptRefAllele, string transcriptAltAllele, ISimpleVariant variant,
                                                                                     IMappedPositions mappedPositions, AminoAcids aminoAcidsProvider)
        {
            var codingSequence = transcript.Translation == null
                ? null
                : new CodingSequence(refSequence, transcript.Translation.CodingRegion.Start,
                                     transcript.Translation.CodingRegion.End, transcript.CdnaMaps, transcript.Gene.OnReverseStrand,
                                     transcript.StartExonPhase);

            // compute codons and amino acids
            AssignCodonsAndAminoAcids(transcriptRefAllele, transcriptAltAllele, mappedPositions,
                                      codingSequence, aminoAcidsProvider, out string referenceCodons,
                                      out string alternateCodons, out string referenceAminoAcids, out string alternateAminoAcids);

            return(Tuple.Create(referenceCodons ?? "", alternateCodons ?? "", referenceAminoAcids ?? "",
                                alternateAminoAcids ?? ""));
        }
예제 #26
0
        private static void AssignCodonsAndAminoAcids(string transcriptRefAllele, string transcriptAltAllele,
                                                      IMappedPositions mappedPositions, ISequence codingSequence, AminoAcids aminoAcidProvier, out string refCodons,
                                                      out string altCodons, out string refAminoAcids, out string altAminoAcids)
        {
            AssignExtended(transcriptRefAllele, transcriptAltAllele, mappedPositions.CdsInterval,
                           mappedPositions.ProteinInterval, codingSequence, out refCodons, out altCodons);


            aminoAcidProvier.Assign(refCodons, altCodons, out refAminoAcids, out altAminoAcids);
        }
예제 #27
0
 public void Add(SinglePointAminoAcid aminoAcid)
 {
     AminoAcids.Add(aminoAcid);
 }
예제 #28
0
        public static void BlindPTM(List <double> experimentalSpectrum, double molW, List <ProteinDto> candidateProteinsList, double pepTol, double userHopThreshold, string pepUnit)
        {
            var stopwatch = new Stopwatch();

            // Data Preperation
            stopwatch.Start();
            var peaks = new List <double>();

            foreach (var peak in experimentalSpectrum)
            {
                peaks.Add(peak + 1.00727647);
                peaks.Add(molW - (peak + 1.00727647));
                //peaks.Add(peak);
                //peaks.Add(molW - (peak));
            }
            peaks.Sort();
            stopwatch.Stop();
            Console.WriteLine("Data Preperation: " + stopwatch.Elapsed);

            // PTM Extraction
            stopwatch.Restart();
            var aminoAcidList    = new List <string>();
            var modificationList = new List <string>();
            var startList        = new List <double>();
            var endList          = new List <double>();

            for (var expI = 0; expI < peaks.Count; expI++)
            {
                for (var expJ = expI + 1; expJ < peaks.Count; expJ++)
                {
                    var peakDiff     = peaks[expJ] - peaks[expI];
                    var modification = AminoAcids.GetModifiedAminoAcid(peakDiff, userHopThreshold);
                    foreach (var mod in modification)
                    {
                        var temproray = mod.Split('_');
                        if (temproray.Length <= 1)
                        {
                            continue;
                        }
                        aminoAcidList.Add(temproray[1]);
                        modificationList.Add(temproray[0]);
                        startList.Add(peaks[expI]);
                        endList.Add(peaks[expJ]);
                    }
                }
            }
            stopwatch.Stop();
            Console.WriteLine("Generation: " + stopwatch.Elapsed);

            // PTM Shortlisting
            stopwatch.Restart();
            foreach (var protein in candidateProteinsList)
            {
                var sequence                = protein.Sequence;
                var hopIndex                = 0;
                var thrI                    = 0;
                var shortlistedAminoAcid    = new List <string>();
                var shortlistedModification = new List <string>();
                var shortlistedEnd          = new List <double>();
                var shortlistedStart        = new List <double>();
                var shortlistedIndex        = new List <int>();

                while (true)
                {
                    try
                    {
                        if (startList.Count > 0)
                        {
                            if (shortlistedStart.Count > 0)
                            {
                                if (shortlistedEnd[shortlistedEnd.Count - 1] > startList[hopIndex])
                                {
                                    hopIndex = hopIndex + 1;
                                    if (hopIndex == startList.Count)
                                    {
                                        break;
                                    }
                                    continue;
                                }
                            }
                            var diff = startList[hopIndex] - protein.InsilicoDetails.InsilicoMassLeft[thrI];
                            if (diff <= userHopThreshold && diff >= -userHopThreshold)
                            {
                                if (aminoAcidList[hopIndex] == sequence[thrI + 2].ToString())
                                {
                                    var modMass = AminoAcids.ModificationTable(modificationList[hopIndex]);
                                    diff =
                                        Math.Abs(endList[hopIndex] -
                                                 (protein.InsilicoDetails.InsilicoMassLeft[thrI + 1
                                                  ] +
                                                  modMass));
                                    if (string.Compare(pepUnit, "ppm", StringComparison.Ordinal) == 0)
                                    {
                                        diff = (diff / molW) * 1000000;
                                    }
                                    else if (string.Compare(pepUnit, "%", StringComparison.Ordinal) == 0)
                                    {
                                        diff = (diff / molW) * 100;
                                    }
                                    if (diff < pepTol)
                                    {
                                        for (var i = thrI + 1;
                                             i < protein.InsilicoDetails.InsilicoMassLeft.Count;
                                             i++)
                                        {
                                            protein.InsilicoDetails.InsilicoMassLeft[i] =
                                                protein.InsilicoDetails.InsilicoMassLeft[i] +
                                                modMass;
                                        }
                                        protein.Mw = protein.Mw + modMass;
                                        shortlistedAminoAcid.Add(aminoAcidList[hopIndex]);
                                        shortlistedModification.Add(modificationList[hopIndex]);
                                        shortlistedEnd.Add(endList[hopIndex]);
                                        shortlistedStart.Add(startList[hopIndex]);
                                        shortlistedIndex.Add(thrI);
                                    }
                                }
                            }
                            else if (diff > userHopThreshold)
                            {
                                thrI = thrI + 1;
                                if (thrI == protein.InsilicoDetails.InsilicoMassLeft.Count - 1)
                                {
                                    break;
                                }
                                continue;
                            }
                            else if (diff < -userHopThreshold)
                            {
                                hopIndex = hopIndex + 1;
                                if (hopIndex == startList.Count)
                                {
                                    break;
                                }
                                continue;
                            }
                            hopIndex = hopIndex + 1;
                            if (hopIndex == startList.Count)
                            {
                                break;
                            }
                        }
                    }
                    catch (Exception exception)
                    {
                        Debug.WriteLine(exception.Message);
                    }
                }
                for (var hopIter = 0; hopIter < shortlistedStart.Count; hopIter++)
                {
                    var site = new PostTranslationModificationsSiteDto
                    {
                        Index     = shortlistedIndex[hopIter],
                        ModName   = shortlistedModification[hopIter],
                        ModWeight = AminoAcids.ModificationTable(shortlistedModification[hopIter]),
                        Site      = Convert.ToChar(shortlistedAminoAcid[hopIter])
                    };
                    protein.PtmParticulars.Add(site);
                }
                var massError = Math.Abs(molW - protein.Mw);
                protein.MwScore = Math.Abs(massError) < 0 ? 1 : Math.Pow(massError, 0.5);
            }
            stopwatch.Stop();
            Console.WriteLine("Shortlisting :" + stopwatch.Elapsed);
        }
예제 #29
0
        public static void BlindPTM(List <double> experimentalSpectrum, double molW, List <ProteinDto> candidateProteinsList,
                                    double pepTol, double userHopThreshold, string pepUnit)
        {
            var stopwatch = new Stopwatch();

            // Data Preperation and Loading GPU Module
            stopwatch.Start();
            var peaks            = new List <double>();
            var aminoAcidList    = new List <string>();
            var modificationList = new List <string>();
            var startList        = new List <double>();
            var endList          = new List <double>();

            foreach (var peak in experimentalSpectrum)
            {
                peaks.Add(peak + 1.00727647);
                peaks.Add(molW - (peak + 1.00727647));
                //peaks.Add(peak);
                //peaks.Add(molW - (peak));
            }
            peaks.Sort();
            GPGPU        gpu = CudafyHost.GetDevice(CudafyModes.Target);
            CudafyModule km  = CudafyModule.TryDeserialize();

            if (km == null || !km.TryVerifyChecksums())
            {
                km = CudafyTranslator.Cudafy();
                km.Serialize();
            }
            gpu.LoadModule(km);
            stopwatch.Stop();
            Console.WriteLine("Data Preperation: " + stopwatch.Elapsed);

            // GPU Module
            stopwatch.Restart();
            var lengthSquared    = peaks.Count * peaks.Count;
            var peaksArray       = peaks.ToArray();
            var lengthOfPeakList = new int[1];

            lengthOfPeakList[0] = peaks.Count;
            var outputArray = new char[peaks.Count, peaks.Count, 37];
            var errorArray  = new double[peaks.Count, peaks.Count, 37];
            var modMassList = ModificationMass;

            char[,,] outputArrayDevice  = gpu.Allocate(outputArray);
            double[,,] errorArrayDevice = gpu.Allocate(errorArray);
            double[] peaksDevice            = gpu.Allocate <double>(peaksArray.Length);
            int[]    lengthOfPeakListDevice = gpu.Allocate <int>(lengthOfPeakList.Length);
            double[] ptmMassListDevice      = gpu.Allocate <double>(modMassList.Length);
            gpu.CopyToDevice(peaksArray, peaksDevice);
            gpu.CopyToDevice(lengthOfPeakList, lengthOfPeakListDevice);
            gpu.CopyToDevice(ModificationMass, ptmMassListDevice);
            int block = (int)Math.Ceiling((double)lengthSquared * 37 / N);

            gpu.Launch(block, N).PtmExtractor(peaksDevice, lengthOfPeakListDevice, ptmMassListDevice, outputArrayDevice,
                                              errorArrayDevice);
            gpu.CopyFromDevice(outputArrayDevice, outputArray);
            gpu.CopyFromDevice(errorArrayDevice, errorArray);
            gpu.FreeAll();

            for (var i = 0; i < peaks.Count; i++)
            {
                for (var j = 0; j < peaks.Count; j++)
                {
                    for (var k = 0; k < 37; k++)
                    {
                        if (outputArray[i, j, k] == '\0')
                        {
                            continue;
                        }
                        aminoAcidList.Add(ModificationAminoAcids[outputArray[i, j, k]].ToString());
                        modificationList.Add(ModificationName[outputArray[i, j, k]]);
                        startList.Add(peaks[i]);
                        endList.Add(peaks[j]);
                    }
                }
            }
            stopwatch.Stop();
            Console.WriteLine("GPU Generation: " + stopwatch.Elapsed);

            // PTM Shortlisting
            stopwatch.Restart();
            foreach (var protein in candidateProteinsList)
            {
                var sequence                = protein.Sequence.ToCharArray();
                var hopI                    = 0;
                var thrI                    = 0;
                var shortlistedAminoAcid    = new List <string>();
                var shortlistedModification = new List <string>();
                var shortlistedEnd          = new List <double>();
                var shortlistedStart        = new List <double>();
                var shortlistedIndex        = new List <int>();
                while (true)
                {
                    try
                    {
                        if (startList.Count > 0)
                        {
                            if (shortlistedStart.Count > 0)
                            {
                                if (shortlistedEnd[shortlistedEnd.Count - 1] > startList[hopI])
                                {
                                    hopI = hopI + 1;
                                    if (hopI == startList.Count)
                                    {
                                        break;
                                    }
                                    continue;
                                }
                            }
                            var diff = startList[hopI] - protein.InsilicoDetails.InsilicoMassLeft[thrI];
                            if (diff <= userHopThreshold && diff >= -userHopThreshold)
                            {
                                if (aminoAcidList[hopI] == sequence[thrI + 2].ToString())
                                {
                                    var temproray = modificationList[hopI].Split('_');
                                    var modMass   = AminoAcids.ModificationTable(temproray[0]);
                                    //var modMass = AminoAcids.ModTable(modificationList[hopI]);
                                    diff =
                                        Math.Abs(endList[hopI] -
                                                 (protein.InsilicoDetails.InsilicoMassLeft[thrI + 1
                                                  ] +
                                                  modMass));
                                    if (string.Compare(pepUnit, "ppm", StringComparison.Ordinal) == 0)
                                    {
                                        diff = (diff / molW) * 1000000;
                                    }
                                    else if (string.Compare(pepUnit, "%", StringComparison.Ordinal) == 0)
                                    {
                                        diff = (diff / molW) * 100;
                                    }
                                    if (diff < pepTol)
                                    {
                                        for (var i = thrI + 1;
                                             i < protein.InsilicoDetails.InsilicoMassLeft.Count;
                                             i++)
                                        {
                                            protein.InsilicoDetails.InsilicoMassLeft[i] =
                                                protein.InsilicoDetails.InsilicoMassLeft[i] +
                                                modMass;
                                        }
                                        protein.Mw = protein.Mw + modMass;
                                        shortlistedAminoAcid.Add(aminoAcidList[hopI]);
                                        shortlistedModification.Add(modificationList[hopI]);
                                        shortlistedEnd.Add(endList[hopI]);
                                        shortlistedStart.Add(startList[hopI]);
                                        shortlistedIndex.Add(thrI);
                                    }
                                }
                            }
                            else if (diff > userHopThreshold)
                            {
                                thrI = thrI + 1;
                                if (thrI == protein.InsilicoDetails.InsilicoMassLeft.Count - 1)
                                {
                                    break;
                                }
                                continue;
                            }
                            else if (diff < -userHopThreshold)
                            {
                                hopI = hopI + 1;
                                if (hopI == startList.Count)
                                {
                                    break;
                                }
                                continue;
                            }
                            hopI = hopI + 1;
                            if (hopI == startList.Count)
                            {
                                break;
                            }
                        }
                    }
                    catch (Exception exception)
                    {
                        Debug.WriteLine(exception.Message);
                    }
                }
                for (var hopIndex = 0; hopIndex < shortlistedStart.Count; hopIndex++)
                {
                    var site = new PostTranslationModificationsSiteDto
                    {
                        Index     = shortlistedIndex[hopIndex],
                        ModName   = shortlistedModification[hopIndex],
                        ModWeight = AminoAcids.ModificationTable(shortlistedModification[hopIndex]),
                        Site      = Convert.ToChar(shortlistedAminoAcid[hopIndex])
                    };
                    protein.PtmParticulars.Add(site);
                }
                var massError = Math.Abs(molW - protein.Mw);
                protein.MwScore = Math.Abs(massError) < 0 ? 1 : Math.Pow(massError, 0.5);
            }
            stopwatch.Stop();
            Console.WriteLine("Shortlisting :" + stopwatch.Elapsed);
        }
예제 #30
0
        public static string GetHgvsProteinAnnotation(
            ITranscript transcript,
            string refAminoAcids,
            string altAminoAcids,
            string transcriptAltAllele,
            IMappedPosition position,
            VariantEffect variantEffect,
            ISimpleVariant variant,
            ISequence refSequence,
            string hgvscNotation,
            bool isMitochondrial)
        {
            if (IsHgvspNull(transcriptAltAllele, position.CdsStart, position.CdsEnd, variant, hgvscNotation))
            {
                return(null);
            }

            var peptideSeq = transcript.Translation.PeptideSeq;

            // Amino acid seq should never go past the stop codon
            refAminoAcids = !refAminoAcids.EndsWith(AminoAcids.StopCodon) && refAminoAcids.Contains(AminoAcids.StopCodon)
                ? refAminoAcids.OptimizedSplit(AminoAcids.StopCodon[0])[0] + AminoAcids.StopCodon
                : refAminoAcids;

            int proteinStart = position.ProteinStart;

            HgvsUtilities.ShiftAndRotateAlleles(ref proteinStart, ref refAminoAcids, ref altAminoAcids, peptideSeq);

            var end             = proteinStart + refAminoAcids.Length - 1;
            var refAbbreviation = AminoAcids.GetAbbreviations(refAminoAcids);
            var altAbbreviation = AminoAcids.GetAbbreviations(altAminoAcids);

            var proteinId     = transcript.Translation.ProteinId.WithVersion;
            var proteinChange = GetProteinChange(proteinStart, refAminoAcids, altAminoAcids, peptideSeq, variantEffect);

            // ReSharper disable once SwitchStatementMissingSomeCases
            switch (proteinChange)
            {
            case ProteinChange.Substitution:
                return(HgvspNotation.GetSubstitutionNotation(proteinId, proteinStart, refAbbreviation, altAbbreviation));

            case ProteinChange.Unknown:
                return(HgvspNotation.GetUnknownNotation(proteinId, proteinStart, end, refAbbreviation, altAbbreviation));

            case ProteinChange.Deletion:
                return(HgvspNotation.GetDeletionNotation(proteinId, proteinStart, end, refAbbreviation, variantEffect.IsStopGained()));

            case ProteinChange.Duplication:
                proteinStart -= altAminoAcids.Length;
                return(HgvspNotation.GetDuplicationNotation(proteinId, proteinStart, end, altAbbreviation));

            case ProteinChange.Frameshift:
                return(GetHgvsFrameshiftNotation(refSequence, position.CdsStart, position.CdsEnd, transcriptAltAllele,
                                                 transcript, isMitochondrial, proteinId, proteinStart, end));

            case ProteinChange.None:
                return(HgvspNotation.GetSilentNotation(hgvscNotation, proteinStart, refAbbreviation, variantEffect.IsStopRetained()));

            case ProteinChange.DelIns:
                return(HgvspNotation.GetDelInsNotation(proteinId, proteinStart, end, refAbbreviation, altAbbreviation));

            case ProteinChange.Insertion:
                Swap.Int(ref proteinStart, ref end);
                return(HgvspNotation.GetInsertionNotation(proteinId, proteinStart, end, altAbbreviation, peptideSeq));

            case ProteinChange.Extension:
                var altPeptideSequence = HgvsUtilities.GetAltPeptideSequence(refSequence, position.CdsStart, position.CdsEnd,
                                                                             transcriptAltAllele, transcript, isMitochondrial);
                altAbbreviation = proteinStart <= altPeptideSequence.Length ? AminoAcids.ConvertAminoAcidToAbbreviation(altPeptideSequence[proteinStart - 1]): "Ter";
                var countToStop = HgvsUtilities.GetNumAminoAcidsUntilStopCodon(altPeptideSequence, peptideSeq, proteinStart - 1, false);

                return(HgvspNotation.GetExtensionNotation(proteinId, proteinStart, refAbbreviation, altAbbreviation, countToStop));

            case ProteinChange.StartLost:
                return(HgvspNotation.GetStartLostNotation(proteinId, proteinStart, end, refAbbreviation));
            }

            return(null);
        }