Exemplo n.º 1
0
    public void TestAddDigestFeatures()
    {
      Sequence seq = new Sequence("Test", "AAANESAARBBBNPSBBKFFFNITFFRGGGNDTGGR");

      Digest digest = new Digest();
      digest.DigestProtease = ProteaseManager.CreateProtease("Trypsin_TestAddDigestFeatures", true, "RK", "P");
      digest.ProteinSequence = seq;

      digest.MaxMissedCleavages = 0;
      digest.AddDigestFeatures();
      List<DigestPeptideInfo> peptides = (List<DigestPeptideInfo>)seq.Annotation[Digest.PEPTIDE_FEATURE_TYPE];
      Assert.AreEqual(4, peptides.Count);

      digest.MaxMissedCleavages = 1;
      digest.AddDigestFeatures();
      List<DigestPeptideInfo> missedPeptides = (List<DigestPeptideInfo>)seq.Annotation[Digest.PEPTIDE_FEATURE_TYPE];
      Assert.AreEqual(7, missedPeptides.Count);

      IRangeLocationFilter nglycanFilter = new NGlycanFilter();
      digest.MaxMissedCleavages = 0;
      digest.Filter = nglycanFilter;
      digest.AddDigestFeatures();
      List<DigestPeptideInfo> nglycanPeptides = (List<DigestPeptideInfo>)seq.Annotation[Digest.PEPTIDE_FEATURE_TYPE];
      Assert.AreEqual(2, nglycanPeptides.Count);
      Assert.AreEqual("AAANESAAR", nglycanPeptides[0].PeptideSeq);
      Assert.AreEqual("FFFNITFFR", nglycanPeptides[1].PeptideSeq);
    }
Exemplo n.º 2
0
        private void GetDigestPeptide(List <Sequence> seqs)
        {
            miss0 = new Dictionary <string, string>();
            miss1 = new Dictionary <string, string>();

            foreach (var seq in seqs)
            {
                Digest dig = new Digest();
                dig.DigestProtease     = options.Enzyme;
                dig.ProteinSequence    = seq;
                dig.MaxMissedCleavages = 1;
                dig.AddDigestFeatures();
                var features = seq.GetDigestPeptideInfo();
                foreach (var feature in features)
                {
                    if (feature.PeptideSeq.Length < options.MinLength)
                    {
                        continue;
                    }

                    var pepseq = feature.PeptideSeq.Replace('I', 'L');
                    if (feature.MissCleavage == 0)
                    {
                        miss0[pepseq] = feature.PeptideSeq;
                    }
                    else
                    {
                        miss1[pepseq] = feature.PeptideSeq;
                    }
                }
            }
        }
        protected override IFileProcessor GetFileProcessor()
        {
            proteins = SequenceUtils.Read(new FastaFormat(), base.GetOriginFile());
            Protease protease = ProteaseManager.GetProteaseByName(proteases.SelectedItem);
            Digest   digest   = new Digest()
            {
                DigestProtease     = protease,
                MaxMissedCleavages = 2
            };

            List <SimplePeakChro> totalPeaks = new List <SimplePeakChro>();

            foreach (var seq in proteins)
            {
                digest.ProteinSequence = seq;
                digest.AddDigestFeatures();

                List <DigestPeptideInfo> peptides = seq.GetDigestPeptideInfo();
                peptides.RemoveAll(m => m.PeptideSeq.Length < 6);
                foreach (var dpi in peptides)
                {
                    double mass = aas.MonoPeptideMass(dpi.PeptideSeq);
                    List <SimplePeakChro> curPeaks = new List <SimplePeakChro>();
                    for (int charge = 2; charge <= 3; charge++)
                    {
                        double precursor = (mass + Atom.H.MonoMass * charge) / charge;
                        if (precursor < 300 || precursor > 2000)
                        {
                            continue;
                        }

                        curPeaks.Add(new SimplePeakChro()
                        {
                            Mz       = precursor,
                            Sequence = dpi.PeptideSeq,
                            Charge   = charge
                        });
                    }

                    if (curPeaks.Count > 0)
                    {
                        dpi.Annotations[CHRO_KEY] = curPeaks;
                        totalPeaks.AddRange(curPeaks);
                    }
                }

                peptides.RemoveAll(m => !m.Annotations.ContainsKey(CHRO_KEY));
            }

            return(new ProteinChromatographProcessor(totalPeaks, new string[] { rawFile.FullName }.ToList(), new RawFileImpl(), ppmTolerance.Value, 2.0, rebuildAll.Checked));
        }
Exemplo n.º 4
0
        public override IEnumerable <string> Process(string filename)
        {
            FastaFormat ff     = new FastaFormat();
            Digest      digest = new Digest();

            digest.DigestProtease     = ProteaseManager.FindOrCreateProtease("Trypsin", true, "RK", "P");
            digest.MaxMissedCleavages = 1;

            NGlycanFilter filter = new NGlycanFilter();

            digest.Filter = filter;

            string resultFile = filename + ".nglycan";
            Dictionary <string, NGlycanValue> peptideProteinMap = new Dictionary <string, NGlycanValue>();

            using (StreamReader sr = new StreamReader(filename))
            {
                Sequence seq;
                while ((seq = ff.ReadSequence(sr)) != null)
                {
                    digest.ProteinSequence = seq;
                    digest.AddDigestFeatures();

                    if (seq.Annotation.ContainsKey(Digest.PEPTIDE_FEATURE_TYPE))
                    {
                        bool[] isGlycans = filter.IsNglycan;

                        List <DigestPeptideInfo> nglycanPeptides = (List <DigestPeptideInfo>)seq.Annotation[Digest.PEPTIDE_FEATURE_TYPE];
                        foreach (DigestPeptideInfo dpi in nglycanPeptides)
                        {
                            if (!peptideProteinMap.ContainsKey(dpi.PeptideSeq))
                            {
                                StringBuilder sb = new StringBuilder();
                                for (int i = 0; i < dpi.PeptideSeq.Length; i++)
                                {
                                    if (isGlycans[dpi.PeptideLoc.Min - 1 + i])
                                    {
                                        sb.Append(1);
                                    }
                                    else
                                    {
                                        sb.Append(0);
                                    }
                                }

                                NGlycanValue value = new NGlycanValue();
                                value.NGlycanSites = sb.ToString();

                                peptideProteinMap[dpi.PeptideSeq] = value;
                            }

                            peptideProteinMap[dpi.PeptideSeq].Proteins.Add(parser.GetValue(dpi.ProteinName));
                        }
                    }
                }
            }

            List <string> peptides = new List <string>(peptideProteinMap.Keys);

            peptides.Sort();

            using (StreamWriter sw = new StreamWriter(resultFile))
            {
                foreach (string pep in peptides)
                {
                    NGlycanValue value = peptideProteinMap[pep];
                    sw.Write(pep + "\t" + value.NGlycanSites + "\t");
                    bool bFirst = true;
                    foreach (string protein in value.Proteins)
                    {
                        if (bFirst)
                        {
                            bFirst = false;
                            sw.Write(protein);
                        }
                        else
                        {
                            sw.Write(" ! " + protein);
                        }
                    }
                    sw.WriteLine();
                }
            }

            return(new[] { resultFile });
        }
Exemplo n.º 5
0
        /// <summary>
        /// 读取fasta文件,进行数据处理。
        /// </summary>
        /// <param name="fileName"></param>
        /// <returns></returns>
        public override IEnumerable <string> Process(string fastaFile)
        {
            HashSet <string> result = new HashSet <string>();

            var ff = new FastaFormat();

            using (StreamReader sr = new StreamReader(fastaFile))
            {
                Progress.SetRange(0, sr.BaseStream.Length);

                var aas = new Aminoacids();

                Predicate <string> aaFilter = m =>
                {
                    foreach (var aa in ignoreAminoacids)
                    {
                        if (m.Contains(aa))
                        {
                            return(false);
                        }
                    }
                    return(true);
                };

                Predicate <string> lengthFilter = m => m.Length >= minLength;

                Predicate <string> massFilter = m =>
                {
                    var mass = aas.MonoPeptideMass(m);
                    return(mass >= minMass && mass <= maxMass);
                };

                Predicate <string> filter = m => aaFilter(m) && lengthFilter(m) && massFilter(m);

                List <Digest> digs = new List <Digest>();
                foreach (var protease in proteases)
                {
                    var dig = new Digest();
                    dig.DigestProtease     = protease;
                    dig.MaxMissedCleavages = maxMissCleavage;
                    digs.Add(dig);
                }

                Sequence seq;
                Progress.SetMessage("Digesting sequences ...");
                while ((seq = ff.ReadSequence(sr)) != null)
                {
                    Progress.SetPosition(sr.GetCharpos());

                    if (Progress.IsCancellationPending())
                    {
                        throw new UserTerminatedException();
                    }

                    HashSet <string> curseqs = new HashSet <string>();
                    curseqs.Add(seq.SeqString);

                    foreach (var dig in digs)
                    {
                        var last = curseqs;
                        curseqs = new HashSet <string>();

                        foreach (var curseq in last)
                        {
                            var pro = new Sequence(curseq, curseq);
                            dig.ProteinSequence = pro;
                            dig.AddDigestFeatures();
                            var infos = pro.GetDigestPeptideInfo();

                            infos.ForEach(m =>
                            {
                                if (filter(m.PeptideSeq))
                                {
                                    curseqs.Add(m.PeptideSeq);
                                }
                            });
                        }
                    }

                    result.UnionWith(curseqs);
                }
            }

            Progress.SetMessage("Sorting sequences ...");
            var peps = new List <string>(result);

            peps.Sort((m1, m2) =>
            {
                var res = m1.Length.CompareTo(m2.Length);
                if (res == 0)
                {
                    res = m1.CompareTo(m2);
                }
                return(res);
            });

            var resultFile = fastaFile + ".pep";

            using (StreamWriter sw = new StreamWriter(resultFile))
            {
                peps.ForEach(m => sw.WriteLine(m));
            }

            return(new[] { resultFile });
        }