public void TestAddDigestFeatures() { Sequence seq = new Sequence("Test", "AAANESAARBBBNPSBBKFFFNITFFRGGGNDTGGR"); Digest digest = new Digest(); digest.DigestProtease = ProteaseManager.CreateProtease("Trypsin_TestAddDigestFeatures", true, "RK", "P"); digest.ProteinSequence = seq; digest.MaxMissedCleavages = 0; digest.AddDigestFeatures(); List<DigestPeptideInfo> peptides = (List<DigestPeptideInfo>)seq.Annotation[Digest.PEPTIDE_FEATURE_TYPE]; Assert.AreEqual(4, peptides.Count); digest.MaxMissedCleavages = 1; digest.AddDigestFeatures(); List<DigestPeptideInfo> missedPeptides = (List<DigestPeptideInfo>)seq.Annotation[Digest.PEPTIDE_FEATURE_TYPE]; Assert.AreEqual(7, missedPeptides.Count); IRangeLocationFilter nglycanFilter = new NGlycanFilter(); digest.MaxMissedCleavages = 0; digest.Filter = nglycanFilter; digest.AddDigestFeatures(); List<DigestPeptideInfo> nglycanPeptides = (List<DigestPeptideInfo>)seq.Annotation[Digest.PEPTIDE_FEATURE_TYPE]; Assert.AreEqual(2, nglycanPeptides.Count); Assert.AreEqual("AAANESAAR", nglycanPeptides[0].PeptideSeq); Assert.AreEqual("FFFNITFFR", nglycanPeptides[1].PeptideSeq); }
private void GetDigestPeptide(List <Sequence> seqs) { miss0 = new Dictionary <string, string>(); miss1 = new Dictionary <string, string>(); foreach (var seq in seqs) { Digest dig = new Digest(); dig.DigestProtease = options.Enzyme; dig.ProteinSequence = seq; dig.MaxMissedCleavages = 1; dig.AddDigestFeatures(); var features = seq.GetDigestPeptideInfo(); foreach (var feature in features) { if (feature.PeptideSeq.Length < options.MinLength) { continue; } var pepseq = feature.PeptideSeq.Replace('I', 'L'); if (feature.MissCleavage == 0) { miss0[pepseq] = feature.PeptideSeq; } else { miss1[pepseq] = feature.PeptideSeq; } } } }
protected override IFileProcessor GetFileProcessor() { proteins = SequenceUtils.Read(new FastaFormat(), base.GetOriginFile()); Protease protease = ProteaseManager.GetProteaseByName(proteases.SelectedItem); Digest digest = new Digest() { DigestProtease = protease, MaxMissedCleavages = 2 }; List <SimplePeakChro> totalPeaks = new List <SimplePeakChro>(); foreach (var seq in proteins) { digest.ProteinSequence = seq; digest.AddDigestFeatures(); List <DigestPeptideInfo> peptides = seq.GetDigestPeptideInfo(); peptides.RemoveAll(m => m.PeptideSeq.Length < 6); foreach (var dpi in peptides) { double mass = aas.MonoPeptideMass(dpi.PeptideSeq); List <SimplePeakChro> curPeaks = new List <SimplePeakChro>(); for (int charge = 2; charge <= 3; charge++) { double precursor = (mass + Atom.H.MonoMass * charge) / charge; if (precursor < 300 || precursor > 2000) { continue; } curPeaks.Add(new SimplePeakChro() { Mz = precursor, Sequence = dpi.PeptideSeq, Charge = charge }); } if (curPeaks.Count > 0) { dpi.Annotations[CHRO_KEY] = curPeaks; totalPeaks.AddRange(curPeaks); } } peptides.RemoveAll(m => !m.Annotations.ContainsKey(CHRO_KEY)); } return(new ProteinChromatographProcessor(totalPeaks, new string[] { rawFile.FullName }.ToList(), new RawFileImpl(), ppmTolerance.Value, 2.0, rebuildAll.Checked)); }
public override IEnumerable <string> Process(string filename) { FastaFormat ff = new FastaFormat(); Digest digest = new Digest(); digest.DigestProtease = ProteaseManager.FindOrCreateProtease("Trypsin", true, "RK", "P"); digest.MaxMissedCleavages = 1; NGlycanFilter filter = new NGlycanFilter(); digest.Filter = filter; string resultFile = filename + ".nglycan"; Dictionary <string, NGlycanValue> peptideProteinMap = new Dictionary <string, NGlycanValue>(); using (StreamReader sr = new StreamReader(filename)) { Sequence seq; while ((seq = ff.ReadSequence(sr)) != null) { digest.ProteinSequence = seq; digest.AddDigestFeatures(); if (seq.Annotation.ContainsKey(Digest.PEPTIDE_FEATURE_TYPE)) { bool[] isGlycans = filter.IsNglycan; List <DigestPeptideInfo> nglycanPeptides = (List <DigestPeptideInfo>)seq.Annotation[Digest.PEPTIDE_FEATURE_TYPE]; foreach (DigestPeptideInfo dpi in nglycanPeptides) { if (!peptideProteinMap.ContainsKey(dpi.PeptideSeq)) { StringBuilder sb = new StringBuilder(); for (int i = 0; i < dpi.PeptideSeq.Length; i++) { if (isGlycans[dpi.PeptideLoc.Min - 1 + i]) { sb.Append(1); } else { sb.Append(0); } } NGlycanValue value = new NGlycanValue(); value.NGlycanSites = sb.ToString(); peptideProteinMap[dpi.PeptideSeq] = value; } peptideProteinMap[dpi.PeptideSeq].Proteins.Add(parser.GetValue(dpi.ProteinName)); } } } } List <string> peptides = new List <string>(peptideProteinMap.Keys); peptides.Sort(); using (StreamWriter sw = new StreamWriter(resultFile)) { foreach (string pep in peptides) { NGlycanValue value = peptideProteinMap[pep]; sw.Write(pep + "\t" + value.NGlycanSites + "\t"); bool bFirst = true; foreach (string protein in value.Proteins) { if (bFirst) { bFirst = false; sw.Write(protein); } else { sw.Write(" ! " + protein); } } sw.WriteLine(); } } return(new[] { resultFile }); }
/// <summary> /// 读取fasta文件,进行数据处理。 /// </summary> /// <param name="fileName"></param> /// <returns></returns> public override IEnumerable <string> Process(string fastaFile) { HashSet <string> result = new HashSet <string>(); var ff = new FastaFormat(); using (StreamReader sr = new StreamReader(fastaFile)) { Progress.SetRange(0, sr.BaseStream.Length); var aas = new Aminoacids(); Predicate <string> aaFilter = m => { foreach (var aa in ignoreAminoacids) { if (m.Contains(aa)) { return(false); } } return(true); }; Predicate <string> lengthFilter = m => m.Length >= minLength; Predicate <string> massFilter = m => { var mass = aas.MonoPeptideMass(m); return(mass >= minMass && mass <= maxMass); }; Predicate <string> filter = m => aaFilter(m) && lengthFilter(m) && massFilter(m); List <Digest> digs = new List <Digest>(); foreach (var protease in proteases) { var dig = new Digest(); dig.DigestProtease = protease; dig.MaxMissedCleavages = maxMissCleavage; digs.Add(dig); } Sequence seq; Progress.SetMessage("Digesting sequences ..."); while ((seq = ff.ReadSequence(sr)) != null) { Progress.SetPosition(sr.GetCharpos()); if (Progress.IsCancellationPending()) { throw new UserTerminatedException(); } HashSet <string> curseqs = new HashSet <string>(); curseqs.Add(seq.SeqString); foreach (var dig in digs) { var last = curseqs; curseqs = new HashSet <string>(); foreach (var curseq in last) { var pro = new Sequence(curseq, curseq); dig.ProteinSequence = pro; dig.AddDigestFeatures(); var infos = pro.GetDigestPeptideInfo(); infos.ForEach(m => { if (filter(m.PeptideSeq)) { curseqs.Add(m.PeptideSeq); } }); } } result.UnionWith(curseqs); } } Progress.SetMessage("Sorting sequences ..."); var peps = new List <string>(result); peps.Sort((m1, m2) => { var res = m1.Length.CompareTo(m2.Length); if (res == 0) { res = m1.CompareTo(m2); } return(res); }); var resultFile = fastaFile + ".pep"; using (StreamWriter sw = new StreamWriter(resultFile)) { peps.ForEach(m => sw.WriteLine(m)); } return(new[] { resultFile }); }