public override void SetProperty(T t, string value) { t.ClearPeptides(); IIdentifiedPeptide mp = t.NewPeptide(); mp.Sequence = value; }
public override IEnumerable <string> Process(string fileName) { MascotResultTextFormat format = new MascotResultTextFormat(); IIdentifiedResult ir = format.ReadFromFile(fileName); List <IIdentifiedSpectrum> spectra = ir.GetSpectra(); spectra.ForEach(m => { for (int i = m.Peptides.Count - 1; i >= 0; i--) { IIdentifiedPeptide peptide = m.Peptides[i]; string seq = PeptideUtils.GetMatchedSequence(peptide.Sequence); if (!validator.Validate(seq)) { m.RemovePeptideAt(i); peptide.Spectrum = null; } } }); ir.Filter(m => { return(m.Spectrum != null); }); string result = fileName + ".Labeled"; format.WriteToFile(result, ir); return(new[] { result }); }
private static void WritePeptideProtein(StreamWriter sw, IIdentifiedPeptide pep, int iProtein) { sw.WriteLine(" <alternative_protein protein=\"{0}\" peptide_prev_aa=\"{1}\" peptide_next_aa=\"{2}\"/>", pep.Proteins[iProtein], pep.Sequence[1] == '.' ? pep.Sequence[0] : ' ', pep.Sequence[pep.Sequence.Length - 2] == '.' ? pep.Sequence[pep.Sequence.Length - 1] : ' '); }
private void MergePeptides(MascotResult result) { var peptideMap = new Dictionary <String, IIdentifiedPeptide>(); foreach (IIdentifiedProteinGroup group in result) { foreach (IIdentifiedProtein protein in group) { for (int i = 0; i < protein.Peptides.Count; i++) { String pepid = protein.Peptides[i].Spectrum.Query.QueryId + "_" + protein.Peptides[i].Sequence; if (peptideMap.ContainsKey(pepid)) { IIdentifiedPeptide old = peptideMap[pepid]; old.AddProtein(protein.Name); protein.Peptides[i] = old; } else { peptideMap[pepid] = protein.Peptides[i]; } } } } }
public void RemovePeptideAt(int index) { IIdentifiedPeptide peptide = this.peptides[index]; this.peptides.RemoveAt(index); peptide.Spectrum = null; }
public void AddPeptide(IIdentifiedPeptide peptide) { if (!this.peptides.Contains(peptide)) { this.peptides.Add(peptide); peptide.SpectrumBase = this; } }
public void RemovePeptide(IIdentifiedPeptide peptide) { int index = this.peptides.IndexOf(peptide); if (index >= 0) { RemovePeptideAt(index); } }
protected override string DoGetClassification(IIdentifiedPeptide obj) { string experimentalName = base.DoGetClassification(obj); if (!identityMap.ContainsKey(experimentalName)) { throw new ArgumentException(experimentalName + " does not match to any classified name defined in map, so classification failed!"); } return(identityMap[experimentalName]); }
public int CompareTo(IIdentifiedPeptide other) { if (this.spectrum == null) { return(-1); } if (other == null || other.Spectrum == null) { return(1); } return(this.spectrum.CompareTo(other.Spectrum)); }
private string GetModificationCount(IIdentifiedPeptide pep, string p) { var result = 0; var matchedSeq = PeptideUtils.GetMatchedSequence(pep.Sequence); for (int i = 0; i < matchedSeq.Length - 1; i++) { if (p.Contains(matchedSeq[i]) && !Char.IsLetter(matchedSeq[i + 1])) { result++; i++; } } return(result.ToString()); }
private static void WritePeptide(StreamWriter sw, IIdentifiedSpectrum sph, IIdentifiedPeptide pep) { sw.WriteLine( " <search_hit hit_rank=\"1\" peptide=\"{0}\" peptide_prev_aa=\"{1}\" peptide_next_aa=\"{2}\" protein=\"{3}\" num_tot_proteins=\"{4}\" num_matched_ions=\"{5}\" tot_num_ions=\"{6}\" calc_neutral_pep_mass=\"{7:0.0000}\" massdiff=\"{8}{9:0.0000}\" num_tol_term=\"{10}\" num_missed_cleavages=\"{11}\" is_rejected=\"0\">", pep.PureSequence, pep.Sequence[1] == '.' ? pep.Sequence[0] : ' ', pep.Sequence[pep.Sequence.Length - 2] == '.' ? pep.Sequence[pep.Sequence.Length - 1] : ' ', pep.Proteins[0], pep.Proteins.Count, sph.MatchedIonCount, sph.TheoreticalIonCount, sph.TheoreticalMH - Atom.H.AverageMass, sph.TheoreticalMinusExperimentalMass <= 0 ? "+" : "", -sph.TheoreticalMinusExperimentalMass, 2, sph.NumMissedCleavages); }
protected override bool SkipCurrentEntry(IdentifiedSpectrum entry, IdentifiedSpectrum currEntry) { string sNextPureSequence = currEntry.Peptides[0].PureSequence; for (int i = 0; i < entry.Peptides.Count; i++) { IIdentifiedPeptide sp = entry.Peptides[i]; if (sNextPureSequence.Equals(sp.PureSequence)) { if (currEntry.DeltaScore <= this.maxDeltaScore) { entry.DiffModificationSiteCandidates.Add(new FollowCandidate(currEntry.GetSequences(" ! "), currEntry.Score, currEntry.DeltaScore)); } return(true); } } return(false); }
public override void SetProperty(T t, string value) { string[] peptides = reg.Split(value); if (t.Peptides.Count != peptides.Length) { t.ClearPeptides(); foreach (string peptide in peptides) { IIdentifiedPeptide mp = t.NewPeptide(); mp.Sequence = peptide; } } else { for (int i = 0; i < peptides.Length; i++) { t.Peptides[i].Sequence = peptides[i]; } } }
private static string GetSequenceCharge(IIdentifiedPeptide n) { return(n.PureSequence.Replace("I", "L").Replace("Q", "K") + "_" + n.Spectrum.Charge.ToString()); }
private void WriteModificationAndScore(StreamWriter sw, Dictionary <char, double> modChars, IIdentifiedSpectrum sph, IIdentifiedPeptide pep) { var matchSeq = PeptideUtils.GetMatchedSequence(pep.Sequence); if (matchSeq.Any(m => modChars.ContainsKey(m))) // modification { if (modChars.ContainsKey(matchSeq[0])) //Nterminal { sw.Write(" <modification_info mod_nterm_mass=\"{0:0.######}\"", modChars[matchSeq[0]]); } else { sw.Write(" <modification_info "); } sw.WriteLine(" modified_peptide=\"{0}\">", matchSeq); int pos = 1; double mass; for (int i = 0; i < matchSeq.Length; i++) { if (modChars.TryGetValue(matchSeq[i], out mass)) { if (i == 0) { continue; } sw.WriteLine(" <mod_aminoacid_mass position=\"{0}\" mass=\"{1:0.######}\"/>", pos, mass); } else { pos++; } } sw.WriteLine(" </modification_info>"); } WriteScore(sw, sph); }
public void Quantify(string rawFileName, List <IIdentifiedSpectrum> spectra, string detailDir) { if (!Directory.Exists(detailDir)) { Directory.CreateDirectory(detailDir); } var experimental = RawFileFactory.GetExperimental(rawFileName); Dictionary <string, DifferentRetentionTimeEnvelopes> spectrumKeyMap = new Dictionary <string, DifferentRetentionTimeEnvelopes>(); Dictionary <SilacEnvelopes, List <IIdentifiedSpectrum> > envelopeSpectrumGroup = new Dictionary <SilacEnvelopes, List <IIdentifiedSpectrum> >(); double precursorPPM = GetPrecursorPPM(spectra); try { _rawReader.Open(rawFileName); int firstScanNumber = _rawReader.GetFirstSpectrumNumber(); int lastScanNumber = _rawReader.GetLastSpectrumNumber(); Progress.SetRange(1, spectra.Count); int pepCount = 0; for (int s = 0; s < spectra.Count; s++) { Console.WriteLine(s); IIdentifiedSpectrum spectrum = spectra[s]; SilacQuantificationSummaryItem.ClearAnnotation(spectrum); if (Progress.IsCancellationPending()) { throw new UserTerminatedException(); } int startScan = spectrum.Query.FileScan.FirstScan; if (startScan > lastScanNumber) { spectrum.GetOrCreateQuantificationItem().RatioStr = "OUT_OF_RANGE"; continue; } Progress.SetPosition(pepCount++); IIdentifiedPeptide sp = spectrum.Peptide; string seq = GetMatchSequence(spectrum); IPeptideInfo peptideInfo = new IdentifiedPeptideInfo(seq, spectrum.TheoreticalMH, spectrum.Query.Charge); SilacCompoundInfo sci = GetSilacCompoundInfo(peptideInfo); //如果轻重离子理论质荷比一样,忽略 if (!sci.IsSilacData()) { spectrum.GetOrCreateQuantificationItem().RatioStr = "NOT_SILAC"; continue; } //如果轻重离子理论质荷比与观测值不一致,忽略 if (!sci.IsMzEquals(spectrum.ObservedMz, MAX_DELTA_MZ)) { ValidateModifications(seq); spectrum.GetOrCreateQuantificationItem().RatioStr = "WRONG_IDENTIFICATION"; continue; } //如果没有找到相应的FullScan,忽略 int identifiedFullScan = _rawReader.FindPreviousFullScan(startScan, firstScanNumber); if (-1 == identifiedFullScan) { spectrum.GetOrCreateQuantificationItem().RatioStr = "NO_PROFILE"; continue; } DifferentRetentionTimeEnvelopes pkls = FindEnvelopes(spectrumKeyMap, spectrum, sci); SilacEnvelopes envelope = pkls.FindSilacEnvelope(identifiedFullScan); //如果该scan被包含在已经被定量的结果中,忽略 if (envelope != null) { envelope.SetScanIdentified(identifiedFullScan, spectrum.IsExtendedIdentification()); envelopeSpectrumGroup[envelope].Add(spectrum); continue; } //从原始文件中找出该spectrum的定量信息 int maxIndex = Math.Min(option.ProfileLength - 1, pkls.LightProfile.FindMaxIndex()); double mzTolerance = PrecursorUtils.ppm2mz(sci.Light.Mz, option.PPMTolerance); //如果FullScan没有相应的离子,忽略。(鉴定错误或者扩展定量时候,会出现找不到pair的现象) SilacPeakListPair splp = GetLightHeavyPeakList(_rawReader, sci, maxIndex, mzTolerance, identifiedFullScan); if (null == splp) { spectrum.GetOrCreateQuantificationItem().RatioStr = "NO_PROFILE"; continue; } splp.IsIdentified = true; splp.IsExtendedIdentification = spectrum.IsExtendedIdentification(); SilacEnvelopes envelopes = new SilacEnvelopes(); envelopes.Add(splp); //向前查找定量信息 int fullScan = identifiedFullScan; int scanNumber = 0; while ((fullScan = _rawReader.FindPreviousFullScan(fullScan - 1, firstScanNumber)) != -1) { if (_rawReader.IsBadDataScan(fullScan)) { continue; } scanNumber++; var item = GetLightHeavyPeakList(_rawReader, sci, maxIndex, mzTolerance, fullScan, scanNumber <= MinScanNumber); if (null == item) { break; } envelopes.Add(item); } envelopes.Reverse(); //向后查找定量信息 fullScan = identifiedFullScan; scanNumber = 0; while ((fullScan = _rawReader.FindNextFullScan(fullScan + 1, lastScanNumber)) != -1) { if (_rawReader.IsBadDataScan(fullScan)) { continue; } scanNumber++; var item = GetLightHeavyPeakList(_rawReader, sci, maxIndex, mzTolerance, fullScan, scanNumber <= MinScanNumber); if (null == item) { break; } envelopes.Add(item); } //对每个scan计算轻重的离子丰度 envelopes.ForEach(m => m.CalculateIntensity(pkls.LightProfile, pkls.HeavyProfile)); pkls.Add(envelopes); envelopeSpectrumGroup.Add(envelopes, new List <IIdentifiedSpectrum>()); envelopeSpectrumGroup[envelopes].Add(spectrum); } } finally { _rawReader.Close(); } foreach (string key in spectrumKeyMap.Keys) { DifferentRetentionTimeEnvelopes pkls = spectrumKeyMap[key]; foreach (SilacEnvelopes envelopes in pkls) { if (0 == envelopes.Count) { continue; } List <IIdentifiedSpectrum> mps = envelopeSpectrumGroup[envelopes]; double mzTolerance = PrecursorUtils.ppm2mz(mps[0].Query.ObservedMz, option.PPMTolerance); string scanStr = GetScanRange(envelopes); string resultFilename = detailDir + "\\" + mps[0].Query.FileScan.Experimental + "." + PeptideUtils.GetPureSequence(mps[0].Sequence) + "." + mps[0].Query.Charge + scanStr + ".silac"; IPeptideInfo peptideInfo = new IdentifiedPeptideInfo(mps[0].GetMatchSequence(), mps[0].TheoreticalMH, mps[0].Query.Charge); SilacCompoundInfo sci = GetSilacCompoundInfo(peptideInfo); SilacQuantificationSummaryItem item = new SilacQuantificationSummaryItem(sci.Light.IsSample); item.RawFilename = rawFileName; item.SoftwareVersion = this.SoftwareVersion; item.PeptideSequence = mps[0].Sequence; item.Charge = mps[0].Charge; item.LightAtomComposition = sci.Light.Composition.ToString(); item.HeavyAtomComposition = sci.Heavy.Composition.ToString(); item.LightProfile = pkls.LightProfile; item.HeavyProfile = pkls.HeavyProfile; item.ObservedEnvelopes = envelopes; item.ValidateScans(sci, precursorPPM); item.Smoothing(); item.CalculateRatio(); new SilacQuantificationSummaryItemXmlFormat().WriteToFile(resultFilename, item); int maxScoreItemIndex = FindMaxScoreItemIndex(mps); for (int i = 0; i < mps.Count; i++) { if (maxScoreItemIndex == i) { item.AssignToAnnotation(mps[i], resultFilename); } else { item.AssignDuplicationToAnnotation(mps[i], resultFilename); } } } } foreach (IIdentifiedSpectrum mph in spectra) { mph.InitializeRatioEnabled(); } }
public override IEnumerable <string> Process(string optionFile) { this.options = O18QuantificationFileProcessorOptions.Load(optionFile); var calc = options.GetProteinRatioCalculator(); var detailDirectory = options.GetDetailDirectory(); if (!Directory.Exists(detailDirectory)) { Directory.CreateDirectory(detailDirectory); } var format = new MascotResultTextFormat(); IIdentifiedResult mr = format.ReadFromFile(options.ProteinFile); CheckRawFilename(mr, optionFile); Dictionary <string, List <IIdentifiedSpectrum> > filePepMap = GetFilePeptideMap(mr); Dictionary <IIdentifiedPeptide, O18QuantificationSummaryItem> pepResultMap = new Dictionary <IIdentifiedPeptide, O18QuantificationSummaryItem>(); foreach (string filename in filePepMap.Keys) { Progress.SetMessage("Processing " + filename); string rawFilename = filename; if (new FileInfo(filename).Name.Equals("Cmpd.raw")) { rawFilename = FindRawFileName(options.ProteinFile); } string experimental = FileUtils.ChangeExtension(new FileInfo(rawFilename).Name, ""); using (CacheRawFile rawFile = new CacheRawFile(rawFilename)) { int firstScanNumber = rawFile.GetFirstSpectrumNumber(); int lastScanNumber = rawFile.GetLastSpectrumNumber(); List <IIdentifiedSpectrum> peps = filePepMap[filename]; Dictionary <string, DifferentRetentionTimeEnvelopes> peptideChargeMap = new Dictionary <string, DifferentRetentionTimeEnvelopes>(); Dictionary <O18QuanEnvelopes, List <IIdentifiedSpectrum> > pklMpMap = new Dictionary <O18QuanEnvelopes, List <IIdentifiedSpectrum> >(); Progress.SetRange(0, peps.Count); foreach (IIdentifiedSpectrum mphit in peps) { if (Progress.IsCancellationPending()) { throw new UserTerminatedException(); } Progress.Increment(1); IIdentifiedPeptide mp = mphit.Peptide; if (mp.Sequence.EndsWith("-")) { //it cannot be O18 labelled, just skip it continue; } int startScan = mphit.Query.FileScan.FirstScan; double theoreticalMz = GetTheoretialO16Mz(gapO18O16, mphit); int theoreticalMass = (int)(theoreticalMz * mphit.Query.Charge + 0.5); string sequenceCharge = PeptideUtils.GetPureSequence(mphit.Sequence) + "." + mphit.Query.Charge + "." + theoreticalMass; if (!peptideChargeMap.ContainsKey(sequenceCharge)) { peptideChargeMap.Add(sequenceCharge, new DifferentRetentionTimeEnvelopes()); } bool bFound = false; DifferentRetentionTimeEnvelopes pkls = peptideChargeMap[sequenceCharge]; foreach (var pklList in pkls) { if (pklList.Count == 0) { continue; } if (pklList[0].Scan > startScan) { continue; } if (pklList[pklList.Count - 1].Scan < startScan) { continue; } pklMpMap[pklList].Add(mphit); bFound = true; bool findIdentified = false; for (int i = 1; i < pklList.Count; i++) { if (pklList[i].ScanTimes[0].Scan > startScan) { pklList[i - 1].IsIdentified = true; findIdentified = true; break; } } if (!findIdentified) { pklList[pklList.Count - 1].IsIdentified = true; } } if (bFound) { continue; } double mzTolerance = PrecursorUtils.ppm2mz(theoreticalMz, options.PPMTolerance); O18QuanEnvelopes envelopes = new O18QuanEnvelopes(); bool bFirst = true; int count = 0; //backward for (int scan = startScan; scan >= firstScanNumber; scan--) { if (1 == rawFile.GetMsLevel(scan)) { O18QuanEnvelope envelope = GetCorrespondingEnvelope(rawFile, theoreticalMz, mphit.Query.Charge, mzTolerance, scan); //At most one invalid scan inside both pre or post identification scan range. if (!IsValidEnvelope(envelope, mphit.Charge)) { if (count > 0) { envelopes.RemoveAt(0); break; } else { count++; } } else { count = 0; } if (bFirst) { envelope.IsIdentified = true; bFirst = false; } envelopes.Insert(0, envelope); } } if (envelopes.Count == 0) { //If the identified scan has no quantification information ,ignore it. continue; } count = 0; //forward for (int scan = startScan + 1; scan <= lastScanNumber; scan++) { if (1 == rawFile.GetMsLevel(scan)) { var envelope = GetCorrespondingEnvelope(rawFile, theoreticalMz, mphit.Query.Charge, mzTolerance, scan); //At most one invalid scan inside both pre or post identification scan range. if (!IsValidEnvelope(envelope, mphit.Charge)) { if (count > 0) { envelopes.RemoveAt(envelopes.Count - 1); break; } else { count = 1; } } else { count = 0; } envelopes.Add(envelope); } } if (envelopes.Count == 0) { continue; } string scanCurr = envelopes.GetScanRange(); //check scan list again bFound = false; foreach (var pklList in pkls) { if (pklList.Count == 0) { continue; } string scanOld = pklList.GetScanRange(); if (scanOld.Equals(scanCurr)) { pklMpMap[pklList].Add(mphit); bFound = true; break; } } if (bFound) { continue; } pkls.Add(envelopes); pklMpMap.Add(envelopes, new List <IIdentifiedSpectrum>()); pklMpMap[envelopes].Add(mphit); } var detailFilePrefix = options.GetDetailDirectory() + "\\" + new FileInfo(options.ProteinFile).Name; foreach (string sequenceCharge in peptideChargeMap.Keys) { DifferentRetentionTimeEnvelopes pkls = peptideChargeMap[sequenceCharge]; foreach (var envelopes in pkls) { if (0 == envelopes.Count) { continue; } List <IIdentifiedSpectrum> mps = pklMpMap[envelopes]; double mzTolerance = PrecursorUtils.ppm2mz(mps[0].Query.ObservedMz, options.PPMTolerance); O18QuantificationPeptideProcessor processor = new O18QuantificationPeptideProcessor(fileFormat, options.IsPostDigestionLabelling, rawFilename, PeptideUtils.GetPureSequence(mps[0].Sequence), options.PurityOfO18Water, envelopes, mzTolerance, "", options.GetScanPercentageStart() / 100, options.GetScanPercentageEnd() / 100); processor.TheoreticalMz = GetTheoretialO16Mz(gapO18O16, mps[0]); processor.Charge = mps[0].Charge; processor.SoftwareVersion = options.SoftwareVersion; var resultFilename = MyConvert.Format("{0}.{1}.{2}.{3}.{4}.O18", detailFilePrefix, experimental, PeptideUtils.GetPureSequence(mps[0].Sequence), mps[0].Charge, envelopes.GetScanRange()); processor.Process(resultFilename); O18QuantificationSummaryItem item = fileFormat.ReadFromFile(resultFilename); int maxScoreItemIndex = FindMaxScoreItemIndex(mps); var relativeFile = Path.Combine(Path.GetFileName(options.GetDetailDirectory()), Path.GetFileName(resultFilename)); for (int i = 0; i < mps.Count; i++) { if (maxScoreItemIndex == i) { item.AssignToAnnotation(mps[i], relativeFile); } else { item.AssignDuplicationToAnnotation(mps[i], relativeFile); } } } } } } List <IIdentifiedSpectrum> peptides = mr.GetSpectra(); foreach (IIdentifiedSpectrum mphit in peptides) { if (!mphit.Annotations.ContainsKey(O18QuantificationConstants.O18_RATIO_SCANCOUNT)) { mphit.Annotations[O18QuantificationConstants.O18_RATIO_SCANCOUNT] = "-"; } mphit.SetEnabled(calc.HasPeptideRatio(mphit)); } calc.Calculate(mr, m => true); string resultFile = FileUtils.ChangeExtension(optionFile, ".O18summary"); format.InitializeByResult(mr); format.ProteinFormat = format.ProteinFormat.GetLineFormat(O18QuantificationConstants.O18_EXPORT_PROTEIN_HEADER); format.WriteToFile(resultFile, mr); Progress.SetMessage("Finished, result was saved to " + resultFile); return(new[] { resultFile }); }