public void Test() { Assert.AreEqual(0.046, ExpectValueCalculator.Calc(40.95, 11521, 0.05), 0.001); Assert.AreEqual(1.2e-7, ExpectValueCalculator.Calc(89.10, 1949, 0.05), 1e-8); Assert.AreEqual(19, ExpectValueCalculator.Calc(11.58, 5370, 0.05), 1); }
public Dictionary <int, List <IIdentifiedSpectrum> > DoParsePeptides(string datFilename, int minRank, double minScore, bool isDecoy) { var result = new Dictionary <int, List <IIdentifiedSpectrum> >(); Dictionary <string, string> headers; int queryCount; Dictionary <int, MascotQueryItem> queryItems; Dictionary <string, string> peptideSection; var prefix = isDecoy ? "decoy_" : ""; using (var sr = new StreamReader(datFilename)) { InitializeBoundary(sr); CurrentParameters = ParseSection(sr, "parameters"); var hasDecoy = CurrentParameters.ContainsKey("DECOY") && CurrentParameters["DECOY"].Equals("1"); if (!hasDecoy && isDecoy) { return(result); } var masses = ParseSection(sr, "masses"); CurrentModifications = ParseModification(masses); long curPos = sr.GetCharpos(); CurrentProtease = ParseEnzyme(sr); sr.SetCharpos(curPos); headers = ParseSection(sr, "header"); queryCount = int.Parse(headers["queries"]); queryItems = ParseQueryItems(sr, queryCount, prefix); peptideSection = ParseSection(sr, prefix + "peptides", !isDecoy); } string file = CurrentParameters["FILE"]; if (file.StartsWith("File Name: ")) { file = file.Substring(10).Trim(); } string defaultExperimental = FileUtils.ChangeExtension(new FileInfo(file).Name, ""); bool isPrecursorMonoisotopic = true; if (CurrentParameters.ContainsKey("MASS")) { isPrecursorMonoisotopic = CurrentParameters["MASS"].Equals("Monoisotopic"); } using (var sr = new StreamReader(datFilename)) { //Progress.SetRange(1, queryCount); for (int queryId = 1; queryId <= queryCount; queryId++) { if (Progress.IsCancellationPending()) { throw new UserTerminatedException(); } //Progress.SetPosition(queryId); MascotQueryItem queryItem = queryItems[queryId]; var iPeps = new List <IIdentifiedSpectrum>(); result[queryId] = iPeps; IIdentifiedSpectrum lastHit = null; int rank = 0; for (int k = 1; k <= 10; k++) { string key = "q" + queryId + "_p" + k; if (!peptideSection.ContainsKey(key)) { if (null != lastHit) { lastHit.DeltaScore = 1.0; } break; } string line = peptideSection[key]; if (line == null || line.Equals("-1")) { if (null != lastHit) { lastHit.DeltaScore = 1.0; } break; } Match mDetail = this.peptideRegex.Match(line); if (!mDetail.Success) { throw new Exception("Wrong format of peptides : " + line); } double score = MyConvert.ToDouble(mDetail.Groups["Score"].Value); if (score < minScore) { if (null != lastHit) { lastHit.DeltaScore = 1.0 - score / lastHit.Score; } break; } bool bSameRank = null != lastHit && score == lastHit.Score; if (!bSameRank) { if (null != lastHit) { lastHit.DeltaScore = 1.0 - score / lastHit.Score; } rank++; if (rank > minRank) { break; } } IIdentifiedSpectrum mphit; if (bSameRank) { mphit = lastHit; } else { mphit = new IdentifiedSpectrum(); mphit.IsPrecursorMonoisotopic = isPrecursorMonoisotopic; mphit.Rank = rank; mphit.NumMissedCleavages = int.Parse(mDetail.Groups["MissCleavage"].Value); mphit.TheoreticalMass = MyConvert.ToDouble(mDetail.Groups["TheoreticalMass"].Value); mphit.ExperimentalMass = queryItem.ExperimentalMass; mphit.Score = score; mphit.ExpectValue = ExpectValueCalculator.Calc(mphit.Score, queryItem.MatchCount, 0.05); mphit.Query.QueryId = queryId; mphit.Query.ObservedMz = queryItem.Observed; mphit.Query.Charge = queryItem.Charge; mphit.Query.MatchCount = queryItem.MatchCount; if (queryItem.HomologyScore != 0) { mphit.Annotations[HomologyScoreKey] = queryItem.HomologyScore; } if (CurrentProtease.IsSemiSpecific) { mphit.NumProteaseTermini = 1; } lastHit = mphit; } var pureSeq = mDetail.Groups["Sequence"].Value; string modification = mDetail.Groups["Modification"].Value; var seq = ModifySequence(pureSeq, modification); AssignModification(mphit, modification, CurrentModifications); string proteins = mDetail.Groups["ProteinNames"].Value; Match proteinNameMatch = this.proteinNameRegex.Match(proteins); string key_terms = key + "_terms"; if (!peptideSection.ContainsKey(key_terms)) { throw new Exception("Mascot version is too old. It's not supported."); } string value_terms = peptideSection[key_terms]; Match termsMatch = this.termsRegex.Match(value_terms); int numProteaseTermini = 0; while (proteinNameMatch.Success && termsMatch.Success) { var fullSeq = MyConvert.Format("{0}.{1}.{2}", termsMatch.Groups[1].Value, seq, termsMatch.Groups[2].Value); var name = proteinNameMatch.Groups[1].Value.Replace("/", "_"); if (isDecoy) { name = DECOY_PREFIX + name; } bool findPeptide = false; for (int i = 0; i < mphit.Peptides.Count; i++) { if (mphit.Peptides[i].Sequence == fullSeq) { mphit.Peptides[i].AddProtein(name); findPeptide = true; break; } } if (!findPeptide) { var mp = new IdentifiedPeptide(mphit); mp.Sequence = fullSeq; mp.AddProtein(name); if (CurrentProtease.IsSemiSpecific) { int position = Convert.ToInt32(proteinNameMatch.Groups[2].Value); int count = CurrentProtease.GetNumProteaseTermini(termsMatch.Groups[1].Value[0], pureSeq, termsMatch.Groups[2].Value[0], '-', position); numProteaseTermini = Math.Max(numProteaseTermini, count); } } proteinNameMatch = proteinNameMatch.NextMatch(); termsMatch = termsMatch.NextMatch(); } if (CurrentProtease.IsSemiSpecific) { mphit.NumProteaseTermini = Math.Max(mphit.NumProteaseTermini, numProteaseTermini); } if (!bSameRank) { iPeps.Add(mphit); } } string query = "query" + queryId; Dictionary <string, string> querySection = ParseSection(sr, query); string title = Uri.UnescapeDataString(querySection["title"]); SequestFilename sf = this.TitleParser.GetValue(title); sf.Charge = queryItem.Charge; if (sf.Experimental == null || sf.Experimental.Length == 0) { sf.Experimental = defaultExperimental; } foreach (IIdentifiedSpectrum mp in iPeps) { mp.Query.Title = title; mp.Query.FileScan = sf; } } } return(result); }