Пример #1
0
 public void Test()
 {
     Assert.AreEqual(0.046, ExpectValueCalculator.Calc(40.95, 11521, 0.05), 0.001);
     Assert.AreEqual(1.2e-7, ExpectValueCalculator.Calc(89.10, 1949, 0.05), 1e-8);
     Assert.AreEqual(19, ExpectValueCalculator.Calc(11.58, 5370, 0.05), 1);
 }
Пример #2
0
        public Dictionary <int, List <IIdentifiedSpectrum> > DoParsePeptides(string datFilename, int minRank, double minScore, bool isDecoy)
        {
            var result = new Dictionary <int, List <IIdentifiedSpectrum> >();

            Dictionary <string, string> headers;
            int queryCount;
            Dictionary <int, MascotQueryItem> queryItems;
            Dictionary <string, string>       peptideSection;

            var prefix = isDecoy ? "decoy_" : "";

            using (var sr = new StreamReader(datFilename))
            {
                InitializeBoundary(sr);

                CurrentParameters = ParseSection(sr, "parameters");

                var hasDecoy = CurrentParameters.ContainsKey("DECOY") && CurrentParameters["DECOY"].Equals("1");

                if (!hasDecoy && isDecoy)
                {
                    return(result);
                }

                var masses = ParseSection(sr, "masses");

                CurrentModifications = ParseModification(masses);

                long curPos = sr.GetCharpos();

                CurrentProtease = ParseEnzyme(sr);

                sr.SetCharpos(curPos);

                headers    = ParseSection(sr, "header");
                queryCount = int.Parse(headers["queries"]);

                queryItems     = ParseQueryItems(sr, queryCount, prefix);
                peptideSection = ParseSection(sr, prefix + "peptides", !isDecoy);
            }

            string file = CurrentParameters["FILE"];

            if (file.StartsWith("File Name: "))
            {
                file = file.Substring(10).Trim();
            }
            string defaultExperimental     = FileUtils.ChangeExtension(new FileInfo(file).Name, "");
            bool   isPrecursorMonoisotopic = true;

            if (CurrentParameters.ContainsKey("MASS"))
            {
                isPrecursorMonoisotopic = CurrentParameters["MASS"].Equals("Monoisotopic");
            }

            using (var sr = new StreamReader(datFilename))
            {
                //Progress.SetRange(1, queryCount);
                for (int queryId = 1; queryId <= queryCount; queryId++)
                {
                    if (Progress.IsCancellationPending())
                    {
                        throw new UserTerminatedException();
                    }

                    //Progress.SetPosition(queryId);

                    MascotQueryItem queryItem = queryItems[queryId];

                    var iPeps = new List <IIdentifiedSpectrum>();
                    result[queryId] = iPeps;

                    IIdentifiedSpectrum lastHit = null;
                    int rank = 0;
                    for (int k = 1; k <= 10; k++)
                    {
                        string key = "q" + queryId + "_p" + k;
                        if (!peptideSection.ContainsKey(key))
                        {
                            if (null != lastHit)
                            {
                                lastHit.DeltaScore = 1.0;
                            }
                            break;
                        }

                        string line = peptideSection[key];
                        if (line == null || line.Equals("-1"))
                        {
                            if (null != lastHit)
                            {
                                lastHit.DeltaScore = 1.0;
                            }
                            break;
                        }

                        Match mDetail = this.peptideRegex.Match(line);
                        if (!mDetail.Success)
                        {
                            throw new Exception("Wrong format of peptides : " + line);
                        }

                        double score = MyConvert.ToDouble(mDetail.Groups["Score"].Value);
                        if (score < minScore)
                        {
                            if (null != lastHit)
                            {
                                lastHit.DeltaScore = 1.0 - score / lastHit.Score;
                            }
                            break;
                        }

                        bool bSameRank = null != lastHit && score == lastHit.Score;
                        if (!bSameRank)
                        {
                            if (null != lastHit)
                            {
                                lastHit.DeltaScore = 1.0 - score / lastHit.Score;
                            }

                            rank++;
                            if (rank > minRank)
                            {
                                break;
                            }
                        }

                        IIdentifiedSpectrum mphit;
                        if (bSameRank)
                        {
                            mphit = lastHit;
                        }
                        else
                        {
                            mphit = new IdentifiedSpectrum();
                            mphit.IsPrecursorMonoisotopic = isPrecursorMonoisotopic;

                            mphit.Rank = rank;
                            mphit.NumMissedCleavages = int.Parse(mDetail.Groups["MissCleavage"].Value);
                            mphit.TheoreticalMass    = MyConvert.ToDouble(mDetail.Groups["TheoreticalMass"].Value);
                            mphit.ExperimentalMass   = queryItem.ExperimentalMass;
                            mphit.Score       = score;
                            mphit.ExpectValue = ExpectValueCalculator.Calc(mphit.Score, queryItem.MatchCount, 0.05);

                            mphit.Query.QueryId    = queryId;
                            mphit.Query.ObservedMz = queryItem.Observed;
                            mphit.Query.Charge     = queryItem.Charge;
                            mphit.Query.MatchCount = queryItem.MatchCount;
                            if (queryItem.HomologyScore != 0)
                            {
                                mphit.Annotations[HomologyScoreKey] = queryItem.HomologyScore;
                            }

                            if (CurrentProtease.IsSemiSpecific)
                            {
                                mphit.NumProteaseTermini = 1;
                            }

                            lastHit = mphit;
                        }

                        var    pureSeq      = mDetail.Groups["Sequence"].Value;
                        string modification = mDetail.Groups["Modification"].Value;
                        var    seq          = ModifySequence(pureSeq, modification);
                        AssignModification(mphit, modification, CurrentModifications);

                        string proteins         = mDetail.Groups["ProteinNames"].Value;
                        Match  proteinNameMatch = this.proteinNameRegex.Match(proteins);

                        string key_terms = key + "_terms";
                        if (!peptideSection.ContainsKey(key_terms))
                        {
                            throw new Exception("Mascot version is too old. It's not supported.");
                        }

                        string value_terms = peptideSection[key_terms];
                        Match  termsMatch  = this.termsRegex.Match(value_terms);

                        int numProteaseTermini = 0;
                        while (proteinNameMatch.Success && termsMatch.Success)
                        {
                            var fullSeq = MyConvert.Format("{0}.{1}.{2}",
                                                           termsMatch.Groups[1].Value,
                                                           seq,
                                                           termsMatch.Groups[2].Value);

                            var name = proteinNameMatch.Groups[1].Value.Replace("/", "_");
                            if (isDecoy)
                            {
                                name = DECOY_PREFIX + name;
                            }

                            bool findPeptide = false;
                            for (int i = 0; i < mphit.Peptides.Count; i++)
                            {
                                if (mphit.Peptides[i].Sequence == fullSeq)
                                {
                                    mphit.Peptides[i].AddProtein(name);
                                    findPeptide = true;
                                    break;
                                }
                            }

                            if (!findPeptide)
                            {
                                var mp = new IdentifiedPeptide(mphit);
                                mp.Sequence = fullSeq;
                                mp.AddProtein(name);

                                if (CurrentProtease.IsSemiSpecific)
                                {
                                    int position = Convert.ToInt32(proteinNameMatch.Groups[2].Value);
                                    int count    = CurrentProtease.GetNumProteaseTermini(termsMatch.Groups[1].Value[0], pureSeq, termsMatch.Groups[2].Value[0], '-', position);
                                    numProteaseTermini = Math.Max(numProteaseTermini, count);
                                }
                            }

                            proteinNameMatch = proteinNameMatch.NextMatch();
                            termsMatch       = termsMatch.NextMatch();
                        }

                        if (CurrentProtease.IsSemiSpecific)
                        {
                            mphit.NumProteaseTermini = Math.Max(mphit.NumProteaseTermini, numProteaseTermini);
                        }

                        if (!bSameRank)
                        {
                            iPeps.Add(mphit);
                        }
                    }

                    string query = "query" + queryId;

                    Dictionary <string, string> querySection = ParseSection(sr, query);
                    string title = Uri.UnescapeDataString(querySection["title"]);

                    SequestFilename sf = this.TitleParser.GetValue(title);
                    sf.Charge = queryItem.Charge;

                    if (sf.Experimental == null || sf.Experimental.Length == 0)
                    {
                        sf.Experimental = defaultExperimental;
                    }

                    foreach (IIdentifiedSpectrum mp in iPeps)
                    {
                        mp.Query.Title    = title;
                        mp.Query.FileScan = sf;
                    }
                }
            }

            return(result);
        }