Пример #1
0
        public void TestConvoluteMass(double massMz, int currentCharge, int newCharge, double expectedMz)
        {
            var newMz    = mPeptideMassCalculator.ConvoluteMass(massMz, currentCharge, newCharge);
            var newMzAlt = mPeptideMassCalculator.ConvoluteMass(massMz, currentCharge, newCharge, PeptideMassCalculator.MASS_PROTON);

            Console.WriteLine("{0} from {1}+ to {2}+ is {3:F5}; expected {4:F5}", massMz, currentCharge, newCharge, newMz, expectedMz);

            Assert.AreEqual(newMz, newMzAlt, 1E-05, "The two overloads of ConvoluteMass reported conflicting mass values");

            Assert.AreEqual(expectedMz, newMz, 0.0001, "Unexpected convoluted m/z");
        }
Пример #2
0
        /// <summary>
        /// Runs the all the tools necessary to perform an ascore run
        /// </summary>
        /// <param name="jobToDatasetNameMap">Keys are job numbers (stored as strings); values are Dataset Names or the path to the _dta.txt file</param>
        /// <param name="spectraManager">Manager for reading _dta.txt or .mzML files; must have already been initialized by the calling class</param>
        /// <param name="psmResultsManager"></param>
        /// <param name="ascoreParams"></param>
        /// <param name="ascoreOptions"></param>
        /// <param name="spectraFileOpened">Set to true if processing a single dataset, and spectraManager.OpenFile() has already been called</param>
        private void RunAScoreOnPreparedData(
            IReadOnlyDictionary <string, DatasetFileInfo> jobToDatasetNameMap,
            SpectraManagerCache spectraManager,
            PsmResultsManager psmResultsManager,
            ParameterFileManager ascoreParams,
            AScoreOptions ascoreOptions,
            bool spectraFileOpened)
        {
            var totalRows            = psmResultsManager.GetRowLength();
            var dctPeptidesProcessed = new Dictionary <string, int>();

            if (jobToDatasetNameMap == null || jobToDatasetNameMap.Count == 0)
            {
                const string errorMessage = "Error in AlgorithmRun: jobToDatasetNameMap cannot be null or empty";
                OnErrorEvent(errorMessage);
                throw new ArgumentException(errorMessage);
            }

            ISpectraManager spectraFile = null;
            string          spectraManagerCurrentJob = null; // Force open after first read from fht

            var modSummaryManager = new ModSummaryFileManager();

            RegisterEvents(modSummaryManager);

            var peptideMassCalculator = new PeptideMassCalculator();

            if (FilterOnMSGFScore)
            {
                OnStatusEvent("Filtering using MSGF_SpecProb <= " + ascoreParams.MSGFPreFilter.ToString("0.0E+00"));
            }
            Console.WriteLine();

            var statsByType     = new int[4];
            var ascoreAlgorithm = new AScoreAlgorithm();

            RegisterEvents(ascoreAlgorithm);

            while (psmResultsManager.CurrentRowNum < totalRows)
            {
                //  Console.Clear();

                if (psmResultsManager.CurrentRowNum % 100 == 0)
                {
                    Console.Write("\rPercent Completion " + Math.Round((double)psmResultsManager.CurrentRowNum / totalRows * 100) + "%");
                }

                int    scanNumber;
                int    scanCount;
                int    chargeState;
                string peptideSeq;
                double msgfScore;

                if (FilterOnMSGFScore)
                {
                    psmResultsManager.GetNextRow(out scanNumber, out scanCount, out chargeState, out peptideSeq, out msgfScore, ref ascoreParams);
                }
                else
                {
                    psmResultsManager.GetNextRow(out scanNumber, out scanCount, out chargeState, out peptideSeq, ref ascoreParams);
                    msgfScore = 1;
                }

                switch (ascoreParams.FragmentType)
                {
                case FragmentType.CID:
                    statsByType[(int)FragmentType.CID]++;
                    break;

                case FragmentType.ETD:
                    statsByType[(int)FragmentType.ETD]++;
                    break;

                case FragmentType.HCD:
                    statsByType[(int)FragmentType.HCD]++;
                    break;

                default:
                    statsByType[(int)FragmentType.Unspecified]++;
                    break;
                }

                if (string.IsNullOrEmpty(spectraManagerCurrentJob) || !string.Equals(spectraManagerCurrentJob, psmResultsManager.JobNum))
                {
                    // New dataset
                    // Get the correct spectrum file for the match
                    if (!jobToDatasetNameMap.TryGetValue(psmResultsManager.JobNum, out var datasetInfo))
                    {
                        var errorMessage = "Input file refers to job " + psmResultsManager.JobNum +
                                           " but jobToDatasetNameMap does not contain that job; unable to continue";
                        OnWarningEvent(errorMessage);

                        if (!psmResultsManager.JobColumnDefined)
                        {
                            OnWarningEvent(
                                "If the input file includes results from multiple jobs, the first column must be job number with Job as the column heading");
                        }

                        throw new Exception(errorMessage);
                    }

                    var datasetName = GetDatasetName(datasetInfo.SpectrumFilePath);
                    OnStatusEvent("Dataset name: " + datasetName);

                    if (!spectraFileOpened)
                    {
                        // This method was called from RunAScoreWithMappingFile
                        // Open the spectrum file for this dataset
                        spectraFile = spectraManager.GetSpectraManagerForFile(
                            psmResultsManager.PSMResultsFilePath,
                            datasetName,
                            datasetInfo.ModSummaryFilePath);
                    }
                    else
                    {
                        spectraFile = spectraManager.GetCurrentSpectrumManager();
                    }

                    spectraManagerCurrentJob = string.Copy(psmResultsManager.JobNum);
                    Console.Write("\r");

                    if (string.IsNullOrWhiteSpace(datasetInfo.ModSummaryFilePath) && !string.IsNullOrWhiteSpace(ascoreOptions.ModSummaryFile))
                    {
                        datasetInfo.ModSummaryFilePath = ascoreOptions.ModSummaryFile;
                    }

                    if (psmResultsManager is MsgfMzid mzid)
                    {
                        mzid.SetModifications(ascoreParams);
                    }
                    else if (psmResultsManager is MsgfMzidFull mzidFull)
                    {
                        mzidFull.SetModifications(ascoreParams);
                    }
                    else
                    {
                        if (string.IsNullOrEmpty(datasetInfo.ModSummaryFilePath))
                        {
                            modSummaryManager.ReadModSummary(spectraFile.DatasetName, psmResultsManager.PSMResultsFilePath, ascoreParams);
                        }
                        else
                        {
                            var modSummaryFile = new FileInfo(datasetInfo.ModSummaryFilePath);
                            modSummaryManager.ReadModSummary(modSummaryFile, ascoreParams);
                        }
                    }

                    Console.WriteLine();

                    Console.Write("\rPercent Completion " + Math.Round((double)psmResultsManager.CurrentRowNum / totalRows * 100) + "%");
                }

                // perform work on the match
                var    peptideParts = peptideSeq.Split('.');
                string sequenceWithoutSuffixOrPrefix;
                string front;
                string back;

                if (peptideParts.Length >= 3)
                {
                    front = peptideParts[0];
                    sequenceWithoutSuffixOrPrefix = peptideParts[1];
                    back = peptideParts[2];
                }
                else
                {
                    front = "?";
                    sequenceWithoutSuffixOrPrefix = string.Copy(peptideSeq);
                    back = "?";
                }

                var sequenceClean = GetCleanSequence(sequenceWithoutSuffixOrPrefix, ref ascoreParams);
                var skipPSM       = FilterOnMSGFScore && msgfScore > ascoreParams.MSGFPreFilter;

                var scanChargePeptide = scanNumber + "_" + chargeState + "_" + sequenceWithoutSuffixOrPrefix;
                if (dctPeptidesProcessed.ContainsKey(scanChargePeptide))
                {
                    // We have already processed this PSM
                    skipPSM = true;
                }
                else
                {
                    dctPeptidesProcessed.Add(scanChargePeptide, 0);
                }

                if (skipPSM)
                {
                    psmResultsManager.IncrementRow();
                    continue;
                }

                //Get experimental spectra
                if (spectraFile == null)
                {
                    const string errorMessage = "spectraFile is uninitialized in RunAScoreOnPreparedData; this indicates a programming bug";
                    OnErrorEvent(errorMessage);
                    throw new Exception(errorMessage);
                }

                var expSpec = spectraFile.GetExperimentalSpectra(scanNumber, scanCount, chargeState);

                if (expSpec == null)
                {
                    OnWarningEvent("Scan " + scanNumber + " not found in spectra file for peptide " + peptideSeq);
                    psmResultsManager.IncrementRow();
                    continue;
                }

                // Assume monoisotopic for both hi res and low res spectra
                MolecularWeights.MassType = MassType.Monoisotopic;

                // Compute precursor m/z value
                var precursorMZ = peptideMassCalculator.ConvoluteMass(expSpec.PrecursorMass, 1, chargeState);

                // Set the m/z range
                var mzMax = maxRange;
                var mzMin = precursorMZ * lowRangeMultiplier;

                if (ascoreParams.FragmentType != FragmentType.CID)
                {
                    mzMax = maxRange;
                    mzMin = minRange;
                }

                //Generate all combination mixtures
                var modMixture = new Combinatorics.ModMixtureCombo(ascoreParams.DynamicMods, sequenceClean);

                var myPositionsList = GetMyPositionList(sequenceClean, modMixture);

                //If I have more than 1 modifiable site proceed to calculation
                if (myPositionsList.Count > 1)
                {
                    ascoreAlgorithm.ComputeAScore(psmResultsManager, ascoreParams, scanNumber, chargeState,
                                                  peptideSeq, front, back, sequenceClean, expSpec,
                                                  mzMax, mzMin, myPositionsList);
                }
                else if (myPositionsList.Count == 1)
                {
                    // Either one or no modifiable sites
                    var uniqueID = myPositionsList[0].Max();
                    if (uniqueID == 0)
                    {
                        psmResultsManager.WriteToTable(peptideSeq, scanNumber, 0, myPositionsList[0], MOD_INFO_NO_MODIFIED_RESIDUES);
                    }
                    else
                    {
                        psmResultsManager.WriteToTable(peptideSeq, scanNumber, 0, myPositionsList[0], LookupModInfoByID(uniqueID, ascoreParams.DynamicMods));
                    }
                }
                else
                {
                    // No modifiable sites
                    psmResultsManager.WriteToTable(peptideSeq, scanNumber, 0, new int[0], MOD_INFO_NO_MODIFIED_RESIDUES);
                }
                psmResultsManager.IncrementRow();
            }

            Console.WriteLine();

            OnStatusEvent(string.Format("Writing {0:N0} rows to {1}", psmResultsManager.ResultsCount, PathUtils.CompactPathString(ascoreOptions.AScoreResultsFilePath, 80)));
            psmResultsManager.WriteToFile(ascoreOptions.AScoreResultsFilePath);

            Console.WriteLine();

            if (statsByType.Sum() == 0)
            {
                OnWarningEvent("Input file appeared empty");
            }
            else
            {
                OnStatusEvent("Stats by fragmentation ion type:");
                ReportStatsForFragType("  CID", statsByType, FragmentType.CID);
                ReportStatsForFragType("  ETD", statsByType, FragmentType.ETD);
                ReportStatsForFragType("  HCD", statsByType, FragmentType.HCD);
            }

            Console.WriteLine();
        }
Пример #3
0
        private static void TestPHRPReader(string synOrFhtFile, bool blnSkipDuplicates)
        {
            var inputFile = new FileInfo(synOrFhtFile);

            Console.WriteLine("Instantiating reader");
            var startupOptions = new StartupOptions
            {
                LoadModsAndSeqInfo = true,
                LoadMSGFResults    = true,
                LoadScanStatsData  = false,
                MaxProteinsPerPSM  = 100
            };

            var phrpReader =
                new ReaderFactory(inputFile.FullName, PeptideHitResultTypes.Unknown, startupOptions)
            {
                EchoMessagesToConsole = false,
                SkipDuplicatePSMs     = blnSkipDuplicates
            };

            // Check for any load errors
            if (phrpReader.ErrorMessages.Count > 0)
            {
                Console.WriteLine("Error(s) instantiating the reader:");
                foreach (var errorMessage in phrpReader.ErrorMessages)
                {
                    Console.WriteLine("  " + errorMessage);
                }
            }

            phrpReader.ErrorEvent   += ErrorEventHandler;
            phrpReader.StatusEvent  += MessageEventHandler;
            phrpReader.WarningEvent += WarningEventHandler;

            const bool fastReadEnabled = true;

            phrpReader.FastReadMode = fastReadEnabled;

            var massCalculator = new PeptideMassCalculator();

            if (!phrpReader.CanRead)
            {
                Console.WriteLine("Aborting since PHRPReader is not ready: " + phrpReader.ErrorMessage);
                return;
            }

            var lstValues = new List <string>();

            var intPSMsRead         = 0;
            var intModifiedPSMsRead = 0;

            // ReSharper disable once CollectionNeverQueried.Local
            var dctCachedValues = new Dictionary <int, PSM>();

            Console.WriteLine("Reading data");

            while (phrpReader.MoveNext())
            {
                var psm = phrpReader.CurrentPSM;

                intPSMsRead += 1;
                lstValues.Clear();

                phrpReader.FinalizeCurrentPSM();

                PeptideCleavageStateCalculator.SplitPrefixAndSuffixFromSequence(psm.Peptide, out _, out _, out _);

                var strMassErrorPPM = GetCorrectedMassErrorPPM(psm, out _);

                lstValues.Add(phrpReader.DatasetName + "_dta.txt");                                                                          // #SpecFile
                lstValues.Add("index=" + intPSMsRead);                                                                                       // SpecID
                lstValues.Add(psm.ScanNumber.ToString());                                                                                    // ScanNum
                lstValues.Add(psm.CollisionMode);                                                                                            // FragMethod
                lstValues.Add(massCalculator.ConvoluteMass(psm.PrecursorNeutralMass, 0, psm.Charge).ToString(CultureInfo.InvariantCulture)); // Precursor m/z

                lstValues.Add(strMassErrorPPM);                                                                                              // PrecursorError(ppm)
                lstValues.Add(psm.Charge.ToString());                                                                                        // Charge
                lstValues.Add(psm.NumTrypticTermini.ToString());                                                                             // Tryptic state (0, 1, or 2)
                lstValues.Add(CleanupPeptide(psm.PeptideWithNumericMods));                                                                   // Peptide

                if (psm.SeqID <= 0)
                {
                    lstValues.Add("**" + psm.SeqID + "**");                 // SeqID is undefined
                }
                else
                {
                    lstValues.Add(psm.SeqID.ToString());                    // SeqID
                }

                lstValues.Add(psm.ProteinFirst);

                if (psm.ProteinDetails.Count > 0)
                {
                    var firstProteinDetail = psm.ProteinDetails.First();

                    if (!string.Equals(psm.ProteinFirst, firstProteinDetail.Key))
                    {
                        lstValues.Add(firstProteinDetail.Key);
                    }
                    else
                    {
                        lstValues.Add("<Match>");
                    }
                    lstValues.Add(firstProteinDetail.Value.ResidueStart.ToString());
                    lstValues.Add(firstProteinDetail.Value.ResidueEnd.ToString());
                }

                var strXCorr = GetScore(psm, SequestSynFileReader.GetColumnNameByID(SequestSynopsisFileColumns.XCorr), "0");
                lstValues.Add(strXCorr);

                lstValues.Add(GetScore(psm, SequestSynFileReader.GetColumnNameByID(SequestSynopsisFileColumns.Sp), "0"));
                lstValues.Add(psm.MSGFSpecEValue);
                lstValues.Add(GetScore(psm, SequestSynFileReader.GetColumnNameByID(SequestSynopsisFileColumns.DeltaCn2), "0"));

                lstValues.Add(GetScore(psm, MSGFPlusSynFileReader.GetMSGFDBColumnNameByID(MSGFDBSynFileColumns.PValue), "0"));
                lstValues.Add(GetScore(psm, MSGFPlusSynFileReader.GetColumnNameByID(MSGFPlusSynFileColumns.EValue), "0"));
                lstValues.Add(GetScore(psm, MSGFPlusSynFileReader.GetColumnNameByID(MSGFPlusSynFileColumns.RankSpecEValue), "0"));
                lstValues.Add(GetScore(psm, MSGFPlusSynFileReader.GetMSGFDBColumnNameByID(MSGFDBSynFileColumns.FDR), "1"));
                lstValues.Add(GetScore(psm, MSGFPlusSynFileReader.GetColumnNameByID(MSGFPlusSynFileColumns.QValue), "0"));
                lstValues.Add(GetScore(psm, MSGFPlusSynFileReader.GetColumnNameByID(MSGFPlusSynFileColumns.PepQValue), "0"));


                if (psm.PeptideCleanSequence == "QQIEESTSDYDKEK")
                {
                    Console.WriteLine(psm.Peptide + " in scan " + psm.ScanNumber);

                    var parentIonMZ = massCalculator.ConvoluteMass(psm.PrecursorNeutralMass, 0, psm.Charge);

                    Console.WriteLine("ParentIonMZ   = " + parentIonMZ);
                    Console.WriteLine("PeptideWithNumericMods   = " + psm.PeptideWithNumericMods);
                }

                if (psm.ModifiedResidues.Count > 0)
                {
                    intModifiedPSMsRead += 1;

                    if (intModifiedPSMsRead % 500 == 0)
                    {
                        Console.WriteLine("PeptideWithNumericMods   = " + psm.PeptideWithNumericMods);
                        foreach (var modifiedResidue in psm.ModifiedResidues)
                        {
                            Console.WriteLine("  " + modifiedResidue.Residue + modifiedResidue.EndResidueLocInPeptide + ": " + modifiedResidue.ModDefinition.ModificationMassAsText);
                        }
                    }

                    var dblPeptideMassRecomputed = massCalculator.ComputeSequenceMassNumericMods(psm.PeptideWithNumericMods);
                    if (Math.Abs(psm.PeptideMonoisotopicMass - dblPeptideMassRecomputed) > 0.1)
                    {
                        Console.WriteLine("  Peptide mass disagreement: " + (psm.PeptideMonoisotopicMass - dblPeptideMassRecomputed).ToString("0.0000000"));
                    }
                }

                var strFlattened = FlattenList(lstValues);

                if (intPSMsRead % 10000 == 0)
                {
                    Console.WriteLine(strFlattened);
                }

                dctCachedValues.Add(intPSMsRead, psm);
            }
        }