public void TestConvoluteMass(double massMz, int currentCharge, int newCharge, double expectedMz) { var newMz = mPeptideMassCalculator.ConvoluteMass(massMz, currentCharge, newCharge); var newMzAlt = mPeptideMassCalculator.ConvoluteMass(massMz, currentCharge, newCharge, PeptideMassCalculator.MASS_PROTON); Console.WriteLine("{0} from {1}+ to {2}+ is {3:F5}; expected {4:F5}", massMz, currentCharge, newCharge, newMz, expectedMz); Assert.AreEqual(newMz, newMzAlt, 1E-05, "The two overloads of ConvoluteMass reported conflicting mass values"); Assert.AreEqual(expectedMz, newMz, 0.0001, "Unexpected convoluted m/z"); }
/// <summary> /// Runs the all the tools necessary to perform an ascore run /// </summary> /// <param name="jobToDatasetNameMap">Keys are job numbers (stored as strings); values are Dataset Names or the path to the _dta.txt file</param> /// <param name="spectraManager">Manager for reading _dta.txt or .mzML files; must have already been initialized by the calling class</param> /// <param name="psmResultsManager"></param> /// <param name="ascoreParams"></param> /// <param name="ascoreOptions"></param> /// <param name="spectraFileOpened">Set to true if processing a single dataset, and spectraManager.OpenFile() has already been called</param> private void RunAScoreOnPreparedData( IReadOnlyDictionary <string, DatasetFileInfo> jobToDatasetNameMap, SpectraManagerCache spectraManager, PsmResultsManager psmResultsManager, ParameterFileManager ascoreParams, AScoreOptions ascoreOptions, bool spectraFileOpened) { var totalRows = psmResultsManager.GetRowLength(); var dctPeptidesProcessed = new Dictionary <string, int>(); if (jobToDatasetNameMap == null || jobToDatasetNameMap.Count == 0) { const string errorMessage = "Error in AlgorithmRun: jobToDatasetNameMap cannot be null or empty"; OnErrorEvent(errorMessage); throw new ArgumentException(errorMessage); } ISpectraManager spectraFile = null; string spectraManagerCurrentJob = null; // Force open after first read from fht var modSummaryManager = new ModSummaryFileManager(); RegisterEvents(modSummaryManager); var peptideMassCalculator = new PeptideMassCalculator(); if (FilterOnMSGFScore) { OnStatusEvent("Filtering using MSGF_SpecProb <= " + ascoreParams.MSGFPreFilter.ToString("0.0E+00")); } Console.WriteLine(); var statsByType = new int[4]; var ascoreAlgorithm = new AScoreAlgorithm(); RegisterEvents(ascoreAlgorithm); while (psmResultsManager.CurrentRowNum < totalRows) { // Console.Clear(); if (psmResultsManager.CurrentRowNum % 100 == 0) { Console.Write("\rPercent Completion " + Math.Round((double)psmResultsManager.CurrentRowNum / totalRows * 100) + "%"); } int scanNumber; int scanCount; int chargeState; string peptideSeq; double msgfScore; if (FilterOnMSGFScore) { psmResultsManager.GetNextRow(out scanNumber, out scanCount, out chargeState, out peptideSeq, out msgfScore, ref ascoreParams); } else { psmResultsManager.GetNextRow(out scanNumber, out scanCount, out chargeState, out peptideSeq, ref ascoreParams); msgfScore = 1; } switch (ascoreParams.FragmentType) { case FragmentType.CID: statsByType[(int)FragmentType.CID]++; break; case FragmentType.ETD: statsByType[(int)FragmentType.ETD]++; break; case FragmentType.HCD: statsByType[(int)FragmentType.HCD]++; break; default: statsByType[(int)FragmentType.Unspecified]++; break; } if (string.IsNullOrEmpty(spectraManagerCurrentJob) || !string.Equals(spectraManagerCurrentJob, psmResultsManager.JobNum)) { // New dataset // Get the correct spectrum file for the match if (!jobToDatasetNameMap.TryGetValue(psmResultsManager.JobNum, out var datasetInfo)) { var errorMessage = "Input file refers to job " + psmResultsManager.JobNum + " but jobToDatasetNameMap does not contain that job; unable to continue"; OnWarningEvent(errorMessage); if (!psmResultsManager.JobColumnDefined) { OnWarningEvent( "If the input file includes results from multiple jobs, the first column must be job number with Job as the column heading"); } throw new Exception(errorMessage); } var datasetName = GetDatasetName(datasetInfo.SpectrumFilePath); OnStatusEvent("Dataset name: " + datasetName); if (!spectraFileOpened) { // This method was called from RunAScoreWithMappingFile // Open the spectrum file for this dataset spectraFile = spectraManager.GetSpectraManagerForFile( psmResultsManager.PSMResultsFilePath, datasetName, datasetInfo.ModSummaryFilePath); } else { spectraFile = spectraManager.GetCurrentSpectrumManager(); } spectraManagerCurrentJob = string.Copy(psmResultsManager.JobNum); Console.Write("\r"); if (string.IsNullOrWhiteSpace(datasetInfo.ModSummaryFilePath) && !string.IsNullOrWhiteSpace(ascoreOptions.ModSummaryFile)) { datasetInfo.ModSummaryFilePath = ascoreOptions.ModSummaryFile; } if (psmResultsManager is MsgfMzid mzid) { mzid.SetModifications(ascoreParams); } else if (psmResultsManager is MsgfMzidFull mzidFull) { mzidFull.SetModifications(ascoreParams); } else { if (string.IsNullOrEmpty(datasetInfo.ModSummaryFilePath)) { modSummaryManager.ReadModSummary(spectraFile.DatasetName, psmResultsManager.PSMResultsFilePath, ascoreParams); } else { var modSummaryFile = new FileInfo(datasetInfo.ModSummaryFilePath); modSummaryManager.ReadModSummary(modSummaryFile, ascoreParams); } } Console.WriteLine(); Console.Write("\rPercent Completion " + Math.Round((double)psmResultsManager.CurrentRowNum / totalRows * 100) + "%"); } // perform work on the match var peptideParts = peptideSeq.Split('.'); string sequenceWithoutSuffixOrPrefix; string front; string back; if (peptideParts.Length >= 3) { front = peptideParts[0]; sequenceWithoutSuffixOrPrefix = peptideParts[1]; back = peptideParts[2]; } else { front = "?"; sequenceWithoutSuffixOrPrefix = string.Copy(peptideSeq); back = "?"; } var sequenceClean = GetCleanSequence(sequenceWithoutSuffixOrPrefix, ref ascoreParams); var skipPSM = FilterOnMSGFScore && msgfScore > ascoreParams.MSGFPreFilter; var scanChargePeptide = scanNumber + "_" + chargeState + "_" + sequenceWithoutSuffixOrPrefix; if (dctPeptidesProcessed.ContainsKey(scanChargePeptide)) { // We have already processed this PSM skipPSM = true; } else { dctPeptidesProcessed.Add(scanChargePeptide, 0); } if (skipPSM) { psmResultsManager.IncrementRow(); continue; } //Get experimental spectra if (spectraFile == null) { const string errorMessage = "spectraFile is uninitialized in RunAScoreOnPreparedData; this indicates a programming bug"; OnErrorEvent(errorMessage); throw new Exception(errorMessage); } var expSpec = spectraFile.GetExperimentalSpectra(scanNumber, scanCount, chargeState); if (expSpec == null) { OnWarningEvent("Scan " + scanNumber + " not found in spectra file for peptide " + peptideSeq); psmResultsManager.IncrementRow(); continue; } // Assume monoisotopic for both hi res and low res spectra MolecularWeights.MassType = MassType.Monoisotopic; // Compute precursor m/z value var precursorMZ = peptideMassCalculator.ConvoluteMass(expSpec.PrecursorMass, 1, chargeState); // Set the m/z range var mzMax = maxRange; var mzMin = precursorMZ * lowRangeMultiplier; if (ascoreParams.FragmentType != FragmentType.CID) { mzMax = maxRange; mzMin = minRange; } //Generate all combination mixtures var modMixture = new Combinatorics.ModMixtureCombo(ascoreParams.DynamicMods, sequenceClean); var myPositionsList = GetMyPositionList(sequenceClean, modMixture); //If I have more than 1 modifiable site proceed to calculation if (myPositionsList.Count > 1) { ascoreAlgorithm.ComputeAScore(psmResultsManager, ascoreParams, scanNumber, chargeState, peptideSeq, front, back, sequenceClean, expSpec, mzMax, mzMin, myPositionsList); } else if (myPositionsList.Count == 1) { // Either one or no modifiable sites var uniqueID = myPositionsList[0].Max(); if (uniqueID == 0) { psmResultsManager.WriteToTable(peptideSeq, scanNumber, 0, myPositionsList[0], MOD_INFO_NO_MODIFIED_RESIDUES); } else { psmResultsManager.WriteToTable(peptideSeq, scanNumber, 0, myPositionsList[0], LookupModInfoByID(uniqueID, ascoreParams.DynamicMods)); } } else { // No modifiable sites psmResultsManager.WriteToTable(peptideSeq, scanNumber, 0, new int[0], MOD_INFO_NO_MODIFIED_RESIDUES); } psmResultsManager.IncrementRow(); } Console.WriteLine(); OnStatusEvent(string.Format("Writing {0:N0} rows to {1}", psmResultsManager.ResultsCount, PathUtils.CompactPathString(ascoreOptions.AScoreResultsFilePath, 80))); psmResultsManager.WriteToFile(ascoreOptions.AScoreResultsFilePath); Console.WriteLine(); if (statsByType.Sum() == 0) { OnWarningEvent("Input file appeared empty"); } else { OnStatusEvent("Stats by fragmentation ion type:"); ReportStatsForFragType(" CID", statsByType, FragmentType.CID); ReportStatsForFragType(" ETD", statsByType, FragmentType.ETD); ReportStatsForFragType(" HCD", statsByType, FragmentType.HCD); } Console.WriteLine(); }
private static void TestPHRPReader(string synOrFhtFile, bool blnSkipDuplicates) { var inputFile = new FileInfo(synOrFhtFile); Console.WriteLine("Instantiating reader"); var startupOptions = new StartupOptions { LoadModsAndSeqInfo = true, LoadMSGFResults = true, LoadScanStatsData = false, MaxProteinsPerPSM = 100 }; var phrpReader = new ReaderFactory(inputFile.FullName, PeptideHitResultTypes.Unknown, startupOptions) { EchoMessagesToConsole = false, SkipDuplicatePSMs = blnSkipDuplicates }; // Check for any load errors if (phrpReader.ErrorMessages.Count > 0) { Console.WriteLine("Error(s) instantiating the reader:"); foreach (var errorMessage in phrpReader.ErrorMessages) { Console.WriteLine(" " + errorMessage); } } phrpReader.ErrorEvent += ErrorEventHandler; phrpReader.StatusEvent += MessageEventHandler; phrpReader.WarningEvent += WarningEventHandler; const bool fastReadEnabled = true; phrpReader.FastReadMode = fastReadEnabled; var massCalculator = new PeptideMassCalculator(); if (!phrpReader.CanRead) { Console.WriteLine("Aborting since PHRPReader is not ready: " + phrpReader.ErrorMessage); return; } var lstValues = new List <string>(); var intPSMsRead = 0; var intModifiedPSMsRead = 0; // ReSharper disable once CollectionNeverQueried.Local var dctCachedValues = new Dictionary <int, PSM>(); Console.WriteLine("Reading data"); while (phrpReader.MoveNext()) { var psm = phrpReader.CurrentPSM; intPSMsRead += 1; lstValues.Clear(); phrpReader.FinalizeCurrentPSM(); PeptideCleavageStateCalculator.SplitPrefixAndSuffixFromSequence(psm.Peptide, out _, out _, out _); var strMassErrorPPM = GetCorrectedMassErrorPPM(psm, out _); lstValues.Add(phrpReader.DatasetName + "_dta.txt"); // #SpecFile lstValues.Add("index=" + intPSMsRead); // SpecID lstValues.Add(psm.ScanNumber.ToString()); // ScanNum lstValues.Add(psm.CollisionMode); // FragMethod lstValues.Add(massCalculator.ConvoluteMass(psm.PrecursorNeutralMass, 0, psm.Charge).ToString(CultureInfo.InvariantCulture)); // Precursor m/z lstValues.Add(strMassErrorPPM); // PrecursorError(ppm) lstValues.Add(psm.Charge.ToString()); // Charge lstValues.Add(psm.NumTrypticTermini.ToString()); // Tryptic state (0, 1, or 2) lstValues.Add(CleanupPeptide(psm.PeptideWithNumericMods)); // Peptide if (psm.SeqID <= 0) { lstValues.Add("**" + psm.SeqID + "**"); // SeqID is undefined } else { lstValues.Add(psm.SeqID.ToString()); // SeqID } lstValues.Add(psm.ProteinFirst); if (psm.ProteinDetails.Count > 0) { var firstProteinDetail = psm.ProteinDetails.First(); if (!string.Equals(psm.ProteinFirst, firstProteinDetail.Key)) { lstValues.Add(firstProteinDetail.Key); } else { lstValues.Add("<Match>"); } lstValues.Add(firstProteinDetail.Value.ResidueStart.ToString()); lstValues.Add(firstProteinDetail.Value.ResidueEnd.ToString()); } var strXCorr = GetScore(psm, SequestSynFileReader.GetColumnNameByID(SequestSynopsisFileColumns.XCorr), "0"); lstValues.Add(strXCorr); lstValues.Add(GetScore(psm, SequestSynFileReader.GetColumnNameByID(SequestSynopsisFileColumns.Sp), "0")); lstValues.Add(psm.MSGFSpecEValue); lstValues.Add(GetScore(psm, SequestSynFileReader.GetColumnNameByID(SequestSynopsisFileColumns.DeltaCn2), "0")); lstValues.Add(GetScore(psm, MSGFPlusSynFileReader.GetMSGFDBColumnNameByID(MSGFDBSynFileColumns.PValue), "0")); lstValues.Add(GetScore(psm, MSGFPlusSynFileReader.GetColumnNameByID(MSGFPlusSynFileColumns.EValue), "0")); lstValues.Add(GetScore(psm, MSGFPlusSynFileReader.GetColumnNameByID(MSGFPlusSynFileColumns.RankSpecEValue), "0")); lstValues.Add(GetScore(psm, MSGFPlusSynFileReader.GetMSGFDBColumnNameByID(MSGFDBSynFileColumns.FDR), "1")); lstValues.Add(GetScore(psm, MSGFPlusSynFileReader.GetColumnNameByID(MSGFPlusSynFileColumns.QValue), "0")); lstValues.Add(GetScore(psm, MSGFPlusSynFileReader.GetColumnNameByID(MSGFPlusSynFileColumns.PepQValue), "0")); if (psm.PeptideCleanSequence == "QQIEESTSDYDKEK") { Console.WriteLine(psm.Peptide + " in scan " + psm.ScanNumber); var parentIonMZ = massCalculator.ConvoluteMass(psm.PrecursorNeutralMass, 0, psm.Charge); Console.WriteLine("ParentIonMZ = " + parentIonMZ); Console.WriteLine("PeptideWithNumericMods = " + psm.PeptideWithNumericMods); } if (psm.ModifiedResidues.Count > 0) { intModifiedPSMsRead += 1; if (intModifiedPSMsRead % 500 == 0) { Console.WriteLine("PeptideWithNumericMods = " + psm.PeptideWithNumericMods); foreach (var modifiedResidue in psm.ModifiedResidues) { Console.WriteLine(" " + modifiedResidue.Residue + modifiedResidue.EndResidueLocInPeptide + ": " + modifiedResidue.ModDefinition.ModificationMassAsText); } } var dblPeptideMassRecomputed = massCalculator.ComputeSequenceMassNumericMods(psm.PeptideWithNumericMods); if (Math.Abs(psm.PeptideMonoisotopicMass - dblPeptideMassRecomputed) > 0.1) { Console.WriteLine(" Peptide mass disagreement: " + (psm.PeptideMonoisotopicMass - dblPeptideMassRecomputed).ToString("0.0000000")); } } var strFlattened = FlattenList(lstValues); if (intPSMsRead % 10000 == 0) { Console.WriteLine(strFlattened); } dctCachedValues.Add(intPSMsRead, psm); } }