/// <summary> /// Configure and run the AScore algorithm, optionally can add protein mapping information /// </summary> /// <param name="spectraManager"></param> /// <param name="psmResultsManager"></param> /// <param name="ascoreParams"></param> /// <param name="outputFilePath">Name of the output file</param> /// <param name="fastaFilePath">Path to FASTA file. If this is empty/null, protein mapping will not occur</param> /// <param name="outputDescriptions">Whether to include protein description line in output or not.</param> public void RunAScoreOnSingleFile( SpectraManagerCache spectraManager, PsmResultsManager psmResultsManager, ParameterFileManager ascoreParams, string outputFilePath, string fastaFilePath = "", bool outputDescriptions = false ) { var ascoreOptions = new AScoreOptions { FastaFilePath = fastaFilePath, OutputProteinDescriptions = outputDescriptions }; ascoreOptions.SetAScoreResultsFilePath(outputFilePath); RunAScoreOnSingleFile(ascoreOptions, spectraManager, psmResultsManager, ascoreParams); }
/// <summary> /// Configure and run the AScore algorithm, optionally can add protein mapping information /// </summary> /// <param name="ascoreOptions"></param> /// <param name="spectraManager"></param> /// <param name="psmResultsManager"></param> /// <param name="ascoreParams"></param> public void RunAScoreWithMappingFile( AScoreOptions ascoreOptions, SpectraManagerCache spectraManager, PsmResultsManager psmResultsManager, ParameterFileManager ascoreParams) { var requiredColumns = new List <string> { "Job", "Dataset" }; OnStatusEvent("Reading Job to Dataset Map File: " + PathUtils.CompactPathString(ascoreOptions.JobToDatasetMapFile, 80)); ReadJobToDatasetMapFile(ascoreOptions, requiredColumns, out var jobToDatasetNameMap); RunAScoreOnPreparedData(jobToDatasetNameMap, spectraManager, psmResultsManager, ascoreParams, ascoreOptions, false); ProteinMapperTestRun(ascoreOptions); }
/// <summary> /// Configure and run the AScore algorithm, optionally can add protein mapping information /// </summary> /// <param name="ascoreOptions"></param> /// <param name="spectraManager"></param> /// <param name="psmResultsManager"></param> /// <param name="ascoreParams"></param> public void RunAScoreOnSingleFile( AScoreOptions ascoreOptions, SpectraManagerCache spectraManager, PsmResultsManager psmResultsManager, ParameterFileManager ascoreParams) { var jobToDatasetNameMap = new Dictionary <string, DatasetFileInfo> { { psmResultsManager.JobNum, new DatasetFileInfo(spectraManager.SpectrumFilePath, spectraManager.ModSummaryFilePath) } }; if (spectraManager == null || !spectraManager.Initialized) { throw new Exception( "spectraManager must be instantiated and initialized before calling RunAScoreOnSingleFile for a single source file"); } RunAScoreOnPreparedData(jobToDatasetNameMap, spectraManager, psmResultsManager, ascoreParams, ascoreOptions, true); ProteinMapperTestRun(ascoreOptions); }
/// <summary> /// Runs the all the tools necessary to perform an ascore run /// </summary> /// <param name="jobToDatasetNameMap">Keys are job numbers (stored as strings); values are Dataset Names or the path to the _dta.txt file</param> /// <param name="spectraManager">Manager for reading _dta.txt or .mzML files; must have already been initialized by the calling class</param> /// <param name="psmResultsManager"></param> /// <param name="ascoreParams"></param> /// <param name="ascoreOptions"></param> /// <param name="spectraFileOpened">Set to true if processing a single dataset, and spectraManager.OpenFile() has already been called</param> private void RunAScoreOnPreparedData( IReadOnlyDictionary <string, DatasetFileInfo> jobToDatasetNameMap, SpectraManagerCache spectraManager, PsmResultsManager psmResultsManager, ParameterFileManager ascoreParams, AScoreOptions ascoreOptions, bool spectraFileOpened) { var totalRows = psmResultsManager.GetRowLength(); var dctPeptidesProcessed = new Dictionary <string, int>(); if (jobToDatasetNameMap == null || jobToDatasetNameMap.Count == 0) { const string errorMessage = "Error in AlgorithmRun: jobToDatasetNameMap cannot be null or empty"; OnErrorEvent(errorMessage); throw new ArgumentException(errorMessage); } ISpectraManager spectraFile = null; string spectraManagerCurrentJob = null; // Force open after first read from fht var modSummaryManager = new ModSummaryFileManager(); RegisterEvents(modSummaryManager); var peptideMassCalculator = new PeptideMassCalculator(); if (FilterOnMSGFScore) { OnStatusEvent("Filtering using MSGF_SpecProb <= " + ascoreParams.MSGFPreFilter.ToString("0.0E+00")); } Console.WriteLine(); var statsByType = new int[4]; var ascoreAlgorithm = new AScoreAlgorithm(); RegisterEvents(ascoreAlgorithm); while (psmResultsManager.CurrentRowNum < totalRows) { // Console.Clear(); if (psmResultsManager.CurrentRowNum % 100 == 0) { Console.Write("\rPercent Completion " + Math.Round((double)psmResultsManager.CurrentRowNum / totalRows * 100) + "%"); } int scanNumber; int scanCount; int chargeState; string peptideSeq; double msgfScore; if (FilterOnMSGFScore) { psmResultsManager.GetNextRow(out scanNumber, out scanCount, out chargeState, out peptideSeq, out msgfScore, ref ascoreParams); } else { psmResultsManager.GetNextRow(out scanNumber, out scanCount, out chargeState, out peptideSeq, ref ascoreParams); msgfScore = 1; } switch (ascoreParams.FragmentType) { case FragmentType.CID: statsByType[(int)FragmentType.CID]++; break; case FragmentType.ETD: statsByType[(int)FragmentType.ETD]++; break; case FragmentType.HCD: statsByType[(int)FragmentType.HCD]++; break; default: statsByType[(int)FragmentType.Unspecified]++; break; } if (string.IsNullOrEmpty(spectraManagerCurrentJob) || !string.Equals(spectraManagerCurrentJob, psmResultsManager.JobNum)) { // New dataset // Get the correct spectrum file for the match if (!jobToDatasetNameMap.TryGetValue(psmResultsManager.JobNum, out var datasetInfo)) { var errorMessage = "Input file refers to job " + psmResultsManager.JobNum + " but jobToDatasetNameMap does not contain that job; unable to continue"; OnWarningEvent(errorMessage); if (!psmResultsManager.JobColumnDefined) { OnWarningEvent( "If the input file includes results from multiple jobs, the first column must be job number with Job as the column heading"); } throw new Exception(errorMessage); } var datasetName = GetDatasetName(datasetInfo.SpectrumFilePath); OnStatusEvent("Dataset name: " + datasetName); if (!spectraFileOpened) { // This method was called from RunAScoreWithMappingFile // Open the spectrum file for this dataset spectraFile = spectraManager.GetSpectraManagerForFile( psmResultsManager.PSMResultsFilePath, datasetName, datasetInfo.ModSummaryFilePath); } else { spectraFile = spectraManager.GetCurrentSpectrumManager(); } spectraManagerCurrentJob = string.Copy(psmResultsManager.JobNum); Console.Write("\r"); if (string.IsNullOrWhiteSpace(datasetInfo.ModSummaryFilePath) && !string.IsNullOrWhiteSpace(ascoreOptions.ModSummaryFile)) { datasetInfo.ModSummaryFilePath = ascoreOptions.ModSummaryFile; } if (psmResultsManager is MsgfMzid mzid) { mzid.SetModifications(ascoreParams); } else if (psmResultsManager is MsgfMzidFull mzidFull) { mzidFull.SetModifications(ascoreParams); } else { if (string.IsNullOrEmpty(datasetInfo.ModSummaryFilePath)) { modSummaryManager.ReadModSummary(spectraFile.DatasetName, psmResultsManager.PSMResultsFilePath, ascoreParams); } else { var modSummaryFile = new FileInfo(datasetInfo.ModSummaryFilePath); modSummaryManager.ReadModSummary(modSummaryFile, ascoreParams); } } Console.WriteLine(); Console.Write("\rPercent Completion " + Math.Round((double)psmResultsManager.CurrentRowNum / totalRows * 100) + "%"); } // perform work on the match var peptideParts = peptideSeq.Split('.'); string sequenceWithoutSuffixOrPrefix; string front; string back; if (peptideParts.Length >= 3) { front = peptideParts[0]; sequenceWithoutSuffixOrPrefix = peptideParts[1]; back = peptideParts[2]; } else { front = "?"; sequenceWithoutSuffixOrPrefix = string.Copy(peptideSeq); back = "?"; } var sequenceClean = GetCleanSequence(sequenceWithoutSuffixOrPrefix, ref ascoreParams); var skipPSM = FilterOnMSGFScore && msgfScore > ascoreParams.MSGFPreFilter; var scanChargePeptide = scanNumber + "_" + chargeState + "_" + sequenceWithoutSuffixOrPrefix; if (dctPeptidesProcessed.ContainsKey(scanChargePeptide)) { // We have already processed this PSM skipPSM = true; } else { dctPeptidesProcessed.Add(scanChargePeptide, 0); } if (skipPSM) { psmResultsManager.IncrementRow(); continue; } //Get experimental spectra if (spectraFile == null) { const string errorMessage = "spectraFile is uninitialized in RunAScoreOnPreparedData; this indicates a programming bug"; OnErrorEvent(errorMessage); throw new Exception(errorMessage); } var expSpec = spectraFile.GetExperimentalSpectra(scanNumber, scanCount, chargeState); if (expSpec == null) { OnWarningEvent("Scan " + scanNumber + " not found in spectra file for peptide " + peptideSeq); psmResultsManager.IncrementRow(); continue; } // Assume monoisotopic for both hi res and low res spectra MolecularWeights.MassType = MassType.Monoisotopic; // Compute precursor m/z value var precursorMZ = peptideMassCalculator.ConvoluteMass(expSpec.PrecursorMass, 1, chargeState); // Set the m/z range var mzMax = maxRange; var mzMin = precursorMZ * lowRangeMultiplier; if (ascoreParams.FragmentType != FragmentType.CID) { mzMax = maxRange; mzMin = minRange; } //Generate all combination mixtures var modMixture = new Combinatorics.ModMixtureCombo(ascoreParams.DynamicMods, sequenceClean); var myPositionsList = GetMyPositionList(sequenceClean, modMixture); //If I have more than 1 modifiable site proceed to calculation if (myPositionsList.Count > 1) { ascoreAlgorithm.ComputeAScore(psmResultsManager, ascoreParams, scanNumber, chargeState, peptideSeq, front, back, sequenceClean, expSpec, mzMax, mzMin, myPositionsList); } else if (myPositionsList.Count == 1) { // Either one or no modifiable sites var uniqueID = myPositionsList[0].Max(); if (uniqueID == 0) { psmResultsManager.WriteToTable(peptideSeq, scanNumber, 0, myPositionsList[0], MOD_INFO_NO_MODIFIED_RESIDUES); } else { psmResultsManager.WriteToTable(peptideSeq, scanNumber, 0, myPositionsList[0], LookupModInfoByID(uniqueID, ascoreParams.DynamicMods)); } } else { // No modifiable sites psmResultsManager.WriteToTable(peptideSeq, scanNumber, 0, new int[0], MOD_INFO_NO_MODIFIED_RESIDUES); } psmResultsManager.IncrementRow(); } Console.WriteLine(); OnStatusEvent(string.Format("Writing {0:N0} rows to {1}", psmResultsManager.ResultsCount, PathUtils.CompactPathString(ascoreOptions.AScoreResultsFilePath, 80))); psmResultsManager.WriteToFile(ascoreOptions.AScoreResultsFilePath); Console.WriteLine(); if (statsByType.Sum() == 0) { OnWarningEvent("Input file appeared empty"); } else { OnStatusEvent("Stats by fragmentation ion type:"); ReportStatsForFragType(" CID", statsByType, FragmentType.CID); ReportStatsForFragType(" ETD", statsByType, FragmentType.ETD); ReportStatsForFragType(" HCD", statsByType, FragmentType.HCD); } Console.WriteLine(); }
public void ComputeAScore( PsmResultsManager psmResultsManager, ParameterFileManager ascoreParams, int scanNumber, int chargeState, string peptideSeq, string front, string back, string sequenceClean, ExperimentalSpectra expSpec, double mzMax, double mzMin, IReadOnlyList <int[]> myPositionsList) { // Change the charge state to 2+ if it is 1+ if (chargeState == 1) { chargeState = 2; } // Parallel lists of scores var peptideScores = new List <List <double> >(); var weightedScores = new List <List <double> >(); try { var theoreticalMonoMassSpectra = new TheoreticalSpectra(sequenceClean, ascoreParams, chargeState, MassType.Monoisotopic); var theoreticalAverageMassSpectra = new TheoreticalSpectra(sequenceClean, ascoreParams, chargeState, MassType.Average); var peptideMassTheoretical = theoreticalMonoMassSpectra.PeptideNeutralMassWithStaticMods + GetModMassTotal(peptideSeq, ascoreParams.DynamicMods); if (Math.Abs(peptideMassTheoretical - expSpec.PrecursorNeutralMass) > 20) { OnWarningEvent(string.Format( "Scan {0}: Observed precursor mass of {1:F1} Da is more than 20 Da away from the computed mass of {2:F1} Da; DeltaMass = {3:F1} Da", scanNumber, expSpec.PrecursorNeutralMass, peptideMassTheoretical, expSpec.PrecursorNeutralMass - peptideMassTheoretical)); } else { // Make sure the masses agree within a reasonable tolerance var validMatch = false; for (double chargeAdjust = 0; chargeAdjust < 0.1; chargeAdjust += 0.005) { for (var massAdjust = -chargeState - 3; massAdjust <= chargeState + 3; massAdjust++) { var delM = peptideMassTheoretical - expSpec.PrecursorNeutralMass + massAdjust * MASS_C13; if (Math.Abs(delM) < 0.15 + chargeState * chargeAdjust) { validMatch = true; break; } } if (validMatch) { break; } } if (!validMatch) { OnWarningEvent(string.Format( "Scan {0}: Observed precursor mass of {1:F1} Da is not a reasonable match for computed mass of {2:F1} Da; " + "DeltaMass = {3:F1} Da; Peptide = {4}", scanNumber, expSpec.PrecursorNeutralMass, peptideMassTheoretical, expSpec.PrecursorNeutralMass - peptideMassTheoretical, peptideSeq )); } } var modNumber = 0; foreach (var myPositions in myPositionsList) { //Generate spectra for a modification combination var myIons = GetChargeList(ascoreParams, mzMax, mzMin, theoreticalMonoMassSpectra, theoreticalAverageMassSpectra, myPositions); peptideScores.Add(new List <double>()); weightedScores.Add(new List <double>()); for (var peakDepth = 1; peakDepth < 11; ++peakDepth) { var peakDepthSpectra = expSpec.GetPeakDepthSpectra(peakDepth); peakDepthSpectra.Sort(); var matchedIons = GetMatchedMZ(ascoreParams.FragmentMassTolerance, myIons, peakDepthSpectra); //Adjusted peptide score to score based on tolerance window. var score = PeptideScoresManager.GetPeptideScore( peakDepth * ascoreParams.FragmentMassTolerance * 2 / 100.0, myIons.Count, matchedIons.Count); // Check if there were any negative scores peptideScores[modNumber].Add(score); weightedScores[modNumber].Add(score * ScoreWeights[peakDepth - 1]); } modNumber++; } var sortedSumScore = new List <ValueIndexPair <double> >(); for (var seq = 0; seq < peptideScores.Count; ++seq) { var score = 0.0; for (var depth = 0; depth < peptideScores[seq].Count; ++depth) { score += weightedScores[seq][depth]; } sortedSumScore.Add(new ValueIndexPair <double>(score, seq)); } sortedSumScore.Sort(); var topPeptideScore = sortedSumScore[0].Value; // Need the phosphorylation sites for the top peptide var topPeptidePTMSites = myPositionsList[sortedSumScore[0].Index]; var ascoreResults = CalculateAScoreForSite(ascoreParams, expSpec, mzMax, mzMin, myPositionsList, topPeptidePTMSites, peptideScores, theoreticalMonoMassSpectra, theoreticalAverageMassSpectra, sortedSumScore); foreach (var ascoreResult in ascoreResults) { ascoreResult.SecondSequence = front + "." + GenerateFinalSequences(sequenceClean, ascoreParams, ascoreResult.PeptideMods) + "." + back; } //Put scores into our table var bestSeq = front + "." + GenerateFinalSequences(sequenceClean, ascoreParams, topPeptidePTMSites) + "." + back; foreach (var ascoreResult in ascoreResults) { psmResultsManager.WriteToTable(peptideSeq, bestSeq, scanNumber, topPeptideScore, ascoreResult); } } catch (Exception ex) { OnErrorEvent("Exception in ComputeAScore: " + ex.Message); throw; } }