Exemple #1
0
        /// <summary>
        /// Configure and run the AScore algorithm, optionally can add protein mapping information
        /// </summary>
        /// <param name="spectraManager"></param>
        /// <param name="psmResultsManager"></param>
        /// <param name="ascoreParams"></param>
        /// <param name="outputFilePath">Name of the output file</param>
        /// <param name="fastaFilePath">Path to FASTA file. If this is empty/null, protein mapping will not occur</param>
        /// <param name="outputDescriptions">Whether to include protein description line in output or not.</param>
        public void RunAScoreOnSingleFile(
            SpectraManagerCache spectraManager,
            PsmResultsManager psmResultsManager,
            ParameterFileManager ascoreParams,
            string outputFilePath,
            string fastaFilePath    = "",
            bool outputDescriptions = false
            )
        {
            var ascoreOptions = new AScoreOptions
            {
                FastaFilePath             = fastaFilePath,
                OutputProteinDescriptions = outputDescriptions
            };

            ascoreOptions.SetAScoreResultsFilePath(outputFilePath);

            RunAScoreOnSingleFile(ascoreOptions, spectraManager, psmResultsManager, ascoreParams);
        }
Exemple #2
0
        /// <summary>
        /// Configure and run the AScore algorithm, optionally can add protein mapping information
        /// </summary>
        /// <param name="ascoreOptions"></param>
        /// <param name="spectraManager"></param>
        /// <param name="psmResultsManager"></param>
        /// <param name="ascoreParams"></param>
        public void RunAScoreWithMappingFile(
            AScoreOptions ascoreOptions,
            SpectraManagerCache spectraManager,
            PsmResultsManager psmResultsManager,
            ParameterFileManager ascoreParams)
        {
            var requiredColumns = new List <string>
            {
                "Job",
                "Dataset"
            };

            OnStatusEvent("Reading Job to Dataset Map File: " + PathUtils.CompactPathString(ascoreOptions.JobToDatasetMapFile, 80));

            ReadJobToDatasetMapFile(ascoreOptions, requiredColumns, out var jobToDatasetNameMap);

            RunAScoreOnPreparedData(jobToDatasetNameMap, spectraManager, psmResultsManager, ascoreParams, ascoreOptions, false);

            ProteinMapperTestRun(ascoreOptions);
        }
Exemple #3
0
        /// <summary>
        /// Configure and run the AScore algorithm, optionally can add protein mapping information
        /// </summary>
        /// <param name="ascoreOptions"></param>
        /// <param name="spectraManager"></param>
        /// <param name="psmResultsManager"></param>
        /// <param name="ascoreParams"></param>
        public void RunAScoreOnSingleFile(
            AScoreOptions ascoreOptions,
            SpectraManagerCache spectraManager,
            PsmResultsManager psmResultsManager,
            ParameterFileManager ascoreParams)
        {
            var jobToDatasetNameMap = new Dictionary <string, DatasetFileInfo>
            {
                {
                    psmResultsManager.JobNum,
                    new DatasetFileInfo(spectraManager.SpectrumFilePath, spectraManager.ModSummaryFilePath)
                }
            };

            if (spectraManager == null || !spectraManager.Initialized)
            {
                throw new Exception(
                          "spectraManager must be instantiated and initialized before calling RunAScoreOnSingleFile for a single source file");
            }

            RunAScoreOnPreparedData(jobToDatasetNameMap, spectraManager, psmResultsManager, ascoreParams, ascoreOptions, true);

            ProteinMapperTestRun(ascoreOptions);
        }
Exemple #4
0
        /// <summary>
        /// Runs the all the tools necessary to perform an ascore run
        /// </summary>
        /// <param name="jobToDatasetNameMap">Keys are job numbers (stored as strings); values are Dataset Names or the path to the _dta.txt file</param>
        /// <param name="spectraManager">Manager for reading _dta.txt or .mzML files; must have already been initialized by the calling class</param>
        /// <param name="psmResultsManager"></param>
        /// <param name="ascoreParams"></param>
        /// <param name="ascoreOptions"></param>
        /// <param name="spectraFileOpened">Set to true if processing a single dataset, and spectraManager.OpenFile() has already been called</param>
        private void RunAScoreOnPreparedData(
            IReadOnlyDictionary <string, DatasetFileInfo> jobToDatasetNameMap,
            SpectraManagerCache spectraManager,
            PsmResultsManager psmResultsManager,
            ParameterFileManager ascoreParams,
            AScoreOptions ascoreOptions,
            bool spectraFileOpened)
        {
            var totalRows            = psmResultsManager.GetRowLength();
            var dctPeptidesProcessed = new Dictionary <string, int>();

            if (jobToDatasetNameMap == null || jobToDatasetNameMap.Count == 0)
            {
                const string errorMessage = "Error in AlgorithmRun: jobToDatasetNameMap cannot be null or empty";
                OnErrorEvent(errorMessage);
                throw new ArgumentException(errorMessage);
            }

            ISpectraManager spectraFile = null;
            string          spectraManagerCurrentJob = null; // Force open after first read from fht

            var modSummaryManager = new ModSummaryFileManager();

            RegisterEvents(modSummaryManager);

            var peptideMassCalculator = new PeptideMassCalculator();

            if (FilterOnMSGFScore)
            {
                OnStatusEvent("Filtering using MSGF_SpecProb <= " + ascoreParams.MSGFPreFilter.ToString("0.0E+00"));
            }
            Console.WriteLine();

            var statsByType     = new int[4];
            var ascoreAlgorithm = new AScoreAlgorithm();

            RegisterEvents(ascoreAlgorithm);

            while (psmResultsManager.CurrentRowNum < totalRows)
            {
                //  Console.Clear();

                if (psmResultsManager.CurrentRowNum % 100 == 0)
                {
                    Console.Write("\rPercent Completion " + Math.Round((double)psmResultsManager.CurrentRowNum / totalRows * 100) + "%");
                }

                int    scanNumber;
                int    scanCount;
                int    chargeState;
                string peptideSeq;
                double msgfScore;

                if (FilterOnMSGFScore)
                {
                    psmResultsManager.GetNextRow(out scanNumber, out scanCount, out chargeState, out peptideSeq, out msgfScore, ref ascoreParams);
                }
                else
                {
                    psmResultsManager.GetNextRow(out scanNumber, out scanCount, out chargeState, out peptideSeq, ref ascoreParams);
                    msgfScore = 1;
                }

                switch (ascoreParams.FragmentType)
                {
                case FragmentType.CID:
                    statsByType[(int)FragmentType.CID]++;
                    break;

                case FragmentType.ETD:
                    statsByType[(int)FragmentType.ETD]++;
                    break;

                case FragmentType.HCD:
                    statsByType[(int)FragmentType.HCD]++;
                    break;

                default:
                    statsByType[(int)FragmentType.Unspecified]++;
                    break;
                }

                if (string.IsNullOrEmpty(spectraManagerCurrentJob) || !string.Equals(spectraManagerCurrentJob, psmResultsManager.JobNum))
                {
                    // New dataset
                    // Get the correct spectrum file for the match
                    if (!jobToDatasetNameMap.TryGetValue(psmResultsManager.JobNum, out var datasetInfo))
                    {
                        var errorMessage = "Input file refers to job " + psmResultsManager.JobNum +
                                           " but jobToDatasetNameMap does not contain that job; unable to continue";
                        OnWarningEvent(errorMessage);

                        if (!psmResultsManager.JobColumnDefined)
                        {
                            OnWarningEvent(
                                "If the input file includes results from multiple jobs, the first column must be job number with Job as the column heading");
                        }

                        throw new Exception(errorMessage);
                    }

                    var datasetName = GetDatasetName(datasetInfo.SpectrumFilePath);
                    OnStatusEvent("Dataset name: " + datasetName);

                    if (!spectraFileOpened)
                    {
                        // This method was called from RunAScoreWithMappingFile
                        // Open the spectrum file for this dataset
                        spectraFile = spectraManager.GetSpectraManagerForFile(
                            psmResultsManager.PSMResultsFilePath,
                            datasetName,
                            datasetInfo.ModSummaryFilePath);
                    }
                    else
                    {
                        spectraFile = spectraManager.GetCurrentSpectrumManager();
                    }

                    spectraManagerCurrentJob = string.Copy(psmResultsManager.JobNum);
                    Console.Write("\r");

                    if (string.IsNullOrWhiteSpace(datasetInfo.ModSummaryFilePath) && !string.IsNullOrWhiteSpace(ascoreOptions.ModSummaryFile))
                    {
                        datasetInfo.ModSummaryFilePath = ascoreOptions.ModSummaryFile;
                    }

                    if (psmResultsManager is MsgfMzid mzid)
                    {
                        mzid.SetModifications(ascoreParams);
                    }
                    else if (psmResultsManager is MsgfMzidFull mzidFull)
                    {
                        mzidFull.SetModifications(ascoreParams);
                    }
                    else
                    {
                        if (string.IsNullOrEmpty(datasetInfo.ModSummaryFilePath))
                        {
                            modSummaryManager.ReadModSummary(spectraFile.DatasetName, psmResultsManager.PSMResultsFilePath, ascoreParams);
                        }
                        else
                        {
                            var modSummaryFile = new FileInfo(datasetInfo.ModSummaryFilePath);
                            modSummaryManager.ReadModSummary(modSummaryFile, ascoreParams);
                        }
                    }

                    Console.WriteLine();

                    Console.Write("\rPercent Completion " + Math.Round((double)psmResultsManager.CurrentRowNum / totalRows * 100) + "%");
                }

                // perform work on the match
                var    peptideParts = peptideSeq.Split('.');
                string sequenceWithoutSuffixOrPrefix;
                string front;
                string back;

                if (peptideParts.Length >= 3)
                {
                    front = peptideParts[0];
                    sequenceWithoutSuffixOrPrefix = peptideParts[1];
                    back = peptideParts[2];
                }
                else
                {
                    front = "?";
                    sequenceWithoutSuffixOrPrefix = string.Copy(peptideSeq);
                    back = "?";
                }

                var sequenceClean = GetCleanSequence(sequenceWithoutSuffixOrPrefix, ref ascoreParams);
                var skipPSM       = FilterOnMSGFScore && msgfScore > ascoreParams.MSGFPreFilter;

                var scanChargePeptide = scanNumber + "_" + chargeState + "_" + sequenceWithoutSuffixOrPrefix;
                if (dctPeptidesProcessed.ContainsKey(scanChargePeptide))
                {
                    // We have already processed this PSM
                    skipPSM = true;
                }
                else
                {
                    dctPeptidesProcessed.Add(scanChargePeptide, 0);
                }

                if (skipPSM)
                {
                    psmResultsManager.IncrementRow();
                    continue;
                }

                //Get experimental spectra
                if (spectraFile == null)
                {
                    const string errorMessage = "spectraFile is uninitialized in RunAScoreOnPreparedData; this indicates a programming bug";
                    OnErrorEvent(errorMessage);
                    throw new Exception(errorMessage);
                }

                var expSpec = spectraFile.GetExperimentalSpectra(scanNumber, scanCount, chargeState);

                if (expSpec == null)
                {
                    OnWarningEvent("Scan " + scanNumber + " not found in spectra file for peptide " + peptideSeq);
                    psmResultsManager.IncrementRow();
                    continue;
                }

                // Assume monoisotopic for both hi res and low res spectra
                MolecularWeights.MassType = MassType.Monoisotopic;

                // Compute precursor m/z value
                var precursorMZ = peptideMassCalculator.ConvoluteMass(expSpec.PrecursorMass, 1, chargeState);

                // Set the m/z range
                var mzMax = maxRange;
                var mzMin = precursorMZ * lowRangeMultiplier;

                if (ascoreParams.FragmentType != FragmentType.CID)
                {
                    mzMax = maxRange;
                    mzMin = minRange;
                }

                //Generate all combination mixtures
                var modMixture = new Combinatorics.ModMixtureCombo(ascoreParams.DynamicMods, sequenceClean);

                var myPositionsList = GetMyPositionList(sequenceClean, modMixture);

                //If I have more than 1 modifiable site proceed to calculation
                if (myPositionsList.Count > 1)
                {
                    ascoreAlgorithm.ComputeAScore(psmResultsManager, ascoreParams, scanNumber, chargeState,
                                                  peptideSeq, front, back, sequenceClean, expSpec,
                                                  mzMax, mzMin, myPositionsList);
                }
                else if (myPositionsList.Count == 1)
                {
                    // Either one or no modifiable sites
                    var uniqueID = myPositionsList[0].Max();
                    if (uniqueID == 0)
                    {
                        psmResultsManager.WriteToTable(peptideSeq, scanNumber, 0, myPositionsList[0], MOD_INFO_NO_MODIFIED_RESIDUES);
                    }
                    else
                    {
                        psmResultsManager.WriteToTable(peptideSeq, scanNumber, 0, myPositionsList[0], LookupModInfoByID(uniqueID, ascoreParams.DynamicMods));
                    }
                }
                else
                {
                    // No modifiable sites
                    psmResultsManager.WriteToTable(peptideSeq, scanNumber, 0, new int[0], MOD_INFO_NO_MODIFIED_RESIDUES);
                }
                psmResultsManager.IncrementRow();
            }

            Console.WriteLine();

            OnStatusEvent(string.Format("Writing {0:N0} rows to {1}", psmResultsManager.ResultsCount, PathUtils.CompactPathString(ascoreOptions.AScoreResultsFilePath, 80)));
            psmResultsManager.WriteToFile(ascoreOptions.AScoreResultsFilePath);

            Console.WriteLine();

            if (statsByType.Sum() == 0)
            {
                OnWarningEvent("Input file appeared empty");
            }
            else
            {
                OnStatusEvent("Stats by fragmentation ion type:");
                ReportStatsForFragType("  CID", statsByType, FragmentType.CID);
                ReportStatsForFragType("  ETD", statsByType, FragmentType.ETD);
                ReportStatsForFragType("  HCD", statsByType, FragmentType.HCD);
            }

            Console.WriteLine();
        }
        public void ComputeAScore(
            PsmResultsManager psmResultsManager, ParameterFileManager ascoreParams, int scanNumber,
            int chargeState, string peptideSeq, string front, string back, string sequenceClean, ExperimentalSpectra expSpec,
            double mzMax, double mzMin, IReadOnlyList <int[]> myPositionsList)
        {
            // Change the charge state to 2+ if it is 1+
            if (chargeState == 1)
            {
                chargeState = 2;
            }

            // Parallel lists of scores
            var peptideScores  = new List <List <double> >();
            var weightedScores = new List <List <double> >();

            try
            {
                var theoreticalMonoMassSpectra    = new TheoreticalSpectra(sequenceClean, ascoreParams, chargeState, MassType.Monoisotopic);
                var theoreticalAverageMassSpectra = new TheoreticalSpectra(sequenceClean, ascoreParams, chargeState, MassType.Average);
                var peptideMassTheoretical        = theoreticalMonoMassSpectra.PeptideNeutralMassWithStaticMods + GetModMassTotal(peptideSeq, ascoreParams.DynamicMods);

                if (Math.Abs(peptideMassTheoretical - expSpec.PrecursorNeutralMass) > 20)
                {
                    OnWarningEvent(string.Format(
                                       "Scan {0}: Observed precursor mass of {1:F1} Da is more than 20 Da away from the computed mass of {2:F1} Da; DeltaMass = {3:F1} Da",
                                       scanNumber,
                                       expSpec.PrecursorNeutralMass,
                                       peptideMassTheoretical,
                                       expSpec.PrecursorNeutralMass - peptideMassTheoretical));
                }
                else
                {
                    // Make sure the masses agree within a reasonable tolerance
                    var validMatch = false;

                    for (double chargeAdjust = 0; chargeAdjust < 0.1; chargeAdjust += 0.005)
                    {
                        for (var massAdjust = -chargeState - 3; massAdjust <= chargeState + 3; massAdjust++)
                        {
                            var delM = peptideMassTheoretical - expSpec.PrecursorNeutralMass + massAdjust * MASS_C13;
                            if (Math.Abs(delM) < 0.15 + chargeState * chargeAdjust)
                            {
                                validMatch = true;
                                break;
                            }
                        }

                        if (validMatch)
                        {
                            break;
                        }
                    }

                    if (!validMatch)
                    {
                        OnWarningEvent(string.Format(
                                           "Scan {0}: Observed precursor mass of {1:F1} Da is not a reasonable match for computed mass of {2:F1} Da; " +
                                           "DeltaMass = {3:F1} Da; Peptide = {4}",
                                           scanNumber,
                                           expSpec.PrecursorNeutralMass,
                                           peptideMassTheoretical,
                                           expSpec.PrecursorNeutralMass - peptideMassTheoretical,
                                           peptideSeq
                                           ));
                    }
                }

                var modNumber = 0;
                foreach (var myPositions in myPositionsList)
                {
                    //Generate spectra for a modification combination
                    var myIons = GetChargeList(ascoreParams, mzMax, mzMin, theoreticalMonoMassSpectra, theoreticalAverageMassSpectra, myPositions);
                    peptideScores.Add(new List <double>());
                    weightedScores.Add(new List <double>());

                    for (var peakDepth = 1; peakDepth < 11; ++peakDepth)
                    {
                        var peakDepthSpectra = expSpec.GetPeakDepthSpectra(peakDepth);
                        peakDepthSpectra.Sort();

                        var matchedIons = GetMatchedMZ(ascoreParams.FragmentMassTolerance, myIons, peakDepthSpectra);

                        //Adjusted peptide score to score based on tolerance window.
                        var score = PeptideScoresManager.GetPeptideScore(
                            peakDepth * ascoreParams.FragmentMassTolerance * 2 / 100.0, myIons.Count, matchedIons.Count);

                        // Check if there were any negative scores
                        peptideScores[modNumber].Add(score);
                        weightedScores[modNumber].Add(score * ScoreWeights[peakDepth - 1]);
                    }
                    modNumber++;
                }

                var sortedSumScore = new List <ValueIndexPair <double> >();
                for (var seq = 0; seq < peptideScores.Count; ++seq)
                {
                    var score = 0.0;
                    for (var depth = 0; depth < peptideScores[seq].Count; ++depth)
                    {
                        score += weightedScores[seq][depth];
                    }
                    sortedSumScore.Add(new ValueIndexPair <double>(score, seq));
                }

                sortedSumScore.Sort();
                var topPeptideScore = sortedSumScore[0].Value;

                // Need the phosphorylation sites for the top peptide
                var topPeptidePTMSites = myPositionsList[sortedSumScore[0].Index];

                var ascoreResults = CalculateAScoreForSite(ascoreParams, expSpec, mzMax, mzMin, myPositionsList, topPeptidePTMSites, peptideScores, theoreticalMonoMassSpectra,
                                                           theoreticalAverageMassSpectra, sortedSumScore);

                foreach (var ascoreResult in ascoreResults)
                {
                    ascoreResult.SecondSequence = front + "." +
                                                  GenerateFinalSequences(sequenceClean, ascoreParams, ascoreResult.PeptideMods) + "." + back;
                }

                //Put scores into our table
                var bestSeq = front + "." + GenerateFinalSequences(sequenceClean, ascoreParams, topPeptidePTMSites) + "." + back;
                foreach (var ascoreResult in ascoreResults)
                {
                    psmResultsManager.WriteToTable(peptideSeq, bestSeq, scanNumber, topPeptideScore, ascoreResult);
                }
            }
            catch (Exception ex)
            {
                OnErrorEvent("Exception in ComputeAScore: " + ex.Message);
                throw;
            }
        }