Exemplo n.º 1
0
        /// <summary>
        /// Configure and run the AScore algorithm
        /// </summary>
        /// <param name="ascoreOptions"></param>
        /// <returns></returns>
        public int RunAScore(AScoreOptions ascoreOptions)
        {
            var paramManager = new ParameterFileManager(ascoreOptions.AScoreParamFile);

            RegisterEvents(paramManager);

            Console.WriteLine();

            if (paramManager.DynamicMods.Count > 0 || paramManager.StaticMods.Count > 0)
            {
                OnStatusEvent("Loaded modifications from: " + ascoreOptions.AScoreParamFile);

                foreach (var mod in paramManager.StaticMods)
                {
                    OnStatusEvent(Utilities.GetModDescription("Static,   ", mod));
                }

                foreach (var mod in paramManager.DynamicMods)
                {
                    OnStatusEvent(Utilities.GetModDescription("Dynamic,  ", mod));
                }

                foreach (var mod in paramManager.TerminiMods)
                {
                    OnStatusEvent(Utilities.GetModDescription("Terminus, ", mod));
                }

                Console.WriteLine();
            }

            PsmResultsManager psmResultsManager;

            switch (ascoreOptions.SearchType)
            {
            case AScoreOptions.SearchMode.XTandem:
                OnStatusEvent("Caching data in " + PathUtils.CompactPathString(ascoreOptions.DbSearchResultsFile, 80));
                psmResultsManager = new XTandemFHT(ascoreOptions.DbSearchResultsFile);
                break;

            case AScoreOptions.SearchMode.Sequest:
                OnStatusEvent("Caching data in " + PathUtils.CompactPathString(ascoreOptions.DbSearchResultsFile, 80));
                psmResultsManager = new SequestFHT(ascoreOptions.DbSearchResultsFile);
                break;

            case AScoreOptions.SearchMode.Inspect:
                OnStatusEvent("Caching data in " + PathUtils.CompactPathString(ascoreOptions.DbSearchResultsFile, 80));
                psmResultsManager = new InspectFHT(ascoreOptions.DbSearchResultsFile);
                break;

            case AScoreOptions.SearchMode.Msgfdb:
            case AScoreOptions.SearchMode.Msgfplus:
                OnStatusEvent("Caching data in " + PathUtils.CompactPathString(ascoreOptions.DbSearchResultsFile, 80));
                if (ascoreOptions.SearchResultsType == AScoreOptions.DbSearchResultsType.Mzid)
                {
                    if (ascoreOptions.CreateUpdatedDbSearchResultsFile)
                    {
                        psmResultsManager = new MsgfMzidFull(ascoreOptions.DbSearchResultsFile);
                    }
                    else
                    {
                        psmResultsManager = new MsgfMzid(ascoreOptions.DbSearchResultsFile);
                    }
                }
                else
                {
                    psmResultsManager = new MsgfdbFHT(ascoreOptions.DbSearchResultsFile);
                }
                break;

            default:
                OnErrorEvent(string.Format(
                                 "Incorrect search type: {0} , supported values are {1}",
                                 ascoreOptions.SearchType,
                                 string.Join(", ", Enum.GetNames(typeof(AScoreOptions.SearchMode)))
                                 ));
                return(-13);
            }
            var peptideMassCalculator = new PeptideMassCalculator();

            var spectraManager = new SpectraManagerCache(peptideMassCalculator);

            RegisterEvents(spectraManager);

            OnStatusEvent("Output directory: " + ascoreOptions.OutputDirectoryInfo.FullName);

            var ascoreEngine = new AScoreAlgorithm();

            RegisterEvents(ascoreEngine);

            // Initialize the options
            FilterOnMSGFScore = ascoreOptions.FilterOnMSGFScore;

            // Run the algorithm
            if (ascoreOptions.MultiJobMode)
            {
                RunAScoreWithMappingFile(ascoreOptions, spectraManager, psmResultsManager, paramManager);
            }
            else
            {
                spectraManager.OpenFile(ascoreOptions.MassSpecFile, ascoreOptions.ModSummaryFile);

                RunAScoreOnSingleFile(ascoreOptions, spectraManager, psmResultsManager, paramManager);
            }

            OnStatusEvent("AScore Complete");

            if (ascoreOptions.CreateUpdatedDbSearchResultsFile)
            {
                if (ascoreOptions.SearchResultsType == AScoreOptions.DbSearchResultsType.Fht)
                {
                    CreateUpdatedFirstHitsFile(ascoreOptions);
                }
                else if (psmResultsManager is MsgfMzidFull mzidFull)
                {
                    mzidFull.WriteToMzidFile(ascoreOptions.UpdatedDbSearchResultsFileName);
                    OnStatusEvent("Results merged; new file: " + PathUtils.CompactPathString(ascoreOptions.UpdatedDbSearchResultsFileName, 80));
                }
            }

            return(0);
        }
Exemplo n.º 2
0
        /// <summary>
        /// Runs the all the tools necessary to perform an ascore run
        /// </summary>
        /// <param name="jobToDatasetNameMap">Keys are job numbers (stored as strings); values are Dataset Names or the path to the _dta.txt file</param>
        /// <param name="spectraManager">Manager for reading _dta.txt or .mzML files; must have already been initialized by the calling class</param>
        /// <param name="psmResultsManager"></param>
        /// <param name="ascoreParams"></param>
        /// <param name="ascoreOptions"></param>
        /// <param name="spectraFileOpened">Set to true if processing a single dataset, and spectraManager.OpenFile() has already been called</param>
        private void RunAScoreOnPreparedData(
            IReadOnlyDictionary <string, DatasetFileInfo> jobToDatasetNameMap,
            SpectraManagerCache spectraManager,
            PsmResultsManager psmResultsManager,
            ParameterFileManager ascoreParams,
            AScoreOptions ascoreOptions,
            bool spectraFileOpened)
        {
            var totalRows            = psmResultsManager.GetRowLength();
            var dctPeptidesProcessed = new Dictionary <string, int>();

            if (jobToDatasetNameMap == null || jobToDatasetNameMap.Count == 0)
            {
                const string errorMessage = "Error in AlgorithmRun: jobToDatasetNameMap cannot be null or empty";
                OnErrorEvent(errorMessage);
                throw new ArgumentException(errorMessage);
            }

            ISpectraManager spectraFile = null;
            string          spectraManagerCurrentJob = null; // Force open after first read from fht

            var modSummaryManager = new ModSummaryFileManager();

            RegisterEvents(modSummaryManager);

            var peptideMassCalculator = new PeptideMassCalculator();

            if (FilterOnMSGFScore)
            {
                OnStatusEvent("Filtering using MSGF_SpecProb <= " + ascoreParams.MSGFPreFilter.ToString("0.0E+00"));
            }
            Console.WriteLine();

            var statsByType     = new int[4];
            var ascoreAlgorithm = new AScoreAlgorithm();

            RegisterEvents(ascoreAlgorithm);

            while (psmResultsManager.CurrentRowNum < totalRows)
            {
                //  Console.Clear();

                if (psmResultsManager.CurrentRowNum % 100 == 0)
                {
                    Console.Write("\rPercent Completion " + Math.Round((double)psmResultsManager.CurrentRowNum / totalRows * 100) + "%");
                }

                int    scanNumber;
                int    scanCount;
                int    chargeState;
                string peptideSeq;
                double msgfScore;

                if (FilterOnMSGFScore)
                {
                    psmResultsManager.GetNextRow(out scanNumber, out scanCount, out chargeState, out peptideSeq, out msgfScore, ref ascoreParams);
                }
                else
                {
                    psmResultsManager.GetNextRow(out scanNumber, out scanCount, out chargeState, out peptideSeq, ref ascoreParams);
                    msgfScore = 1;
                }

                switch (ascoreParams.FragmentType)
                {
                case FragmentType.CID:
                    statsByType[(int)FragmentType.CID]++;
                    break;

                case FragmentType.ETD:
                    statsByType[(int)FragmentType.ETD]++;
                    break;

                case FragmentType.HCD:
                    statsByType[(int)FragmentType.HCD]++;
                    break;

                default:
                    statsByType[(int)FragmentType.Unspecified]++;
                    break;
                }

                if (string.IsNullOrEmpty(spectraManagerCurrentJob) || !string.Equals(spectraManagerCurrentJob, psmResultsManager.JobNum))
                {
                    // New dataset
                    // Get the correct spectrum file for the match
                    if (!jobToDatasetNameMap.TryGetValue(psmResultsManager.JobNum, out var datasetInfo))
                    {
                        var errorMessage = "Input file refers to job " + psmResultsManager.JobNum +
                                           " but jobToDatasetNameMap does not contain that job; unable to continue";
                        OnWarningEvent(errorMessage);

                        if (!psmResultsManager.JobColumnDefined)
                        {
                            OnWarningEvent(
                                "If the input file includes results from multiple jobs, the first column must be job number with Job as the column heading");
                        }

                        throw new Exception(errorMessage);
                    }

                    var datasetName = GetDatasetName(datasetInfo.SpectrumFilePath);
                    OnStatusEvent("Dataset name: " + datasetName);

                    if (!spectraFileOpened)
                    {
                        // This method was called from RunAScoreWithMappingFile
                        // Open the spectrum file for this dataset
                        spectraFile = spectraManager.GetSpectraManagerForFile(
                            psmResultsManager.PSMResultsFilePath,
                            datasetName,
                            datasetInfo.ModSummaryFilePath);
                    }
                    else
                    {
                        spectraFile = spectraManager.GetCurrentSpectrumManager();
                    }

                    spectraManagerCurrentJob = string.Copy(psmResultsManager.JobNum);
                    Console.Write("\r");

                    if (string.IsNullOrWhiteSpace(datasetInfo.ModSummaryFilePath) && !string.IsNullOrWhiteSpace(ascoreOptions.ModSummaryFile))
                    {
                        datasetInfo.ModSummaryFilePath = ascoreOptions.ModSummaryFile;
                    }

                    if (psmResultsManager is MsgfMzid mzid)
                    {
                        mzid.SetModifications(ascoreParams);
                    }
                    else if (psmResultsManager is MsgfMzidFull mzidFull)
                    {
                        mzidFull.SetModifications(ascoreParams);
                    }
                    else
                    {
                        if (string.IsNullOrEmpty(datasetInfo.ModSummaryFilePath))
                        {
                            modSummaryManager.ReadModSummary(spectraFile.DatasetName, psmResultsManager.PSMResultsFilePath, ascoreParams);
                        }
                        else
                        {
                            var modSummaryFile = new FileInfo(datasetInfo.ModSummaryFilePath);
                            modSummaryManager.ReadModSummary(modSummaryFile, ascoreParams);
                        }
                    }

                    Console.WriteLine();

                    Console.Write("\rPercent Completion " + Math.Round((double)psmResultsManager.CurrentRowNum / totalRows * 100) + "%");
                }

                // perform work on the match
                var    peptideParts = peptideSeq.Split('.');
                string sequenceWithoutSuffixOrPrefix;
                string front;
                string back;

                if (peptideParts.Length >= 3)
                {
                    front = peptideParts[0];
                    sequenceWithoutSuffixOrPrefix = peptideParts[1];
                    back = peptideParts[2];
                }
                else
                {
                    front = "?";
                    sequenceWithoutSuffixOrPrefix = string.Copy(peptideSeq);
                    back = "?";
                }

                var sequenceClean = GetCleanSequence(sequenceWithoutSuffixOrPrefix, ref ascoreParams);
                var skipPSM       = FilterOnMSGFScore && msgfScore > ascoreParams.MSGFPreFilter;

                var scanChargePeptide = scanNumber + "_" + chargeState + "_" + sequenceWithoutSuffixOrPrefix;
                if (dctPeptidesProcessed.ContainsKey(scanChargePeptide))
                {
                    // We have already processed this PSM
                    skipPSM = true;
                }
                else
                {
                    dctPeptidesProcessed.Add(scanChargePeptide, 0);
                }

                if (skipPSM)
                {
                    psmResultsManager.IncrementRow();
                    continue;
                }

                //Get experimental spectra
                if (spectraFile == null)
                {
                    const string errorMessage = "spectraFile is uninitialized in RunAScoreOnPreparedData; this indicates a programming bug";
                    OnErrorEvent(errorMessage);
                    throw new Exception(errorMessage);
                }

                var expSpec = spectraFile.GetExperimentalSpectra(scanNumber, scanCount, chargeState);

                if (expSpec == null)
                {
                    OnWarningEvent("Scan " + scanNumber + " not found in spectra file for peptide " + peptideSeq);
                    psmResultsManager.IncrementRow();
                    continue;
                }

                // Assume monoisotopic for both hi res and low res spectra
                MolecularWeights.MassType = MassType.Monoisotopic;

                // Compute precursor m/z value
                var precursorMZ = peptideMassCalculator.ConvoluteMass(expSpec.PrecursorMass, 1, chargeState);

                // Set the m/z range
                var mzMax = maxRange;
                var mzMin = precursorMZ * lowRangeMultiplier;

                if (ascoreParams.FragmentType != FragmentType.CID)
                {
                    mzMax = maxRange;
                    mzMin = minRange;
                }

                //Generate all combination mixtures
                var modMixture = new Combinatorics.ModMixtureCombo(ascoreParams.DynamicMods, sequenceClean);

                var myPositionsList = GetMyPositionList(sequenceClean, modMixture);

                //If I have more than 1 modifiable site proceed to calculation
                if (myPositionsList.Count > 1)
                {
                    ascoreAlgorithm.ComputeAScore(psmResultsManager, ascoreParams, scanNumber, chargeState,
                                                  peptideSeq, front, back, sequenceClean, expSpec,
                                                  mzMax, mzMin, myPositionsList);
                }
                else if (myPositionsList.Count == 1)
                {
                    // Either one or no modifiable sites
                    var uniqueID = myPositionsList[0].Max();
                    if (uniqueID == 0)
                    {
                        psmResultsManager.WriteToTable(peptideSeq, scanNumber, 0, myPositionsList[0], MOD_INFO_NO_MODIFIED_RESIDUES);
                    }
                    else
                    {
                        psmResultsManager.WriteToTable(peptideSeq, scanNumber, 0, myPositionsList[0], LookupModInfoByID(uniqueID, ascoreParams.DynamicMods));
                    }
                }
                else
                {
                    // No modifiable sites
                    psmResultsManager.WriteToTable(peptideSeq, scanNumber, 0, new int[0], MOD_INFO_NO_MODIFIED_RESIDUES);
                }
                psmResultsManager.IncrementRow();
            }

            Console.WriteLine();

            OnStatusEvent(string.Format("Writing {0:N0} rows to {1}", psmResultsManager.ResultsCount, PathUtils.CompactPathString(ascoreOptions.AScoreResultsFilePath, 80)));
            psmResultsManager.WriteToFile(ascoreOptions.AScoreResultsFilePath);

            Console.WriteLine();

            if (statsByType.Sum() == 0)
            {
                OnWarningEvent("Input file appeared empty");
            }
            else
            {
                OnStatusEvent("Stats by fragmentation ion type:");
                ReportStatsForFragType("  CID", statsByType, FragmentType.CID);
                ReportStatsForFragType("  ETD", statsByType, FragmentType.ETD);
                ReportStatsForFragType("  HCD", statsByType, FragmentType.HCD);
            }

            Console.WriteLine();
        }