Exemplo n.º 1
0
        private DataPointAquisitionResults GetDataAcquisitionResults(MsDataFile myMsDataFile, string currentDataFile, List <Modification> variableModifications, List <Modification> fixedModifications, List <Protein> proteinList, string taskId, CommonParameters combinedParameters, Tolerance initPrecTol, Tolerance initProdTol)
        {
            var fileNameWithoutExtension = Path.GetFileNameWithoutExtension(currentDataFile);
            MassDiffAcceptor searchMode  = initPrecTol is PpmTolerance ?
                                           (MassDiffAcceptor) new SinglePpmAroundZeroSearchMode(initPrecTol.Value) :
                                           new SingleAbsoluteAroundZeroSearchMode(initPrecTol.Value);

            var listOfSortedms2Scans = GetMs2Scans(myMsDataFile, currentDataFile, combinedParameters).OrderBy(b => b.PrecursorMass).ToArray();

            PeptideSpectralMatch[] allPsmsArray = new PeptideSpectralMatch[listOfSortedms2Scans.Length];

            Log("Searching with searchMode: " + searchMode, new List <string> {
                taskId, "Individual Spectra Files", fileNameWithoutExtension
            });
            Log("Searching with productMassTolerance: " + initProdTol, new List <string> {
                taskId, "Individual Spectra Files", fileNameWithoutExtension
            });

            new ClassicSearchEngine(allPsmsArray, listOfSortedms2Scans, variableModifications, fixedModifications, null, null, null, proteinList, searchMode, combinedParameters, this.FileSpecificParameters, new List <string> {
                taskId, "Individual Spectra Files", fileNameWithoutExtension
            }).Run();
            List <PeptideSpectralMatch> allPsms = allPsmsArray.Where(b => b != null).ToList();

            allPsms = allPsms.OrderByDescending(b => b.Score)
                      .ThenBy(b => b.PeptideMonisotopicMass.HasValue ? Math.Abs(b.ScanPrecursorMass - b.PeptideMonisotopicMass.Value) : double.MaxValue)
                      .GroupBy(b => (b.FullFilePath, b.ScanNumber, b.PeptideMonisotopicMass)).Select(b => b.First()).ToList();

            new FdrAnalysisEngine(allPsms, searchMode.NumNotches, CommonParameters, this.FileSpecificParameters, new List <string> {
                taskId, "Individual Spectra Files", fileNameWithoutExtension
            }).Run();

            List <PeptideSpectralMatch> goodIdentifications = allPsms.Where(b => b.FdrInfo.QValueNotch < 0.001 && !b.IsDecoy && b.FullSequence != null).ToList();

            if (!goodIdentifications.Any())
            {
                return(new DataPointAquisitionResults(null, new List <PeptideSpectralMatch>(), new List <LabeledDataPoint>(), new List <LabeledDataPoint>(), 0, 0, 0, 0));
            }

            DataPointAquisitionResults currentResult = (DataPointAquisitionResults) new DataPointAcquisitionEngine(
                goodIdentifications,
                myMsDataFile,
                initPrecTol,
                CalibrationParameters.MinMS1IsotopicPeaksNeededForConfirmedIdentification,
                CommonParameters,
                this.FileSpecificParameters,
                new List <string> {
                taskId, "Individual Spectra Files", fileNameWithoutExtension
            }).Run();

            return(currentResult);
        }
Exemplo n.º 2
0
        protected override MyTaskResults RunSpecific(string OutputFolder, List <DbForTask> dbFilenameList, List <string> currentRawFileList, string taskId, FileSpecificParameters[] fileSettingsList)
        {
            LoadModifications(taskId, out var variableModifications, out var fixedModifications, out var localizeableModificationTypes);

            // load proteins
            List <Protein> proteinList = LoadProteins(taskId, dbFilenameList, true, DecoyType.Reverse, localizeableModificationTypes, CommonParameters);

            // write prose settings
            ProseCreatedWhileRunning.Append("The following calibration settings were used: ");
            ProseCreatedWhileRunning.Append("protease = " + CommonParameters.DigestionParams.Protease + "; ");
            ProseCreatedWhileRunning.Append("maximum missed cleavages = " + CommonParameters.DigestionParams.MaxMissedCleavages + "; ");
            ProseCreatedWhileRunning.Append("minimum peptide length = " + CommonParameters.DigestionParams.MinPeptideLength + "; ");
            ProseCreatedWhileRunning.Append(CommonParameters.DigestionParams.MaxPeptideLength == int.MaxValue ?
                                            "maximum peptide length = unspecified; " :
                                            "maximum peptide length = " + CommonParameters.DigestionParams.MaxPeptideLength + "; ");
            ProseCreatedWhileRunning.Append("initiator methionine behavior = " + CommonParameters.DigestionParams.InitiatorMethionineBehavior + "; ");
            ProseCreatedWhileRunning.Append("fixed modifications = " + string.Join(", ", fixedModifications.Select(m => m.IdWithMotif)) + "; ");
            ProseCreatedWhileRunning.Append("variable modifications = " + string.Join(", ", variableModifications.Select(m => m.IdWithMotif)) + "; ");
            ProseCreatedWhileRunning.Append("max mods per peptide = " + CommonParameters.DigestionParams.MaxModsForPeptide + "; ");
            ProseCreatedWhileRunning.Append("max modification isoforms = " + CommonParameters.DigestionParams.MaxModificationIsoforms + "; ");
            ProseCreatedWhileRunning.Append("precursor mass tolerance = " + CommonParameters.PrecursorMassTolerance + "; ");
            ProseCreatedWhileRunning.Append("product mass tolerance = " + CommonParameters.ProductMassTolerance + ". ");
            ProseCreatedWhileRunning.Append("The combined search database contained " + proteinList.Count(p => !p.IsDecoy) + " non-decoy protein entries including " + proteinList.Count(p => p.IsContaminant) + " contaminant sequences. ");

            // start the calibration task
            Status("Calibrating...", new List <string> {
                taskId
            });
            MyTaskResults = new MyTaskResults(this)
            {
                NewSpectra           = new List <string>(),
                NewFileSpecificTomls = new List <string>()
            };

            var           myFileManager = new MyFileManager(true);
            List <string> spectraFilesAfterCalibration = new List <string>();

            for (int spectraFileIndex = 0; spectraFileIndex < currentRawFileList.Count; spectraFileIndex++)
            {
                if (GlobalVariables.StopLoops)
                {
                    break;
                }

                bool couldNotFindEnoughDatapoints = false;

                // get filename stuff
                var    originalUncalibratedFilePath = currentRawFileList[spectraFileIndex];
                var    originalUncalibratedFilenameWithoutExtension = Path.GetFileNameWithoutExtension(originalUncalibratedFilePath);
                string calibratedFilePath = Path.Combine(OutputFolder, originalUncalibratedFilenameWithoutExtension + CalibSuffix + ".mzML");

                // mark the file as in-progress
                StartingDataFile(originalUncalibratedFilePath, new List <string> {
                    taskId, "Individual Spectra Files", originalUncalibratedFilePath
                });

                CommonParameters combinedParams = SetAllFileSpecificCommonParams(CommonParameters, fileSettingsList[spectraFileIndex]);

                // load the file
                Status("Loading spectra file...", new List <string> {
                    taskId, "Individual Spectra Files"
                });

                var myMsDataFile = myFileManager.LoadFile(originalUncalibratedFilePath, CommonParameters);

                // get datapoints to fit calibration function to
                Status("Acquiring calibration data points...", new List <string> {
                    taskId, "Individual Spectra Files"
                });
                DataPointAquisitionResults acquisitionResults = null;

                for (int i = 1; i <= 5; i++)
                {
                    acquisitionResults = GetDataAcquisitionResults(myMsDataFile, originalUncalibratedFilePath, variableModifications, fixedModifications, proteinList, taskId, combinedParams, combinedParams.PrecursorMassTolerance, combinedParams.ProductMassTolerance);

                    // enough data points to calibrate?
                    if (acquisitionResults.Psms.Count >= NumRequiredPsms && acquisitionResults.Ms1List.Count > NumRequiredMs1Datapoints && acquisitionResults.Ms2List.Count > NumRequiredMs2Datapoints)
                    {
                        break;
                    }

                    if (i == 1) // failed round 1
                    {
                        CommonParameters.PrecursorMassTolerance = new PpmTolerance(20);
                        CommonParameters.ProductMassTolerance   = new PpmTolerance(50);
                    }
                    else if (i == 2) // failed round 2
                    {
                        CommonParameters.PrecursorMassTolerance = new PpmTolerance(30);
                        CommonParameters.ProductMassTolerance   = new PpmTolerance(100);
                    }
                    else if (i == 3) // failed round 3
                    {
                        CommonParameters.PrecursorMassTolerance = new PpmTolerance(40);
                        CommonParameters.ProductMassTolerance   = new PpmTolerance(150);
                    }
                    else // failed round 4
                    {
                        if (acquisitionResults.Psms.Count < NumRequiredPsms)
                        {
                            Warn("Calibration failure! Could not find enough high-quality PSMs. Required " + NumRequiredPsms + ", saw " + acquisitionResults.Psms.Count);
                        }
                        if (acquisitionResults.Ms1List.Count < NumRequiredMs1Datapoints)
                        {
                            Warn("Calibration failure! Could not find enough MS1 datapoints. Required " + NumRequiredMs1Datapoints + ", saw " + acquisitionResults.Ms1List.Count);
                        }
                        if (acquisitionResults.Ms2List.Count < NumRequiredMs2Datapoints)
                        {
                            Warn("Calibration failure! Could not find enough MS2 datapoints. Required " + NumRequiredMs2Datapoints + ", saw " + acquisitionResults.Ms2List.Count);
                        }

                        couldNotFindEnoughDatapoints = true;
                        FinishedDataFile(originalUncalibratedFilePath, new List <string> {
                            taskId, "Individual Spectra Files", originalUncalibratedFilePath
                        });
                        break;
                    }

                    Warn("Could not find enough PSMs to calibrate with; opening up tolerances to " +
                         Math.Round(CommonParameters.PrecursorMassTolerance.Value, 2) + " ppm precursor and " +
                         Math.Round(CommonParameters.ProductMassTolerance.Value, 2) + " ppm product");
                }

                if (couldNotFindEnoughDatapoints)
                {
                    spectraFilesAfterCalibration.Add(Path.GetFileNameWithoutExtension(currentRawFileList[spectraFileIndex]));
                    ReportProgress(new ProgressEventArgs(100, "Failed to calibrate!", new List <string> {
                        taskId, "Individual Spectra Files", originalUncalibratedFilenameWithoutExtension
                    }));
                    continue;
                }

                // stats before calibration
                int    prevPsmCount = acquisitionResults.Psms.Count;
                double preCalibrationPrecursorErrorIqr = acquisitionResults.PsmPrecursorIqrPpmError;
                double preCalibrationProductErrorIqr   = acquisitionResults.PsmProductIqrPpmError;

                // generate calibration function and shift data points
                Status("Calibrating...", new List <string> {
                    taskId, "Individual Spectra Files"
                });
                CalibrationEngine engine = new CalibrationEngine(myMsDataFile, acquisitionResults, CommonParameters, FileSpecificParameters, new List <string> {
                    taskId, "Individual Spectra Files", originalUncalibratedFilenameWithoutExtension
                });
                engine.Run();

                //update file
                myMsDataFile = engine.CalibratedDataFile;

                // do another search to evaluate calibration results
                Status("Post-calibration search...", new List <string> {
                    taskId, "Individual Spectra Files"
                });
                acquisitionResults = GetDataAcquisitionResults(myMsDataFile, originalUncalibratedFilePath, variableModifications, fixedModifications, proteinList, taskId, combinedParams, combinedParams.PrecursorMassTolerance, combinedParams.ProductMassTolerance);

                //generate calibration function and shift data points AGAIN because it's fast and contributes new data
                Status("Calibrating...", new List <string> {
                    taskId, "Individual Spectra Files"
                });
                engine = new CalibrationEngine(myMsDataFile, acquisitionResults, CommonParameters, FileSpecificParameters, new List <string> {
                    taskId, "Individual Spectra Files", originalUncalibratedFilenameWithoutExtension
                });
                engine.Run();

                //update file
                myMsDataFile = engine.CalibratedDataFile;

                // write the calibrated mzML file
                MzmlMethods.CreateAndWriteMyMzmlWithCalibratedSpectra(myMsDataFile, calibratedFilePath, false);
                myFileManager.DoneWithFile(originalUncalibratedFilePath);

                // stats after calibration
                int    postCalibrationPsmCount          = acquisitionResults.Psms.Count;
                double postCalibrationPrecursorErrorIqr = acquisitionResults.PsmPrecursorIqrPpmError;
                double postCalibrationProductErrorIqr   = acquisitionResults.PsmProductIqrPpmError;

                // did the data improve? (not used for anything yet...)
                bool improvement = ImprovGlobal(preCalibrationPrecursorErrorIqr, preCalibrationProductErrorIqr, prevPsmCount, postCalibrationPsmCount, postCalibrationPrecursorErrorIqr, postCalibrationProductErrorIqr);

                // write toml settings for the calibrated file
                var newTomlFileName = Path.Combine(OutputFolder, originalUncalibratedFilenameWithoutExtension + CalibSuffix + ".toml");

                var fileSpecificParams = new FileSpecificParameters();

                // carry over file-specific parameters from the uncalibrated file to the calibrated one
                if (fileSettingsList[spectraFileIndex] != null)
                {
                    fileSpecificParams = fileSettingsList[spectraFileIndex].Clone();
                }

                //suggest 4 * interquartile range as the ppm tolerance
                fileSpecificParams.PrecursorMassTolerance = new PpmTolerance((4.0 * postCalibrationPrecursorErrorIqr) + Math.Abs(acquisitionResults.PsmPrecursorMedianPpmError));
                fileSpecificParams.ProductMassTolerance   = new PpmTolerance((4.0 * postCalibrationProductErrorIqr) + Math.Abs(acquisitionResults.PsmProductMedianPpmError));

                Toml.WriteFile(fileSpecificParams, newTomlFileName, tomlConfig);

                FinishedWritingFile(newTomlFileName, new List <string> {
                    taskId, "Individual Spectra Files", originalUncalibratedFilenameWithoutExtension
                });

                // finished calibrating this file
                spectraFilesAfterCalibration.Add(Path.GetFileNameWithoutExtension(calibratedFilePath));
                FinishedWritingFile(calibratedFilePath, new List <string> {
                    taskId, "Individual Spectra Files", originalUncalibratedFilenameWithoutExtension
                });
                MyTaskResults.NewSpectra.Add(calibratedFilePath);
                MyTaskResults.NewFileSpecificTomls.Add(newTomlFileName);
                FinishedDataFile(originalUncalibratedFilePath, new List <string> {
                    taskId, "Individual Spectra Files", originalUncalibratedFilePath
                });
                ReportProgress(new ProgressEventArgs(100, "Done!", new List <string> {
                    taskId, "Individual Spectra Files", originalUncalibratedFilenameWithoutExtension
                }));
            }

            // re-write experimental design (if it has been defined) with new calibrated file names
            string assumedPathToExperDesign = Directory.GetParent(currentRawFileList.First()).FullName;

            assumedPathToExperDesign = Path.Combine(assumedPathToExperDesign, GlobalVariables.ExperimentalDesignFileName);

            if (File.Exists(assumedPathToExperDesign))
            {
                WriteNewExperimentalDesignFile(assumedPathToExperDesign, OutputFolder, spectraFilesAfterCalibration);
            }

            // finished calibrating all files for the task
            ReportProgress(new ProgressEventArgs(100, "Done!", new List <string> {
                taskId, "Individual Spectra Files"
            }));

            return(MyTaskResults);
        }
Exemplo n.º 3
0
        protected override MyTaskResults RunSpecific(string OutputFolder, List <DbForTask> dbFilenameList, List <string> currentRawFileList, string taskId, FileSpecificParameters[] fileSettingsList)
        {
            // load modifications
            Status("Loading modifications...", new List <string> {
                taskId
            });
            List <ModificationWithMass> variableModifications = GlobalVariables.AllModsKnown.OfType <ModificationWithMass>().Where(b => CommonParameters.ListOfModsVariable.Contains((b.modificationType, b.id))).ToList();
            List <ModificationWithMass> fixedModifications    = GlobalVariables.AllModsKnown.OfType <ModificationWithMass>().Where(b => CommonParameters.ListOfModsFixed.Contains((b.modificationType, b.id))).ToList();
            List <string> localizeableModificationTypes       = GlobalVariables.AllModTypesKnown.ToList();

            // what types of fragment ions to search for
            List <ProductType> ionTypes = new List <ProductType>();

            if (CommonParameters.BIons)
            {
                ionTypes.Add(ProductType.BnoB1ions);
            }
            if (CommonParameters.YIons)
            {
                ionTypes.Add(ProductType.Y);
            }
            if (CommonParameters.ZdotIons)
            {
                ionTypes.Add(ProductType.Zdot);
            }
            if (CommonParameters.CIons)
            {
                ionTypes.Add(ProductType.C);
            }

            // load proteins
            List <Protein> proteinList = LoadProteins(taskId, dbFilenameList, true, DecoyType.Reverse, localizeableModificationTypes, CommonParameters);

            // write prose settings
            ProseCreatedWhileRunning.Append("The following calibration settings were used: ");
            ProseCreatedWhileRunning.Append("protease = " + CommonParameters.DigestionParams.Protease + "; ");
            ProseCreatedWhileRunning.Append("maximum missed cleavages = " + CommonParameters.DigestionParams.MaxMissedCleavages + "; ");
            ProseCreatedWhileRunning.Append("minimum peptide length = " + CommonParameters.DigestionParams.MinPeptideLength + "; ");
            ProseCreatedWhileRunning.Append(CommonParameters.DigestionParams.MaxPeptideLength == int.MaxValue ?
                                            "maximum peptide length = unspecified; " :
                                            "maximum peptide length = " + CommonParameters.DigestionParams.MaxPeptideLength + "; ");
            ProseCreatedWhileRunning.Append("initiator methionine behavior = " + CommonParameters.DigestionParams.InitiatorMethionineBehavior + "; ");
            ProseCreatedWhileRunning.Append("fixed modifications = " + string.Join(", ", fixedModifications.Select(m => m.id)) + "; ");
            ProseCreatedWhileRunning.Append("variable modifications = " + string.Join(", ", variableModifications.Select(m => m.id)) + "; ");
            ProseCreatedWhileRunning.Append("max mods per peptide = " + CommonParameters.DigestionParams.MaxModsForPeptide + "; ");
            ProseCreatedWhileRunning.Append("max modification isoforms = " + CommonParameters.DigestionParams.MaxModificationIsoforms + "; ");
            ProseCreatedWhileRunning.Append("precursor mass tolerance = " + CommonParameters.PrecursorMassTolerance + "; ");
            ProseCreatedWhileRunning.Append("product mass tolerance = " + CommonParameters.ProductMassTolerance + ". ");
            ProseCreatedWhileRunning.Append("The combined search database contained " + proteinList.Count(p => !p.IsDecoy) + " non-decoy protein entries including " + proteinList.Count(p => p.IsContaminant) + " contaminant sequences. ");

            // start the calibration task
            Status("Calibrating...", new List <string> {
                taskId
            });
            MyTaskResults = new MyTaskResults(this)
            {
                NewSpectra           = new List <string>(),
                NewFileSpecificTomls = new List <string>()
            };

            object lock1 = new object();

            var myFileManager = new MyFileManager(true);

            for (int spectraFileIndex = 0; spectraFileIndex < currentRawFileList.Count; spectraFileIndex++)
            {
                if (GlobalVariables.StopLoops)
                {
                    break;
                }

                // get filename stuff
                var    originalUncalibratedFilePath = currentRawFileList[spectraFileIndex];
                var    originalUncalibratedFilenameWithoutExtension = Path.GetFileNameWithoutExtension(originalUncalibratedFilePath);
                string calibratedFilePath = Path.Combine(OutputFolder, originalUncalibratedFilenameWithoutExtension + CalibSuffix + ".mzML");

                // mark the file as in-progress
                StartingDataFile(originalUncalibratedFilePath, new List <string> {
                    taskId, "Individual Spectra Files", originalUncalibratedFilePath
                });

                CommonParameters combinedParams = SetAllFileSpecificCommonParams(CommonParameters, fileSettingsList[spectraFileIndex]);

                MsDataFile myMsDataFile;

                // load the file
                Status("Loading spectra file...", new List <string> {
                    taskId, "Individual Spectra Files"
                });
                lock (lock1)
                {
                    myMsDataFile = myFileManager.LoadFile(originalUncalibratedFilePath, CommonParameters.TopNpeaks, CommonParameters.MinRatio, CommonParameters.TrimMs1Peaks, CommonParameters.TrimMsMsPeaks, CommonParameters);
                }

                // get datapoints to fit calibration function to
                Status("Acquiring calibration data points...", new List <string> {
                    taskId, "Individual Spectra Files"
                });
                DataPointAquisitionResults acquisitionResults = null;

                for (int i = 1; i <= 5; i++)
                {
                    acquisitionResults = GetDataAcquisitionResults(myMsDataFile, originalUncalibratedFilePath, variableModifications, fixedModifications, proteinList, taskId, combinedParams, combinedParams.PrecursorMassTolerance, combinedParams.ProductMassTolerance);

                    // enough data points to calibrate?
                    if (acquisitionResults.Psms.Count >= NumRequiredPsms && acquisitionResults.Ms1List.Count > NumRequiredMs1Datapoints && acquisitionResults.Ms2List.Count > NumRequiredMs2Datapoints)
                    {
                        break;
                    }

                    if (i == 1) // failed round 1
                    {
                        CommonParameters.PrecursorMassTolerance = new PpmTolerance(20);
                        CommonParameters.ProductMassTolerance   = new PpmTolerance(50);
                    }
                    else if (i == 2) // failed round 2
                    {
                        CommonParameters.PrecursorMassTolerance = new PpmTolerance(30);
                        CommonParameters.ProductMassTolerance   = new PpmTolerance(100);
                    }
                    else if (i == 3) // failed round 3
                    {
                        CommonParameters.PrecursorMassTolerance = new PpmTolerance(40);
                        CommonParameters.ProductMassTolerance   = new PpmTolerance(150);
                    }
                    else // failed round 4
                    {
                        if (acquisitionResults.Psms.Count < NumRequiredPsms)
                        {
                            Warn("Calibration failure! Could not find enough high-quality PSMs. Required " + NumRequiredPsms + ", saw " + acquisitionResults.Psms.Count);
                        }
                        if (acquisitionResults.Ms1List.Count < NumRequiredMs1Datapoints)
                        {
                            Warn("Calibration failure! Could not find enough MS1 datapoints. Required " + NumRequiredMs1Datapoints + ", saw " + acquisitionResults.Ms1List.Count);
                        }
                        if (acquisitionResults.Ms2List.Count < NumRequiredMs2Datapoints)
                        {
                            Warn("Calibration failure! Could not find enough MS2 datapoints. Required " + NumRequiredMs2Datapoints + ", saw " + acquisitionResults.Ms2List.Count);
                        }
                        FinishedDataFile(originalUncalibratedFilePath, new List <string> {
                            taskId, "Individual Spectra Files", originalUncalibratedFilePath
                        });
                        return(MyTaskResults);
                    }

                    Warn("Could not find enough PSMs to calibrate with; opening up tolerances to " +
                         Math.Round(CommonParameters.PrecursorMassTolerance.Value, 2) + " ppm precursor and " +
                         Math.Round(CommonParameters.ProductMassTolerance.Value, 2) + " ppm product");
                }

                // stats before calibration
                int    prevPsmCount = acquisitionResults.Psms.Count;
                double preCalibrationPrecursorErrorIqr = acquisitionResults.PsmPrecursorIqrPpmError;
                double preCalibrationProductErrorIqr   = acquisitionResults.PsmProductIqrPpmError;

                // generate calibration function and shift data points
                Status("Calibrating...", new List <string> {
                    taskId, "Individual Spectra Files"
                });
                new CalibrationEngine(myMsDataFile, acquisitionResults, CommonParameters, new List <string> {
                    taskId, "Individual Spectra Files", originalUncalibratedFilenameWithoutExtension
                }).Run();

                // do another search to evaluate calibration results
                Status("Post-calibration search...", new List <string> {
                    taskId, "Individual Spectra Files"
                });
                acquisitionResults = GetDataAcquisitionResults(myMsDataFile, originalUncalibratedFilePath, variableModifications, fixedModifications, proteinList, taskId, combinedParams, combinedParams.PrecursorMassTolerance, combinedParams.ProductMassTolerance);

                // stats after calibration
                int    postCalibrationPsmCount          = acquisitionResults.Psms.Count;
                double postCalibrationPrecursorErrorIqr = acquisitionResults.PsmPrecursorIqrPpmError;
                double postCalibrationProductErrorIqr   = acquisitionResults.PsmProductIqrPpmError;

                // did the data improve? (not used for anything yet...)
                bool improvement = ImprovGlobal(preCalibrationPrecursorErrorIqr, preCalibrationProductErrorIqr, prevPsmCount, postCalibrationPsmCount, postCalibrationPrecursorErrorIqr, postCalibrationProductErrorIqr);

                // write toml settings for the calibrated file
                var newTomlFileName = Path.Combine(OutputFolder, originalUncalibratedFilenameWithoutExtension + CalibSuffix + ".toml");

                var fileSpecificParams = new FileSpecificParameters();

                // carry over file-specific parameters from the uncalibrated file to the calibrated one
                if (fileSettingsList[spectraFileIndex] != null)
                {
                    fileSpecificParams = fileSettingsList[spectraFileIndex].Clone();
                }

                // don't write over ppm tolerances if they've been specified by the user already in the file-specific settings
                // otherwise, suggest 4 * interquartile range as the ppm tolerance
                if (fileSpecificParams.PrecursorMassTolerance == null)
                {
                    fileSpecificParams.PrecursorMassTolerance = new PpmTolerance((4.0 * postCalibrationPrecursorErrorIqr) + Math.Abs(acquisitionResults.PsmPrecursorMedianPpmError));
                }
                if (fileSpecificParams.ProductMassTolerance == null)
                {
                    fileSpecificParams.ProductMassTolerance = new PpmTolerance((4.0 * postCalibrationProductErrorIqr) + Math.Abs(acquisitionResults.PsmProductMedianPpmError));
                }

                Toml.WriteFile(fileSpecificParams, newTomlFileName, tomlConfig);

                FinishedWritingFile(newTomlFileName, new List <string> {
                    taskId, "Individual Spectra Files", originalUncalibratedFilenameWithoutExtension
                });

                // write the calibrated mzML file
                MzmlMethods.CreateAndWriteMyMzmlWithCalibratedSpectra(myMsDataFile, calibratedFilePath, false);
                myFileManager.DoneWithFile(originalUncalibratedFilePath);

                // finished calibrating this file
                FinishedWritingFile(calibratedFilePath, new List <string> {
                    taskId, "Individual Spectra Files", originalUncalibratedFilenameWithoutExtension
                });
                MyTaskResults.NewSpectra.Add(calibratedFilePath);
                MyTaskResults.NewFileSpecificTomls.Add(newTomlFileName);
                FinishedDataFile(originalUncalibratedFilePath, new List <string> {
                    taskId, "Individual Spectra Files", originalUncalibratedFilePath
                });
                ReportProgress(new ProgressEventArgs(100, "Done!", new List <string> {
                    taskId, "Individual Spectra Files", originalUncalibratedFilenameWithoutExtension
                }));
            }

            // re-write experimental design (if it has been defined) with new calibrated file names
            string assumedPathToExperDesign = Directory.GetParent(currentRawFileList.First()).FullName;

            assumedPathToExperDesign = Path.Combine(assumedPathToExperDesign, GlobalVariables.ExperimentalDesignFileName);
            List <string> newExperimentalDesignOutput = new List <string>();

            if (File.Exists(assumedPathToExperDesign))
            {
                var lines = File.ReadAllLines(assumedPathToExperDesign);

                for (int i = 0; i < lines.Length; i++)
                {
                    // header of experimental design file
                    if (i == 0)
                    {
                        newExperimentalDesignOutput.Add(lines[i]);
                    }
                    else
                    {
                        var    split   = lines[i].Split('\t');
                        string newline = Path.GetFileNameWithoutExtension(split[0]) + CalibSuffix + "\t";
                        for (int j = 1; j < split.Length; j++)
                        {
                            newline += split[j] + "\t";
                        }

                        newExperimentalDesignOutput.Add(newline);
                    }
                }
            }

            File.WriteAllLines(Path.Combine(OutputFolder, GlobalVariables.ExperimentalDesignFileName), newExperimentalDesignOutput);

            // finished calibrating all files for the task
            ReportProgress(new ProgressEventArgs(100, "Done!", new List <string> {
                taskId, "Individual Spectra Files"
            }));

            return(MyTaskResults);
        }
Exemplo n.º 4
0
        private DataPointAquisitionResults GetDataAcquisitionResults(MsDataFile myMsDataFile, string currentDataFile, List <ModificationWithMass> variableModifications, List <ModificationWithMass> fixedModifications, List <Protein> proteinList, string taskId, CommonParameters combinedParameters, Tolerance initPrecTol, Tolerance initProdTol)
        {
            var fileNameWithoutExtension = Path.GetFileNameWithoutExtension(currentDataFile);
            MassDiffAcceptor searchMode  = initPrecTol is PpmTolerance ?
                                           (MassDiffAcceptor) new SinglePpmAroundZeroSearchMode(initPrecTol.Value) :
                                           new SingleAbsoluteAroundZeroSearchMode(initPrecTol.Value);

            FragmentTypes fragmentTypesForCalibration = FragmentTypes.None;

            if (combinedParameters.BIons)
            {
                fragmentTypesForCalibration = fragmentTypesForCalibration | FragmentTypes.b;
            }
            if (combinedParameters.YIons)
            {
                fragmentTypesForCalibration = fragmentTypesForCalibration | FragmentTypes.y;
            }
            if (combinedParameters.CIons)
            {
                fragmentTypesForCalibration = fragmentTypesForCalibration | FragmentTypes.c;
            }
            if (combinedParameters.ZdotIons)
            {
                fragmentTypesForCalibration = fragmentTypesForCalibration | FragmentTypes.zdot;
            }

            var listOfSortedms2Scans = GetMs2Scans(myMsDataFile, currentDataFile, combinedParameters.DoPrecursorDeconvolution, combinedParameters.UseProvidedPrecursorInfo, combinedParameters.DeconvolutionIntensityRatio, combinedParameters.DeconvolutionMaxAssumedChargeState, combinedParameters.DeconvolutionMassTolerance).OrderBy(b => b.PrecursorMass).ToArray();

            PeptideSpectralMatch[] allPsmsArray = new PeptideSpectralMatch[listOfSortedms2Scans.Length];

            List <ProductType> productTypes = new List <ProductType>();

            if (combinedParameters.BIons)
            {
                productTypes.Add(ProductType.B);
            }
            if (combinedParameters.YIons)
            {
                productTypes.Add(ProductType.Y);
            }
            if (combinedParameters.CIons)
            {
                productTypes.Add(ProductType.C);
            }
            if (combinedParameters.ZdotIons)
            {
                productTypes.Add(ProductType.Zdot);
            }

            Log("Searching with searchMode: " + searchMode, new List <string> {
                taskId, "Individual Spectra Files", fileNameWithoutExtension
            });
            Log("Searching with productMassTolerance: " + initProdTol, new List <string> {
                taskId, "Individual Spectra Files", fileNameWithoutExtension
            });

            new ClassicSearchEngine(allPsmsArray, listOfSortedms2Scans, variableModifications, fixedModifications, proteinList, productTypes, searchMode, combinedParameters, new List <string> {
                taskId, "Individual Spectra Files", fileNameWithoutExtension
            }).Run();

            List <PeptideSpectralMatch> allPsms = allPsmsArray.Where(p => p != null).ToList();

            var compactPeptideToProteinPeptideMatching = ((SequencesToActualProteinPeptidesEngineResults) new SequencesToActualProteinPeptidesEngine
                                                              (allPsms, proteinList, fixedModifications, variableModifications, productTypes, new List <DigestionParams> {
                combinedParameters.DigestionParams
            },
                                                              combinedParameters.ReportAllAmbiguity, combinedParameters, new List <string> {
                taskId, "Individual Spectra Files", fileNameWithoutExtension
            }).Run()).CompactPeptideToProteinPeptideMatching;

            foreach (var huh in allPsms)
            {
                huh.MatchToProteinLinkedPeptides(compactPeptideToProteinPeptideMatching);
            }

            allPsms = allPsms.OrderByDescending(b => b.Score)
                      .ThenBy(b => b.PeptideMonisotopicMass.HasValue ? Math.Abs(b.ScanPrecursorMass - b.PeptideMonisotopicMass.Value) : double.MaxValue)
                      .GroupBy(b => (b.FullFilePath, b.ScanNumber, b.PeptideMonisotopicMass)).Select(b => b.First()).ToList();

            new FdrAnalysisEngine(allPsms, searchMode.NumNotches, CommonParameters, new List <string> {
                taskId, "Individual Spectra Files", fileNameWithoutExtension
            }).Run();

            List <PeptideSpectralMatch> goodIdentifications = allPsms.Where(b => b.FdrInfo.QValueNotch < 0.01 && !b.IsDecoy && b.FullSequence != null).ToList();

            if (!goodIdentifications.Any())
            {
                return(new DataPointAquisitionResults(null, new List <PeptideSpectralMatch>(), new List <LabeledDataPoint>(), new List <LabeledDataPoint>(), 0, 0, 0, 0));
            }

            DataPointAquisitionResults currentResult = (DataPointAquisitionResults) new DataPointAcquisitionEngine(
                goodIdentifications,
                myMsDataFile,
                initPrecTol,
                initProdTol,
                CalibrationParameters.NumFragmentsNeededForEveryIdentification,
                CalibrationParameters.MinMS1IsotopicPeaksNeededForConfirmedIdentification,
                CalibrationParameters.MinMS2IsotopicPeaksNeededForConfirmedIdentification,
                fragmentTypesForCalibration,
                CommonParameters,
                new List <string> {
                taskId, "Individual Spectra Files", fileNameWithoutExtension
            }).Run();

            return(currentResult);
        }
Exemplo n.º 5
0
        private (List <PeptideSpectralMatch>, DataPointAquisitionResults) GetDataAcquisitionResults(IMsDataFile <IMsDataScan <IMzSpectrum <IMzPeak> > > myMsDataFile, string currentDataFile, List <ModificationWithMass> variableModifications, List <ModificationWithMass> fixedModifications, List <Protein> proteinList, string taskId, ICommonParameters combinedParameters, Tolerance initPrecTol, Tolerance initProdTol)
        {
            var fileNameWithoutExtension = Path.GetFileNameWithoutExtension(currentDataFile);
            MassDiffAcceptor searchMode;

            if (initPrecTol is PpmTolerance)
            {
                searchMode = new SinglePpmAroundZeroSearchMode(initPrecTol.Value);
            }
            else
            {
                searchMode = new SingleAbsoluteAroundZeroSearchMode(initPrecTol.Value);
            }

            FragmentTypes fragmentTypesForCalibration = FragmentTypes.None;

            if (combinedParameters.BIons)
            {
                fragmentTypesForCalibration = fragmentTypesForCalibration | FragmentTypes.b;
            }
            if (combinedParameters.YIons)
            {
                fragmentTypesForCalibration = fragmentTypesForCalibration | FragmentTypes.y;
            }
            if (combinedParameters.CIons)
            {
                fragmentTypesForCalibration = fragmentTypesForCalibration | FragmentTypes.c;
            }
            if (combinedParameters.ZdotIons)
            {
                fragmentTypesForCalibration = fragmentTypesForCalibration | FragmentTypes.zdot;
            }

            var listOfSortedms2Scans = GetMs2Scans(myMsDataFile, currentDataFile, combinedParameters.DoPrecursorDeconvolution, combinedParameters.UseProvidedPrecursorInfo, combinedParameters.DeconvolutionIntensityRatio, combinedParameters.DeconvolutionMaxAssumedChargeState, combinedParameters.DeconvolutionMassTolerance).OrderBy(b => b.PrecursorMass).ToArray();

            PeptideSpectralMatch[] allPsmsArray = new PeptideSpectralMatch[listOfSortedms2Scans.Length];

            List <ProductType> lp = new List <ProductType>();

            if (combinedParameters.BIons)
            {
                lp.Add(ProductType.B);
            }
            if (combinedParameters.YIons)
            {
                lp.Add(ProductType.Y);
            }
            if (combinedParameters.CIons)
            {
                lp.Add(ProductType.C);
            }
            if (combinedParameters.ZdotIons)
            {
                lp.Add(ProductType.Zdot);
            }

            Log("Searching with searchMode: " + searchMode, new List <string> {
                taskId, "Individual Spectra Files", fileNameWithoutExtension
            });
            Log("Searching with productMassTolerance: " + initProdTol, new List <string> {
                taskId, "Individual Spectra Files", fileNameWithoutExtension
            });

            new ClassicSearchEngine(allPsmsArray, listOfSortedms2Scans, variableModifications, fixedModifications, proteinList, lp, searchMode, false, combinedParameters, initProdTol, new List <string> {
                taskId, "Individual Spectra Files", fileNameWithoutExtension
            }).Run();

            List <PeptideSpectralMatch> allPsms = allPsmsArray.ToList();

            Dictionary <CompactPeptideBase, HashSet <PeptideWithSetModifications> > compactPeptideToProteinPeptideMatching = ((SequencesToActualProteinPeptidesEngineResults) new SequencesToActualProteinPeptidesEngine(allPsms, proteinList, fixedModifications, variableModifications, lp, new List <IDigestionParams> {
                combinedParameters.DigestionParams
            }, combinedParameters.ReportAllAmbiguity, new List <string> {
                taskId, "Individual Spectra Files", fileNameWithoutExtension
            }).Run()).CompactPeptideToProteinPeptideMatching;

            foreach (var huh in allPsms)
            {
                if (huh != null)
                {
                    huh.MatchToProteinLinkedPeptides(compactPeptideToProteinPeptideMatching);
                }
            }

            allPsms = allPsms.Where(b => b != null).OrderByDescending(b => b.Score).ThenBy(b => b.PeptideMonisotopicMass.HasValue ? Math.Abs(b.ScanPrecursorMass - b.PeptideMonisotopicMass.Value) : double.MaxValue).GroupBy(b => (b.FullFilePath, b.ScanNumber, b.PeptideMonisotopicMass)).Select(b => b.First()).ToList();

            new FdrAnalysisEngine(allPsms, searchMode.NumNotches, false, new List <string> {
                taskId, "Individual Spectra Files", fileNameWithoutExtension
            }).Run();

            List <PeptideSpectralMatch> goodIdentifications = allPsms.Where(b => b.FdrInfo.QValueNotch < 0.01 && !b.IsDecoy && b.FullSequence != null).ToList();

            if (!goodIdentifications.Any())
            {
                Warn("No PSMs below 1% FDR observed!");
                return(new List <PeptideSpectralMatch>(), null);
            }

            var dissociationTypes = MetaMorpheusEngine.DetermineDissociationType(lp);

            foreach (var psm in allPsms)
            {
                var    theScan          = myMsDataFile.GetOneBasedScan(psm.ScanNumber);
                double thePrecursorMass = psm.ScanPrecursorMass;

                foreach (var huh in lp)
                {
                    var ionMasses = psm.CompactPeptides.First().Key.ProductMassesMightHaveDuplicatesAndNaNs(new List <ProductType> {
                        huh
                    });
                    Array.Sort(ionMasses);
                    List <double> matchedIonMassesList    = new List <double>();
                    List <double> productMassErrorDaList  = new List <double>();
                    List <double> productMassErrorPpmList = new List <double>();
                    LocalizationEngine.MatchIons(theScan, initProdTol, ionMasses, matchedIonMassesList, productMassErrorDaList, productMassErrorPpmList, thePrecursorMass, dissociationTypes, false);
                    double[] matchedIonMassesOnlyMatches = matchedIonMassesList.ToArray();
                    psm.MatchedIonDictOnlyMatches.Add(huh, matchedIonMassesOnlyMatches);
                    psm.ProductMassErrorDa.Add(huh, productMassErrorDaList.ToArray());
                    psm.ProductMassErrorPpm.Add(huh, productMassErrorPpmList.ToArray());
                }
            }

            DataPointAquisitionResults currentResult = (DataPointAquisitionResults) new DataPointAcquisitionEngine(
                goodIdentifications,
                myMsDataFile,
                initPrecTol,
                initProdTol,
                CalibrationParameters.NumFragmentsNeededForEveryIdentification,
                CalibrationParameters.MinMS1IsotopicPeaksNeededForConfirmedIdentification,
                CalibrationParameters.MinMS2IsotopicPeaksNeededForConfirmedIdentification,
                fragmentTypesForCalibration,
                new List <string> {
                taskId, "Individual Spectra Files", fileNameWithoutExtension
            }).Run();

            return(goodIdentifications, currentResult);
        }