public static void WriteOutput(string inputPath, FlashLfqResults results, string outputPath = null) { if (outputPath == null) { outputPath = Path.GetDirectoryName(inputPath); } string inputFileName = Path.GetFileNameWithoutExtension(inputPath); if (!Directory.Exists(outputPath)) { Directory.CreateDirectory(outputPath); } string append = "_FlashLFQ_"; if (inputFileName.ToLowerInvariant().Contains("flashlfq")) { append = "_"; } results.WriteResults( outputPath + Path.DirectorySeparatorChar + inputFileName + append + "QuantifiedPeaks.tsv", outputPath + Path.DirectorySeparatorChar + inputFileName + append + "QuantifiedPeptides.tsv", outputPath + Path.DirectorySeparatorChar + inputFileName + append + "QuantifiedProteins.tsv" ); }
private void RunFlashLfq() { // read IDs var ids = new List <Identification>(); foreach (var identFile in identFilesForDataGrid) { ids = ids.Concat(PsmReader.ReadPsms(identFile.FilePath, false, spectraFileInfo)).ToList(); } // run FlashLFQ engine try { flashLfqEngine = new FlashLFQEngine( allIdentifications: ids, normalize: flashLfqEngine.Normalize, ppmTolerance: flashLfqEngine.PpmTolerance, isotopeTolerancePpm: flashLfqEngine.IsotopePpmTolerance, matchBetweenRuns: flashLfqEngine.MatchBetweenRuns, matchBetweenRunsPpmTolerance: flashLfqEngine.MbrPpmTolerance, integrate: flashLfqEngine.Integrate, numIsotopesRequired: flashLfqEngine.NumIsotopesRequired, idSpecificChargeState: flashLfqEngine.IdSpecificChargeState, requireMonoisotopicMass: flashLfqEngine.RequireMonoisotopicMass, silent: false, optionalPeriodicTablePath: null, maxMbrWindow: flashLfqEngine.MbrRtWindow, advancedProteinQuant: flashLfqEngine.AdvancedProteinQuant); results = flashLfqEngine.Run(); } catch (Exception ex) { MessageBox.Show("FlashLFQ has crashed with the following error: " + ex.Message, "Error", MessageBoxButton.OK, MessageBoxImage.Hand); } // write output try { OutputWriter.WriteOutput(Directory.GetParent(spectraFileInfo.First().FullFilePathWithExtension).FullName, results, outputFolderPath); } catch (Exception ex) { MessageBox.Show("Could not write FlashLFQ output: " + ex.Message, "Error", MessageBoxButton.OK, MessageBoxImage.Hand); } }
public static void WriteOutput(string inputPath, FlashLfqResults results, string outputPath = null) { if (outputPath == null) { outputPath = Path.GetDirectoryName(inputPath); } string inputFileName = Path.GetFileNameWithoutExtension(inputPath); if (!Directory.Exists(outputPath)) { Directory.CreateDirectory(outputPath); } results.WriteResults( outputPath + Path.DirectorySeparatorChar + inputFileName + "_FlashLFQ_QuantifiedPeaks.tsv", outputPath + Path.DirectorySeparatorChar + inputFileName + "_FlashLFQ_QuantifiedModifiedSequences.tsv", outputPath + Path.DirectorySeparatorChar + inputFileName + "_FlashLFQ_QuantifiedBaseSequences.tsv", outputPath + Path.DirectorySeparatorChar + inputFileName + "_FlashLFQ_QuantifiedProteins.tsv" ); }
public static void WriteOutput(string inputPath, FlashLfqResults results, bool silent, string outputPath = null) { if (outputPath == null) { outputPath = Path.GetDirectoryName(inputPath); } string inputFileName = Path.GetFileNameWithoutExtension(inputPath); if (!Directory.Exists(outputPath)) { Directory.CreateDirectory(outputPath); } bool bayesianResults = results.ProteinGroups.Any(p => p.Value.ConditionToQuantificationResults.Any()); results.WriteResults( Path.Combine(outputPath, "QuantifiedPeaks.tsv"), Path.Combine(outputPath, "QuantifiedPeptides.tsv"), Path.Combine(outputPath, "QuantifiedProteins.tsv"), bayesianResults ? Path.Combine(outputPath, "BayesianFoldChangeAnalysis.tsv") : null, silent ); }
protected override MyTaskResults RunSpecific(string OutputFolder, List <DbForTask> dbFilenameList, List <string> currentRawFileList, string taskId, FileSpecificParameters[] fileSettingsList) { if (SearchParameters.DoQuantification) { // disable quantification if a .mgf is being used if (currentRawFileList.Any(x => Path.GetExtension(x).Equals(".mgf", StringComparison.OrdinalIgnoreCase))) { SearchParameters.DoQuantification = false; } //if we're doing SILAC, assign and add the silac labels to the residue dictionary else if (SearchParameters.SilacLabels != null || SearchParameters.StartTurnoverLabel != null || SearchParameters.EndTurnoverLabel != null) { char heavyLabel = 'a'; //char to assign //add the Turnoverlabels to the silacLabels list. They weren't there before just to prevent duplication in the tomls if (SearchParameters.StartTurnoverLabel != null || SearchParameters.EndTurnoverLabel != null) { //original silacLabels object is null, so we need to initialize it SearchParameters.SilacLabels = new List <SilacLabel>(); if (SearchParameters.StartTurnoverLabel != null) { var updatedLabel = SilacConversions.UpdateAminoAcidLabel(SearchParameters.StartTurnoverLabel, heavyLabel); heavyLabel = updatedLabel.nextHeavyLabel; SearchParameters.StartTurnoverLabel = updatedLabel.updatedLabel; SearchParameters.SilacLabels.Add(SearchParameters.StartTurnoverLabel); } if (SearchParameters.EndTurnoverLabel != null) { var updatedLabel = SilacConversions.UpdateAminoAcidLabel(SearchParameters.EndTurnoverLabel, heavyLabel); heavyLabel = updatedLabel.nextHeavyLabel; SearchParameters.EndTurnoverLabel = updatedLabel.updatedLabel; SearchParameters.SilacLabels.Add(SearchParameters.EndTurnoverLabel); } } else { //change the silac residues to lower case amino acids (currently null) List <SilacLabel> updatedLabels = new List <SilacLabel>(); for (int i = 0; i < SearchParameters.SilacLabels.Count; i++) { var updatedLabel = SilacConversions.UpdateAminoAcidLabel(SearchParameters.SilacLabels[i], heavyLabel); heavyLabel = updatedLabel.nextHeavyLabel; updatedLabels.Add(updatedLabel.updatedLabel); } SearchParameters.SilacLabels = updatedLabels; } } } //if no quant, remove any silac labels that may have been added, because they screw up downstream analysis if (!SearchParameters.DoQuantification) //using "if" instead of "else", because DoQuantification can change if it's an mgf { SearchParameters.SilacLabels = null; } LoadModifications(taskId, out var variableModifications, out var fixedModifications, out var localizeableModificationTypes); // load proteins List <Protein> proteinList = LoadProteins(taskId, dbFilenameList, SearchParameters.SearchTarget, SearchParameters.DecoyType, localizeableModificationTypes, CommonParameters); SanitizeProteinDatabase(proteinList, SearchParameters.TCAmbiguity); // load spectral libraries var spectralLibrary = LoadSpectralLibraries(taskId, dbFilenameList); // write prose settings ProseCreatedWhileRunning.Append("The following search settings were used: "); ProseCreatedWhileRunning.Append("protease = " + CommonParameters.DigestionParams.Protease + "; "); ProseCreatedWhileRunning.Append("maximum missed cleavages = " + CommonParameters.DigestionParams.MaxMissedCleavages + "; "); ProseCreatedWhileRunning.Append("minimum peptide length = " + CommonParameters.DigestionParams.MinPeptideLength + "; "); ProseCreatedWhileRunning.Append(CommonParameters.DigestionParams.MaxPeptideLength == int.MaxValue ? "maximum peptide length = unspecified; " : "maximum peptide length = " + CommonParameters.DigestionParams.MaxPeptideLength + "; "); ProseCreatedWhileRunning.Append("initiator methionine behavior = " + CommonParameters.DigestionParams.InitiatorMethionineBehavior + "; "); ProseCreatedWhileRunning.Append("fixed modifications = " + string.Join(", ", fixedModifications.Select(m => m.IdWithMotif)) + "; "); ProseCreatedWhileRunning.Append("variable modifications = " + string.Join(", ", variableModifications.Select(m => m.IdWithMotif)) + "; "); ProseCreatedWhileRunning.Append("max mods per peptide = " + CommonParameters.DigestionParams.MaxModsForPeptide + "; "); ProseCreatedWhileRunning.Append("max modification isoforms = " + CommonParameters.DigestionParams.MaxModificationIsoforms + "; "); ProseCreatedWhileRunning.Append("precursor mass tolerance = " + CommonParameters.PrecursorMassTolerance + "; "); ProseCreatedWhileRunning.Append("product mass tolerance = " + CommonParameters.ProductMassTolerance + "; "); ProseCreatedWhileRunning.Append("report PSM ambiguity = " + CommonParameters.ReportAllAmbiguity + ". "); ProseCreatedWhileRunning.Append("The combined search database contained " + proteinList.Count(p => !p.IsDecoy) + " non-decoy protein entries including " + proteinList.Count(p => p.IsContaminant) + " contaminant sequences. "); // start the search task MyTaskResults = new MyTaskResults(this); List <PeptideSpectralMatch> allPsms = new List <PeptideSpectralMatch>(); //generate an array to store category specific fdr values (for speedy semi/nonspecific searches) int numFdrCategories = (int)(Enum.GetValues(typeof(FdrCategory)).Cast <FdrCategory>().Last() + 1); //+1 because it starts at zero List <PeptideSpectralMatch>[] allCategorySpecificPsms = new List <PeptideSpectralMatch> [numFdrCategories]; for (int i = 0; i < numFdrCategories; i++) { allCategorySpecificPsms[i] = new List <PeptideSpectralMatch>(); } FlashLfqResults flashLfqResults = null; MyFileManager myFileManager = new MyFileManager(SearchParameters.DisposeOfFileWhenDone); var fileSpecificCommonParams = fileSettingsList.Select(b => SetAllFileSpecificCommonParams(CommonParameters, b)); int completedFiles = 0; object indexLock = new object(); object psmLock = new object(); Status("Searching files...", taskId); Status("Searching files...", new List <string> { taskId, "Individual Spectra Files" }); Dictionary <string, int[]> numMs2SpectraPerFile = new Dictionary <string, int[]>(); for (int spectraFileIndex = 0; spectraFileIndex < currentRawFileList.Count; spectraFileIndex++) { if (GlobalVariables.StopLoops) { break; } var origDataFile = currentRawFileList[spectraFileIndex]; // mark the file as in-progress StartingDataFile(origDataFile, new List <string> { taskId, "Individual Spectra Files", origDataFile }); CommonParameters combinedParams = SetAllFileSpecificCommonParams(CommonParameters, fileSettingsList[spectraFileIndex]); MassDiffAcceptor massDiffAcceptor = GetMassDiffAcceptor(combinedParams.PrecursorMassTolerance, SearchParameters.MassDiffAcceptorType, SearchParameters.CustomMdac); var thisId = new List <string> { taskId, "Individual Spectra Files", origDataFile }; NewCollection(Path.GetFileName(origDataFile), thisId); Status("Loading spectra file...", thisId); MsDataFile myMsDataFile = myFileManager.LoadFile(origDataFile, combinedParams); Status("Getting ms2 scans...", thisId); Ms2ScanWithSpecificMass[] arrayOfMs2ScansSortedByMass = GetMs2Scans(myMsDataFile, origDataFile, combinedParams).OrderBy(b => b.PrecursorMass).ToArray(); numMs2SpectraPerFile.Add(Path.GetFileNameWithoutExtension(origDataFile), new int[] { myMsDataFile.GetAllScansList().Count(p => p.MsnOrder == 2), arrayOfMs2ScansSortedByMass.Length }); myFileManager.DoneWithFile(origDataFile); PeptideSpectralMatch[] fileSpecificPsms = new PeptideSpectralMatch[arrayOfMs2ScansSortedByMass.Length]; // modern search if (SearchParameters.SearchType == SearchType.Modern) { for (int currentPartition = 0; currentPartition < combinedParams.TotalPartitions; currentPartition++) { List <PeptideWithSetModifications> peptideIndex = null; List <Protein> proteinListSubset = proteinList.GetRange(currentPartition * proteinList.Count / combinedParams.TotalPartitions, ((currentPartition + 1) * proteinList.Count / combinedParams.TotalPartitions) - (currentPartition * proteinList.Count / combinedParams.TotalPartitions)); Status("Getting fragment dictionary...", new List <string> { taskId }); var indexEngine = new IndexingEngine(proteinListSubset, variableModifications, fixedModifications, SearchParameters.SilacLabels, SearchParameters.StartTurnoverLabel, SearchParameters.EndTurnoverLabel, currentPartition, SearchParameters.DecoyType, combinedParams, FileSpecificParameters, SearchParameters.MaxFragmentSize, false, dbFilenameList.Select(p => new FileInfo(p.FilePath)).ToList(), SearchParameters.TCAmbiguity, new List <string> { taskId }); List <int>[] fragmentIndex = null; List <int>[] precursorIndex = null; lock (indexLock) { GenerateIndexes(indexEngine, dbFilenameList, ref peptideIndex, ref fragmentIndex, ref precursorIndex, proteinList, taskId); } Status("Searching files...", taskId); new ModernSearchEngine(fileSpecificPsms, arrayOfMs2ScansSortedByMass, peptideIndex, fragmentIndex, currentPartition, combinedParams, this.FileSpecificParameters, massDiffAcceptor, SearchParameters.MaximumMassThatFragmentIonScoreIsDoubled, thisId).Run(); ReportProgress(new ProgressEventArgs(100, "Done with search " + (currentPartition + 1) + "/" + combinedParams.TotalPartitions + "!", thisId)); if (GlobalVariables.StopLoops) { break; } } } // nonspecific search else if (SearchParameters.SearchType == SearchType.NonSpecific) { PeptideSpectralMatch[][] fileSpecificPsmsSeparatedByFdrCategory = new PeptideSpectralMatch[numFdrCategories][]; //generate an array of all possible locals for (int i = 0; i < numFdrCategories; i++) //only add if we're using for FDR, else ignore it as null. { fileSpecificPsmsSeparatedByFdrCategory[i] = new PeptideSpectralMatch[arrayOfMs2ScansSortedByMass.Length]; } //create params for N, C, or both if semi List <CommonParameters> paramsToUse = new List <CommonParameters> { combinedParams }; if (combinedParams.DigestionParams.SearchModeType == CleavageSpecificity.Semi) //if semi, we need to do both N and C to hit everything { paramsToUse.Clear(); List <FragmentationTerminus> terminiToUse = new List <FragmentationTerminus> { FragmentationTerminus.N, FragmentationTerminus.C }; foreach (FragmentationTerminus terminus in terminiToUse) //set both termini { paramsToUse.Add(combinedParams.CloneWithNewTerminus(terminus)); } } //Compress array of deconvoluted ms2 scans to avoid searching the same ms2 multiple times while still identifying coisolated peptides List <int>[] coisolationIndex = new List <int>[] { new List <int>() }; if (arrayOfMs2ScansSortedByMass.Length != 0) { int maxScanNumber = arrayOfMs2ScansSortedByMass.Max(x => x.OneBasedScanNumber); coisolationIndex = new List <int> [maxScanNumber + 1]; for (int i = 0; i < arrayOfMs2ScansSortedByMass.Length; i++) { int scanNumber = arrayOfMs2ScansSortedByMass[i].OneBasedScanNumber; if (coisolationIndex[scanNumber] == null) { coisolationIndex[scanNumber] = new List <int> { i }; } else { coisolationIndex[scanNumber].Add(i); } } coisolationIndex = coisolationIndex.Where(x => x != null).ToArray(); } //foreach terminus we're going to look at foreach (CommonParameters paramToUse in paramsToUse) { //foreach database partition for (int currentPartition = 0; currentPartition < paramToUse.TotalPartitions; currentPartition++) { List <PeptideWithSetModifications> peptideIndex = null; List <Protein> proteinListSubset = proteinList.GetRange(currentPartition * proteinList.Count / paramToUse.TotalPartitions, ((currentPartition + 1) * proteinList.Count / paramToUse.TotalPartitions) - (currentPartition * proteinList.Count / paramToUse.TotalPartitions)); List <int>[] fragmentIndex = null; List <int>[] precursorIndex = null; Status("Getting fragment dictionary...", new List <string> { taskId }); var indexEngine = new IndexingEngine(proteinListSubset, variableModifications, fixedModifications, SearchParameters.SilacLabels, SearchParameters.StartTurnoverLabel, SearchParameters.EndTurnoverLabel, currentPartition, SearchParameters.DecoyType, paramToUse, FileSpecificParameters, SearchParameters.MaxFragmentSize, true, dbFilenameList.Select(p => new FileInfo(p.FilePath)).ToList(), SearchParameters.TCAmbiguity, new List <string> { taskId }); lock (indexLock) { GenerateIndexes(indexEngine, dbFilenameList, ref peptideIndex, ref fragmentIndex, ref precursorIndex, proteinList, taskId); } Status("Searching files...", taskId); new NonSpecificEnzymeSearchEngine(fileSpecificPsmsSeparatedByFdrCategory, arrayOfMs2ScansSortedByMass, coisolationIndex, peptideIndex, fragmentIndex, precursorIndex, currentPartition, paramToUse, this.FileSpecificParameters, variableModifications, massDiffAcceptor, SearchParameters.MaximumMassThatFragmentIonScoreIsDoubled, thisId).Run(); ReportProgress(new ProgressEventArgs(100, "Done with search " + (currentPartition + 1) + "/" + paramToUse.TotalPartitions + "!", thisId)); if (GlobalVariables.StopLoops) { break; } } } lock (psmLock) { for (int i = 0; i < allCategorySpecificPsms.Length; i++) { if (allCategorySpecificPsms[i] != null) { allCategorySpecificPsms[i].AddRange(fileSpecificPsmsSeparatedByFdrCategory[i]); } } } } // classic search else { Status("Starting search...", thisId); var newClassicSearchEngine = new ClassicSearchEngine(fileSpecificPsms, arrayOfMs2ScansSortedByMass, variableModifications, fixedModifications, SearchParameters.SilacLabels, SearchParameters.StartTurnoverLabel, SearchParameters.EndTurnoverLabel, proteinList, massDiffAcceptor, combinedParams, this.FileSpecificParameters, spectralLibrary, thisId, SearchParameters.WriteSpectralLibrary); newClassicSearchEngine.Run(); ReportProgress(new ProgressEventArgs(100, "Done with search!", thisId)); } //look for internal fragments if (SearchParameters.MinAllowedInternalFragmentLength != 0) { MatchInternalFragmentIons(fileSpecificPsms, arrayOfMs2ScansSortedByMass, combinedParams, SearchParameters.MinAllowedInternalFragmentLength); } // calculate/set spectral angles if there is a spectral library being used if (spectralLibrary != null) { Status("Calculating spectral library similarity...", thisId); } SpectralLibrarySearchFunction.CalculateSpectralAngles(spectralLibrary, fileSpecificPsms, arrayOfMs2ScansSortedByMass, combinedParams); lock (psmLock) { allPsms.AddRange(fileSpecificPsms); } completedFiles++; FinishedDataFile(origDataFile, new List <string> { taskId, "Individual Spectra Files", origDataFile }); ReportProgress(new ProgressEventArgs(completedFiles / currentRawFileList.Count, "Searching...", new List <string> { taskId, "Individual Spectra Files" })); } if (spectralLibrary != null) { spectralLibrary.CloseConnections(); } ReportProgress(new ProgressEventArgs(100, "Done with all searches!", new List <string> { taskId, "Individual Spectra Files" })); int numNotches = GetNumNotches(SearchParameters.MassDiffAcceptorType, SearchParameters.CustomMdac); //resolve category specific fdrs (for speedy semi and nonspecific if (SearchParameters.SearchType == SearchType.NonSpecific) { allPsms = NonSpecificEnzymeSearchEngine.ResolveFdrCategorySpecificPsms(allCategorySpecificPsms, numNotches, taskId, CommonParameters, FileSpecificParameters); } PostSearchAnalysisParameters parameters = new PostSearchAnalysisParameters { SearchTaskResults = MyTaskResults, SearchTaskId = taskId, SearchParameters = SearchParameters, ProteinList = proteinList, AllPsms = allPsms, VariableModifications = variableModifications, FixedModifications = fixedModifications, ListOfDigestionParams = new HashSet <DigestionParams>(fileSpecificCommonParams.Select(p => p.DigestionParams)), CurrentRawFileList = currentRawFileList, MyFileManager = myFileManager, NumNotches = numNotches, OutputFolder = OutputFolder, IndividualResultsOutputFolder = Path.Combine(OutputFolder, "Individual File Results"), FlashLfqResults = flashLfqResults, FileSettingsList = fileSettingsList, NumMs2SpectraPerFile = numMs2SpectraPerFile, DatabaseFilenameList = dbFilenameList }; PostSearchAnalysisTask postProcessing = new PostSearchAnalysisTask { Parameters = parameters, FileSpecificParameters = this.FileSpecificParameters, CommonParameters = CommonParameters }; return(postProcessing.Run()); }
private static Dictionary <SpectraFileInfo, List <SpectraFileInfo> > CreateSilacRawFiles(FlashLfqResults flashLfqResults, List <SilacLabel> allSilacLabels, SilacLabel startLabel, SilacLabel endLabel, bool quantifyUnlabeledPeptides, List <SpectraFileInfo> spectraFileInfo) { //update number of spectra files to include a new file for each label*condition Dictionary <SpectraFileInfo, List <SpectraFileInfo> > originalToLabeledFileInfoDictionary = new Dictionary <SpectraFileInfo, List <SpectraFileInfo> >(); flashLfqResults.SpectraFiles.Clear(); //clear existing files so we can replace them with labeled ones //foreach existing file if (startLabel == null && endLabel == null) //if multiplex { //populate dictionary if (quantifyUnlabeledPeptides) { spectraFileInfo.ForEach(x => originalToLabeledFileInfoDictionary.Add(x, new List <SpectraFileInfo> { x })); flashLfqResults.SpectraFiles.AddRange(spectraFileInfo); } else { spectraFileInfo.ForEach(x => originalToLabeledFileInfoDictionary.Add(x, new List <SpectraFileInfo>())); } //get the labeled foreach (SilacLabel label in allSilacLabels) { List <SpectraFileInfo> labeledFiles = new List <SpectraFileInfo>(); foreach (SpectraFileInfo originalFile in spectraFileInfo) { //foreach label, add a new file with the label SpectraFileInfo labeledInfo = GetHeavyFileInfo(originalFile, label); labeledFiles.Add(labeledInfo); originalToLabeledFileInfoDictionary[originalFile].Add(labeledInfo); } flashLfqResults.SpectraFiles.AddRange(labeledFiles); } } return(originalToLabeledFileInfoDictionary); }
//If SILAC (Post-Quantification), compress the light/heavy protein group pairs into the same light protein group but different files //Create new files for each silac label and file so that "file 1" now becomes "file 1 (light)" and "file 1 (heavy)" //Change heavy residue into the light residue plus a string label ("PEPTIDEa" -> "PEPTIDEK(+8.014)") //This light to heavy conversion needs to happen for the flashLFQ peptides here, but can't for the psm peptides, which are constrained to the protein //i.e. pwsms currently don't have sequences; they have start/end residues and a protein sequence. We have to change the output sequences when they're created. public static void SilacConversionsPostQuantification(List <SilacLabel> allSilacLabels, SilacLabel startLabel, SilacLabel endLabel, List <SpectraFileInfo> spectraFileInfo, List <ProteinGroup> proteinGroups, HashSet <DigestionParams> listOfDigestionParams, FlashLfqResults flashLfqResults, List <PeptideSpectralMatch> allPsms, Dictionary <string, int> modsToWriteSelection, bool quantifyUnlabeledPeptides) { //do protein quant if we had any results //if no results, we still may need to edit the psms if (flashLfqResults != null) //can be null if no unambiguous psms were found { //after this point, we now have quantification values for the peptides, but they all belong to the same "unlabeled" protein and are in the same file //We can remove "labeled" peptides from each file and put them in a new file as "unlabeled". //MAKE NEW RAW FILES //update number of spectra files to include a new file for each label/condition Dictionary <SpectraFileInfo, List <SpectraFileInfo> > originalToLabeledFileInfoDictionary = CreateSilacRawFiles(flashLfqResults, allSilacLabels, startLabel, endLabel, quantifyUnlabeledPeptides, spectraFileInfo); //we have the files, now let's reassign the psms. //there are a few ways to do this, but we're going to generate the "base" peptide and assign to that //Get Dictionary of protein accessions to peptides Dictionary <string, List <FlashLFQ.Peptide> > unlabeledToPeptidesDictionary = GetDictionaryOfProteinAccessionsToPeptides(flashLfqResults.PeptideModifiedSequences.Values, allSilacLabels, startLabel, endLabel); //we now have a dictionary of unlabeledBaseSequence to the labeled peptides //Better SILAC results can be obtained by using the summed intensities from ms1 scans where all peaks were found, rather than the apex //foreach peptide, unlabeled peptide, get the isotopic envelope intensities for each labeled peptide in each file //save the intensities from ms1s that are shared. If no ms1s contains all the peaks, then just use the apex intensity (default) CalculateSilacIntensities(flashLfqResults.Peaks, unlabeledToPeptidesDictionary); //SPLIT THE FILES List <FlashLFQ.Peptide> updatedPeptides = new List <FlashLFQ.Peptide>(); //split the heavy/light peptides into separate raw files, remove the heavy peptide if (startLabel != null || endLabel != null) //if turnover { //foreach group, the labeled peptides should be split into their labeled files //we're deleting the heavy results after we pull those results into a different file foreach (SpectraFileInfo info in spectraFileInfo) { string fullPathWithExtension = info.FullFilePathWithExtension; string[] pathArray = fullPathWithExtension.Split('.'); string extension = pathArray.Last(); string filePathWithoutExtension = fullPathWithExtension.Substring(0, fullPathWithExtension.Length - extension.Length - 1); //-1 removes the '.' SpectraFileInfo lightInfo = new SpectraFileInfo(filePathWithoutExtension + "_Original." + extension, info.Condition, info.BiologicalReplicate, info.TechnicalReplicate, info.Fraction); SpectraFileInfo heavyInfo = new SpectraFileInfo(filePathWithoutExtension + "_NewlySynthesized." + extension, info.Condition + "_NewlySynthesized", info.BiologicalReplicate, info.TechnicalReplicate, info.Fraction); originalToLabeledFileInfoDictionary[info] = new List <SpectraFileInfo> { lightInfo, heavyInfo }; flashLfqResults.SpectraFiles.Add(lightInfo); flashLfqResults.SpectraFiles.Add(heavyInfo); } //This step converts the quantification intensities from light/heavy to original/newlySynthesized by splitting up the missed cleavage mixtures foreach (KeyValuePair <string, List <FlashLFQ.Peptide> > kvp in unlabeledToPeptidesDictionary) { string unlabeledSequence = kvp.Key; //this will be the key for the new quant entry List <FlashLFQ.Peptide> peptides = kvp.Value; if (peptides.Count != 1) //sometimes it's one if there is no label site on the peptide (e.g. label K, peptide is PEPTIDER) { //Missed cleavages can yield multiple peptides (e.g. 1 missed = LL, LH, HH; 2 missed = LLL, LLH, LHH, HHH; etc) //Compress into 2 values: Light and Heavy FlashLFQ.Peptide updatedPeptide = new FlashLFQ.Peptide(unlabeledSequence, unlabeledSequence, peptides[0].UseForProteinQuant, CleanPastProteinQuant(peptides[0].ProteinGroups)); //needed to keep protein info. foreach (SpectraFileInfo info in spectraFileInfo) { int maxNumberHeavyAminoAcids = peptides.Count - 1; double lightIntensity = 0; double heavyIntensity = 0; int numUniquePeptidesQuantified = 0; for (int numHeavyAminoAcids = 0; numHeavyAminoAcids < peptides.Count; numHeavyAminoAcids++) { double totalIntensity = peptides[numHeavyAminoAcids].GetIntensity(info); if (totalIntensity > 0) { //prevent confidence of a ratio if only the HL (and not the LL or HH) is observed. //If LL or HH is observed (but not any other), the user knows the ratio is only from one peak. if (numHeavyAminoAcids == 0 || numHeavyAminoAcids == maxNumberHeavyAminoAcids) { numUniquePeptidesQuantified += 2; } else { numUniquePeptidesQuantified++; } double partHeavyIntensity = totalIntensity * numHeavyAminoAcids / maxNumberHeavyAminoAcids; lightIntensity += totalIntensity - partHeavyIntensity; heavyIntensity += partHeavyIntensity; } } //If only a mixed peptide with a missed cleavage was identified, reset the intensity values to zero so the user doesn't get a discreet, inaccurate measurement if (numUniquePeptidesQuantified < 2) { lightIntensity = 0; heavyIntensity = 0; } List <SpectraFileInfo> updatedInfo = originalToLabeledFileInfoDictionary[info]; SpectraFileInfo startInfo = updatedInfo[0]; SpectraFileInfo endInfo = updatedInfo[1]; updatedPeptide.SetIntensity(startInfo, lightIntensity); //assign the corrected light intensity updatedPeptide.SetDetectionType(startInfo, peptides.First().GetDetectionType(info)); updatedPeptide.SetIntensity(endInfo, heavyIntensity); //assign the corrected heavy intensity to the heavy file updatedPeptide.SetDetectionType(endInfo, peptides.Last().GetDetectionType(info)); //could include the mixed here if it really matters } //add the updated peptide to the list updatedPeptides.Add(updatedPeptide); } else { updatedPeptides.Add(peptides[0]); } } } else //multiplex { foreach (var kvp in unlabeledToPeptidesDictionary) { string unlabeledSequence = kvp.Key; List <FlashLFQ.Peptide> peptides = kvp.Value; FlashLFQ.Peptide representativePeptide = peptides[0]; FlashLFQ.Peptide updatedPeptide = new FlashLFQ.Peptide(unlabeledSequence, unlabeledSequence, representativePeptide.UseForProteinQuant, CleanPastProteinQuant(representativePeptide.ProteinGroups)); //needed to keep protein info. //foreach original file foreach (SpectraFileInfo info in spectraFileInfo) { List <SpectraFileInfo> filesForThisFile = originalToLabeledFileInfoDictionary[info]; for (int i = 0; i < peptides.Count; i++) //the files and the peptides can use the same index, because there should be a distinct file for each label/peptide { SpectraFileInfo currentInfo = filesForThisFile[i]; FlashLFQ.Peptide currentPeptide = peptides[i]; updatedPeptide.SetIntensity(currentInfo, currentPeptide.GetIntensity(info)); updatedPeptide.SetDetectionType(currentInfo, currentPeptide.GetDetectionType(info)); } } updatedPeptides.Add(updatedPeptide); } } //Update peptides var peptideResults = flashLfqResults.PeptideModifiedSequences; peptideResults.Clear(); foreach (FlashLFQ.Peptide peptide in updatedPeptides) { peptideResults.Add(peptide.Sequence, peptide); } //Do protein quant flashLfqResults.CalculateProteinResultsMedianPolish(true); //update proteingroups to have all files for quantification if (proteinGroups != null) { List <SpectraFileInfo> allInfo = originalToLabeledFileInfoDictionary.SelectMany(x => x.Value).ToList(); foreach (ProteinGroup proteinGroup in proteinGroups) { proteinGroup.FilesForQuantification = allInfo; proteinGroup.IntensitiesByFile = new Dictionary <SpectraFileInfo, double>(); foreach (var spectraFile in allInfo) { if (flashLfqResults.ProteinGroups.TryGetValue(proteinGroup.ProteinGroupName, out var flashLfqProteinGroup)) { proteinGroup.IntensitiesByFile.Add(spectraFile, flashLfqProteinGroup.GetIntensity(spectraFile)); } else { //needed for decoys/contaminants/proteins that aren't quantified proteinGroup.IntensitiesByFile.Add(spectraFile, 0); } } } } //Convert all lfqpeaks from heavy (a) to light (K+8.014) for output if (flashLfqResults != null) //can be null if nothing was quantified (all peptides are ambiguous) { var lfqPeaks = flashLfqResults.Peaks; List <SpectraFileInfo> peakKeys = lfqPeaks.Keys.ToList(); foreach (SpectraFileInfo key in peakKeys) { List <ChromatographicPeak> peaks = lfqPeaks[key]; for (int i = 0; i < peaks.Count; i++) { var peak = peaks[i]; //check if we're removing light peaks and if it's a light peak if (peak.Identifications.Any(x => GetRelevantLabelFromBaseSequence(x.BaseSequence, allSilacLabels) != null)) //if no ids have any labels, remove them { List <Identification> updatedIds = new List <Identification>(); foreach (var id in peak.Identifications) { string baseSequence = id.BaseSequence; string fullSequence = id.ModifiedSequence; List <SilacLabel> labels = GetRelevantLabelsFromBaseSequenceForOutput(id.BaseSequence, allSilacLabels); if (labels != null) { foreach (SilacLabel label in labels) { baseSequence = GetSilacLightBaseSequence(baseSequence, label); fullSequence = GetSilacLightFullSequence(fullSequence, label); } } Identification updatedId = new Identification( id.FileInfo, baseSequence, fullSequence, id.MonoisotopicMass, id.Ms2RetentionTimeInMinutes, id.PrecursorChargeState, id.ProteinGroups.ToList(), id.OptionalChemicalFormula, id.UseForProteinQuant ); updatedIds.Add(updatedId); } peak.Identifications.Clear(); peak.Identifications.AddRange(updatedIds); } } } } } //convert all psms into human readable format for (int i = 0; i < allPsms.Count; i++) { allPsms[i].ResolveHeavySilacLabel(allSilacLabels, modsToWriteSelection); } }
private static void Run(FlashLfqSettings settings) { try { settings.ValidateCommandLineSettings(); } catch (Exception e) { if (!settings.Silent) { Console.WriteLine("Error: " + e.Message); } return; } // check to see if experimental design file exists string assumedPathToExpDesign = Path.Combine(settings.SpectraFileRepository, "ExperimentalDesign.tsv"); if ((settings.Normalize || settings.BayesianProteinQuant) && !File.Exists(assumedPathToExpDesign)) { if (!settings.Silent) { Console.WriteLine("Could not find experimental design file " + "(required for normalization and Bayesian statistical analysis): " + assumedPathToExpDesign); } return; } // set up spectra file info List <SpectraFileInfo> spectraFileInfos = new List <SpectraFileInfo>(); List <string> filePaths = Directory.GetFiles(settings.SpectraFileRepository) .Where(f => acceptedSpectrumFileFormats.Contains(Path.GetExtension(f).ToLowerInvariant())).ToList(); // check for duplicate file names (agnostic of file extension) foreach (var fileName in filePaths.GroupBy(p => Path.GetFileNameWithoutExtension(p))) { if (fileName.Count() > 1) { var types = fileName.Select(p => Path.GetFileNameWithoutExtension(p)).Distinct(); if (!settings.Silent) { Console.WriteLine("Multiple spectra files with the same name were detected (maybe " + string.Join(" and ", types) + "?). " + "Please remove or rename duplicate files from the spectra file directory."); } return; } } if (settings.PrintThermoLicenceViaCommandLine) { Console.WriteLine(ThermoRawFileReaderLicence.ThermoLicenceText); return; } // check thermo licence agreement if (filePaths.Select(v => Path.GetExtension(v).ToLowerInvariant()).Any(f => f == ".raw")) { var licenceAgreement = LicenceAgreementSettings.ReadLicenceSettings(); if (!licenceAgreement.HasAcceptedThermoLicence) { if (settings.AcceptThermoLicenceViaCommandLine) { if (!settings.ReadOnlyFileSystem) { licenceAgreement.AcceptLicenceAndWrite(); } } else { // decided to write this even if it's on silent mode... Console.WriteLine(ThermoRawFileReaderLicence.ThermoLicenceText); Console.WriteLine("\nIn order to search Thermo .raw files, you must agree to the above terms. Do you agree to the above terms? y/n\n"); string res = Console.ReadLine(); if (res.ToLowerInvariant() == "y") { try { if (!settings.ReadOnlyFileSystem) { licenceAgreement.AcceptLicenceAndWrite(); } } catch (Exception e) { Console.WriteLine(e.Message); } } else { Console.WriteLine("Thermo licence has been declined. Exiting FlashLFQ. You can still search .mzML and .mgf files without agreeing to the Thermo licence."); return; } } } } if (File.Exists(assumedPathToExpDesign)) { var experimentalDesign = File.ReadAllLines(assumedPathToExpDesign) .ToDictionary(v => v.Split('\t')[0], v => v); foreach (var file in filePaths) { string filename = Path.GetFileNameWithoutExtension(file); var expDesignForThisFile = experimentalDesign[filename]; var split = expDesignForThisFile.Split('\t'); string condition = split[1]; int biorep = int.Parse(split[2]); int fraction = int.Parse(split[3]); int techrep = int.Parse(split[4]); // experimental design info passed in here for each spectra file spectraFileInfos.Add(new SpectraFileInfo(fullFilePathWithExtension: file, condition: condition, biorep: biorep - 1, fraction: fraction - 1, techrep: techrep - 1)); } } else { for (int i = 0; i < filePaths.Count; i++) { var file = filePaths[i]; spectraFileInfos.Add(new SpectraFileInfo(fullFilePathWithExtension: file, condition: "Default", biorep: i, fraction: 0, techrep: 0)); } } // check the validity of the settings and experimental design try { settings.ValidateSettings(spectraFileInfos); } catch (Exception e) { if (!settings.Silent) { Console.WriteLine("Error: " + e.Message); } return; } // set up IDs List <Identification> ids; try { ids = PsmReader.ReadPsms(settings.PsmIdentificationPath, settings.Silent, spectraFileInfos); } catch (Exception e) { Console.WriteLine("Problem reading PSMs: " + e.Message); return; } if (ids.Any()) { if (!settings.Silent) { Console.WriteLine("Setup is OK; read in " + ids.Count + " identifications; starting FlashLFQ engine"); } // write FlashLFQ settings to a file if (!Directory.Exists(settings.OutputPath)) { Directory.CreateDirectory(settings.OutputPath); } Nett.Toml.WriteFile(settings, Path.Combine(settings.OutputPath, "FlashLfqSettings.toml")); // make engine with desired settings FlashLfqEngine engine = null; FlashLfqResults results = null; try { engine = FlashLfqSettings.CreateEngineWithSettings(settings, ids); // run results = engine.Run(); } catch (Exception ex) { string errorReportPath = Directory.GetParent(filePaths.First()).FullName; if (settings.OutputPath != null) { errorReportPath = settings.OutputPath; } if (!settings.Silent) { Console.WriteLine("FlashLFQ has crashed with the following error: " + ex.Message + ".\nError report written to " + errorReportPath); } OutputWriter.WriteErrorReport(ex, Directory.GetParent(filePaths.First()).FullName, settings.OutputPath); } // output if (results != null) { try { OutputWriter.WriteOutput(settings.PsmIdentificationPath, results, settings.Silent, settings.OutputPath); } catch (Exception ex) { if (!settings.Silent) { Console.WriteLine("Could not write FlashLFQ output: " + ex.Message); } } } } else { if (!settings.Silent) { Console.WriteLine("No peptide IDs for the specified spectra files were found! " + "Check to make sure the spectra file names match between the ID file and the spectra files"); } } }
public static void Main(string[] args) { // parameters List <string> acceptedSpectrumFileFormats = new List <string> { ".RAW", ".MZML" }; // setup parameters var p = new FluentCommandLineParser <ApplicationArguments>(); p.SetupHelp("?", "help") .Callback(text => Console.WriteLine( "Valid arguments:\n" + "--idt [string|identification file path (TSV format)]\n" + "--rep [string|directory containing spectrum data files]\n" + "--out [string|output directory]\n" + "--ppm [double|ppm tolerance]\n" + "--iso [double|isotopic distribution tolerance in ppm]\n" + "--sil [bool|silent mode]\n" + "--int [bool|integrate features]\n" + "--mbr [bool|match between runs]\n" + "--mrt [double|maximum MBR window in minutes]\n" + "--chg [bool|use only precursor charge state]\n" + "--rmm [bool|require observed monoisotopic mass peak]\n" + "--nis [int|number of isotopes required to be observed]\n" + "--nor [bool|normalize intensity results]\n" + "--pro [bool|advanced protein quantification]\n" )); p.Setup(arg => arg.PsmInputPath) // PSMs file .As("idt"). Required(); p.Setup(arg => arg.RawFilesPath) // spectrum files .As("rep"). Required(); p.Setup(arg => arg.OutputPath) // output path .As("out"); p.Setup(arg => arg.PpmTolerance) // ppm tolerance .As("ppm"); p.Setup(arg => arg.IsotopePpmTolerance) // isotope ppm tolerance .As("iso"); p.Setup(arg => arg.Silent) // do not display output messages .As("sil"); p.Setup(arg => arg.Integrate) // integrate .As("int"); p.Setup(arg => arg.MatchBetweenRuns) // match between runs .As("mbr"); p.Setup(arg => arg.MbrRtWindow) // maximum match-between-runs window in minutes .As("mrt"); p.Setup(arg => arg.IdSpecificChargeState) // only use PSM-identified charge states .As("chg"); p.Setup(arg => arg.RequireMonoisotopicMass) // require observation of monoisotopic peak .As("rmm"); p.Setup(arg => arg.NumIsotopesRequired) // num of isotopes required .As("nis"); p.Setup(arg => arg.Normalize) // normalize .As("nor"); p.Setup(arg => arg.AdvancedProteinQuant) // advanced protein quant .As("pro"); // args are OK - run FlashLFQ if (!p.Parse(args).HasErrors&& p.Object.PsmInputPath != null) { if (!File.Exists(p.Object.PsmInputPath)) { if (!p.Object.Silent) { Console.WriteLine("Could not locate identification file " + p.Object.PsmInputPath); } return; } if (!Directory.Exists(p.Object.RawFilesPath)) { if (!p.Object.Silent) { Console.WriteLine("Could not locate folder " + p.Object.RawFilesPath); } return; } string assumedPathToExpDesign = Path.Combine(p.Object.RawFilesPath, "ExperimentalDesign.tsv"); if (p.Object.Normalize && !File.Exists(assumedPathToExpDesign)) { if (!p.Object.Silent) { Console.WriteLine("Could not find experimental design file (required for normalization): " + assumedPathToExpDesign); } return; } // set up spectra file info // get experimental design info for normalization List <SpectraFileInfo> spectraFileInfos = new List <SpectraFileInfo>(); IEnumerable <string> files = Directory.GetFiles(p.Object.RawFilesPath) .Where(f => acceptedSpectrumFileFormats.Contains(Path.GetExtension(f).ToUpperInvariant())); if (p.Object.Normalize) { var experimentalDesign = File.ReadAllLines(assumedPathToExpDesign) .ToDictionary(v => v.Split('\t')[0], v => v); foreach (var file in files) { string filename = Path.GetFileNameWithoutExtension(file); var expDesignForThisFile = experimentalDesign[filename]; var split = expDesignForThisFile.Split('\t'); string condition = split[1]; int biorep = int.Parse(split[2]); int fraction = int.Parse(split[3]); int techrep = int.Parse(split[4]); // experimental design info passed in here for each spectra file spectraFileInfos.Add(new SpectraFileInfo(fullFilePathWithExtension: file, condition: condition, biorep: biorep - 1, fraction: fraction - 1, techrep: techrep - 1)); } } else { foreach (var file in files) { spectraFileInfos.Add(new SpectraFileInfo(fullFilePathWithExtension: file, condition: "", biorep: 0, fraction: 0, techrep: 0)); } } // set up IDs List <Identification> ids; try { ids = PsmReader.ReadPsms(p.Object.PsmInputPath, p.Object.Silent, spectraFileInfos); } catch (Exception e) { Console.WriteLine("Problem reading PSMs: " + e.Message); return; } if (ids.Any()) { if (!p.Object.Silent) { Console.WriteLine("Setup is OK; read in " + ids.Count + " identifications; starting FlashLFQ engine"); } // make engine with desired settings FlashLfqEngine engine = null; FlashLfqResults results = null; try { engine = new FlashLfqEngine( allIdentifications: ids, normalize: p.Object.Normalize, ppmTolerance: p.Object.PpmTolerance, isotopeTolerancePpm: p.Object.IsotopePpmTolerance, matchBetweenRuns: p.Object.MatchBetweenRuns, matchBetweenRunsPpmTolerance: p.Object.MbrPpmTolerance, integrate: p.Object.Integrate, numIsotopesRequired: p.Object.NumIsotopesRequired, idSpecificChargeState: p.Object.IdSpecificChargeState, requireMonoisotopicMass: p.Object.RequireMonoisotopicMass, silent: p.Object.Silent, optionalPeriodicTablePath: null, maxMbrWindow: p.Object.MbrRtWindow, advancedProteinQuant: p.Object.AdvancedProteinQuant); // run results = engine.Run(); } catch (Exception ex) { string errorReportPath = Directory.GetParent(files.First()).FullName; if (p.Object.OutputPath != null) { errorReportPath = p.Object.OutputPath; } if (!p.Object.Silent) { Console.WriteLine("FlashLFQ has crashed with the following error: " + ex.Message + ".\nError report written to " + errorReportPath); } OutputWriter.WriteErrorReport(ex, Directory.GetParent(files.First()).FullName, p.Object.OutputPath); } // output if (results != null) { try { OutputWriter.WriteOutput(p.Object.PsmInputPath, results, p.Object.OutputPath); } catch (Exception ex) { if (!p.Object.Silent) { Console.WriteLine("Could not write FlashLFQ output: " + ex.Message); } } } } else { if (!p.Object.Silent) { Console.WriteLine("No peptide IDs for the specified spectra files were found! " + "Check to make sure the spectra file names match between the ID file and the spectra files"); } } } else if (p.Parse(args).HasErrors == false && p.Object.PsmInputPath == null) { // no errors - just requesting help? } else { Console.WriteLine("Invalid arguments - type \"--help\" for valid arguments"); } }
//If SILAC (Post-Quantification), compress the light/heavy protein group pairs into the same light protein group but different files //Create new files for each silac label and file so that "file 1" now becomes "file 1 (light)" and "file 1 (heavy)" //Change heavy residue into the light residue plus a string label ("PEPTIDEa" -> "PEPTIDEK(+8.014)") //This light to heavy conversion needs to happen for the flashLFQ peptides here, but can't for the psm peptides, which are constrained to the protein //i.e. pwsms currently don't have sequences; they have start/end residues and a protein sequence. We have to change the output sequences when they're created. public static void SilacConversionsPostQuantification(List <SilacLabel> silacLabels, List <SpectraFileInfo> spectraFileInfo, List <ProteinGroup> ProteinGroups, HashSet <DigestionParams> ListOfDigestionParams, Dictionary <string, List <string> > silacProteinGroupMatcher, FlashLfqResults FlashLfqResults, List <PeptideSpectralMatch> allPsms, Dictionary <string, int> ModsToWriteSelection, bool Integrate) { bool outputLightIntensities = ListOfDigestionParams.Any(x => x.GeneratehUnlabeledProteinsForSilac); //MAKE NEW RAW FILES //update number of spectra files to include a new file for each label*condition Dictionary <SpectraFileInfo, string> fileToLabelDictionary = new Dictionary <SpectraFileInfo, string>(); //figure out which file is which label, since some files will be only light and others only heavy. Key is file, value is the label string (label.MassDifference) Dictionary <SpectraFileInfo, SpectraFileInfo> labeledToUnlabeledFile = new Dictionary <SpectraFileInfo, SpectraFileInfo>(); //keep track of the heavy-to-light pairs. If multiple, looks like 3-1 and 2-1, but no 3-2 (only heavy to light, no heavy to heavy) List <SpectraFileInfo> silacSpectraFileInfo = new List <SpectraFileInfo>(); //new files //foreach existing file foreach (SpectraFileInfo originalFile in spectraFileInfo) { //add the existing file as the light silacSpectraFileInfo.Add(originalFile); //foreach label, add a new file with the label foreach (SilacLabel label in silacLabels) { SpectraFileInfo silacFile = GetHeavyFileInfo(originalFile, label); silacSpectraFileInfo.Add(silacFile); fileToLabelDictionary[silacFile] = label.MassDifference; labeledToUnlabeledFile[silacFile] = originalFile; } } //UPDATE PROTEIN GROUPS //remove the heavy protein groups so that there are only light ones //add the intensities of the heavy groups into the newly created heavy SpectraFileInfos HashSet <SpectraFileInfo> lightFilesToRemove = new HashSet <SpectraFileInfo>(); //this is only used when there user specified no unlabeled proteins if (ProteinGroups != null) //if we did parsimony { List <EngineLayer.ProteinGroup> silacProteinGroups = new List <EngineLayer.ProteinGroup>(); //The light/unlabeled peptides/proteins were not searched if specified, but they were still quantified to keep track of the labels //we need to remove these unlabeled peptides/proteins before output //foreach protein group (which has its own quant for each file) foreach (EngineLayer.ProteinGroup proteinGroup in ProteinGroups) { proteinGroup.FilesForQuantification = silacSpectraFileInfo; //update fileinfo for the group //grab the light groups. Using these light groups, find their heavy group pair(s), add them to the light group quant info, and then remove the heavy groups if (silacProteinGroupMatcher.TryGetValue(proteinGroup.ProteinGroupName, out List <string> silacSubGroupNames)) //try to find the light protein groups. If it's not light, ignore it { //the out variable contains all the other heavy protein groups that were generated for this light protein group //go through the files and see if any of them contain the same label. If not, put zeroes for those missing "files" //If the user didn't specify to search light intensities, then don't output them Dictionary <SpectraFileInfo, double> updatedIntensitiesByFile = proteinGroup.IntensitiesByFile; //light intensities List <SpectraFileInfo> lightKeys = updatedIntensitiesByFile.Keys.ToList(); //go through all files (including "silac" files) List <ProteinGroup> subGroup = ProteinGroups.Where(x => silacSubGroupNames.Contains(x.ProteinGroupName)).ToList(); //find the protein groups where the accession contains "light" accession of the current protein group foreach (SpectraFileInfo fileInfo in silacSpectraFileInfo) //for every file (light and heavy) { //if it doesn't have a value, then it's a silac file (light missing values still have a value "0") if (!updatedIntensitiesByFile.ContainsKey(fileInfo)) { string labelSignature = fileToLabelDictionary[fileInfo]; //a string associated with a silac label ProteinGroup foundGroup = subGroup.Where(x => x.Proteins.Any(y => y.Accession.Contains(labelSignature))).FirstOrDefault(); //get the protein groups containing this label updatedIntensitiesByFile[fileInfo] = foundGroup == null ? 0 : foundGroup.IntensitiesByFile[labeledToUnlabeledFile[fileInfo]]; //update the intensity for that label in the light group } //else do nothing. The light version is already in the dictionary } //The light/unlabeled peptides/proteins were not searched if specified, but they were still quantified to keep track of the labels //we need to remove these unlabeled peptides/proteins before output if (!outputLightIntensities) { foreach (SpectraFileInfo info in lightKeys) { updatedIntensitiesByFile.Remove(info); proteinGroup.FilesForQuantification.Remove(info); lightFilesToRemove.Add(info); } } silacProteinGroups.Add(proteinGroup); } } //update ProteinGroups.Clear(); ProteinGroups.AddRange(silacProteinGroups); //remove light files (if necessary) foreach (SpectraFileInfo info in lightFilesToRemove) { FlashLfqResults.SpectraFiles.Remove(info); } //UPDATE FLASHLFQ PROTEINS if (FlashLfqResults != null) //can be null if nothing was quantified (all peptides are ambiguous) { Dictionary <string, FlashLFQ.ProteinGroup> flashLfqProteins = FlashLfqResults.ProteinGroups; //dictionary of protein group names to protein groups //if the protein group is a heavy protein group, get rid of it. We already accounted for it above. var keys = flashLfqProteins.Keys.ToList(); foreach (string key in keys) { if (silacLabels.Any(x => key.Contains(x.MassDifference))) { flashLfqProteins.Remove(key); } } } } ////UPDATE FLASHLFQ SPECTRA FILES if (FlashLfqResults != null) //can be null if nothing was quantified (all peptides are ambiguous) { List <SpectraFileInfo> originalFiles = FlashLfqResults.SpectraFiles; //pass reference foreach (SpectraFileInfo info in silacSpectraFileInfo) { if (!originalFiles.Contains(info)) { originalFiles.Add(info); } } } //UPDATE PEPTIDE INFO //convert all psm/peptide/proteingroup sequences from the heavy label to the light label for output //We can do this for all of the FlashLFQ peptides/peaks, because they use string sequences. //We are unable to do this for Parameters.AllPsms, because they store proteins and start/end residues instead //for Psms, we need to convert during the writing. for (int i = 0; i < allPsms.Count; i++) { allPsms[i].ResolveHeavySilacLabel(silacLabels, ModsToWriteSelection); } //Convert all lfqpeaks from heavy (a) to light (K+8.014) for output if (FlashLfqResults != null) //can be null if nothing was quantified (all peptides are ambiguous) { var lfqPeaks = FlashLfqResults.Peaks; List <SpectraFileInfo> peakKeys = lfqPeaks.Keys.ToList(); foreach (SpectraFileInfo key in peakKeys) { List <FlashLFQ.ChromatographicPeak> peaks = lfqPeaks[key]; for (int i = 0; i < peaks.Count; i++) { var peak = peaks[i]; List <Identification> identifications = new List <Identification>(); //check if we're removing light peaks and if it's a light peak if (!outputLightIntensities && !peak.Identifications.Any(x => GetRelevantLabelFromBaseSequence(x.BaseSequence, silacLabels) != null)) //if no ids have any labels, remove them { peaks.RemoveAt(i); i--; } else { foreach (var id in peak.Identifications) { SilacLabel label = GetRelevantLabelFromBaseSequence(id.BaseSequence, silacLabels); HashSet <FlashLFQ.ProteinGroup> originalGroups = id.proteinGroups; List <FlashLFQ.ProteinGroup> updatedGroups = new List <FlashLFQ.ProteinGroup>(); foreach (FlashLFQ.ProteinGroup group in originalGroups) { string groupName = group.ProteinGroupName; if (label == null) //if light { updatedGroups.Add(group); } else { string labelString = "(" + label.OriginalAminoAcid + label.MassDifference; int labelIndex = groupName.IndexOf(labelString); if (labelIndex != -1) //labelIndex == 1 if a) 2+ peptides are required per protein or b) somebody broke parsimony { groupName = groupName.Substring(0, labelIndex); updatedGroups.Add(new FlashLFQ.ProteinGroup(groupName, group.GeneName, group.Organism)); } } } Identification updatedId = new Identification( id.fileInfo, GetSilacLightBaseSequence(id.BaseSequence, label), GetSilacLightFullSequence(id.ModifiedSequence, label), id.monoisotopicMass, id.ms2RetentionTimeInMinutes, id.precursorChargeState, updatedGroups, id.OptionalChemicalFormula, id.UseForProteinQuant ); identifications.Add(updatedId); } FlashLFQ.ChromatographicPeak updatedPeak = new FlashLFQ.ChromatographicPeak(identifications.First(), peak.IsMbrPeak, peak.SpectraFileInfo); for (int j = 1; j < identifications.Count; j++) //add all the original identification { updatedPeak.MergeFeatureWith(new FlashLFQ.ChromatographicPeak(identifications[j], peak.IsMbrPeak, peak.SpectraFileInfo), Integrate); } updatedPeak.IsotopicEnvelopes = peak.IsotopicEnvelopes; //need to set isotopicEnevelopes, since the new identifications didn't have them. updatedPeak.CalculateIntensityForThisFeature(Integrate); //needed to update info peaks[i] = updatedPeak; } } } //convert all lfq peptides from heavy to light for output Dictionary <string, FlashLFQ.Peptide> lfqPwsms = FlashLfqResults.PeptideModifiedSequences; List <string> pwsmKeys = lfqPwsms.Keys.ToList(); foreach (string key in pwsmKeys) { FlashLFQ.Peptide currentPeptide = lfqPwsms[key]; SilacLabel label = GetRelevantLabelFromFullSequence(currentPeptide.Sequence, silacLabels); if (label != null) //if it's a heavy peptide { lfqPwsms.Remove(key); //get rid of it //update the light version string lightSequence = GetSilacLightFullSequence(currentPeptide.Sequence, label, false); //get the light sequence List <SpectraFileInfo> heavyFiles = silacSpectraFileInfo.Where(x => x.FilenameWithoutExtension.Contains(label.MassDifference)).ToList(); //these are the heavy raw file names //Find the light peptide (which has a value for the light datafile) and set the intensity for the heavy datafile from the current peptide if (lfqPwsms.TryGetValue(lightSequence, out FlashLFQ.Peptide lightPeptide)) //this should always have a value, since we made replicas earlier, and yet it sometimes doesn't... { foreach (SpectraFileInfo heavyFile in heavyFiles) { SpectraFileInfo lightFile = labeledToUnlabeledFile[heavyFile]; lightPeptide.SetIntensity(heavyFile, currentPeptide.GetIntensity(lightFile)); lightPeptide.SetDetectionType(heavyFile, currentPeptide.GetDetectionType(lightFile)); } } else //if there's no light, create a new entry for the heavy { //new peptide FlashLFQ.Peptide updatedPeptide = new FlashLFQ.Peptide(lightSequence, currentPeptide.UseForProteinQuant); //update the heavy info, set the light values to zero foreach (SpectraFileInfo info in heavyFiles) { updatedPeptide.SetIntensity(info, currentPeptide.GetIntensity(info)); updatedPeptide.SetDetectionType(info, currentPeptide.GetDetectionType(info)); } //set the other values to zero List <SpectraFileInfo> otherInfo = silacSpectraFileInfo.Where(x => !heavyFiles.Contains(x)).ToList(); foreach (SpectraFileInfo info in otherInfo) { updatedPeptide.SetIntensity(info, 0); updatedPeptide.SetDetectionType(info, DetectionType.NotDetected); } HashSet <FlashLFQ.ProteinGroup> originalGroups = currentPeptide.proteinGroups; HashSet <FlashLFQ.ProteinGroup> updatedGroups = new HashSet <FlashLFQ.ProteinGroup>(); foreach (FlashLFQ.ProteinGroup group in originalGroups) { string groupName = group.ProteinGroupName; groupName = groupName.Replace(label.MassDifference, ""); updatedGroups.Add(new FlashLFQ.ProteinGroup(groupName, group.GeneName, group.Organism)); } updatedPeptide.proteinGroups = updatedGroups; lfqPwsms[updatedPeptide.Sequence] = updatedPeptide; } } } } }
/// <summary> /// Runs the FlashLFQ engine with the user's defined spectra files, ID files, and FlashLFQ /// settings. /// </summary> private void RunFlashLfq() { // read IDs var ids = new List <Identification>(); try { foreach (var identFile in idFiles) { ids = ids.Concat(PsmReader.ReadPsms(identFile.FilePath, false, spectraFiles.Select(p => p.SpectraFileInfo).ToList())).ToList(); } } catch (Exception e) { string errorReportPath = Directory.GetParent(spectraFiles.First().FilePath).FullName; if (outputFolderPath != null) { errorReportPath = outputFolderPath; } try { OutputWriter.WriteErrorReport(e, Directory.GetParent(spectraFiles.First().FilePath).FullName, outputFolderPath); } catch (Exception ex2) { MessageBox.Show("FlashLFQ has crashed with the following error: " + e.Message + ".\nThe error report could not be written: " + ex2.Message, "Error", MessageBoxButton.OK, MessageBoxImage.Hand); return; } MessageBox.Show("FlashLFQ could not read the PSM file: " + e.Message + ".\nError report written to " + errorReportPath, "Error", MessageBoxButton.OK, MessageBoxImage.Hand); return; } if (!ids.Any()) { MessageBox.Show("No peptide IDs for the specified spectra files were found! " + "Check to make sure the spectra file names match between the ID file and the spectra files", "Error", MessageBoxButton.OK, MessageBoxImage.Hand); return; } // run FlashLFQ engine try { flashLfqEngine = FlashLfqSettings.CreateEngineWithSettings(settings, ids); results = flashLfqEngine.Run(); } catch (Exception ex) { string errorReportPath = Directory.GetParent(spectraFiles.First().FilePath).FullName; if (outputFolderPath != null) { errorReportPath = outputFolderPath; } try { OutputWriter.WriteErrorReport(ex, Directory.GetParent(spectraFiles.First().FilePath).FullName, outputFolderPath); } catch (Exception ex2) { MessageBox.Show("FlashLFQ has crashed with the following error: " + ex.Message + ".\nThe error report could not be written: " + ex2.Message, "Error", MessageBoxButton.OK, MessageBoxImage.Hand); return; } MessageBox.Show("FlashLFQ has crashed with the following error: " + ex.Message + ".\nError report written to " + errorReportPath, "Error", MessageBoxButton.OK, MessageBoxImage.Hand); return; } // write output if (results != null) { try { OutputWriter.WriteOutput(Directory.GetParent(spectraFiles.First().FilePath).FullName, results, flashLfqEngine.Silent, outputFolderPath); } catch (Exception ex) { MessageBox.Show("Could not write FlashLFQ output: " + ex.Message, "Error", MessageBoxButton.OK, MessageBoxImage.Hand); return; } } }
private void RunFlashLfq() { // read IDs var ids = new List <Identification>(); try { foreach (var identFile in identFilesForDataGrid) { ids = ids.Concat(PsmReader.ReadPsms(identFile.FilePath, false, spectraFileInfo)).ToList(); } } catch (Exception e) { string errorReportPath = Directory.GetParent(spectraFileInfo.First().FullFilePathWithExtension).FullName; if (outputFolderPath != null) { errorReportPath = outputFolderPath; } try { OutputWriter.WriteErrorReport(e, Directory.GetParent(spectraFileInfo.First().FullFilePathWithExtension).FullName, outputFolderPath); } catch (Exception ex2) { MessageBox.Show("FlashLFQ has crashed with the following error: " + e.Message + ".\nThe error report could not be written: " + ex2.Message, "Error", MessageBoxButton.OK, MessageBoxImage.Hand); return; } MessageBox.Show("FlashLFQ could not read the PSM file: " + e.Message + ".\nError report written to " + errorReportPath, "Error", MessageBoxButton.OK, MessageBoxImage.Hand); return; } if (!ids.Any()) { MessageBox.Show("No peptide IDs for the specified spectra files were found! " + "Check to make sure the spectra file names match between the ID file and the spectra files", "Error", MessageBoxButton.OK, MessageBoxImage.Hand); return; } // run FlashLFQ engine try { flashLfqEngine = new FlashLfqEngine( allIdentifications: ids, normalize: flashLfqEngine.Normalize, ppmTolerance: flashLfqEngine.PpmTolerance, isotopeTolerancePpm: flashLfqEngine.IsotopePpmTolerance, matchBetweenRuns: flashLfqEngine.MatchBetweenRuns, matchBetweenRunsPpmTolerance: flashLfqEngine.MbrPpmTolerance, integrate: flashLfqEngine.Integrate, numIsotopesRequired: flashLfqEngine.NumIsotopesRequired, idSpecificChargeState: flashLfqEngine.IdSpecificChargeState, requireMonoisotopicMass: flashLfqEngine.RequireMonoisotopicMass, silent: false, optionalPeriodicTablePath: null, maxMbrWindow: flashLfqEngine.MbrRtWindow, advancedProteinQuant: flashLfqEngine.AdvancedProteinQuant); results = flashLfqEngine.Run(); } catch (Exception ex) { string errorReportPath = Directory.GetParent(spectraFileInfo.First().FullFilePathWithExtension).FullName; if (outputFolderPath != null) { errorReportPath = outputFolderPath; } try { OutputWriter.WriteErrorReport(ex, Directory.GetParent(spectraFileInfo.First().FullFilePathWithExtension).FullName, outputFolderPath); } catch (Exception ex2) { MessageBox.Show("FlashLFQ has crashed with the following error: " + ex.Message + ".\nThe error report could not be written: " + ex2.Message, "Error", MessageBoxButton.OK, MessageBoxImage.Hand); return; } MessageBox.Show("FlashLFQ has crashed with the following error: " + ex.Message + ".\nError report written to " + errorReportPath, "Error", MessageBoxButton.OK, MessageBoxImage.Hand); return; } // write output if (results != null) { try { OutputWriter.WriteOutput(Directory.GetParent(spectraFileInfo.First().FullFilePathWithExtension).FullName, results, outputFolderPath); } catch (Exception ex) { MessageBox.Show("Could not write FlashLFQ output: " + ex.Message, "Error", MessageBoxButton.OK, MessageBoxImage.Hand); return; } } }
/// <summary> /// Runs the FlashLFQ engine with the user's defined spectra files, ID files, and FlashLFQ /// settings. /// </summary> private void RunFlashLfq() { // read IDs var ids = new List <Identification>(); try { foreach (var identFile in idFiles) { ids = ids.Concat(PsmReader.ReadPsms(identFile.FilePath, false, spectraFiles.Select(p => p.SpectraFileInfo).ToList())).ToList(); } } catch (Exception e) { string errorReportPath = Directory.GetParent(spectraFiles.First().FilePath).FullName; if (outputFolderPath != null) { errorReportPath = outputFolderPath; } try { OutputWriter.WriteErrorReport(e, Directory.GetParent(spectraFiles.First().FilePath).FullName, outputFolderPath); } catch (Exception ex2) { MessageBox.Show("FlashLFQ has crashed with the following error: " + e.Message + ".\nThe error report could not be written: " + ex2.Message, "Error", MessageBoxButton.OK, MessageBoxImage.Hand); return; } MessageBox.Show("FlashLFQ could not read the PSM file: " + e.Message + ".\nError report written to " + errorReportPath, "Error", MessageBoxButton.OK, MessageBoxImage.Hand); return; } if (!ids.Any()) { MessageBox.Show("No peptide IDs for the specified spectra files were found! " + "Check to make sure the spectra file names match between the ID file and the spectra files", "Error", MessageBoxButton.OK, MessageBoxImage.Hand); return; } if (ids.Any(p => p.Ms2RetentionTimeInMinutes > 500)) { var res = MessageBox.Show("It seems that some of the retention times in the PSM file(s) are in seconds and not minutes; FlashLFQ requires the RT to be in minutes. " + "Continue with the FlashLFQ run? (only click yes if the RTs are actually in minutes)", "Error", MessageBoxButton.YesNo, MessageBoxImage.Hand); if (res == MessageBoxResult.No) { return; } } // run FlashLFQ engine try { flashLfqEngine = FlashLfqSettings.CreateEngineWithSettings(settings, ids); results = flashLfqEngine.Run(); } catch (Exception ex) { string errorReportPath = Directory.GetParent(spectraFiles.First().FilePath).FullName; if (outputFolderPath != null) { errorReportPath = outputFolderPath; } try { OutputWriter.WriteErrorReport(ex, Directory.GetParent(spectraFiles.First().FilePath).FullName, outputFolderPath); } catch (Exception ex2) { MessageBox.Show("FlashLFQ has crashed with the following error: " + ex.Message + ".\nThe error report could not be written: " + ex2.Message, "Error", MessageBoxButton.OK, MessageBoxImage.Hand); return; } MessageBox.Show("FlashLFQ has crashed with the following error: " + ex.Message + ".\nError report written to " + errorReportPath, "Error", MessageBoxButton.OK, MessageBoxImage.Hand); return; } // write output if (results != null) { try { OutputWriter.WriteOutput(Directory.GetParent(spectraFiles.First().FilePath).FullName, results, flashLfqEngine.Silent, outputFolderPath); MessageBox.Show("Run complete"); } catch (Exception ex) { MessageBox.Show("Could not write FlashLFQ output: " + ex.Message, "Error", MessageBoxButton.OK, MessageBoxImage.Hand); return; } } }