Esempio n. 1
        public static void WriteOutput(string inputPath, FlashLfqResults results, string outputPath = null)
            if (outputPath == null)
                outputPath = Path.GetDirectoryName(inputPath);

            string inputFileName = Path.GetFileNameWithoutExtension(inputPath);

            if (!Directory.Exists(outputPath))

            string append = "_FlashLFQ_";

            if (inputFileName.ToLowerInvariant().Contains("flashlfq"))
                append = "_";

                outputPath + Path.DirectorySeparatorChar + inputFileName + append + "QuantifiedPeaks.tsv",
                outputPath + Path.DirectorySeparatorChar + inputFileName + append + "QuantifiedPeptides.tsv",
                outputPath + Path.DirectorySeparatorChar + inputFileName + append + "QuantifiedProteins.tsv"
Esempio n. 2
        private void RunFlashLfq()
            // read IDs
            var ids = new List <Identification>();

            foreach (var identFile in identFilesForDataGrid)
                ids = ids.Concat(PsmReader.ReadPsms(identFile.FilePath, false, spectraFileInfo)).ToList();

            // run FlashLFQ engine
                flashLfqEngine = new FlashLFQEngine(
                    allIdentifications: ids,
                    normalize: flashLfqEngine.Normalize,
                    ppmTolerance: flashLfqEngine.PpmTolerance,
                    isotopeTolerancePpm: flashLfqEngine.IsotopePpmTolerance,
                    matchBetweenRuns: flashLfqEngine.MatchBetweenRuns,
                    matchBetweenRunsPpmTolerance: flashLfqEngine.MbrPpmTolerance,
                    integrate: flashLfqEngine.Integrate,
                    numIsotopesRequired: flashLfqEngine.NumIsotopesRequired,
                    idSpecificChargeState: flashLfqEngine.IdSpecificChargeState,
                    requireMonoisotopicMass: flashLfqEngine.RequireMonoisotopicMass,
                    silent: false,
                    optionalPeriodicTablePath: null,
                    maxMbrWindow: flashLfqEngine.MbrRtWindow,
                    advancedProteinQuant: flashLfqEngine.AdvancedProteinQuant);

                results = flashLfqEngine.Run();
            catch (Exception ex)
                MessageBox.Show("FlashLFQ has crashed with the following error: " + ex.Message, "Error", MessageBoxButton.OK, MessageBoxImage.Hand);

            // write output
                OutputWriter.WriteOutput(Directory.GetParent(spectraFileInfo.First().FullFilePathWithExtension).FullName, results, outputFolderPath);
            catch (Exception ex)
                MessageBox.Show("Could not write FlashLFQ output: " + ex.Message, "Error", MessageBoxButton.OK, MessageBoxImage.Hand);
Esempio n. 3
        public static void WriteOutput(string inputPath, FlashLfqResults results, string outputPath = null)
            if (outputPath == null)
                outputPath = Path.GetDirectoryName(inputPath);

            string inputFileName = Path.GetFileNameWithoutExtension(inputPath);

            if (!Directory.Exists(outputPath))

                outputPath + Path.DirectorySeparatorChar + inputFileName + "_FlashLFQ_QuantifiedPeaks.tsv",
                outputPath + Path.DirectorySeparatorChar + inputFileName + "_FlashLFQ_QuantifiedModifiedSequences.tsv",
                outputPath + Path.DirectorySeparatorChar + inputFileName + "_FlashLFQ_QuantifiedBaseSequences.tsv",
                outputPath + Path.DirectorySeparatorChar + inputFileName + "_FlashLFQ_QuantifiedProteins.tsv"
Esempio n. 4
        public static void WriteOutput(string inputPath, FlashLfqResults results, bool silent, string outputPath = null)
            if (outputPath == null)
                outputPath = Path.GetDirectoryName(inputPath);

            string inputFileName = Path.GetFileNameWithoutExtension(inputPath);

            if (!Directory.Exists(outputPath))

            bool bayesianResults = results.ProteinGroups.Any(p => p.Value.ConditionToQuantificationResults.Any());

                Path.Combine(outputPath, "QuantifiedPeaks.tsv"),
                Path.Combine(outputPath, "QuantifiedPeptides.tsv"),
                Path.Combine(outputPath, "QuantifiedProteins.tsv"),
                bayesianResults ? Path.Combine(outputPath, "BayesianFoldChangeAnalysis.tsv") : null,
Esempio n. 5
        protected override MyTaskResults RunSpecific(string OutputFolder, List <DbForTask> dbFilenameList, List <string> currentRawFileList, string taskId, FileSpecificParameters[] fileSettingsList)
            if (SearchParameters.DoQuantification)
                // disable quantification if a .mgf is being used
                if (currentRawFileList.Any(x => Path.GetExtension(x).Equals(".mgf", StringComparison.OrdinalIgnoreCase)))
                    SearchParameters.DoQuantification = false;
                //if we're doing SILAC, assign and add the silac labels to the residue dictionary
                else if (SearchParameters.SilacLabels != null || SearchParameters.StartTurnoverLabel != null || SearchParameters.EndTurnoverLabel != null)
                    char heavyLabel = 'a'; //char to assign
                    //add the Turnoverlabels to the silacLabels list. They weren't there before just to prevent duplication in the tomls
                    if (SearchParameters.StartTurnoverLabel != null || SearchParameters.EndTurnoverLabel != null)
                        //original silacLabels object is null, so we need to initialize it
                        SearchParameters.SilacLabels = new List <SilacLabel>();
                        if (SearchParameters.StartTurnoverLabel != null)
                            var updatedLabel = SilacConversions.UpdateAminoAcidLabel(SearchParameters.StartTurnoverLabel, heavyLabel);
                            heavyLabel = updatedLabel.nextHeavyLabel;
                            SearchParameters.StartTurnoverLabel = updatedLabel.updatedLabel;
                        if (SearchParameters.EndTurnoverLabel != null)
                            var updatedLabel = SilacConversions.UpdateAminoAcidLabel(SearchParameters.EndTurnoverLabel, heavyLabel);
                            heavyLabel = updatedLabel.nextHeavyLabel;
                            SearchParameters.EndTurnoverLabel = updatedLabel.updatedLabel;
                        //change the silac residues to lower case amino acids (currently null)
                        List <SilacLabel> updatedLabels = new List <SilacLabel>();
                        for (int i = 0; i < SearchParameters.SilacLabels.Count; i++)
                            var updatedLabel = SilacConversions.UpdateAminoAcidLabel(SearchParameters.SilacLabels[i], heavyLabel);
                            heavyLabel = updatedLabel.nextHeavyLabel;
                        SearchParameters.SilacLabels = updatedLabels;
            //if no quant, remove any silac labels that may have been added, because they screw up downstream analysis
            if (!SearchParameters.DoQuantification) //using "if" instead of "else", because DoQuantification can change if it's an mgf
                SearchParameters.SilacLabels = null;

            LoadModifications(taskId, out var variableModifications, out var fixedModifications, out var localizeableModificationTypes);

            // load proteins
            List <Protein> proteinList = LoadProteins(taskId, dbFilenameList, SearchParameters.SearchTarget, SearchParameters.DecoyType, localizeableModificationTypes, CommonParameters);

            SanitizeProteinDatabase(proteinList, SearchParameters.TCAmbiguity);

            // load spectral libraries
            var spectralLibrary = LoadSpectralLibraries(taskId, dbFilenameList);

            // write prose settings
            ProseCreatedWhileRunning.Append("The following search settings were used: ");
            ProseCreatedWhileRunning.Append("protease = " + CommonParameters.DigestionParams.Protease + "; ");
            ProseCreatedWhileRunning.Append("maximum missed cleavages = " + CommonParameters.DigestionParams.MaxMissedCleavages + "; ");
            ProseCreatedWhileRunning.Append("minimum peptide length = " + CommonParameters.DigestionParams.MinPeptideLength + "; ");
            ProseCreatedWhileRunning.Append(CommonParameters.DigestionParams.MaxPeptideLength == int.MaxValue ?
                                            "maximum peptide length = unspecified; " :
                                            "maximum peptide length = " + CommonParameters.DigestionParams.MaxPeptideLength + "; ");
            ProseCreatedWhileRunning.Append("initiator methionine behavior = " + CommonParameters.DigestionParams.InitiatorMethionineBehavior + "; ");
            ProseCreatedWhileRunning.Append("fixed modifications = " + string.Join(", ", fixedModifications.Select(m => m.IdWithMotif)) + "; ");
            ProseCreatedWhileRunning.Append("variable modifications = " + string.Join(", ", variableModifications.Select(m => m.IdWithMotif)) + "; ");
            ProseCreatedWhileRunning.Append("max mods per peptide = " + CommonParameters.DigestionParams.MaxModsForPeptide + "; ");
            ProseCreatedWhileRunning.Append("max modification isoforms = " + CommonParameters.DigestionParams.MaxModificationIsoforms + "; ");
            ProseCreatedWhileRunning.Append("precursor mass tolerance = " + CommonParameters.PrecursorMassTolerance + "; ");
            ProseCreatedWhileRunning.Append("product mass tolerance = " + CommonParameters.ProductMassTolerance + "; ");
            ProseCreatedWhileRunning.Append("report PSM ambiguity = " + CommonParameters.ReportAllAmbiguity + ". ");
            ProseCreatedWhileRunning.Append("The combined search database contained " + proteinList.Count(p => !p.IsDecoy)
                                            + " non-decoy protein entries including " + proteinList.Count(p => p.IsContaminant) + " contaminant sequences. ");

            // start the search task
            MyTaskResults = new MyTaskResults(this);
            List <PeptideSpectralMatch> allPsms = new List <PeptideSpectralMatch>();

            //generate an array to store category specific fdr values (for speedy semi/nonspecific searches)
            int numFdrCategories = (int)(Enum.GetValues(typeof(FdrCategory)).Cast <FdrCategory>().Last() + 1); //+1 because it starts at zero

            List <PeptideSpectralMatch>[] allCategorySpecificPsms = new List <PeptideSpectralMatch> [numFdrCategories];
            for (int i = 0; i < numFdrCategories; i++)
                allCategorySpecificPsms[i] = new List <PeptideSpectralMatch>();

            FlashLfqResults flashLfqResults = null;

            MyFileManager myFileManager = new MyFileManager(SearchParameters.DisposeOfFileWhenDone);

            var fileSpecificCommonParams = fileSettingsList.Select(b => SetAllFileSpecificCommonParams(CommonParameters, b));

            int    completedFiles = 0;
            object indexLock      = new object();
            object psmLock        = new object();

            Status("Searching files...", taskId);
            Status("Searching files...", new List <string> {
                taskId, "Individual Spectra Files"

            Dictionary <string, int[]> numMs2SpectraPerFile = new Dictionary <string, int[]>();

            for (int spectraFileIndex = 0; spectraFileIndex < currentRawFileList.Count; spectraFileIndex++)
                if (GlobalVariables.StopLoops)

                var origDataFile = currentRawFileList[spectraFileIndex];

                // mark the file as in-progress
                StartingDataFile(origDataFile, new List <string> {
                    taskId, "Individual Spectra Files", origDataFile

                CommonParameters combinedParams = SetAllFileSpecificCommonParams(CommonParameters, fileSettingsList[spectraFileIndex]);

                MassDiffAcceptor massDiffAcceptor = GetMassDiffAcceptor(combinedParams.PrecursorMassTolerance, SearchParameters.MassDiffAcceptorType, SearchParameters.CustomMdac);

                var thisId = new List <string> {
                    taskId, "Individual Spectra Files", origDataFile
                NewCollection(Path.GetFileName(origDataFile), thisId);
                Status("Loading spectra file...", thisId);
                MsDataFile myMsDataFile = myFileManager.LoadFile(origDataFile, combinedParams);
                Status("Getting ms2 scans...", thisId);
                Ms2ScanWithSpecificMass[] arrayOfMs2ScansSortedByMass = GetMs2Scans(myMsDataFile, origDataFile, combinedParams).OrderBy(b => b.PrecursorMass).ToArray();
                numMs2SpectraPerFile.Add(Path.GetFileNameWithoutExtension(origDataFile), new int[] { myMsDataFile.GetAllScansList().Count(p => p.MsnOrder == 2), arrayOfMs2ScansSortedByMass.Length });

                PeptideSpectralMatch[] fileSpecificPsms = new PeptideSpectralMatch[arrayOfMs2ScansSortedByMass.Length];

                // modern search
                if (SearchParameters.SearchType == SearchType.Modern)
                    for (int currentPartition = 0; currentPartition < combinedParams.TotalPartitions; currentPartition++)
                        List <PeptideWithSetModifications> peptideIndex = null;
                        List <Protein> proteinListSubset = proteinList.GetRange(currentPartition * proteinList.Count / combinedParams.TotalPartitions,
                                                                                ((currentPartition + 1) * proteinList.Count / combinedParams.TotalPartitions) - (currentPartition * proteinList.Count / combinedParams.TotalPartitions));

                        Status("Getting fragment dictionary...", new List <string> {
                        var indexEngine = new IndexingEngine(proteinListSubset, variableModifications, fixedModifications, SearchParameters.SilacLabels,
                                                             SearchParameters.StartTurnoverLabel, SearchParameters.EndTurnoverLabel, currentPartition, SearchParameters.DecoyType, combinedParams, FileSpecificParameters,
                                                             SearchParameters.MaxFragmentSize, false, dbFilenameList.Select(p => new FileInfo(p.FilePath)).ToList(), SearchParameters.TCAmbiguity, new List <string> {
                        List <int>[] fragmentIndex  = null;
                        List <int>[] precursorIndex = null;

                        lock (indexLock)
                            GenerateIndexes(indexEngine, dbFilenameList, ref peptideIndex, ref fragmentIndex, ref precursorIndex, proteinList, taskId);

                        Status("Searching files...", taskId);

                        new ModernSearchEngine(fileSpecificPsms, arrayOfMs2ScansSortedByMass, peptideIndex, fragmentIndex, currentPartition,
                                               combinedParams, this.FileSpecificParameters, massDiffAcceptor, SearchParameters.MaximumMassThatFragmentIonScoreIsDoubled, thisId).Run();

                        ReportProgress(new ProgressEventArgs(100, "Done with search " + (currentPartition + 1) + "/" + combinedParams.TotalPartitions + "!", thisId));
                        if (GlobalVariables.StopLoops)
                // nonspecific search
                else if (SearchParameters.SearchType == SearchType.NonSpecific)
                    PeptideSpectralMatch[][] fileSpecificPsmsSeparatedByFdrCategory = new PeptideSpectralMatch[numFdrCategories][]; //generate an array of all possible locals
                    for (int i = 0; i < numFdrCategories; i++)                                                                      //only add if we're using for FDR, else ignore it as null.
                        fileSpecificPsmsSeparatedByFdrCategory[i] = new PeptideSpectralMatch[arrayOfMs2ScansSortedByMass.Length];

                    //create params for N, C, or both if semi
                    List <CommonParameters> paramsToUse = new List <CommonParameters> {
                    if (combinedParams.DigestionParams.SearchModeType == CleavageSpecificity.Semi) //if semi, we need to do both N and C to hit everything
                        List <FragmentationTerminus> terminiToUse = new List <FragmentationTerminus> {
                            FragmentationTerminus.N, FragmentationTerminus.C
                        foreach (FragmentationTerminus terminus in terminiToUse) //set both termini

                    //Compress array of deconvoluted ms2 scans to avoid searching the same ms2 multiple times while still identifying coisolated peptides
                    List <int>[] coisolationIndex = new List <int>[] { new List <int>() };
                    if (arrayOfMs2ScansSortedByMass.Length != 0)
                        int maxScanNumber = arrayOfMs2ScansSortedByMass.Max(x => x.OneBasedScanNumber);
                        coisolationIndex = new List <int> [maxScanNumber + 1];
                        for (int i = 0; i < arrayOfMs2ScansSortedByMass.Length; i++)
                            int scanNumber = arrayOfMs2ScansSortedByMass[i].OneBasedScanNumber;
                            if (coisolationIndex[scanNumber] == null)
                                coisolationIndex[scanNumber] = new List <int> {
                        coisolationIndex = coisolationIndex.Where(x => x != null).ToArray();

                    //foreach terminus we're going to look at
                    foreach (CommonParameters paramToUse in paramsToUse)
                        //foreach database partition
                        for (int currentPartition = 0; currentPartition < paramToUse.TotalPartitions; currentPartition++)
                            List <PeptideWithSetModifications> peptideIndex = null;

                            List <Protein> proteinListSubset = proteinList.GetRange(currentPartition * proteinList.Count / paramToUse.TotalPartitions,
                                                                                    ((currentPartition + 1) * proteinList.Count / paramToUse.TotalPartitions) - (currentPartition * proteinList.Count / paramToUse.TotalPartitions));

                            List <int>[] fragmentIndex  = null;
                            List <int>[] precursorIndex = null;

                            Status("Getting fragment dictionary...", new List <string> {
                            var indexEngine = new IndexingEngine(proteinListSubset, variableModifications, fixedModifications, SearchParameters.SilacLabels,
                                                                 SearchParameters.StartTurnoverLabel, SearchParameters.EndTurnoverLabel, currentPartition, SearchParameters.DecoyType, paramToUse, FileSpecificParameters,
                                                                 SearchParameters.MaxFragmentSize, true, dbFilenameList.Select(p => new FileInfo(p.FilePath)).ToList(), SearchParameters.TCAmbiguity, new List <string> {
                            lock (indexLock)
                                GenerateIndexes(indexEngine, dbFilenameList, ref peptideIndex, ref fragmentIndex, ref precursorIndex, proteinList, taskId);

                            Status("Searching files...", taskId);

                            new NonSpecificEnzymeSearchEngine(fileSpecificPsmsSeparatedByFdrCategory, arrayOfMs2ScansSortedByMass, coisolationIndex, peptideIndex, fragmentIndex,
                                                              precursorIndex, currentPartition, paramToUse, this.FileSpecificParameters, variableModifications, massDiffAcceptor,
                                                              SearchParameters.MaximumMassThatFragmentIonScoreIsDoubled, thisId).Run();

                            ReportProgress(new ProgressEventArgs(100, "Done with search " + (currentPartition + 1) + "/" + paramToUse.TotalPartitions + "!", thisId));
                            if (GlobalVariables.StopLoops)
                    lock (psmLock)
                        for (int i = 0; i < allCategorySpecificPsms.Length; i++)
                            if (allCategorySpecificPsms[i] != null)
                // classic search
                    Status("Starting search...", thisId);
                    var newClassicSearchEngine = new ClassicSearchEngine(fileSpecificPsms, arrayOfMs2ScansSortedByMass, variableModifications, fixedModifications, SearchParameters.SilacLabels,
                                                                         SearchParameters.StartTurnoverLabel, SearchParameters.EndTurnoverLabel, proteinList, massDiffAcceptor, combinedParams, this.FileSpecificParameters, spectralLibrary, thisId, SearchParameters.WriteSpectralLibrary);

                    ReportProgress(new ProgressEventArgs(100, "Done with search!", thisId));

                //look for internal fragments
                if (SearchParameters.MinAllowedInternalFragmentLength != 0)
                    MatchInternalFragmentIons(fileSpecificPsms, arrayOfMs2ScansSortedByMass, combinedParams, SearchParameters.MinAllowedInternalFragmentLength);

                // calculate/set spectral angles if there is a spectral library being used
                if (spectralLibrary != null)
                    Status("Calculating spectral library similarity...", thisId);
                SpectralLibrarySearchFunction.CalculateSpectralAngles(spectralLibrary, fileSpecificPsms, arrayOfMs2ScansSortedByMass, combinedParams);

                lock (psmLock)

                FinishedDataFile(origDataFile, new List <string> {
                    taskId, "Individual Spectra Files", origDataFile
                ReportProgress(new ProgressEventArgs(completedFiles / currentRawFileList.Count, "Searching...", new List <string> {
                    taskId, "Individual Spectra Files"

            if (spectralLibrary != null)

            ReportProgress(new ProgressEventArgs(100, "Done with all searches!", new List <string> {
                taskId, "Individual Spectra Files"

            int numNotches = GetNumNotches(SearchParameters.MassDiffAcceptorType, SearchParameters.CustomMdac);

            //resolve category specific fdrs (for speedy semi and nonspecific
            if (SearchParameters.SearchType == SearchType.NonSpecific)
                allPsms = NonSpecificEnzymeSearchEngine.ResolveFdrCategorySpecificPsms(allCategorySpecificPsms, numNotches, taskId, CommonParameters, FileSpecificParameters);

            PostSearchAnalysisParameters parameters = new PostSearchAnalysisParameters
                SearchTaskResults             = MyTaskResults,
                SearchTaskId                  = taskId,
                SearchParameters              = SearchParameters,
                ProteinList                   = proteinList,
                AllPsms                       = allPsms,
                VariableModifications         = variableModifications,
                FixedModifications            = fixedModifications,
                ListOfDigestionParams         = new HashSet <DigestionParams>(fileSpecificCommonParams.Select(p => p.DigestionParams)),
                CurrentRawFileList            = currentRawFileList,
                MyFileManager                 = myFileManager,
                NumNotches                    = numNotches,
                OutputFolder                  = OutputFolder,
                IndividualResultsOutputFolder = Path.Combine(OutputFolder, "Individual File Results"),
                FlashLfqResults               = flashLfqResults,
                FileSettingsList              = fileSettingsList,
                NumMs2SpectraPerFile          = numMs2SpectraPerFile,
                DatabaseFilenameList          = dbFilenameList
            PostSearchAnalysisTask postProcessing = new PostSearchAnalysisTask
                Parameters             = parameters,
                FileSpecificParameters = this.FileSpecificParameters,
                CommonParameters       = CommonParameters

Esempio n. 6
        private static Dictionary <SpectraFileInfo, List <SpectraFileInfo> > CreateSilacRawFiles(FlashLfqResults flashLfqResults, List <SilacLabel> allSilacLabels, SilacLabel startLabel, SilacLabel endLabel, bool quantifyUnlabeledPeptides, List <SpectraFileInfo> spectraFileInfo)
            //update number of spectra files to include a new file for each label*condition
            Dictionary <SpectraFileInfo, List <SpectraFileInfo> > originalToLabeledFileInfoDictionary = new Dictionary <SpectraFileInfo, List <SpectraFileInfo> >();

            flashLfqResults.SpectraFiles.Clear(); //clear existing files so we can replace them with labeled ones

            //foreach existing file
            if (startLabel == null && endLabel == null) //if multiplex
                //populate dictionary
                if (quantifyUnlabeledPeptides)
                    spectraFileInfo.ForEach(x => originalToLabeledFileInfoDictionary.Add(x, new List <SpectraFileInfo> {
                    spectraFileInfo.ForEach(x => originalToLabeledFileInfoDictionary.Add(x, new List <SpectraFileInfo>()));

                //get the labeled
                foreach (SilacLabel label in allSilacLabels)
                    List <SpectraFileInfo> labeledFiles = new List <SpectraFileInfo>();
                    foreach (SpectraFileInfo originalFile in spectraFileInfo)
                        //foreach label, add a new file with the label
                        SpectraFileInfo labeledInfo = GetHeavyFileInfo(originalFile, label);
Esempio n. 7
        //If SILAC (Post-Quantification), compress the light/heavy protein group pairs into the same light protein group but different files
        //Create new files for each silac label and file so that "file 1" now becomes "file 1 (light)" and "file 1 (heavy)"
        //Change heavy residue into the light residue plus a string label ("PEPTIDEa" -> "PEPTIDEK(+8.014)")
        //This light to heavy conversion needs to happen for the flashLFQ peptides here, but can't for the psm peptides, which are constrained to the protein
        //i.e. pwsms currently don't have sequences; they have start/end residues and a protein sequence. We have to change the output sequences when they're created.
        public static void SilacConversionsPostQuantification(List <SilacLabel> allSilacLabels, SilacLabel startLabel, SilacLabel endLabel,
                                                              List <SpectraFileInfo> spectraFileInfo, List <ProteinGroup> proteinGroups, HashSet <DigestionParams> listOfDigestionParams, FlashLfqResults flashLfqResults,
                                                              List <PeptideSpectralMatch> allPsms, Dictionary <string, int> modsToWriteSelection, bool quantifyUnlabeledPeptides)
            //do protein quant if we had any results
            //if no results, we still may need to edit the psms
            if (flashLfqResults != null) //can be null if no unambiguous psms were found
                //after this point, we now have quantification values for the peptides, but they all belong to the same "unlabeled" protein and are in the same file
                //We can remove "labeled" peptides from each file and put them in a new file as "unlabeled".

                //MAKE NEW RAW FILES

                //update number of spectra files to include a new file for each label/condition
                Dictionary <SpectraFileInfo, List <SpectraFileInfo> > originalToLabeledFileInfoDictionary = CreateSilacRawFiles(flashLfqResults, allSilacLabels, startLabel, endLabel, quantifyUnlabeledPeptides, spectraFileInfo);

                //we have the files, now let's reassign the psms.
                //there are a few ways to do this, but we're going to generate the "base" peptide and assign to that

                //Get Dictionary of protein accessions to peptides
                Dictionary <string, List <FlashLFQ.Peptide> > unlabeledToPeptidesDictionary = GetDictionaryOfProteinAccessionsToPeptides(flashLfqResults.PeptideModifiedSequences.Values, allSilacLabels, startLabel, endLabel);

                //we now have a dictionary of unlabeledBaseSequence to the labeled peptides
                //Better SILAC results can be obtained by using the summed intensities from ms1 scans where all peaks were found, rather than the apex
                //foreach peptide, unlabeled peptide, get the isotopic envelope intensities for each labeled peptide in each file
                //save the intensities from ms1s that are shared. If no ms1s contains all the peaks, then just use the apex intensity (default)
                CalculateSilacIntensities(flashLfqResults.Peaks, unlabeledToPeptidesDictionary);

                //SPLIT THE FILES
                List <FlashLFQ.Peptide> updatedPeptides = new List <FlashLFQ.Peptide>();

                //split the heavy/light peptides into separate raw files, remove the heavy peptide
                if (startLabel != null || endLabel != null) //if turnover
                    //foreach group, the labeled peptides should be split into their labeled files
                    //we're deleting the heavy results after we pull those results into a different file
                    foreach (SpectraFileInfo info in spectraFileInfo)
                        string          fullPathWithExtension    = info.FullFilePathWithExtension;
                        string[]        pathArray                = fullPathWithExtension.Split('.');
                        string          extension                = pathArray.Last();
                        string          filePathWithoutExtension = fullPathWithExtension.Substring(0, fullPathWithExtension.Length - extension.Length - 1); //-1 removes the '.'
                        SpectraFileInfo lightInfo                = new SpectraFileInfo(filePathWithoutExtension + "_Original." + extension, info.Condition, info.BiologicalReplicate, info.TechnicalReplicate, info.Fraction);
                        SpectraFileInfo heavyInfo                = new SpectraFileInfo(filePathWithoutExtension + "_NewlySynthesized." + extension, info.Condition + "_NewlySynthesized", info.BiologicalReplicate, info.TechnicalReplicate, info.Fraction);
                        originalToLabeledFileInfoDictionary[info] = new List <SpectraFileInfo> {
                            lightInfo, heavyInfo

                    //This step converts the quantification intensities from light/heavy to original/newlySynthesized by splitting up the missed cleavage mixtures
                    foreach (KeyValuePair <string, List <FlashLFQ.Peptide> > kvp in unlabeledToPeptidesDictionary)
                        string unlabeledSequence         = kvp.Key; //this will be the key for the new quant entry
                        List <FlashLFQ.Peptide> peptides = kvp.Value;
                        if (peptides.Count != 1)                    //sometimes it's one if there is no label site on the peptide (e.g. label K, peptide is PEPTIDER)
                            //Missed cleavages can yield multiple peptides (e.g. 1 missed = LL, LH, HH; 2 missed = LLL, LLH, LHH, HHH; etc)
                            //Compress into 2 values: Light and Heavy
                            FlashLFQ.Peptide updatedPeptide = new FlashLFQ.Peptide(unlabeledSequence, unlabeledSequence, peptides[0].UseForProteinQuant, CleanPastProteinQuant(peptides[0].ProteinGroups)); //needed to keep protein info.
                            foreach (SpectraFileInfo info in spectraFileInfo)
                                int    maxNumberHeavyAminoAcids    = peptides.Count - 1;
                                double lightIntensity              = 0;
                                double heavyIntensity              = 0;
                                int    numUniquePeptidesQuantified = 0;
                                for (int numHeavyAminoAcids = 0; numHeavyAminoAcids < peptides.Count; numHeavyAminoAcids++)
                                    double totalIntensity = peptides[numHeavyAminoAcids].GetIntensity(info);
                                    if (totalIntensity > 0)
                                        //prevent confidence of a ratio if only the HL (and not the LL or HH) is observed.
                                        //If LL or HH is observed (but not any other), the user knows the ratio is only from one peak.
                                        if (numHeavyAminoAcids == 0 || numHeavyAminoAcids == maxNumberHeavyAminoAcids)
                                            numUniquePeptidesQuantified += 2;
                                        double partHeavyIntensity = totalIntensity * numHeavyAminoAcids / maxNumberHeavyAminoAcids;
                                        lightIntensity += totalIntensity - partHeavyIntensity;
                                        heavyIntensity += partHeavyIntensity;
                                //If only a mixed peptide with a missed cleavage was identified, reset the intensity values to zero so the user doesn't get a discreet, inaccurate measurement
                                if (numUniquePeptidesQuantified < 2)
                                    lightIntensity = 0;
                                    heavyIntensity = 0;

                                List <SpectraFileInfo> updatedInfo = originalToLabeledFileInfoDictionary[info];
                                SpectraFileInfo        startInfo   = updatedInfo[0];
                                SpectraFileInfo        endInfo     = updatedInfo[1];

                                updatedPeptide.SetIntensity(startInfo, lightIntensity);                           //assign the corrected light intensity
                                updatedPeptide.SetDetectionType(startInfo, peptides.First().GetDetectionType(info));
                                updatedPeptide.SetIntensity(endInfo, heavyIntensity);                             //assign the corrected heavy intensity to the heavy file
                                updatedPeptide.SetDetectionType(endInfo, peptides.Last().GetDetectionType(info)); //could include the mixed here if it really matters

                            //add the updated peptide to the list
                else //multiplex
                    foreach (var kvp in unlabeledToPeptidesDictionary)
                        string unlabeledSequence         = kvp.Key;
                        List <FlashLFQ.Peptide> peptides = kvp.Value;
                        FlashLFQ.Peptide        representativePeptide = peptides[0];
                        FlashLFQ.Peptide        updatedPeptide        = new FlashLFQ.Peptide(unlabeledSequence, unlabeledSequence, representativePeptide.UseForProteinQuant, CleanPastProteinQuant(representativePeptide.ProteinGroups)); //needed to keep protein info.

                        //foreach original file
                        foreach (SpectraFileInfo info in spectraFileInfo)
                            List <SpectraFileInfo> filesForThisFile = originalToLabeledFileInfoDictionary[info];
                            for (int i = 0; i < peptides.Count; i++) //the files and the peptides can use the same index, because there should be a distinct file for each label/peptide
                                SpectraFileInfo  currentInfo    = filesForThisFile[i];
                                FlashLFQ.Peptide currentPeptide = peptides[i];
                                updatedPeptide.SetIntensity(currentInfo, currentPeptide.GetIntensity(info));
                                updatedPeptide.SetDetectionType(currentInfo, currentPeptide.GetDetectionType(info));

                //Update peptides
                var peptideResults = flashLfqResults.PeptideModifiedSequences;
                foreach (FlashLFQ.Peptide peptide in updatedPeptides)
                    peptideResults.Add(peptide.Sequence, peptide);

                //Do protein quant

                //update proteingroups to have all files for quantification
                if (proteinGroups != null)
                    List <SpectraFileInfo> allInfo = originalToLabeledFileInfoDictionary.SelectMany(x => x.Value).ToList();
                    foreach (ProteinGroup proteinGroup in proteinGroups)
                        proteinGroup.FilesForQuantification = allInfo;
                        proteinGroup.IntensitiesByFile      = new Dictionary <SpectraFileInfo, double>();

                        foreach (var spectraFile in allInfo)
                            if (flashLfqResults.ProteinGroups.TryGetValue(proteinGroup.ProteinGroupName, out var flashLfqProteinGroup))
                                proteinGroup.IntensitiesByFile.Add(spectraFile, flashLfqProteinGroup.GetIntensity(spectraFile));
                                //needed for decoys/contaminants/proteins that aren't quantified
                                proteinGroup.IntensitiesByFile.Add(spectraFile, 0);

                //Convert all lfqpeaks from heavy (a) to light (K+8.014) for output
                if (flashLfqResults != null) //can be null if nothing was quantified (all peptides are ambiguous)
                    var lfqPeaks = flashLfqResults.Peaks;
                    List <SpectraFileInfo> peakKeys = lfqPeaks.Keys.ToList();

                    foreach (SpectraFileInfo key in peakKeys)
                        List <ChromatographicPeak> peaks = lfqPeaks[key];
                        for (int i = 0; i < peaks.Count; i++)
                            var peak = peaks[i];
                            //check if we're removing light peaks and if it's a light peak
                            if (peak.Identifications.Any(x => GetRelevantLabelFromBaseSequence(x.BaseSequence, allSilacLabels) != null)) //if no ids have any labels, remove them
                                List <Identification> updatedIds = new List <Identification>();
                                foreach (var id in peak.Identifications)
                                    string            baseSequence = id.BaseSequence;
                                    string            fullSequence = id.ModifiedSequence;
                                    List <SilacLabel> labels       = GetRelevantLabelsFromBaseSequenceForOutput(id.BaseSequence, allSilacLabels);
                                    if (labels != null)
                                        foreach (SilacLabel label in labels)
                                            baseSequence = GetSilacLightBaseSequence(baseSequence, label);
                                            fullSequence = GetSilacLightFullSequence(fullSequence, label);

                                    Identification updatedId = new Identification(

            //convert all psms into human readable format
            for (int i = 0; i < allPsms.Count; i++)
                allPsms[i].ResolveHeavySilacLabel(allSilacLabels, modsToWriteSelection);
Esempio n. 8
        private static void Run(FlashLfqSettings settings)
            catch (Exception e)
                if (!settings.Silent)
                    Console.WriteLine("Error: " + e.Message);

            // check to see if experimental design file exists
            string assumedPathToExpDesign = Path.Combine(settings.SpectraFileRepository, "ExperimentalDesign.tsv");

            if ((settings.Normalize || settings.BayesianProteinQuant) && !File.Exists(assumedPathToExpDesign))
                if (!settings.Silent)
                    Console.WriteLine("Could not find experimental design file " +
                                      "(required for normalization and Bayesian statistical analysis): " + assumedPathToExpDesign);

            // set up spectra file info
            List <SpectraFileInfo> spectraFileInfos = new List <SpectraFileInfo>();
            List <string>          filePaths        = Directory.GetFiles(settings.SpectraFileRepository)
                                                      .Where(f => acceptedSpectrumFileFormats.Contains(Path.GetExtension(f).ToLowerInvariant())).ToList();

            // check for duplicate file names (agnostic of file extension)
            foreach (var fileName in filePaths.GroupBy(p => Path.GetFileNameWithoutExtension(p)))
                if (fileName.Count() > 1)
                    var types = fileName.Select(p => Path.GetFileNameWithoutExtension(p)).Distinct();

                    if (!settings.Silent)
                        Console.WriteLine("Multiple spectra files with the same name were detected (maybe " + string.Join(" and ", types) + "?). " +
                                          "Please remove or rename duplicate files from the spectra file directory.");

            if (settings.PrintThermoLicenceViaCommandLine)

            // check thermo licence agreement
            if (filePaths.Select(v => Path.GetExtension(v).ToLowerInvariant()).Any(f => f == ".raw"))
                var licenceAgreement = LicenceAgreementSettings.ReadLicenceSettings();

                if (!licenceAgreement.HasAcceptedThermoLicence)
                    if (settings.AcceptThermoLicenceViaCommandLine)
                        if (!settings.ReadOnlyFileSystem)
                        // decided to write this even if it's on silent mode...
                        Console.WriteLine("\nIn order to search Thermo .raw files, you must agree to the above terms. Do you agree to the above terms? y/n\n");

                        string res = Console.ReadLine();

                        if (res.ToLowerInvariant() == "y")
                                if (!settings.ReadOnlyFileSystem)
                            catch (Exception e)
                            Console.WriteLine("Thermo licence has been declined. Exiting FlashLFQ. You can still search .mzML and .mgf files without agreeing to the Thermo licence.");

            if (File.Exists(assumedPathToExpDesign))
                var experimentalDesign = File.ReadAllLines(assumedPathToExpDesign)
                                         .ToDictionary(v => v.Split('\t')[0], v => v);

                foreach (var file in filePaths)
                    string filename = Path.GetFileNameWithoutExtension(file);

                    var expDesignForThisFile = experimentalDesign[filename];
                    var split = expDesignForThisFile.Split('\t');

                    string condition = split[1];
                    int    biorep    = int.Parse(split[2]);
                    int    fraction  = int.Parse(split[3]);
                    int    techrep   = int.Parse(split[4]);

                    // experimental design info passed in here for each spectra file
                    spectraFileInfos.Add(new SpectraFileInfo(fullFilePathWithExtension: file,
                                                             condition: condition,
                                                             biorep: biorep - 1,
                                                             fraction: fraction - 1,
                                                             techrep: techrep - 1));
                for (int i = 0; i < filePaths.Count; i++)
                    var file = filePaths[i];
                    spectraFileInfos.Add(new SpectraFileInfo(fullFilePathWithExtension: file,
                                                             condition: "Default",
                                                             biorep: i,
                                                             fraction: 0,
                                                             techrep: 0));

            // check the validity of the settings and experimental design
            catch (Exception e)
                if (!settings.Silent)
                    Console.WriteLine("Error: " + e.Message);

            // set up IDs
            List <Identification> ids;

                ids = PsmReader.ReadPsms(settings.PsmIdentificationPath, settings.Silent, spectraFileInfos);
            catch (Exception e)
                Console.WriteLine("Problem reading PSMs: " + e.Message);

            if (ids.Any())
                if (!settings.Silent)
                    Console.WriteLine("Setup is OK; read in " + ids.Count + " identifications; starting FlashLFQ engine");

                // write FlashLFQ settings to a file
                if (!Directory.Exists(settings.OutputPath))
                Nett.Toml.WriteFile(settings, Path.Combine(settings.OutputPath, "FlashLfqSettings.toml"));

                // make engine with desired settings
                FlashLfqEngine  engine  = null;
                FlashLfqResults results = null;
                    engine = FlashLfqSettings.CreateEngineWithSettings(settings, ids);

                    // run
                    results = engine.Run();
                catch (Exception ex)
                    string errorReportPath = Directory.GetParent(filePaths.First()).FullName;
                    if (settings.OutputPath != null)
                        errorReportPath = settings.OutputPath;

                    if (!settings.Silent)
                        Console.WriteLine("FlashLFQ has crashed with the following error: " + ex.Message +
                                          ".\nError report written to " + errorReportPath);

                    OutputWriter.WriteErrorReport(ex, Directory.GetParent(filePaths.First()).FullName, settings.OutputPath);

                // output
                if (results != null)
                        OutputWriter.WriteOutput(settings.PsmIdentificationPath, results, settings.Silent, settings.OutputPath);
                    catch (Exception ex)
                        if (!settings.Silent)
                            Console.WriteLine("Could not write FlashLFQ output: " + ex.Message);
                if (!settings.Silent)
                    Console.WriteLine("No peptide IDs for the specified spectra files were found! " +
                                      "Check to make sure the spectra file names match between the ID file and the spectra files");
Esempio n. 9
        public static void Main(string[] args)
            // parameters
            List <string> acceptedSpectrumFileFormats = new List <string> {
                ".RAW", ".MZML"

            // setup parameters
            var p = new FluentCommandLineParser <ApplicationArguments>();

            p.SetupHelp("?", "help")
            .Callback(text => Console.WriteLine(
                          "Valid arguments:\n" +
                          "--idt [string|identification file path (TSV format)]\n" +
                          "--rep [string|directory containing spectrum data files]\n" +
                          "--out [string|output directory]\n" +
                          "--ppm [double|ppm tolerance]\n" +
                          "--iso [double|isotopic distribution tolerance in ppm]\n" +
                          "--sil [bool|silent mode]\n" +
                          "--int [bool|integrate features]\n" +
                          "--mbr [bool|match between runs]\n" +
                          "--mrt [double|maximum MBR window in minutes]\n" +
                          "--chg [bool|use only precursor charge state]\n" +
                          "--rmm [bool|require observed monoisotopic mass peak]\n" +
                          "--nis [int|number of isotopes required to be observed]\n" +
                          "--nor [bool|normalize intensity results]\n" +
                          "--pro [bool|advanced protein quantification]\n"

            p.Setup(arg => arg.PsmInputPath) // PSMs file

            p.Setup(arg => arg.RawFilesPath) // spectrum files

            p.Setup(arg => arg.OutputPath) // output path

            p.Setup(arg => arg.PpmTolerance) // ppm tolerance

            p.Setup(arg => arg.IsotopePpmTolerance) // isotope ppm tolerance

            p.Setup(arg => arg.Silent) // do not display output messages

            p.Setup(arg => arg.Integrate) // integrate

            p.Setup(arg => arg.MatchBetweenRuns) // match between runs

            p.Setup(arg => arg.MbrRtWindow) // maximum match-between-runs window in minutes

            p.Setup(arg => arg.IdSpecificChargeState) // only use PSM-identified charge states

            p.Setup(arg => arg.RequireMonoisotopicMass) // require observation of monoisotopic peak

            p.Setup(arg => arg.NumIsotopesRequired) // num of isotopes required

            p.Setup(arg => arg.Normalize) // normalize

            p.Setup(arg => arg.AdvancedProteinQuant) // advanced protein quant

            // args are OK - run FlashLFQ
            if (!p.Parse(args).HasErrors&& p.Object.PsmInputPath != null)
                if (!File.Exists(p.Object.PsmInputPath))
                    if (!p.Object.Silent)
                        Console.WriteLine("Could not locate identification file " + p.Object.PsmInputPath);

                if (!Directory.Exists(p.Object.RawFilesPath))
                    if (!p.Object.Silent)
                        Console.WriteLine("Could not locate folder " + p.Object.RawFilesPath);

                string assumedPathToExpDesign = Path.Combine(p.Object.RawFilesPath, "ExperimentalDesign.tsv");
                if (p.Object.Normalize && !File.Exists(assumedPathToExpDesign))
                    if (!p.Object.Silent)
                        Console.WriteLine("Could not find experimental design file (required for normalization): " + assumedPathToExpDesign);

                // set up spectra file info
                // get experimental design info for normalization
                List <SpectraFileInfo> spectraFileInfos = new List <SpectraFileInfo>();
                IEnumerable <string>   files            = Directory.GetFiles(p.Object.RawFilesPath)
                                                          .Where(f => acceptedSpectrumFileFormats.Contains(Path.GetExtension(f).ToUpperInvariant()));

                if (p.Object.Normalize)
                    var experimentalDesign = File.ReadAllLines(assumedPathToExpDesign)
                                             .ToDictionary(v => v.Split('\t')[0], v => v);

                    foreach (var file in files)
                        string filename = Path.GetFileNameWithoutExtension(file);

                        var expDesignForThisFile = experimentalDesign[filename];
                        var split = expDesignForThisFile.Split('\t');

                        string condition = split[1];
                        int    biorep    = int.Parse(split[2]);
                        int    fraction  = int.Parse(split[3]);
                        int    techrep   = int.Parse(split[4]);

                        // experimental design info passed in here for each spectra file
                        spectraFileInfos.Add(new SpectraFileInfo(fullFilePathWithExtension: file,
                                                                 condition: condition,
                                                                 biorep: biorep - 1,
                                                                 fraction: fraction - 1,
                                                                 techrep: techrep - 1));
                    foreach (var file in files)
                        spectraFileInfos.Add(new SpectraFileInfo(fullFilePathWithExtension: file,
                                                                 condition: "",
                                                                 biorep: 0,
                                                                 fraction: 0,
                                                                 techrep: 0));

                // set up IDs
                List <Identification> ids;
                    ids = PsmReader.ReadPsms(p.Object.PsmInputPath, p.Object.Silent, spectraFileInfos);
                catch (Exception e)
                    Console.WriteLine("Problem reading PSMs: " + e.Message);

                if (ids.Any())
                    if (!p.Object.Silent)
                        Console.WriteLine("Setup is OK; read in " + ids.Count + " identifications; starting FlashLFQ engine");

                    // make engine with desired settings
                    FlashLfqEngine  engine  = null;
                    FlashLfqResults results = null;
                        engine = new FlashLfqEngine(
                            allIdentifications: ids,
                            normalize: p.Object.Normalize,
                            ppmTolerance: p.Object.PpmTolerance,
                            isotopeTolerancePpm: p.Object.IsotopePpmTolerance,
                            matchBetweenRuns: p.Object.MatchBetweenRuns,
                            matchBetweenRunsPpmTolerance: p.Object.MbrPpmTolerance,
                            integrate: p.Object.Integrate,
                            numIsotopesRequired: p.Object.NumIsotopesRequired,
                            idSpecificChargeState: p.Object.IdSpecificChargeState,
                            requireMonoisotopicMass: p.Object.RequireMonoisotopicMass,
                            silent: p.Object.Silent,
                            optionalPeriodicTablePath: null,
                            maxMbrWindow: p.Object.MbrRtWindow,
                            advancedProteinQuant: p.Object.AdvancedProteinQuant);

                        // run
                        results = engine.Run();
                    catch (Exception ex)
                        string errorReportPath = Directory.GetParent(files.First()).FullName;
                        if (p.Object.OutputPath != null)
                            errorReportPath = p.Object.OutputPath;

                        if (!p.Object.Silent)
                            Console.WriteLine("FlashLFQ has crashed with the following error: " + ex.Message +
                                              ".\nError report written to " + errorReportPath);

                        OutputWriter.WriteErrorReport(ex, Directory.GetParent(files.First()).FullName, p.Object.OutputPath);

                    // output
                    if (results != null)
                            OutputWriter.WriteOutput(p.Object.PsmInputPath, results, p.Object.OutputPath);
                        catch (Exception ex)
                            if (!p.Object.Silent)
                                Console.WriteLine("Could not write FlashLFQ output: " + ex.Message);
                    if (!p.Object.Silent)
                        Console.WriteLine("No peptide IDs for the specified spectra files were found! " +
                                          "Check to make sure the spectra file names match between the ID file and the spectra files");
            else if (p.Parse(args).HasErrors == false && p.Object.PsmInputPath == null)
                // no errors - just requesting help?
                Console.WriteLine("Invalid arguments - type \"--help\" for valid arguments");
        //If SILAC (Post-Quantification), compress the light/heavy protein group pairs into the same light protein group but different files
        //Create new files for each silac label and file so that "file 1" now becomes "file 1 (light)" and "file 1 (heavy)"
        //Change heavy residue into the light residue plus a string label ("PEPTIDEa" -> "PEPTIDEK(+8.014)")
        //This light to heavy conversion needs to happen for the flashLFQ peptides here, but can't for the psm peptides, which are constrained to the protein
        //i.e. pwsms currently don't have sequences; they have start/end residues and a protein sequence. We have to change the output sequences when they're created.
        public static void SilacConversionsPostQuantification(List <SilacLabel> silacLabels, List <SpectraFileInfo> spectraFileInfo, List <ProteinGroup> ProteinGroups,
                                                              HashSet <DigestionParams> ListOfDigestionParams, Dictionary <string, List <string> > silacProteinGroupMatcher, FlashLfqResults FlashLfqResults,
                                                              List <PeptideSpectralMatch> allPsms, Dictionary <string, int> ModsToWriteSelection, bool Integrate)
            bool outputLightIntensities = ListOfDigestionParams.Any(x => x.GeneratehUnlabeledProteinsForSilac);

            //MAKE NEW RAW FILES
            //update number of spectra files to include a new file for each label*condition
            Dictionary <SpectraFileInfo, string>          fileToLabelDictionary  = new Dictionary <SpectraFileInfo, string>();          //figure out which file is which label, since some files will be only light and others only heavy. Key is file, value is the label string (label.MassDifference)
            Dictionary <SpectraFileInfo, SpectraFileInfo> labeledToUnlabeledFile = new Dictionary <SpectraFileInfo, SpectraFileInfo>(); //keep track of the heavy-to-light pairs. If multiple, looks like 3-1 and 2-1, but no 3-2 (only heavy to light, no heavy to heavy)
            List <SpectraFileInfo> silacSpectraFileInfo = new List <SpectraFileInfo>();                                                 //new files

            //foreach existing file
            foreach (SpectraFileInfo originalFile in spectraFileInfo)
                //add the existing file as the light
                //foreach label, add a new file with the label
                foreach (SilacLabel label in silacLabels)
                    SpectraFileInfo silacFile = GetHeavyFileInfo(originalFile, label);
                    fileToLabelDictionary[silacFile]  = label.MassDifference;
                    labeledToUnlabeledFile[silacFile] = originalFile;

            //remove the heavy protein groups so that there are only light ones
            //add the intensities of the heavy groups into the newly created heavy SpectraFileInfos
            HashSet <SpectraFileInfo> lightFilesToRemove = new HashSet <SpectraFileInfo>(); //this is only used when there user specified no unlabeled proteins

            if (ProteinGroups != null)                                                      //if we did parsimony
                List <EngineLayer.ProteinGroup> silacProteinGroups = new List <EngineLayer.ProteinGroup>();
                //The light/unlabeled peptides/proteins were not searched if specified, but they were still quantified to keep track of the labels
                //we need to remove these unlabeled peptides/proteins before output
                //foreach protein group (which has its own quant for each file)
                foreach (EngineLayer.ProteinGroup proteinGroup in ProteinGroups)
                    proteinGroup.FilesForQuantification = silacSpectraFileInfo;                                                    //update fileinfo for the group
                                                                                                                                   //grab the light groups. Using these light groups, find their heavy group pair(s), add them to the light group quant info, and then remove the heavy groups
                    if (silacProteinGroupMatcher.TryGetValue(proteinGroup.ProteinGroupName, out List <string> silacSubGroupNames)) //try to find the light protein groups. If it's not light, ignore it
                        //the out variable contains all the other heavy protein groups that were generated for this light protein group
                        //go through the files and see if any of them contain the same label. If not, put zeroes for those missing "files"
                        //If the user didn't specify to search light intensities, then don't output them
                        Dictionary <SpectraFileInfo, double> updatedIntensitiesByFile = proteinGroup.IntensitiesByFile; //light intensities
                        List <SpectraFileInfo> lightKeys = updatedIntensitiesByFile.Keys.ToList();

                        //go through all files (including "silac" files)
                        List <ProteinGroup> subGroup = ProteinGroups.Where(x => silacSubGroupNames.Contains(x.ProteinGroupName)).ToList(); //find the protein groups where the accession contains "light" accession of the current protein group
                        foreach (SpectraFileInfo fileInfo in silacSpectraFileInfo)                                                         //for every file (light and heavy)
                            //if it doesn't have a value, then it's a silac file (light missing values still have a value "0")
                            if (!updatedIntensitiesByFile.ContainsKey(fileInfo))
                                string       labelSignature = fileToLabelDictionary[fileInfo];                                                                 //a string associated with a silac label
                                ProteinGroup foundGroup     = subGroup.Where(x => x.Proteins.Any(y => y.Accession.Contains(labelSignature))).FirstOrDefault(); //get the protein groups containing this label
                                updatedIntensitiesByFile[fileInfo] = foundGroup == null ? 0 : foundGroup.IntensitiesByFile[labeledToUnlabeledFile[fileInfo]];  //update the intensity for that label in the light group
                            //else do nothing. The light version is already in the dictionary

                        //The light/unlabeled peptides/proteins were not searched if specified, but they were still quantified to keep track of the labels
                        //we need to remove these unlabeled peptides/proteins before output
                        if (!outputLightIntensities)
                            foreach (SpectraFileInfo info in lightKeys)


                //remove light files (if necessary)
                foreach (SpectraFileInfo info in lightFilesToRemove)

                if (FlashLfqResults != null)                                                                     //can be null if nothing was quantified (all peptides are ambiguous)
                    Dictionary <string, FlashLFQ.ProteinGroup> flashLfqProteins = FlashLfqResults.ProteinGroups; //dictionary of protein group names to protein groups
                                                                                                                 //if the protein group is a heavy protein group, get rid of it. We already accounted for it above.
                    var keys = flashLfqProteins.Keys.ToList();
                    foreach (string key in keys)
                        if (silacLabels.Any(x => key.Contains(x.MassDifference)))

            if (FlashLfqResults != null)                                             //can be null if nothing was quantified (all peptides are ambiguous)
                List <SpectraFileInfo> originalFiles = FlashLfqResults.SpectraFiles; //pass reference
                foreach (SpectraFileInfo info in silacSpectraFileInfo)
                    if (!originalFiles.Contains(info))

            //convert all psm/peptide/proteingroup sequences from the heavy label to the light label for output
            //We can do this for all of the FlashLFQ peptides/peaks, because they use string sequences.
            //We are unable to do this for Parameters.AllPsms, because they store proteins and start/end residues instead
            //for Psms, we need to convert during the writing.
            for (int i = 0; i < allPsms.Count; i++)
                allPsms[i].ResolveHeavySilacLabel(silacLabels, ModsToWriteSelection);

            //Convert all lfqpeaks from heavy (a) to light (K+8.014) for output
            if (FlashLfqResults != null) //can be null if nothing was quantified (all peptides are ambiguous)
                var lfqPeaks = FlashLfqResults.Peaks;
                List <SpectraFileInfo> peakKeys = lfqPeaks.Keys.ToList();

                foreach (SpectraFileInfo key in peakKeys)
                    List <FlashLFQ.ChromatographicPeak> peaks = lfqPeaks[key];
                    for (int i = 0; i < peaks.Count; i++)
                        var peak = peaks[i];
                        List <Identification> identifications = new List <Identification>();
                        //check if we're removing light peaks and if it's a light peak
                        if (!outputLightIntensities && !peak.Identifications.Any(x => GetRelevantLabelFromBaseSequence(x.BaseSequence, silacLabels) != null)) //if no ids have any labels, remove them
                            foreach (var id in peak.Identifications)
                                SilacLabel label = GetRelevantLabelFromBaseSequence(id.BaseSequence, silacLabels);
                                HashSet <FlashLFQ.ProteinGroup> originalGroups = id.proteinGroups;
                                List <FlashLFQ.ProteinGroup>    updatedGroups  = new List <FlashLFQ.ProteinGroup>();
                                foreach (FlashLFQ.ProteinGroup group in originalGroups)
                                    string groupName = group.ProteinGroupName;
                                    if (label == null) //if light
                                        string labelString = "(" + label.OriginalAminoAcid + label.MassDifference;
                                        int    labelIndex  = groupName.IndexOf(labelString);
                                        if (labelIndex != -1) //labelIndex == 1 if a) 2+ peptides are required per protein or b) somebody broke parsimony
                                            groupName = groupName.Substring(0, labelIndex);
                                            updatedGroups.Add(new FlashLFQ.ProteinGroup(groupName, group.GeneName, group.Organism));

                                Identification updatedId = new Identification(
                                    GetSilacLightBaseSequence(id.BaseSequence, label),
                                    GetSilacLightFullSequence(id.ModifiedSequence, label),
                            FlashLFQ.ChromatographicPeak updatedPeak = new FlashLFQ.ChromatographicPeak(identifications.First(), peak.IsMbrPeak, peak.SpectraFileInfo);
                            for (int j = 1; j < identifications.Count; j++) //add all the original identification
                                updatedPeak.MergeFeatureWith(new FlashLFQ.ChromatographicPeak(identifications[j], peak.IsMbrPeak, peak.SpectraFileInfo), Integrate);
                            updatedPeak.IsotopicEnvelopes = peak.IsotopicEnvelopes;  //need to set isotopicEnevelopes, since the new identifications didn't have them.
                            updatedPeak.CalculateIntensityForThisFeature(Integrate); //needed to update info
                            peaks[i] = updatedPeak;

                //convert all lfq peptides from heavy to light for output
                Dictionary <string, FlashLFQ.Peptide> lfqPwsms = FlashLfqResults.PeptideModifiedSequences;
                List <string> pwsmKeys = lfqPwsms.Keys.ToList();
                foreach (string key in pwsmKeys)
                    FlashLFQ.Peptide currentPeptide = lfqPwsms[key];
                    SilacLabel       label          = GetRelevantLabelFromFullSequence(currentPeptide.Sequence, silacLabels);
                    if (label != null)                                                                                                                           //if it's a heavy peptide
                        lfqPwsms.Remove(key);                                                                                                                    //get rid of it
                                                                                                                                                                 //update the light version
                        string lightSequence = GetSilacLightFullSequence(currentPeptide.Sequence, label, false);                                                 //get the light sequence
                        List <SpectraFileInfo> heavyFiles = silacSpectraFileInfo.Where(x => x.FilenameWithoutExtension.Contains(label.MassDifference)).ToList(); //these are the heavy raw file names

                        //Find the light peptide (which has a value for the light datafile) and set the intensity for the heavy datafile from the current peptide
                        if (lfqPwsms.TryGetValue(lightSequence, out FlashLFQ.Peptide lightPeptide)) //this should always have a value, since we made replicas earlier, and yet it sometimes doesn't...
                            foreach (SpectraFileInfo heavyFile in heavyFiles)
                                SpectraFileInfo lightFile = labeledToUnlabeledFile[heavyFile];
                                lightPeptide.SetIntensity(heavyFile, currentPeptide.GetIntensity(lightFile));
                                lightPeptide.SetDetectionType(heavyFile, currentPeptide.GetDetectionType(lightFile));
                        else //if there's no light, create a new entry for the heavy
                            //new peptide
                            FlashLFQ.Peptide updatedPeptide = new FlashLFQ.Peptide(lightSequence, currentPeptide.UseForProteinQuant);
                            //update the heavy info, set the light values to zero
                            foreach (SpectraFileInfo info in heavyFiles)
                                updatedPeptide.SetIntensity(info, currentPeptide.GetIntensity(info));
                                updatedPeptide.SetDetectionType(info, currentPeptide.GetDetectionType(info));

                            //set the other values to zero
                            List <SpectraFileInfo> otherInfo = silacSpectraFileInfo.Where(x => !heavyFiles.Contains(x)).ToList();
                            foreach (SpectraFileInfo info in otherInfo)
                                updatedPeptide.SetIntensity(info, 0);
                                updatedPeptide.SetDetectionType(info, DetectionType.NotDetected);
                            HashSet <FlashLFQ.ProteinGroup> originalGroups = currentPeptide.proteinGroups;
                            HashSet <FlashLFQ.ProteinGroup> updatedGroups  = new HashSet <FlashLFQ.ProteinGroup>();
                            foreach (FlashLFQ.ProteinGroup group in originalGroups)
                                string groupName = group.ProteinGroupName;
                                groupName = groupName.Replace(label.MassDifference, "");
                                updatedGroups.Add(new FlashLFQ.ProteinGroup(groupName, group.GeneName, group.Organism));
                            updatedPeptide.proteinGroups      = updatedGroups;
                            lfqPwsms[updatedPeptide.Sequence] = updatedPeptide;
Esempio n. 11
        /// <summary>
        /// Runs the FlashLFQ engine with the user's defined spectra files, ID files, and FlashLFQ
        /// settings.
        /// </summary>
        private void RunFlashLfq()
            // read IDs
            var ids = new List <Identification>();

                foreach (var identFile in idFiles)
                    ids = ids.Concat(PsmReader.ReadPsms(identFile.FilePath, false, spectraFiles.Select(p => p.SpectraFileInfo).ToList())).ToList();
            catch (Exception e)
                string errorReportPath = Directory.GetParent(spectraFiles.First().FilePath).FullName;
                if (outputFolderPath != null)
                    errorReportPath = outputFolderPath;

                    OutputWriter.WriteErrorReport(e, Directory.GetParent(spectraFiles.First().FilePath).FullName,
                catch (Exception ex2)
                    MessageBox.Show("FlashLFQ has crashed with the following error: " + e.Message +
                                    ".\nThe error report could not be written: " + ex2.Message, "Error", MessageBoxButton.OK, MessageBoxImage.Hand);


                MessageBox.Show("FlashLFQ could not read the PSM file: " + e.Message +
                                ".\nError report written to " + errorReportPath, "Error", MessageBoxButton.OK, MessageBoxImage.Hand);


            if (!ids.Any())
                MessageBox.Show("No peptide IDs for the specified spectra files were found! " +
                                "Check to make sure the spectra file names match between the ID file and the spectra files",
                                "Error", MessageBoxButton.OK, MessageBoxImage.Hand);


            // run FlashLFQ engine
                flashLfqEngine = FlashLfqSettings.CreateEngineWithSettings(settings, ids);

                results = flashLfqEngine.Run();
            catch (Exception ex)
                string errorReportPath = Directory.GetParent(spectraFiles.First().FilePath).FullName;

                if (outputFolderPath != null)
                    errorReportPath = outputFolderPath;

                    OutputWriter.WriteErrorReport(ex, Directory.GetParent(spectraFiles.First().FilePath).FullName,
                catch (Exception ex2)
                    MessageBox.Show("FlashLFQ has crashed with the following error: " + ex.Message +
                                    ".\nThe error report could not be written: " + ex2.Message, "Error", MessageBoxButton.OK, MessageBoxImage.Hand);


                MessageBox.Show("FlashLFQ has crashed with the following error: " + ex.Message +
                                ".\nError report written to " + errorReportPath, "Error", MessageBoxButton.OK, MessageBoxImage.Hand);


            // write output
            if (results != null)
                    OutputWriter.WriteOutput(Directory.GetParent(spectraFiles.First().FilePath).FullName, results, flashLfqEngine.Silent,
                catch (Exception ex)
                    MessageBox.Show("Could not write FlashLFQ output: " + ex.Message, "Error", MessageBoxButton.OK, MessageBoxImage.Hand);

Esempio n. 12
        private void RunFlashLfq()
            // read IDs
            var ids = new List <Identification>();

                foreach (var identFile in identFilesForDataGrid)
                    ids = ids.Concat(PsmReader.ReadPsms(identFile.FilePath, false, spectraFileInfo)).ToList();
            catch (Exception e)
                string errorReportPath = Directory.GetParent(spectraFileInfo.First().FullFilePathWithExtension).FullName;
                if (outputFolderPath != null)
                    errorReportPath = outputFolderPath;

                    OutputWriter.WriteErrorReport(e, Directory.GetParent(spectraFileInfo.First().FullFilePathWithExtension).FullName,
                catch (Exception ex2)
                    MessageBox.Show("FlashLFQ has crashed with the following error: " + e.Message +
                                    ".\nThe error report could not be written: " + ex2.Message, "Error", MessageBoxButton.OK, MessageBoxImage.Hand);


                MessageBox.Show("FlashLFQ could not read the PSM file: " + e.Message +
                                ".\nError report written to " + errorReportPath, "Error", MessageBoxButton.OK, MessageBoxImage.Hand);


            if (!ids.Any())
                MessageBox.Show("No peptide IDs for the specified spectra files were found! " +
                                "Check to make sure the spectra file names match between the ID file and the spectra files",
                                "Error", MessageBoxButton.OK, MessageBoxImage.Hand);


            // run FlashLFQ engine
                flashLfqEngine = new FlashLfqEngine(
                    allIdentifications: ids,
                    normalize: flashLfqEngine.Normalize,
                    ppmTolerance: flashLfqEngine.PpmTolerance,
                    isotopeTolerancePpm: flashLfqEngine.IsotopePpmTolerance,
                    matchBetweenRuns: flashLfqEngine.MatchBetweenRuns,
                    matchBetweenRunsPpmTolerance: flashLfqEngine.MbrPpmTolerance,
                    integrate: flashLfqEngine.Integrate,
                    numIsotopesRequired: flashLfqEngine.NumIsotopesRequired,
                    idSpecificChargeState: flashLfqEngine.IdSpecificChargeState,
                    requireMonoisotopicMass: flashLfqEngine.RequireMonoisotopicMass,
                    silent: false,
                    optionalPeriodicTablePath: null,
                    maxMbrWindow: flashLfqEngine.MbrRtWindow,
                    advancedProteinQuant: flashLfqEngine.AdvancedProteinQuant);

                results = flashLfqEngine.Run();
            catch (Exception ex)
                string errorReportPath = Directory.GetParent(spectraFileInfo.First().FullFilePathWithExtension).FullName;
                if (outputFolderPath != null)
                    errorReportPath = outputFolderPath;

                    OutputWriter.WriteErrorReport(ex, Directory.GetParent(spectraFileInfo.First().FullFilePathWithExtension).FullName,
                catch (Exception ex2)
                    MessageBox.Show("FlashLFQ has crashed with the following error: " + ex.Message +
                                    ".\nThe error report could not be written: " + ex2.Message, "Error", MessageBoxButton.OK, MessageBoxImage.Hand);


                MessageBox.Show("FlashLFQ has crashed with the following error: " + ex.Message +
                                ".\nError report written to " + errorReportPath, "Error", MessageBoxButton.OK, MessageBoxImage.Hand);


            // write output
            if (results != null)
                    OutputWriter.WriteOutput(Directory.GetParent(spectraFileInfo.First().FullFilePathWithExtension).FullName, results,
                catch (Exception ex)
                    MessageBox.Show("Could not write FlashLFQ output: " + ex.Message, "Error", MessageBoxButton.OK, MessageBoxImage.Hand);

Esempio n. 13
        /// <summary>
        /// Runs the FlashLFQ engine with the user's defined spectra files, ID files, and FlashLFQ
        /// settings.
        /// </summary>
        private void RunFlashLfq()
            // read IDs
            var ids = new List <Identification>();

                foreach (var identFile in idFiles)
                    ids = ids.Concat(PsmReader.ReadPsms(identFile.FilePath, false, spectraFiles.Select(p => p.SpectraFileInfo).ToList())).ToList();
            catch (Exception e)
                string errorReportPath = Directory.GetParent(spectraFiles.First().FilePath).FullName;
                if (outputFolderPath != null)
                    errorReportPath = outputFolderPath;

                    OutputWriter.WriteErrorReport(e, Directory.GetParent(spectraFiles.First().FilePath).FullName,
                catch (Exception ex2)
                    MessageBox.Show("FlashLFQ has crashed with the following error: " + e.Message +
                                    ".\nThe error report could not be written: " + ex2.Message, "Error", MessageBoxButton.OK, MessageBoxImage.Hand);


                MessageBox.Show("FlashLFQ could not read the PSM file: " + e.Message +
                                ".\nError report written to " + errorReportPath, "Error", MessageBoxButton.OK, MessageBoxImage.Hand);


            if (!ids.Any())
                MessageBox.Show("No peptide IDs for the specified spectra files were found! " +
                                "Check to make sure the spectra file names match between the ID file and the spectra files",
                                "Error", MessageBoxButton.OK, MessageBoxImage.Hand);


            if (ids.Any(p => p.Ms2RetentionTimeInMinutes > 500))
                var res = MessageBox.Show("It seems that some of the retention times in the PSM file(s) are in seconds and not minutes; FlashLFQ requires the RT to be in minutes. " +
                                          "Continue with the FlashLFQ run? (only click yes if the RTs are actually in minutes)",
                                          "Error", MessageBoxButton.YesNo, MessageBoxImage.Hand);

                if (res == MessageBoxResult.No)

            // run FlashLFQ engine
                flashLfqEngine = FlashLfqSettings.CreateEngineWithSettings(settings, ids);

                results = flashLfqEngine.Run();
            catch (Exception ex)
                string errorReportPath = Directory.GetParent(spectraFiles.First().FilePath).FullName;

                if (outputFolderPath != null)
                    errorReportPath = outputFolderPath;

                    OutputWriter.WriteErrorReport(ex, Directory.GetParent(spectraFiles.First().FilePath).FullName,
                catch (Exception ex2)
                    MessageBox.Show("FlashLFQ has crashed with the following error: " + ex.Message +
                                    ".\nThe error report could not be written: " + ex2.Message, "Error", MessageBoxButton.OK, MessageBoxImage.Hand);


                MessageBox.Show("FlashLFQ has crashed with the following error: " + ex.Message +
                                ".\nError report written to " + errorReportPath, "Error", MessageBoxButton.OK, MessageBoxImage.Hand);


            // write output
            if (results != null)
                    OutputWriter.WriteOutput(Directory.GetParent(spectraFiles.First().FilePath).FullName, results, flashLfqEngine.Silent,

                    MessageBox.Show("Run complete");
                catch (Exception ex)
                    MessageBox.Show("Could not write FlashLFQ output: " + ex.Message, "Error", MessageBoxButton.OK, MessageBoxImage.Hand);
