Пример #1
0
        public static List <CrosslinkSpectralMatch> SortOneListCsmsSetSecondBestScore(List <CrosslinkSpectralMatch> csmsPerScan, CommonParameters commonParameters)
        {
            List <double> xlTotalScores = csmsPerScan.Select(s => s.XLTotalScore).OrderByDescending(s => s).ToList();

            xlTotalScores.RemoveAt(0);

            //This possibly needs to be doubled for xlinks. But, since each list can be a mix of xlinks and nonxlinks we just leave as is for now.
            double secondBestSore = commonParameters.ScoreCutoff;

            if (xlTotalScores.Count() > 0)
            {
                secondBestSore = xlTotalScores[0];
            }
            foreach (CrosslinkSpectralMatch csm in csmsPerScan)
            {
                csm.SecondBestXlScore = secondBestSore;
            }
            csmsPerScan = csmsPerScan.OrderByDescending(c => c.XLTotalScore).ThenBy(c => c.FullSequence + (c.BetaPeptide != null ? c.BetaPeptide.FullSequence : "")).ToList();

            return(csmsPerScan);
        }
        public static void WritePepXML_xl(List <CrosslinkSpectralMatch> items, List <Protein> proteinList, string databasePath, List <Modification> variableModifications, List <Modification> fixedModifications, List <string> localizeableModificationTypes, string outputFolder, string fileName, CommonParameters CommonParameters, XlSearchParameters XlSearchParameters)
        {
            if (!items.Any())
            {
                return;
            }

            XmlSerializer _indexedSerializer = new XmlSerializer(typeof(pepXML.Generated.msms_pipeline_analysis));
            var           _pepxml            = new pepXML.Generated.msms_pipeline_analysis();

            _pepxml.date        = DateTime.Now;
            _pepxml.summary_xml = items[0].FullFilePath + ".pep.XML";

            string proteaseC = ""; string proteaseNC = "";

            foreach (var x in CommonParameters.DigestionParams.Protease.DigestionMotifs.Select(m => m.InducingCleavage))
            {
                proteaseC += x;
            }
            foreach (var x in CommonParameters.DigestionParams.Protease.DigestionMotifs.Select(m => m.PreventingCleavage))
            {
                proteaseNC += x;
            }

            Crosslinker crosslinker = XlSearchParameters.Crosslinker;

            string fileNameNoExtension = Path.GetFileNameWithoutExtension(items[0].FullFilePath);
            string filePathNoExtension = Path.ChangeExtension(items[0].FullFilePath, null);
            string modSites            = crosslinker.CrosslinkerModSites.ToCharArray().Concat(crosslinker.CrosslinkerModSites2.ToCharArray()).Distinct().ToString();

            var para = new List <pepXML.Generated.nameValueType>();

            {
                para.Add(new pepXML.Generated.nameValueType {
                    name = "threads", value = CommonParameters.MaxThreadsToUsePerFile.ToString()
                });
                para.Add(new pepXML.Generated.nameValueType {
                    name = "database", value = databasePath
                });
                para.Add(new pepXML.Generated.nameValueType {
                    name = "MS_data_file", value = items[0].FullFilePath
                });
                para.Add(new pepXML.Generated.nameValueType {
                    name = "Cross-link precursor Mass Tolerance", value = CommonParameters.PrecursorMassTolerance.ToString()
                });
                para.Add(new pepXML.Generated.nameValueType {
                    name = "Cross-linker type", value = crosslinker.CrosslinkerName
                });
                para.Add(new pepXML.Generated.nameValueType {
                    name = "Cross-linker mass", value = crosslinker.TotalMass.ToString()
                });
                para.Add(new pepXML.Generated.nameValueType {
                    name = "Cross-linker cleavable", value = crosslinker.Cleavable.ToString()
                });
                para.Add(new pepXML.Generated.nameValueType {
                    name = "Cross-linker cleavable long mass", value = crosslinker.CleaveMassLong.ToString()
                });
                para.Add(new pepXML.Generated.nameValueType {
                    name = "Cross-linker cleavable short mass", value = crosslinker.CleaveMassShort.ToString()
                });
                para.Add(new pepXML.Generated.nameValueType {
                    name = "Cross-linker xl site", value = modSites
                });

                para.Add(new pepXML.Generated.nameValueType {
                    name = "Generate decoy proteins", value = XlSearchParameters.DecoyType.ToString()
                });
                para.Add(new pepXML.Generated.nameValueType {
                    name = "MaxMissed Cleavages", value = CommonParameters.DigestionParams.MaxMissedCleavages.ToString()
                });
                para.Add(new pepXML.Generated.nameValueType {
                    name = "Protease", value = CommonParameters.DigestionParams.Protease.Name
                });
                para.Add(new pepXML.Generated.nameValueType {
                    name = "Initiator Methionine", value = CommonParameters.DigestionParams.InitiatorMethionineBehavior.ToString()
                });
                para.Add(new pepXML.Generated.nameValueType {
                    name = "Max Modification Isoforms", value = CommonParameters.DigestionParams.MaxModificationIsoforms.ToString()
                });
                para.Add(new pepXML.Generated.nameValueType {
                    name = "Min Peptide Len", value = CommonParameters.DigestionParams.MinPeptideLength.ToString()
                });
                para.Add(new pepXML.Generated.nameValueType {
                    name = "Max Peptide Len", value = CommonParameters.DigestionParams.MaxPeptideLength.ToString()
                });
                para.Add(new pepXML.Generated.nameValueType {
                    name = "Product Mass Tolerance", value = CommonParameters.ProductMassTolerance.ToString()
                });
                para.Add(new pepXML.Generated.nameValueType {
                    name = "Ions to search", value = String.Join(", ", DissociationTypeCollection.ProductsFromDissociationType[CommonParameters.DissociationType])
                });

                foreach (var fixedMod in fixedModifications)
                {
                    para.Add(new pepXML.Generated.nameValueType {
                        name = "Fixed Modifications: " + fixedMod.IdWithMotif, value = fixedMod.MonoisotopicMass.ToString()
                    });
                }
                foreach (var variableMod in variableModifications)
                {
                    para.Add(new pepXML.Generated.nameValueType {
                        name = "Variable Modifications: " + variableMod.IdWithMotif, value = variableMod.MonoisotopicMass.ToString()
                    });
                }

                para.Add(new pepXML.Generated.nameValueType {
                    name = "Localize All Modifications", value = "true"
                });
            }

            _pepxml.msms_run_summary = new pepXML.Generated.msms_pipeline_analysisMsms_run_summary[1]
            {
                new pepXML.Generated.msms_pipeline_analysisMsms_run_summary
                {
                    base_name     = filePathNoExtension,
                    raw_data_type = "raw",
                    raw_data      = ".mzML",
                    sample_enzyme = new pepXML.Generated.msms_pipeline_analysisMsms_run_summarySample_enzyme()
                    {
                        name        = CommonParameters.DigestionParams.Protease.Name,
                        specificity = new pepXML.Generated.msms_pipeline_analysisMsms_run_summarySample_enzymeSpecificity[1]
                        {
                            new pepXML.Generated.msms_pipeline_analysisMsms_run_summarySample_enzymeSpecificity
                            {
                                cut    = proteaseC,
                                no_cut = proteaseNC,
                            }
                        }
                    },

                    search_summary = new pepXML.Generated.msms_pipeline_analysisMsms_run_summarySearch_summary[1]
                    {
                        new pepXML.Generated.msms_pipeline_analysisMsms_run_summarySearch_summary
                        {
                            base_name             = filePathNoExtension,
                            search_engine_version = GlobalVariables.MetaMorpheusVersion,
                            precursor_mass_type   = pepXML.Generated.massType.monoisotopic,
                            fragment_mass_type    = pepXML.Generated.massType.monoisotopic,
                            search_id             = 1,
                            search_database       = new pepXML.Generated.msms_pipeline_analysisMsms_run_summarySearch_summarySearch_database
                            {
                                local_path = databasePath,
                                type       = pepXML.Generated.msms_pipeline_analysisMsms_run_summarySearch_summarySearch_databaseType.AA,
                            },
                            enzymatic_search_constraint = new pepXML.Generated.msms_pipeline_analysisMsms_run_summarySearch_summaryEnzymatic_search_constraint
                            {
                                enzyme = CommonParameters.DigestionParams.Protease.Name,
                                max_num_internal_cleavages = CommonParameters.DigestionParams.MaxMissedCleavages.ToString(),
                                //min_number_termini = "2"
                            },

                            parameter = para.ToArray()
                        }
                    },
                }
            };

            _pepxml.msms_run_summary[0].spectrum_query = new pepXML.Generated.msms_pipeline_analysisMsms_run_summarySpectrum_query[items.Count];

            var searchHits = new List <pepXML.Generated.msms_pipeline_analysisMsms_run_summarySpectrum_querySearch_resultSearch_hit>();

            for (int i = 0; i < items.Count; i++)
            {
                var mods         = new List <pepXML.Generated.modInfoDataTypeMod_aminoacid_mass>();
                var alphaPeptide = items[i].BestMatchingPeptides.First().Peptide;

                foreach (var modification in alphaPeptide.AllModsOneIsNterminus)
                {
                    var mod = new pepXML.Generated.modInfoDataTypeMod_aminoacid_mass
                    {
                        mass = modification.Value.MonoisotopicMass.Value,

                        // convert from one-based to zero-based (N-term is zero in the pepXML output)
                        position = (modification.Key - 1).ToString()
                    };
                    mods.Add(mod);
                }

                if (items[i].CrossType == PsmCrossType.Single)
                {
                    var searchHit = new pepXML.Generated.msms_pipeline_analysisMsms_run_summarySpectrum_querySearch_resultSearch_hit
                    {
                        hit_rank              = 1,
                        peptide               = alphaPeptide.BaseSequence,
                        peptide_prev_aa       = alphaPeptide.PreviousAminoAcid.ToString(),
                        peptide_next_aa       = alphaPeptide.NextAminoAcid.ToString(),
                        protein               = alphaPeptide.Protein.Accession,
                        num_tot_proteins      = 1,
                        calc_neutral_pep_mass = (float)items[i].ScanPrecursorMass,
                        massdiff              = (items[i].ScanPrecursorMass - items[i].PeptideMonisotopicMass.Value).ToString(),
                        xlink_typeSpecified   = true,
                        xlink_type            = pepXML.Generated.msms_pipeline_analysisMsms_run_summarySpectrum_querySearch_resultSearch_hitXlink_type.na,
                        modification_info     = new pepXML.Generated.modInfoDataType {
                            mod_aminoacid_mass = mods.ToArray()
                        },
                        search_score = new pepXML.Generated.nameValueType[]
                        {
                            new pepXML.Generated.nameValueType {
                                name = "xlTotalScore", value = items[i].XLTotalScore.ToString()
                            },
                            new pepXML.Generated.nameValueType {
                                name = "Qvalue", value = items[i].FdrInfo.QValue.ToString()
                            }
                        },
                    };
                    searchHits.Add(searchHit);
                }
                else if (items[i].CrossType == PsmCrossType.DeadEnd || items[i].CrossType == PsmCrossType.DeadEndH2O || items[i].CrossType == PsmCrossType.DeadEndNH2 || items[i].CrossType == PsmCrossType.DeadEndTris)
                {
                    double crosslinkerDeadEndMass = 0;
                    switch (items[i].CrossType)
                    {
                    case PsmCrossType.DeadEndNH2:
                        crosslinkerDeadEndMass = crosslinker.DeadendMassNH2;
                        break;

                    case PsmCrossType.DeadEndTris:
                        crosslinkerDeadEndMass = crosslinker.DeadendMassTris;
                        break;

                    default:
                        crosslinkerDeadEndMass = crosslinker.DeadendMassH2O;
                        break;
                    }
                    var searchHit = new pepXML.Generated.msms_pipeline_analysisMsms_run_summarySpectrum_querySearch_resultSearch_hit
                    {
                        hit_rank              = 1,
                        peptide               = alphaPeptide.BaseSequence,
                        peptide_prev_aa       = alphaPeptide.PreviousAminoAcid.ToString(),
                        peptide_next_aa       = alphaPeptide.NextAminoAcid.ToString(),
                        protein               = alphaPeptide.Protein.Accession,
                        num_tot_proteins      = 1,
                        calc_neutral_pep_mass = (float)items[i].ScanPrecursorMass,
                        massdiff              = (items[i].ScanPrecursorMass - items[i].PeptideMonisotopicMass.Value - crosslinkerDeadEndMass).ToString(),
                        xlink_typeSpecified   = true,
                        xlink_type            = pepXML.Generated.msms_pipeline_analysisMsms_run_summarySpectrum_querySearch_resultSearch_hitXlink_type.na,
                        modification_info     = new pepXML.Generated.modInfoDataType {
                            mod_aminoacid_mass = mods.ToArray()
                        },
                        search_score = new pepXML.Generated.nameValueType[]
                        {
                            new pepXML.Generated.nameValueType {
                                name = "xlTotalScore", value = items[i].XLTotalScore.ToString()
                            },
                            new pepXML.Generated.nameValueType {
                                name = "Qvalue", value = items[i].FdrInfo.QValue.ToString()
                            }
                        },
                    };
                    searchHits.Add(searchHit);
                }
                else if (items[i].CrossType == PsmCrossType.Inter || items[i].CrossType == PsmCrossType.Intra || items[i].CrossType == PsmCrossType.Cross)
                {
                    var betaPeptide = items[i].BetaPeptide.BestMatchingPeptides.First().Peptide;
                    var modsBeta    = new List <pepXML.Generated.modInfoDataTypeMod_aminoacid_mass>();

                    foreach (var mod in betaPeptide.AllModsOneIsNterminus)
                    {
                        var modBeta = new pepXML.Generated.modInfoDataTypeMod_aminoacid_mass
                        {
                            mass = mod.Value.MonoisotopicMass.Value,

                            // convert from one-based to zero-based (N-term is zero in the pepXML output)
                            position = (mod.Key - 1).ToString()
                        };
                        modsBeta.Add(modBeta);
                    }

                    var alpha = new pepXML.Generated.msms_pipeline_analysisMsms_run_summarySpectrum_querySearch_resultSearch_hitXlinkLinked_peptide
                    {
                        peptide               = alphaPeptide.BaseSequence,
                        peptide_prev_aa       = alphaPeptide.PreviousAminoAcid.ToString(),
                        peptide_next_aa       = alphaPeptide.NextAminoAcid.ToString(),
                        protein               = alphaPeptide.Protein.Accession,
                        num_tot_proteins      = 1,
                        calc_neutral_pep_mass = (float)items[i].PeptideMonisotopicMass.Value,
                        complement_mass       = (float)(items[i].ScanPrecursorMass - alphaPeptide.MonoisotopicMass),
                        designation           = "alpha",
                        modification_info     = new pepXML.Generated.modInfoDataType {
                            mod_aminoacid_mass = mods.ToArray()
                        },
                        xlink_score = new pepXML.Generated.nameValueType[]
                        {
                            new pepXML.Generated.nameValueType {
                                name = "xlscore", value = items[i].XLTotalScore.ToString()
                            },
                            new pepXML.Generated.nameValueType {
                                name = "link", value = items[i].LinkPositions.First().ToString()
                            },
                        }
                    };
                    var beta = new pepXML.Generated.msms_pipeline_analysisMsms_run_summarySpectrum_querySearch_resultSearch_hitXlinkLinked_peptide
                    {
                        peptide               = betaPeptide.BaseSequence,
                        peptide_prev_aa       = betaPeptide.PreviousAminoAcid.ToString(),
                        peptide_next_aa       = betaPeptide.NextAminoAcid.ToString(),
                        protein               = betaPeptide.Protein.Accession,
                        num_tot_proteins      = 1,
                        calc_neutral_pep_mass = (float)betaPeptide.MonoisotopicMass,
                        complement_mass       = (float)(items[i].ScanPrecursorMass - betaPeptide.MonoisotopicMass),
                        designation           = "beta",
                        modification_info     = new pepXML.Generated.modInfoDataType {
                            mod_aminoacid_mass = modsBeta.ToArray()
                        },
                        xlink_score = new pepXML.Generated.nameValueType[]
                        {
                            new pepXML.Generated.nameValueType {
                                name = "xlscore", value = items[i].BetaPeptide.Score.ToString()
                            },
                            new pepXML.Generated.nameValueType {
                                name = "link", value = items[i].BetaPeptide.LinkPositions.First().ToString()
                            },
                        }
                    };
                    var cross = new pepXML.Generated.msms_pipeline_analysisMsms_run_summarySpectrum_querySearch_resultSearch_hitXlinkLinked_peptide[2] {
                        alpha, beta
                    };
                    var searchHit = new pepXML.Generated.msms_pipeline_analysisMsms_run_summarySpectrum_querySearch_resultSearch_hit
                    {
                        hit_rank              = 1,
                        peptide               = "-",
                        peptide_prev_aa       = "-",
                        peptide_next_aa       = "-",
                        protein               = "-",
                        num_tot_proteins      = 1,
                        calc_neutral_pep_mass = (float)items[i].ScanPrecursorMass,
                        massdiff              = (items[i].ScanPrecursorMass - betaPeptide.MonoisotopicMass - alphaPeptide.MonoisotopicMass - crosslinker.TotalMass).ToString(),
                        xlink_typeSpecified   = true,
                        xlink_type            = pepXML.Generated.msms_pipeline_analysisMsms_run_summarySpectrum_querySearch_resultSearch_hitXlink_type.xl,
                        xlink = new pepXML.Generated.msms_pipeline_analysisMsms_run_summarySpectrum_querySearch_resultSearch_hitXlink
                        {
                            identifier     = crosslinker.CrosslinkerName,
                            mass           = (float)crosslinker.TotalMass,
                            linked_peptide = cross
                        },
                        search_score = new pepXML.Generated.nameValueType[]
                        {
                            new pepXML.Generated.nameValueType {
                                name = "xlTotalScore", value = items[i].XLTotalScore.ToString()
                            },
                            new pepXML.Generated.nameValueType {
                                name = "Qvalue", value = items[i].FdrInfo.QValue.ToString()
                            }
                        }
                    };
                    searchHits.Add(searchHit);
                }
                else if (items[i].CrossType == PsmCrossType.Loop)
                {
                    var thePeptide = new pepXML.Generated.msms_pipeline_analysisMsms_run_summarySpectrum_querySearch_resultSearch_hitXlinkLinked_peptide
                    {
                        xlink_score = new pepXML.Generated.nameValueType[]
                        {
                            new pepXML.Generated.nameValueType {
                                name = "link", value = items[i].LinkPositions.First().ToString()
                            },
                            new pepXML.Generated.nameValueType {
                                name = "link", value = items[i].LinkPositions[1].ToString()
                            }
                        }
                    };
                    var cross = new pepXML.Generated.msms_pipeline_analysisMsms_run_summarySpectrum_querySearch_resultSearch_hitXlinkLinked_peptide[1] {
                        thePeptide
                    };
                    var searchHit = new pepXML.Generated.msms_pipeline_analysisMsms_run_summarySpectrum_querySearch_resultSearch_hit
                    {
                        hit_rank              = 1,
                        peptide               = alphaPeptide.BaseSequence,
                        peptide_prev_aa       = alphaPeptide.PreviousAminoAcid.ToString(),
                        peptide_next_aa       = alphaPeptide.NextAminoAcid.ToString(),
                        protein               = alphaPeptide.Protein.Accession,
                        num_tot_proteins      = 1,
                        calc_neutral_pep_mass = (float)items[i].ScanPrecursorMass,
                        massdiff              = (items[i].ScanPrecursorMass - alphaPeptide.MonoisotopicMass - crosslinker.LoopMass).ToString(),
                        xlink_typeSpecified   = true,
                        xlink_type            = pepXML.Generated.msms_pipeline_analysisMsms_run_summarySpectrum_querySearch_resultSearch_hitXlink_type.loop,
                        modification_info     = new pepXML.Generated.modInfoDataType {
                            mod_aminoacid_mass = mods.ToArray()
                        },
                        xlink = new pepXML.Generated.msms_pipeline_analysisMsms_run_summarySpectrum_querySearch_resultSearch_hitXlink
                        {
                            identifier     = crosslinker.CrosslinkerName,
                            mass           = (float)crosslinker.TotalMass,
                            linked_peptide = cross
                        },
                        search_score = new pepXML.Generated.nameValueType[]
                        {
                            new pepXML.Generated.nameValueType {
                                name = "xlTotalScore", value = items[i].XLTotalScore.ToString()
                            },
                            new pepXML.Generated.nameValueType {
                                name = "Qvalue", value = items[i].FdrInfo.QValue.ToString()
                            }
                        }
                    };
                    searchHits.Add(searchHit);
                }
            }

            for (int i = 0; i < items.Count; i++)
            {
                _pepxml.msms_run_summary[0].spectrum_query[i] = new pepXML.Generated.msms_pipeline_analysisMsms_run_summarySpectrum_query()
                {
                    spectrum               = fileNameNoExtension + "." + items[i].ScanNumber.ToString(),
                    start_scan             = Convert.ToUInt32(items[i].ScanNumber),
                    end_scan               = Convert.ToUInt32(items[i].ScanNumber),
                    precursor_neutral_mass = (float)items[i].ScanPrecursorMass,
                    assumed_charge         = items[i].ScanPrecursorCharge.ToString(),
                    index = Convert.ToUInt32(i + 1),
                    retention_time_sec = (float)(items[i].ScanRetentionTime * 60),
                    search_result      = new pepXML.Generated.msms_pipeline_analysisMsms_run_summarySpectrum_querySearch_result[1]
                    {
                        new pepXML.Generated.msms_pipeline_analysisMsms_run_summarySpectrum_querySearch_result
                        {
                            search_hit = new pepXML.Generated.msms_pipeline_analysisMsms_run_summarySpectrum_querySearch_resultSearch_hit[1]
                            {
                                searchHits[i]
                            }
                        }
                    }
                };
            }

            TextWriter writer = new StreamWriter(Path.Combine(outputFolder, fileName + ".pep.XML"));

            _indexedSerializer.Serialize(writer, _pepxml);
            writer.Close();
        }
Пример #3
0
        protected override MyTaskResults RunSpecific(string OutputFolder, List <DbForTask> dbFilenameList, List <string> currentRawFileList, string taskId, FileSpecificParameters[] fileSettingsList)
        {
            // disable quantification if a .mgf is being used
            if (SearchParameters.DoQuantification && currentRawFileList.Any(x => Path.GetExtension(x).Equals(".mgf", StringComparison.OrdinalIgnoreCase)))
            {
                SearchParameters.DoQuantification = false;
            }

            // load modifications
            Status("Loading modifications...", taskId);
            List <ModificationWithMass> variableModifications = GlobalVariables.AllModsKnown.OfType <ModificationWithMass>().Where(b => CommonParameters.ListOfModsVariable.Contains((b.modificationType, b.id))).ToList();
            List <ModificationWithMass> fixedModifications    = GlobalVariables.AllModsKnown.OfType <ModificationWithMass>().Where(b => CommonParameters.ListOfModsFixed.Contains((b.modificationType, b.id))).ToList();
            List <string> localizeableModificationTypes       = GlobalVariables.AllModTypesKnown.ToList();

            // what types of fragment ions to search for
            List <ProductType> ionTypes = new List <ProductType>();

            if (CommonParameters.BIons && CommonParameters.AddCompIons)
            {
                ionTypes.Add(ProductType.B);
            }
            else if (CommonParameters.BIons)
            {
                ionTypes.Add(ProductType.BnoB1ions);
            }
            if (CommonParameters.YIons)
            {
                ionTypes.Add(ProductType.Y);
            }
            if (CommonParameters.ZdotIons)
            {
                ionTypes.Add(ProductType.Zdot);
            }
            if (CommonParameters.CIons)
            {
                ionTypes.Add(ProductType.C);
            }

            // load proteins
            List <Protein> proteinList = LoadProteins(taskId, dbFilenameList, SearchParameters.SearchTarget, SearchParameters.DecoyType, localizeableModificationTypes, CommonParameters);

            // write prose settings
            ProseCreatedWhileRunning.Append("The following search settings were used: ");
            ProseCreatedWhileRunning.Append("protease = " + CommonParameters.DigestionParams.Protease + "; ");
            ProseCreatedWhileRunning.Append("maximum missed cleavages = " + CommonParameters.DigestionParams.MaxMissedCleavages + "; ");
            ProseCreatedWhileRunning.Append("minimum peptide length = " + CommonParameters.DigestionParams.MinPeptideLength + "; ");
            ProseCreatedWhileRunning.Append(CommonParameters.DigestionParams.MaxPeptideLength == int.MaxValue ?
                                            "maximum peptide length = unspecified; " :
                                            "maximum peptide length = " + CommonParameters.DigestionParams.MaxPeptideLength + "; ");
            ProseCreatedWhileRunning.Append("initiator methionine behavior = " + CommonParameters.DigestionParams.InitiatorMethionineBehavior + "; ");
            ProseCreatedWhileRunning.Append("fixed modifications = " + string.Join(", ", fixedModifications.Select(m => m.id)) + "; ");
            ProseCreatedWhileRunning.Append("variable modifications = " + string.Join(", ", variableModifications.Select(m => m.id)) + "; ");
            ProseCreatedWhileRunning.Append("max mods per peptide = " + CommonParameters.DigestionParams.MaxModsForPeptide + "; ");
            ProseCreatedWhileRunning.Append("max modification isoforms = " + CommonParameters.DigestionParams.MaxModificationIsoforms + "; ");
            ProseCreatedWhileRunning.Append("precursor mass tolerance = " + CommonParameters.PrecursorMassTolerance + "; ");
            ProseCreatedWhileRunning.Append("product mass tolerance = " + CommonParameters.ProductMassTolerance + "; ");
            ProseCreatedWhileRunning.Append("report PSM ambiguity = " + CommonParameters.ReportAllAmbiguity + ". ");
            ProseCreatedWhileRunning.Append("The combined search database contained " + proteinList.Count(p => !p.IsDecoy) + " non-decoy protein entries including " + proteinList.Count(p => p.IsContaminant) + " contaminant sequences. ");

            // start the search task
            MyTaskResults = new MyTaskResults(this);
            List <PeptideSpectralMatch> allPsms         = new List <PeptideSpectralMatch>();
            FlashLFQResults             flashLfqResults = null;

            MyFileManager myFileManager = new MyFileManager(SearchParameters.DisposeOfFileWhenDone);

            var fileSpecificCommonParams = fileSettingsList.Select(b => SetAllFileSpecificCommonParams(CommonParameters, b));
            HashSet <DigestionParams> ListOfDigestionParams = new HashSet <DigestionParams>(fileSpecificCommonParams.Select(p => p.DigestionParams));

            int    completedFiles = 0;
            object indexLock      = new object();
            object psmLock        = new object();

            Status("Searching files...", taskId);
            Status("Searching files...", new List <string> {
                taskId, "Individual Spectra Files"
            });

            Dictionary <string, int[]> numMs2SpectraPerFile = new Dictionary <string, int[]>();

            for (int spectraFileIndex = 0; spectraFileIndex < currentRawFileList.Count; spectraFileIndex++)
            {
                if (GlobalVariables.StopLoops)
                {
                    break;
                }

                var origDataFile = currentRawFileList[spectraFileIndex];

                // mark the file as in-progress
                StartingDataFile(origDataFile, new List <string> {
                    taskId, "Individual Spectra Files", origDataFile
                });

                CommonParameters combinedParams = SetAllFileSpecificCommonParams(CommonParameters, fileSettingsList[spectraFileIndex]);

                MassDiffAcceptor massDiffAcceptor = GetMassDiffAcceptor(combinedParams.PrecursorMassTolerance, SearchParameters.MassDiffAcceptorType, SearchParameters.CustomMdac);

                var thisId = new List <string> {
                    taskId, "Individual Spectra Files", origDataFile
                };
                NewCollection(Path.GetFileName(origDataFile), thisId);
                Status("Loading spectra file...", thisId);
                MsDataFile myMsDataFile = myFileManager.LoadFile(origDataFile, combinedParams.TopNpeaks, combinedParams.MinRatio, combinedParams.TrimMs1Peaks, combinedParams.TrimMsMsPeaks, combinedParams);
                Status("Getting ms2 scans...", thisId);
                Ms2ScanWithSpecificMass[] arrayOfMs2ScansSortedByMass = GetMs2Scans(myMsDataFile, origDataFile, combinedParams.DoPrecursorDeconvolution, combinedParams.UseProvidedPrecursorInfo, combinedParams.DeconvolutionIntensityRatio, combinedParams.DeconvolutionMaxAssumedChargeState, combinedParams.DeconvolutionMassTolerance).OrderBy(b => b.PrecursorMass).ToArray();
                numMs2SpectraPerFile.Add(Path.GetFileNameWithoutExtension(origDataFile), new int[] { myMsDataFile.GetAllScansList().Count(p => p.MsnOrder == 2), arrayOfMs2ScansSortedByMass.Length });
                myFileManager.DoneWithFile(origDataFile);

                var fileSpecificPsms = new PeptideSpectralMatch[arrayOfMs2ScansSortedByMass.Length];
                // modern search
                if (SearchParameters.SearchType == SearchType.Modern)
                {
                    for (int currentPartition = 0; currentPartition < combinedParams.TotalPartitions; currentPartition++)
                    {
                        List <CompactPeptide> peptideIndex      = null;
                        List <Protein>        proteinListSubset = proteinList.GetRange(currentPartition * proteinList.Count() / combinedParams.TotalPartitions, ((currentPartition + 1) * proteinList.Count() / combinedParams.TotalPartitions) - (currentPartition * proteinList.Count() / combinedParams.TotalPartitions));

                        Status("Getting fragment dictionary...", new List <string> {
                            taskId
                        });
                        var indexEngine = new IndexingEngine(proteinListSubset, variableModifications, fixedModifications, ionTypes, currentPartition, SearchParameters.DecoyType, ListOfDigestionParams, combinedParams, SearchParameters.MaxFragmentSize, new List <string> {
                            taskId
                        });
                        List <int>[] fragmentIndex = null;
                        lock (indexLock)
                        {
                            GenerateIndexes(indexEngine, dbFilenameList, ref peptideIndex, ref fragmentIndex, taskId);
                        }

                        Status("Searching files...", taskId);

                        new ModernSearchEngine(fileSpecificPsms, arrayOfMs2ScansSortedByMass, peptideIndex, fragmentIndex, ionTypes, currentPartition, combinedParams, massDiffAcceptor, SearchParameters.MaximumMassThatFragmentIonScoreIsDoubled, thisId).Run();

                        ReportProgress(new ProgressEventArgs(100, "Done with search " + (currentPartition + 1) + "/" + combinedParams.TotalPartitions + "!", thisId));
                    }
                }
                // nonspecific search
                else if (SearchParameters.SearchType == SearchType.NonSpecific)
                {
                    List <List <ProductType> > terminusSeparatedIons = ProductTypeMethods.SeparateIonsByTerminus(ionTypes);
                    foreach (List <ProductType> terminusSpecificIons in terminusSeparatedIons)
                    {
                        for (int currentPartition = 0; currentPartition < combinedParams.TotalPartitions; currentPartition++)
                        {
                            List <CompactPeptide> peptideIndex      = null;
                            List <Protein>        proteinListSubset = proteinList.GetRange(currentPartition * proteinList.Count() / combinedParams.TotalPartitions, ((currentPartition + 1) * proteinList.Count() / combinedParams.TotalPartitions) - (currentPartition * proteinList.Count() / combinedParams.TotalPartitions));

                            List <int>[] fragmentIndex = new List <int> [1];

                            Status("Getting fragment dictionary...", new List <string> {
                                taskId
                            });
                            var indexEngine = new IndexingEngine(proteinListSubset, variableModifications, fixedModifications, terminusSpecificIons, currentPartition, SearchParameters.DecoyType, ListOfDigestionParams, combinedParams, SearchParameters.MaxFragmentSize, new List <string> {
                                taskId
                            });
                            lock (indexLock)
                                GenerateIndexes(indexEngine, dbFilenameList, ref peptideIndex, ref fragmentIndex, taskId);

                            Status("Getting precursor dictionary...", new List <string> {
                                taskId
                            });
                            List <CompactPeptide> peptideIndexPrecursor      = null;
                            List <Protein>        proteinListSubsetPrecursor = proteinList.GetRange(currentPartition * proteinList.Count() / combinedParams.TotalPartitions, ((currentPartition + 1) * proteinList.Count() / combinedParams.TotalPartitions) - (currentPartition * proteinList.Count() / combinedParams.TotalPartitions));
                            List <int>[]          fragmentIndexPrecursor     = new List <int> [1];
                            var indexEnginePrecursor = new PrecursorIndexingEngine(proteinListSubsetPrecursor, variableModifications, fixedModifications, terminusSpecificIons, currentPartition, SearchParameters.DecoyType, ListOfDigestionParams, combinedParams, 0, new List <string> {
                                taskId
                            });
                            lock (indexLock)
                                GenerateIndexes(indexEnginePrecursor, dbFilenameList, ref peptideIndexPrecursor, ref fragmentIndexPrecursor, taskId);

                            if (peptideIndex.Count != peptideIndexPrecursor.Count)
                            {
                                throw new MetaMorpheusException("peptideIndex not identical between indexing engines");
                            }

                            Status("Searching files...", taskId);

                            new NonSpecificEnzymeSearchEngine(fileSpecificPsms, arrayOfMs2ScansSortedByMass, peptideIndex, fragmentIndex, fragmentIndexPrecursor, terminusSpecificIons, currentPartition, combinedParams, massDiffAcceptor, SearchParameters.MaximumMassThatFragmentIonScoreIsDoubled, thisId).Run();

                            ReportProgress(new ProgressEventArgs(100, "Done with search " + (currentPartition + 1) + "/" + combinedParams.TotalPartitions + "!", thisId));
                        }
                    }
                }
                // classic search
                else
                {
                    Status("Starting search...", thisId);
                    new ClassicSearchEngine(fileSpecificPsms, arrayOfMs2ScansSortedByMass, variableModifications, fixedModifications, proteinList, ionTypes, massDiffAcceptor, combinedParams, thisId).Run();

                    ReportProgress(new ProgressEventArgs(100, "Done with search!", thisId));
                }
                lock (psmLock)
                {
                    allPsms.AddRange(fileSpecificPsms.Where(p => p != null));
                }

                completedFiles++;
                FinishedDataFile(origDataFile, new List <string> {
                    taskId, "Individual Spectra Files", origDataFile
                });
                ReportProgress(new ProgressEventArgs(completedFiles / currentRawFileList.Count, "Searching...", new List <string> {
                    taskId, "Individual Spectra Files"
                }));
            }

            ReportProgress(new ProgressEventArgs(100, "Done with all searches!", new List <string> {
                taskId, "Individual Spectra Files"
            }));

            PostSearchAnalysisParameters parameters = new PostSearchAnalysisParameters();

            parameters.SearchTaskResults             = MyTaskResults;
            parameters.SearchTaskId                  = taskId;
            parameters.SearchParameters              = SearchParameters;
            parameters.ProteinList                   = proteinList;
            parameters.IonTypes                      = ionTypes;
            parameters.AllPsms                       = allPsms;
            parameters.FixedModifications            = fixedModifications;
            parameters.VariableModifications         = variableModifications;
            parameters.ListOfDigestionParams         = ListOfDigestionParams;
            parameters.CurrentRawFileList            = currentRawFileList;
            parameters.MyFileManager                 = myFileManager;
            parameters.NumNotches                    = GetNumNotches(SearchParameters.MassDiffAcceptorType, SearchParameters.CustomMdac);
            parameters.OutputFolder                  = OutputFolder;
            parameters.IndividualResultsOutputFolder = Path.Combine(OutputFolder, "Individual File Results");
            parameters.FlashLfqResults               = flashLfqResults;
            parameters.FileSettingsList              = fileSettingsList;
            parameters.NumMs2SpectraPerFile          = numMs2SpectraPerFile;
            parameters.DatabaseFilenameList          = dbFilenameList;
            PostSearchAnalysisTask postProcessing = new PostSearchAnalysisTask();

            postProcessing.Parameters       = parameters;
            postProcessing.CommonParameters = CommonParameters;
            return(postProcessing.Run());
        }
Пример #4
0
        public static List <List <CrosslinkSpectralMatch> > SortListsOfCsms(List <List <CrosslinkSpectralMatch> > ListOfCsmsPerMS2Scan, CommonParameters commonParameters)
        {
            List <List <CrosslinkSpectralMatch> > newLists = new List <List <CrosslinkSpectralMatch> >();

            foreach (var csmsPerScan in ListOfCsmsPerMS2Scan)
            {
                newLists.Add(SortOneListCsmsSetSecondBestScore(csmsPerScan, commonParameters));
            }

            return(newLists.OrderByDescending(c => c.First().XLTotalScore).ThenByDescending(c => c.First().FullSequence + (c.First().BetaPeptide != null ? c.First().BetaPeptide.FullSequence : "")).ToList());
        }
Пример #5
0
        public void ComputeXlinkQandPValues(List <CrosslinkSpectralMatch> allPsms, List <CrosslinkSpectralMatch> intraCsms, List <CrosslinkSpectralMatch> interCsms, CommonParameters commonParameters, string taskId)
        {
            List <CrosslinkSpectralMatch> crossCsms = allPsms.Where(p => p.CrossType == PsmCrossType.Inter || p.CrossType == PsmCrossType.Intra).OrderByDescending(p => p.XLTotalScore).ToList();

            new FdrAnalysisEngine(crossCsms.ToList <PeptideSpectralMatch>(), 0, commonParameters, this.FileSpecificParameters, new List <string> {
                taskId
            }, "crosslink").Run();

            List <CrosslinkSpectralMatch> singles = allPsms.Where(p => p.CrossType != PsmCrossType.Inter).Where(p => p.CrossType != PsmCrossType.Intra).OrderByDescending(p => p.Score).ToList();

            new FdrAnalysisEngine(singles.ToList <PeptideSpectralMatch>(), 0, commonParameters, this.FileSpecificParameters, new List <string> {
                taskId
            }, "PSM").Run();
            SingleFDRAnalysis(singles, commonParameters, new List <string> {
                taskId
            });

            // calculate FDR
            DoCrosslinkFdrAnalysis(interCsms);
            DoCrosslinkFdrAnalysis(intraCsms);
        }
Пример #6
0
        protected override MyTaskResults RunSpecific(string OutputFolder, List <DbForTask> dbFilenameList, List <string> currentRawFileList, string taskId, FileSpecificParameters[] fileSettingsList)
        {
            LoadModifications(taskId, out var variableModifications, out var fixedModifications, out var localizeableModificationTypes);

            // TODO: print error messages loading GPTMD mods
            List <Modification> gptmdModifications       = GlobalVariables.AllModsKnown.OfType <Modification>().Where(b => GptmdParameters.ListOfModsGptmd.Contains((b.ModificationType, b.IdWithMotif))).ToList();
            IEnumerable <Tuple <double, double> > combos = LoadCombos(gptmdModifications).ToList();

            // load proteins
            List <Protein> proteinList = LoadProteins(taskId, dbFilenameList, true, DecoyType.Reverse, localizeableModificationTypes, CommonParameters);

            List <PeptideSpectralMatch> allPsms = new List <PeptideSpectralMatch>();

            var numRawFiles = currentRawFileList.Count;

            // write prose settings
            ProseCreatedWhileRunning.Append("The following G-PTM-D settings were used: "); ProseCreatedWhileRunning.Append("protease = " + CommonParameters.DigestionParams.Protease + "; ");
            ProseCreatedWhileRunning.Append("maximum missed cleavages = " + CommonParameters.DigestionParams.MaxMissedCleavages + "; ");
            ProseCreatedWhileRunning.Append("minimum peptide length = " + CommonParameters.DigestionParams.MinPeptideLength + "; ");
            ProseCreatedWhileRunning.Append(CommonParameters.DigestionParams.MaxPeptideLength == int.MaxValue ?
                                            "maximum peptide length = unspecified; " :
                                            "maximum peptide length = " + CommonParameters.DigestionParams.MaxPeptideLength + "; ");
            ProseCreatedWhileRunning.Append("initiator methionine behavior = " + CommonParameters.DigestionParams.InitiatorMethionineBehavior + "; ");
            ProseCreatedWhileRunning.Append("max modification isoforms = " + CommonParameters.DigestionParams.MaxModificationIsoforms + "; ");
            ProseCreatedWhileRunning.Append("fixed modifications = " + string.Join(", ", fixedModifications.Select(m => m.IdWithMotif)) + "; ");
            ProseCreatedWhileRunning.Append("variable modifications = " + string.Join(", ", variableModifications.Select(m => m.IdWithMotif)) + "; ");
            ProseCreatedWhileRunning.Append("G-PTM-D modifications count = " + gptmdModifications.Count + "; ");

            // temporary search type for writing prose
            // the actual search type is technically file-specific but we don't allow file-specific notches, so it's safe to do this
            MassDiffAcceptor tempSearchMode = new DotMassDiffAcceptor("", GetAcceptableMassShifts(fixedModifications, variableModifications, gptmdModifications, combos), CommonParameters.PrecursorMassTolerance);

            ProseCreatedWhileRunning.Append("precursor mass tolerance(s) = {" + tempSearchMode.ToProseString() + "}; ");

            ProseCreatedWhileRunning.Append("product mass tolerance = " + CommonParameters.ProductMassTolerance + ". ");
            ProseCreatedWhileRunning.Append("The combined search database contained " + proteinList.Count(p => !p.IsDecoy) + " non-decoy protein entries including " + proteinList.Where(p => p.IsContaminant).Count() + " contaminant sequences. ");

            // start the G-PTM-D task
            Status("Running G-PTM-D...", new List <string> {
                taskId
            });
            MyTaskResults = new MyTaskResults(this)
            {
                NewDatabases = new List <DbForTask>()
            };
            var fileSpecificCommonParams = fileSettingsList.Select(b => SetAllFileSpecificCommonParams(CommonParameters, b));
            HashSet <DigestionParams> ListOfDigestionParams = new HashSet <DigestionParams>(fileSpecificCommonParams.Select(p => p.DigestionParams));

            MyFileManager myFileManager = new MyFileManager(true);

            object lock1 = new object();
            object lock2 = new object();

            for (int spectraFileIndex = 0; spectraFileIndex < currentRawFileList.Count; spectraFileIndex++)
            {
                // Stop if canceled
                if (GlobalVariables.StopLoops)
                {
                    break;
                }

                var origDataFile = currentRawFileList[spectraFileIndex];

                // mark the file as in-progress
                StartingDataFile(origDataFile, new List <string> {
                    taskId, "Individual Spectra Files", origDataFile
                });

                CommonParameters combinedParams = SetAllFileSpecificCommonParams(CommonParameters, fileSettingsList[spectraFileIndex]);
                MassDiffAcceptor searchMode     = new DotMassDiffAcceptor("", GetAcceptableMassShifts(fixedModifications, variableModifications, gptmdModifications, combos), combinedParams.PrecursorMassTolerance);

                NewCollection(Path.GetFileName(origDataFile), new List <string> {
                    taskId, "Individual Spectra Files", origDataFile
                });

                Status("Loading spectra file...", new List <string> {
                    taskId, "Individual Spectra Files", origDataFile
                });
                MsDataFile myMsDataFile = myFileManager.LoadFile(origDataFile, combinedParams);
                Status("Getting ms2 scans...", new List <string> {
                    taskId, "Individual Spectra Files", origDataFile
                });
                Ms2ScanWithSpecificMass[] arrayOfMs2ScansSortedByMass = GetMs2Scans(myMsDataFile, origDataFile, combinedParams).OrderBy(b => b.PrecursorMass).ToArray();
                myFileManager.DoneWithFile(origDataFile);
                PeptideSpectralMatch[] allPsmsArray = new PeptideSpectralMatch[arrayOfMs2ScansSortedByMass.Length];
                new ClassicSearchEngine(allPsmsArray, arrayOfMs2ScansSortedByMass, variableModifications, fixedModifications, null, proteinList, searchMode, combinedParams, new List <string> {
                    taskId, "Individual Spectra Files", origDataFile
                }).Run();
                allPsms.AddRange(allPsmsArray.Where(p => p != null));
                FinishedDataFile(origDataFile, new List <string> {
                    taskId, "Individual Spectra Files", origDataFile
                });
                ReportProgress(new ProgressEventArgs(100, "Done!", new List <string> {
                    taskId, "Individual Spectra Files", origDataFile
                }));
            }
            ReportProgress(new ProgressEventArgs(100, "Done!", new List <string> {
                taskId, "Individual Spectra Files"
            }));

            allPsms = allPsms.OrderByDescending(b => b.Score)
                      .ThenBy(b => b.PeptideMonisotopicMass.HasValue ? Math.Abs(b.ScanPrecursorMass - b.PeptideMonisotopicMass.Value) : double.MaxValue)
                      .GroupBy(b => new Tuple <string, int, double?>(b.FullFilePath, b.ScanNumber, b.PeptideMonisotopicMass))
                      .Select(b => b.First()).ToList();

            new FdrAnalysisEngine(allPsms, tempSearchMode.NumNotches, CommonParameters, new List <string> {
                taskId
            }).Run();

            var writtenFile = Path.Combine(OutputFolder, "GPTMD_Candidates.psmtsv");

            WritePsmsToTsv(allPsms, writtenFile, new Dictionary <string, int>());
            FinishedWritingFile(writtenFile, new List <string> {
                taskId
            });

            // get file-specific precursor mass tolerances for the GPTMD engine
            var filePathToPrecursorMassTolerance = new Dictionary <string, Tolerance>();

            for (int i = 0; i < currentRawFileList.Count; i++)
            {
                string    filePath      = currentRawFileList[i];
                Tolerance fileTolerance = CommonParameters.PrecursorMassTolerance;
                if (fileSettingsList[i] != null && fileSettingsList[i].PrecursorMassTolerance != null)
                {
                    fileTolerance = fileSettingsList[i].PrecursorMassTolerance;
                }
                filePathToPrecursorMassTolerance.Add(filePath, fileTolerance);
            }

            // run GPTMD engine
            var gptmdResults = (GptmdResults) new GptmdEngine(allPsms, gptmdModifications, combos, filePathToPrecursorMassTolerance, CommonParameters, new List <string> {
                taskId
            }).Run();

            // Stop if canceled
            if (GlobalVariables.StopLoops)
            {
                return(MyTaskResults);
            }

            // write GPTMD databases
            if (dbFilenameList.Any(b => !b.IsContaminant))
            {
                List <string> databaseNames = new List <string>();
                foreach (var nonContaminantDb in dbFilenameList.Where(p => !p.IsContaminant))
                {
                    var  dbName       = Path.GetFileNameWithoutExtension(nonContaminantDb.FilePath);
                    var  theExtension = Path.GetExtension(nonContaminantDb.FilePath).ToLowerInvariant();
                    bool compressed   = theExtension.EndsWith("gz");
                    databaseNames.Add(compressed ? Path.GetFileNameWithoutExtension(dbName) : dbName);
                }
                string outputXMLdbFullName = Path.Combine(OutputFolder, string.Join("-", databaseNames) + "GPTMD.xml");

                var newModsActuallyWritten = ProteinDbWriter.WriteXmlDatabase(gptmdResults.Mods, proteinList.Where(b => !b.IsDecoy && !b.IsContaminant).ToList(), outputXMLdbFullName);

                FinishedWritingFile(outputXMLdbFullName, new List <string> {
                    taskId
                });

                MyTaskResults.NewDatabases.Add(new DbForTask(outputXMLdbFullName, false));
                MyTaskResults.AddNiceText("Modifications added: " + newModsActuallyWritten.Select(b => b.Value).Sum());
                MyTaskResults.AddNiceText("Mods types and counts:");
                MyTaskResults.AddNiceText(string.Join(Environment.NewLine, newModsActuallyWritten.OrderByDescending(b => b.Value).Select(b => "\t" + b.Key + "\t" + b.Value)));
            }
            if (dbFilenameList.Any(b => b.IsContaminant))
            {
                // do NOT use this code (Path.GetFilenameWithoutExtension) because GPTMD on .xml.gz will result in .xml.xml file type being written
                //string outputXMLdbFullNameContaminants = Path.Combine(OutputFolder, string.Join("-", dbFilenameList.Where(b => b.IsContaminant).Select(b => Path.GetFileNameWithoutExtension(b.FilePath))) + "GPTMD.xml");
                List <string> databaseNames = new List <string>();
                foreach (var contaminantDb in dbFilenameList.Where(p => p.IsContaminant))
                {
                    var dbName          = Path.GetFileName(contaminantDb.FilePath);
                    int indexOfFirstDot = dbName.IndexOf(".");
                    databaseNames.Add(dbName.Substring(0, indexOfFirstDot));
                }
                string outputXMLdbFullNameContaminants = Path.Combine(OutputFolder, string.Join("-", databaseNames) + "GPTMD.xml");

                var newModsActuallyWritten = ProteinDbWriter.WriteXmlDatabase(gptmdResults.Mods, proteinList.Where(b => !b.IsDecoy && b.IsContaminant).ToList(), outputXMLdbFullNameContaminants);

                FinishedWritingFile(outputXMLdbFullNameContaminants, new List <string> {
                    taskId
                });

                MyTaskResults.NewDatabases.Add(new DbForTask(outputXMLdbFullNameContaminants, true));
                MyTaskResults.AddNiceText("Contaminant modifications added: " + newModsActuallyWritten.Select(b => b.Value).Sum());
                MyTaskResults.AddNiceText("Mods types and counts:");
                MyTaskResults.AddNiceText(string.Join(Environment.NewLine, newModsActuallyWritten.OrderByDescending(b => b.Value).Select(b => "\t" + b.Key + "\t" + b.Value)));
            }
            return(MyTaskResults);
        }
Пример #7
0
        private DataPointAquisitionResults GetDataAcquisitionResults(MsDataFile myMsDataFile, string currentDataFile, List <Modification> variableModifications, List <Modification> fixedModifications, List <Protein> proteinList, string taskId, CommonParameters combinedParameters, Tolerance initPrecTol, Tolerance initProdTol)
        {
            var fileNameWithoutExtension = Path.GetFileNameWithoutExtension(currentDataFile);
            MassDiffAcceptor searchMode  = initPrecTol is PpmTolerance ?
                                           (MassDiffAcceptor) new SinglePpmAroundZeroSearchMode(initPrecTol.Value) :
                                           new SingleAbsoluteAroundZeroSearchMode(initPrecTol.Value);

            var listOfSortedms2Scans = GetMs2Scans(myMsDataFile, currentDataFile, combinedParameters).OrderBy(b => b.PrecursorMass).ToArray();

            PeptideSpectralMatch[] allPsmsArray = new PeptideSpectralMatch[listOfSortedms2Scans.Length];

            Log("Searching with searchMode: " + searchMode, new List <string> {
                taskId, "Individual Spectra Files", fileNameWithoutExtension
            });
            Log("Searching with productMassTolerance: " + initProdTol, new List <string> {
                taskId, "Individual Spectra Files", fileNameWithoutExtension
            });

            new ClassicSearchEngine(allPsmsArray, listOfSortedms2Scans, variableModifications, fixedModifications, null, null, null, proteinList, searchMode, combinedParameters, FileSpecificParameters, new List <string> {
                taskId, "Individual Spectra Files", fileNameWithoutExtension
            }).Run();
            List <PeptideSpectralMatch> allPsms = allPsmsArray.Where(b => b != null).ToList();

            allPsms = allPsms.OrderByDescending(b => b.Score)
                      .ThenBy(b => b.PeptideMonisotopicMass.HasValue ? Math.Abs(b.ScanPrecursorMass - b.PeptideMonisotopicMass.Value) : double.MaxValue)
                      .GroupBy(b => (b.FullFilePath, b.ScanNumber, b.PeptideMonisotopicMass)).Select(b => b.First()).ToList();

            new FdrAnalysisEngine(allPsms, searchMode.NumNotches, CommonParameters, FileSpecificParameters, new List <string> {
                taskId, "Individual Spectra Files", fileNameWithoutExtension
            }).Run();

            List <PeptideSpectralMatch> goodIdentifications = allPsms.Where(b => b.FdrInfo.QValueNotch < 0.001 && !b.IsDecoy && b.FullSequence != null).ToList();

            if (!goodIdentifications.Any())
            {
                return(new DataPointAquisitionResults(null, new List <PeptideSpectralMatch>(), new List <LabeledDataPoint>(), new List <LabeledDataPoint>(), 0, 0, 0, 0));
            }

            //get the deconvoluted ms2scans for the good identifications
            List <Ms2ScanWithSpecificMass> goodScans      = new List <Ms2ScanWithSpecificMass>();
            List <PeptideSpectralMatch>    unfilteredPsms = allPsmsArray.ToList();

            foreach (PeptideSpectralMatch psm in goodIdentifications)
            {
                goodScans.Add(listOfSortedms2Scans[unfilteredPsms.IndexOf(psm)]);
            }

            DataPointAquisitionResults currentResult = (DataPointAquisitionResults) new DataPointAcquisitionEngine(
                goodIdentifications,
                goodScans,
                myMsDataFile,
                initPrecTol,
                initProdTol,
                CalibrationParameters.MinMS1IsotopicPeaksNeededForConfirmedIdentification,
                CommonParameters,
                FileSpecificParameters,
                new List <string> {
                taskId, "Individual Spectra Files", fileNameWithoutExtension
            }).Run();

            return(currentResult);
        }
Пример #8
0
        protected override MyTaskResults RunSpecific(string OutputFolder, List <DbForTask> dbFilenameList, List <string> currentRawFileList, string taskId, FileSpecificParameters[] fileSettingsList)
        {
            MyTaskResults = new MyTaskResults(this);
            List <List <CrosslinkSpectralMatch> > ListOfCsmsPerMS2Scan = new List <List <CrosslinkSpectralMatch> >();

            LoadModifications(taskId, out var variableModifications, out var fixedModifications, out var localizeableModificationTypes);

            // load proteins
            List <Protein> proteinList = LoadProteins(taskId, dbFilenameList, true, XlSearchParameters.DecoyType, localizeableModificationTypes, CommonParameters);

            var crosslinker = XlSearchParameters.Crosslinker;

            MyFileManager myFileManager = new MyFileManager(true);

            var fileSpecificCommonParams = fileSettingsList.Select(b => SetAllFileSpecificCommonParams(CommonParameters, b));
            HashSet <DigestionParams> ListOfDigestionParams = new HashSet <DigestionParams>(fileSpecificCommonParams.Select(p => p.DigestionParams));

            int    completedFiles = 0;
            object indexLock      = new object();
            object psmLock        = new object();

            Status("Searching files...", taskId);

            ProseCreatedWhileRunning.Append("The following crosslink discovery were used: ");
            ProseCreatedWhileRunning.Append("crosslinker name = " + crosslinker.CrosslinkerName + "; ");
            ProseCreatedWhileRunning.Append("crosslinker type = " + crosslinker.Cleavable + "; ");
            ProseCreatedWhileRunning.Append("crosslinker mass = " + crosslinker.TotalMass + "; ");
            ProseCreatedWhileRunning.Append("crosslinker modification site(s) = " + crosslinker.CrosslinkerModSites + "; ");

            ProseCreatedWhileRunning.Append("protease = " + CommonParameters.DigestionParams.Protease + "; ");
            ProseCreatedWhileRunning.Append("maximum missed cleavages = " + CommonParameters.DigestionParams.MaxMissedCleavages + "; ");
            ProseCreatedWhileRunning.Append("minimum peptide length = " + CommonParameters.DigestionParams.MinPeptideLength + "; ");
            ProseCreatedWhileRunning.Append(CommonParameters.DigestionParams.MaxPeptideLength == int.MaxValue ?
                                            "maximum peptide length = unspecified; " :
                                            "maximum peptide length = " + CommonParameters.DigestionParams.MaxPeptideLength + "; ");
            ProseCreatedWhileRunning.Append("initiator methionine behavior = " + CommonParameters.DigestionParams.InitiatorMethionineBehavior + "; ");
            ProseCreatedWhileRunning.Append("max modification isoforms = " + CommonParameters.DigestionParams.MaxModificationIsoforms + "; ");

            ProseCreatedWhileRunning.Append("fixed modifications = " + string.Join(", ", fixedModifications.Select(m => m.IdWithMotif) + "; "));
            ProseCreatedWhileRunning.Append("variable modifications = " + string.Join(", ", variableModifications.Select(m => m.IdWithMotif)) + "; ");

            ProseCreatedWhileRunning.Append("parent mass tolerance(s) = " + CommonParameters.PrecursorMassTolerance + "; ");
            ProseCreatedWhileRunning.Append("product mass tolerance = " + CommonParameters.ProductMassTolerance + "; ");
            ProseCreatedWhileRunning.Append("The combined search database contained " + proteinList.Count + " total entries including " + proteinList.Where(p => p.IsContaminant).Count() + " contaminant sequences. ");

            for (int spectraFileIndex = 0; spectraFileIndex < currentRawFileList.Count; spectraFileIndex++)
            {
                var origDataFile = currentRawFileList[spectraFileIndex];
                CommonParameters combinedParams = SetAllFileSpecificCommonParams(CommonParameters, fileSettingsList[spectraFileIndex]);

                var thisId = new List <string> {
                    taskId, "Individual Spectra Files", origDataFile
                };
                NewCollection(Path.GetFileName(origDataFile), thisId);

                Status("Loading spectra file...", thisId);
                MsDataFile myMsDataFile = myFileManager.LoadFile(origDataFile, combinedParams);

                Status("Getting ms2 scans...", thisId);

                Ms2ScanWithSpecificMass[] arrayOfMs2ScansSortedByMass = GetMs2Scans(myMsDataFile, origDataFile, combinedParams).OrderBy(b => b.PrecursorMass).ToArray();

                List <CrosslinkSpectralMatch>[] newCsmsPerMS2ScanPerFile = new List <CrosslinkSpectralMatch> [arrayOfMs2ScansSortedByMass.Length];
                for (int currentPartition = 0; currentPartition < CommonParameters.TotalPartitions; currentPartition++)
                {
                    List <PeptideWithSetModifications> peptideIndex = null;

                    //When partition, the proteinList will be split for each Thread.
                    List <Protein> proteinListSubset = proteinList.GetRange(currentPartition * proteinList.Count() / combinedParams.TotalPartitions, ((currentPartition + 1) * proteinList.Count() / combinedParams.TotalPartitions) - (currentPartition * proteinList.Count() / combinedParams.TotalPartitions));

                    Status("Getting fragment dictionary...", new List <string> {
                        taskId
                    });

                    //Only reverse Decoy for crosslink search has been tested and are set as fixed parameter.
                    var indexEngine = new IndexingEngine(proteinListSubset, variableModifications, fixedModifications, null, null, null, currentPartition, UsefulProteomicsDatabases.DecoyType.Reverse, combinedParams, 30000.0, false, dbFilenameList.Select(p => new FileInfo(p.FilePath)).ToList(), new List <string> {
                        taskId
                    });

                    List <int>[] fragmentIndex  = null;
                    List <int>[] precursorIndex = null;
                    GenerateIndexes(indexEngine, dbFilenameList, ref peptideIndex, ref fragmentIndex, ref precursorIndex, proteinList, taskId);

                    //The second Fragment index is for 'MS1-HCD_MS1-ETD_MS2s' type of data. If LowCID is used for MS1, ion-index is not allowed to use.
                    List <int>[] secondFragmentIndex = null;
                    if (combinedParams.ChildScanDissociationType != DissociationType.LowCID &&
                        !CrosslinkSearchEngine.DissociationTypeGenerateSameTypeOfIons(combinedParams.DissociationType, combinedParams.ChildScanDissociationType))
                    {
                        //Becuase two different type of dissociation methods are used, the parameters are changed with different dissociation type.
                        var secondCombinedParams = CommonParameters.CloneWithNewDissociationType(combinedParams.ChildScanDissociationType);
                        var secondIndexEngine    = new IndexingEngine(proteinListSubset, variableModifications, fixedModifications, null, null, null, currentPartition, UsefulProteomicsDatabases.DecoyType.Reverse, secondCombinedParams, 30000.0, false, dbFilenameList.Select(p => new FileInfo(p.FilePath)).ToList(), new List <string> {
                            taskId
                        });
                        GenerateSecondIndexes(indexEngine, secondIndexEngine, dbFilenameList, ref secondFragmentIndex, proteinList, taskId);
                    }

                    Status("Searching files...", taskId);
                    new CrosslinkSearchEngine(newCsmsPerMS2ScanPerFile, arrayOfMs2ScansSortedByMass, peptideIndex, fragmentIndex, secondFragmentIndex, currentPartition, combinedParams, crosslinker,
                                              XlSearchParameters.RestrictToTopNHits, XlSearchParameters.CrosslinkSearchTopNum, XlSearchParameters.CrosslinkAtCleavageSite,
                                              XlSearchParameters.XlQuench_H2O, XlSearchParameters.XlQuench_NH2, XlSearchParameters.XlQuench_Tris, thisId).Run();

                    ReportProgress(new ProgressEventArgs(100, "Done with search " + (currentPartition + 1) + "/" + CommonParameters.TotalPartitions + "!", thisId));
                    if (GlobalVariables.StopLoops)
                    {
                        break;
                    }
                }

                ListOfCsmsPerMS2Scan.AddRange(newCsmsPerMS2ScanPerFile.Where(p => p != null).ToList());

                completedFiles++;
                ReportProgress(new ProgressEventArgs(completedFiles / currentRawFileList.Count, "Searching...", new List <string> {
                    taskId, "Individual Spectra Files"
                }));
            }

            ReportProgress(new ProgressEventArgs(100, "Done with all searches!", new List <string> {
                taskId, "Individual Spectra Files"
            }));

            List <List <CrosslinkSpectralMatch> > ListOfCsmsPerMS2ScanParsimony = new List <List <CrosslinkSpectralMatch> >();

            //For every Ms2Scans, each have a list of candidates psms. The allPsms from CrosslinkSearchEngine is the list (all ms2scans) of list (each ms2scan) of psm (all candidate psm).
            //The allPsmsList is same as allPsms after ResolveAmbiguities.
            foreach (var csmsPerScan in ListOfCsmsPerMS2Scan)
            {
                foreach (var csm in csmsPerScan)
                {
                    csm.ResolveAllAmbiguities();
                    if (csm.BetaPeptide != null)
                    {
                        csm.BetaPeptide.ResolveAllAmbiguities();
                    }
                }
                ListOfCsmsPerMS2ScanParsimony.Add(RemoveDuplicateFromCsmsPerScan(csmsPerScan));
            }

            var filteredAllPsms = new List <CrosslinkSpectralMatch>();

            //For each ms2scan, try to find the best candidate psm from the psms list. Add it into filteredAllPsms
            //This function is for current usage, this can be replaced with PEP value.
            foreach (var csmsPerScan in ListOfCsmsPerMS2ScanParsimony)
            {
                filteredAllPsms.Add(csmsPerScan[0]);
            }

            PostXLSearchAnalysisTask postXLSearchAnalysisTask = new PostXLSearchAnalysisTask();

            return(postXLSearchAnalysisTask.Run(OutputFolder, dbFilenameList, currentRawFileList, taskId, fileSettingsList, filteredAllPsms.OrderByDescending(p => p.XLTotalScore).ToList(), CommonParameters, XlSearchParameters, proteinList, variableModifications, fixedModifications, localizeableModificationTypes, MyTaskResults));
        }
Пример #9
0
        protected override MyTaskResults RunSpecific(string OutputFolder, List <DbForTask> dbFilenameList, List <string> currentRawFileList, string taskId, FileSpecificParameters[] fileSettingsList)
        {
            // load modifications
            Status("Loading modifications...", new List <string> {
                taskId
            });
            List <ModificationWithMass> variableModifications = GlobalVariables.AllModsKnown.OfType <ModificationWithMass>().Where(b => CommonParameters.ListOfModsVariable.Contains((b.modificationType, b.id))).ToList();
            List <ModificationWithMass> fixedModifications    = GlobalVariables.AllModsKnown.OfType <ModificationWithMass>().Where(b => CommonParameters.ListOfModsFixed.Contains((b.modificationType, b.id))).ToList();
            List <string> localizeableModificationTypes       = GlobalVariables.AllModTypesKnown.ToList();

            // what types of fragment ions to search for
            List <ProductType> ionTypes = new List <ProductType>();

            if (CommonParameters.BIons)
            {
                ionTypes.Add(ProductType.BnoB1ions);
            }
            if (CommonParameters.YIons)
            {
                ionTypes.Add(ProductType.Y);
            }
            if (CommonParameters.ZdotIons)
            {
                ionTypes.Add(ProductType.Zdot);
            }
            if (CommonParameters.CIons)
            {
                ionTypes.Add(ProductType.C);
            }

            // load proteins
            List <Protein> proteinList = LoadProteins(taskId, dbFilenameList, true, DecoyType.Reverse, localizeableModificationTypes, CommonParameters);

            // write prose settings
            ProseCreatedWhileRunning.Append("The following calibration settings were used: ");
            ProseCreatedWhileRunning.Append("protease = " + CommonParameters.DigestionParams.Protease + "; ");
            ProseCreatedWhileRunning.Append("maximum missed cleavages = " + CommonParameters.DigestionParams.MaxMissedCleavages + "; ");
            ProseCreatedWhileRunning.Append("minimum peptide length = " + CommonParameters.DigestionParams.MinPeptideLength + "; ");
            ProseCreatedWhileRunning.Append(CommonParameters.DigestionParams.MaxPeptideLength == int.MaxValue ?
                                            "maximum peptide length = unspecified; " :
                                            "maximum peptide length = " + CommonParameters.DigestionParams.MaxPeptideLength + "; ");
            ProseCreatedWhileRunning.Append("initiator methionine behavior = " + CommonParameters.DigestionParams.InitiatorMethionineBehavior + "; ");
            ProseCreatedWhileRunning.Append("fixed modifications = " + string.Join(", ", fixedModifications.Select(m => m.id)) + "; ");
            ProseCreatedWhileRunning.Append("variable modifications = " + string.Join(", ", variableModifications.Select(m => m.id)) + "; ");
            ProseCreatedWhileRunning.Append("max mods per peptide = " + CommonParameters.DigestionParams.MaxModsForPeptide + "; ");
            ProseCreatedWhileRunning.Append("max modification isoforms = " + CommonParameters.DigestionParams.MaxModificationIsoforms + "; ");
            ProseCreatedWhileRunning.Append("precursor mass tolerance = " + CommonParameters.PrecursorMassTolerance + "; ");
            ProseCreatedWhileRunning.Append("product mass tolerance = " + CommonParameters.ProductMassTolerance + ". ");
            ProseCreatedWhileRunning.Append("The combined search database contained " + proteinList.Count(p => !p.IsDecoy) + " non-decoy protein entries including " + proteinList.Count(p => p.IsContaminant) + " contaminant sequences. ");

            // start the calibration task
            Status("Calibrating...", new List <string> {
                taskId
            });
            MyTaskResults = new MyTaskResults(this)
            {
                NewSpectra           = new List <string>(),
                NewFileSpecificTomls = new List <string>()
            };

            object lock1 = new object();

            var myFileManager = new MyFileManager(true);

            for (int spectraFileIndex = 0; spectraFileIndex < currentRawFileList.Count; spectraFileIndex++)
            {
                if (GlobalVariables.StopLoops)
                {
                    break;
                }

                // get filename stuff
                var    originalUncalibratedFilePath = currentRawFileList[spectraFileIndex];
                var    originalUncalibratedFilenameWithoutExtension = Path.GetFileNameWithoutExtension(originalUncalibratedFilePath);
                string calibratedFilePath = Path.Combine(OutputFolder, originalUncalibratedFilenameWithoutExtension + CalibSuffix + ".mzML");

                // mark the file as in-progress
                StartingDataFile(originalUncalibratedFilePath, new List <string> {
                    taskId, "Individual Spectra Files", originalUncalibratedFilePath
                });

                CommonParameters combinedParams = SetAllFileSpecificCommonParams(CommonParameters, fileSettingsList[spectraFileIndex]);

                MsDataFile myMsDataFile;

                // load the file
                Status("Loading spectra file...", new List <string> {
                    taskId, "Individual Spectra Files"
                });
                lock (lock1)
                {
                    myMsDataFile = myFileManager.LoadFile(originalUncalibratedFilePath, CommonParameters.TopNpeaks, CommonParameters.MinRatio, CommonParameters.TrimMs1Peaks, CommonParameters.TrimMsMsPeaks, CommonParameters);
                }

                // get datapoints to fit calibration function to
                Status("Acquiring calibration data points...", new List <string> {
                    taskId, "Individual Spectra Files"
                });
                DataPointAquisitionResults acquisitionResults = null;

                for (int i = 1; i <= 5; i++)
                {
                    acquisitionResults = GetDataAcquisitionResults(myMsDataFile, originalUncalibratedFilePath, variableModifications, fixedModifications, proteinList, taskId, combinedParams, combinedParams.PrecursorMassTolerance, combinedParams.ProductMassTolerance);

                    // enough data points to calibrate?
                    if (acquisitionResults.Psms.Count >= NumRequiredPsms && acquisitionResults.Ms1List.Count > NumRequiredMs1Datapoints && acquisitionResults.Ms2List.Count > NumRequiredMs2Datapoints)
                    {
                        break;
                    }

                    if (i == 1) // failed round 1
                    {
                        CommonParameters.PrecursorMassTolerance = new PpmTolerance(20);
                        CommonParameters.ProductMassTolerance   = new PpmTolerance(50);
                    }
                    else if (i == 2) // failed round 2
                    {
                        CommonParameters.PrecursorMassTolerance = new PpmTolerance(30);
                        CommonParameters.ProductMassTolerance   = new PpmTolerance(100);
                    }
                    else if (i == 3) // failed round 3
                    {
                        CommonParameters.PrecursorMassTolerance = new PpmTolerance(40);
                        CommonParameters.ProductMassTolerance   = new PpmTolerance(150);
                    }
                    else // failed round 4
                    {
                        if (acquisitionResults.Psms.Count < NumRequiredPsms)
                        {
                            Warn("Calibration failure! Could not find enough high-quality PSMs. Required " + NumRequiredPsms + ", saw " + acquisitionResults.Psms.Count);
                        }
                        if (acquisitionResults.Ms1List.Count < NumRequiredMs1Datapoints)
                        {
                            Warn("Calibration failure! Could not find enough MS1 datapoints. Required " + NumRequiredMs1Datapoints + ", saw " + acquisitionResults.Ms1List.Count);
                        }
                        if (acquisitionResults.Ms2List.Count < NumRequiredMs2Datapoints)
                        {
                            Warn("Calibration failure! Could not find enough MS2 datapoints. Required " + NumRequiredMs2Datapoints + ", saw " + acquisitionResults.Ms2List.Count);
                        }
                        FinishedDataFile(originalUncalibratedFilePath, new List <string> {
                            taskId, "Individual Spectra Files", originalUncalibratedFilePath
                        });
                        return(MyTaskResults);
                    }

                    Warn("Could not find enough PSMs to calibrate with; opening up tolerances to " +
                         Math.Round(CommonParameters.PrecursorMassTolerance.Value, 2) + " ppm precursor and " +
                         Math.Round(CommonParameters.ProductMassTolerance.Value, 2) + " ppm product");
                }

                // stats before calibration
                int    prevPsmCount = acquisitionResults.Psms.Count;
                double preCalibrationPrecursorErrorIqr = acquisitionResults.PsmPrecursorIqrPpmError;
                double preCalibrationProductErrorIqr   = acquisitionResults.PsmProductIqrPpmError;

                // generate calibration function and shift data points
                Status("Calibrating...", new List <string> {
                    taskId, "Individual Spectra Files"
                });
                new CalibrationEngine(myMsDataFile, acquisitionResults, CommonParameters, new List <string> {
                    taskId, "Individual Spectra Files", originalUncalibratedFilenameWithoutExtension
                }).Run();

                // do another search to evaluate calibration results
                Status("Post-calibration search...", new List <string> {
                    taskId, "Individual Spectra Files"
                });
                acquisitionResults = GetDataAcquisitionResults(myMsDataFile, originalUncalibratedFilePath, variableModifications, fixedModifications, proteinList, taskId, combinedParams, combinedParams.PrecursorMassTolerance, combinedParams.ProductMassTolerance);

                // stats after calibration
                int    postCalibrationPsmCount          = acquisitionResults.Psms.Count;
                double postCalibrationPrecursorErrorIqr = acquisitionResults.PsmPrecursorIqrPpmError;
                double postCalibrationProductErrorIqr   = acquisitionResults.PsmProductIqrPpmError;

                // did the data improve? (not used for anything yet...)
                bool improvement = ImprovGlobal(preCalibrationPrecursorErrorIqr, preCalibrationProductErrorIqr, prevPsmCount, postCalibrationPsmCount, postCalibrationPrecursorErrorIqr, postCalibrationProductErrorIqr);

                // write toml settings for the calibrated file
                var newTomlFileName = Path.Combine(OutputFolder, originalUncalibratedFilenameWithoutExtension + CalibSuffix + ".toml");

                var fileSpecificParams = new FileSpecificParameters();

                // carry over file-specific parameters from the uncalibrated file to the calibrated one
                if (fileSettingsList[spectraFileIndex] != null)
                {
                    fileSpecificParams = fileSettingsList[spectraFileIndex].Clone();
                }

                // don't write over ppm tolerances if they've been specified by the user already in the file-specific settings
                // otherwise, suggest 4 * interquartile range as the ppm tolerance
                if (fileSpecificParams.PrecursorMassTolerance == null)
                {
                    fileSpecificParams.PrecursorMassTolerance = new PpmTolerance((4.0 * postCalibrationPrecursorErrorIqr) + Math.Abs(acquisitionResults.PsmPrecursorMedianPpmError));
                }
                if (fileSpecificParams.ProductMassTolerance == null)
                {
                    fileSpecificParams.ProductMassTolerance = new PpmTolerance((4.0 * postCalibrationProductErrorIqr) + Math.Abs(acquisitionResults.PsmProductMedianPpmError));
                }

                Toml.WriteFile(fileSpecificParams, newTomlFileName, tomlConfig);

                FinishedWritingFile(newTomlFileName, new List <string> {
                    taskId, "Individual Spectra Files", originalUncalibratedFilenameWithoutExtension
                });

                // write the calibrated mzML file
                MzmlMethods.CreateAndWriteMyMzmlWithCalibratedSpectra(myMsDataFile, calibratedFilePath, false);
                myFileManager.DoneWithFile(originalUncalibratedFilePath);

                // finished calibrating this file
                FinishedWritingFile(calibratedFilePath, new List <string> {
                    taskId, "Individual Spectra Files", originalUncalibratedFilenameWithoutExtension
                });
                MyTaskResults.NewSpectra.Add(calibratedFilePath);
                MyTaskResults.NewFileSpecificTomls.Add(newTomlFileName);
                FinishedDataFile(originalUncalibratedFilePath, new List <string> {
                    taskId, "Individual Spectra Files", originalUncalibratedFilePath
                });
                ReportProgress(new ProgressEventArgs(100, "Done!", new List <string> {
                    taskId, "Individual Spectra Files", originalUncalibratedFilenameWithoutExtension
                }));
            }

            // re-write experimental design (if it has been defined) with new calibrated file names
            string assumedPathToExperDesign = Directory.GetParent(currentRawFileList.First()).FullName;

            assumedPathToExperDesign = Path.Combine(assumedPathToExperDesign, GlobalVariables.ExperimentalDesignFileName);
            List <string> newExperimentalDesignOutput = new List <string>();

            if (File.Exists(assumedPathToExperDesign))
            {
                var lines = File.ReadAllLines(assumedPathToExperDesign);

                for (int i = 0; i < lines.Length; i++)
                {
                    // header of experimental design file
                    if (i == 0)
                    {
                        newExperimentalDesignOutput.Add(lines[i]);
                    }
                    else
                    {
                        var    split   = lines[i].Split('\t');
                        string newline = Path.GetFileNameWithoutExtension(split[0]) + CalibSuffix + "\t";
                        for (int j = 1; j < split.Length; j++)
                        {
                            newline += split[j] + "\t";
                        }

                        newExperimentalDesignOutput.Add(newline);
                    }
                }
            }

            File.WriteAllLines(Path.Combine(OutputFolder, GlobalVariables.ExperimentalDesignFileName), newExperimentalDesignOutput);

            // finished calibrating all files for the task
            ReportProgress(new ProgressEventArgs(100, "Done!", new List <string> {
                taskId, "Individual Spectra Files"
            }));

            return(MyTaskResults);
        }
Пример #10
0
        public static void WritePepXml(List <PeptideSpectralMatch> psms, List <DbForTask> database, List <Modification> variableModifications, List <Modification> fixedModifications, CommonParameters CommonParameters, string outputPath, double qValueFilter)
        {
            // TODO: needs a unit test
            psms = psms.Where(p => p.FdrInfo.QValue <= qValueFilter && p.FdrInfo.QValueNotch < qValueFilter).ToList();

            if (!psms.Any())
            {
                return;
            }

            XmlSerializer _indexedSerializer = new XmlSerializer(typeof(pepXML.Generated.msms_pipeline_analysis));
            var           _pepxml            = new pepXML.Generated.msms_pipeline_analysis();

            _pepxml.date        = DateTime.Now;
            _pepxml.summary_xml = psms[0].FullFilePath + ".pep.XML";

            string proteaseNC = string.Join(string.Empty, CommonParameters.DigestionParams.Protease.DigestionMotifs.Select(m => m.InducingCleavage));
            string proteaseC  = string.Join(string.Empty, CommonParameters.DigestionParams.Protease.DigestionMotifs.Select(m => m.InducingCleavage));

            string fileNameNoExtension = Path.GetFileNameWithoutExtension(psms[0].FullFilePath);
            string filePathNoExtension = Path.ChangeExtension(psms[0].FullFilePath, null);

            var para = new List <pepXML.Generated.nameValueType>();

            {
                para.Add(new pepXML.Generated.nameValueType {
                    name = "threads", value = CommonParameters.MaxThreadsToUsePerFile.ToString()
                });
                para.Add(new pepXML.Generated.nameValueType {
                    name = "database", value = database.First().FilePath
                });
                para.Add(new pepXML.Generated.nameValueType {
                    name = "MS_data_file", value = psms[0].FullFilePath
                });

                para.Add(new pepXML.Generated.nameValueType {
                    name = "MaxMissed Cleavages", value = CommonParameters.DigestionParams.MaxMissedCleavages.ToString()
                });
                para.Add(new pepXML.Generated.nameValueType {
                    name = "Protease", value = CommonParameters.DigestionParams.Protease.Name
                });
                para.Add(new pepXML.Generated.nameValueType {
                    name = "Initiator Methionine", value = CommonParameters.DigestionParams.InitiatorMethionineBehavior.ToString()
                });
                para.Add(new pepXML.Generated.nameValueType {
                    name = "Max Modification Isoforms", value = CommonParameters.DigestionParams.MaxModificationIsoforms.ToString()
                });
                para.Add(new pepXML.Generated.nameValueType {
                    name = "Min Peptide Len", value = CommonParameters.DigestionParams.MinPeptideLength.ToString()
                });
                para.Add(new pepXML.Generated.nameValueType {
                    name = "Max Peptide Len", value = CommonParameters.DigestionParams.MaxPeptideLength.ToString()
                });
                para.Add(new pepXML.Generated.nameValueType {
                    name = "Product Mass Tolerance", value = CommonParameters.ProductMassTolerance.ToString()
                });
                // TODO: check this
                para.Add(new pepXML.Generated.nameValueType {
                    name = "Ions to search", value = string.Join(", ", DissociationTypeCollection.ProductsFromDissociationType[CommonParameters.DissociationType])
                });
                para.Add(new pepXML.Generated.nameValueType {
                    name = "Q-value Filter", value = CommonParameters.QValueOutputFilter.ToString()
                });
                foreach (var item in fixedModifications)
                {
                    para.Add(new pepXML.Generated.nameValueType {
                        name = "Fixed Modifications: " + item.IdWithMotif, value = item.MonoisotopicMass.ToString()
                    });
                }
                foreach (var item in variableModifications)
                {
                    para.Add(new pepXML.Generated.nameValueType {
                        name = "Variable Modifications: " + item.IdWithMotif, value = item.MonoisotopicMass.ToString()
                    });
                }

                para.Add(new pepXML.Generated.nameValueType {
                    name = "Localize All Modifications", value = "true"
                });
            }

            _pepxml.msms_run_summary = new pepXML.Generated.msms_pipeline_analysisMsms_run_summary[1]
            {
                new pepXML.Generated.msms_pipeline_analysisMsms_run_summary
                {
                    base_name     = filePathNoExtension,
                    raw_data_type = "raw",

                    raw_data      = ".mzML", //TODO: use file format of spectra file used
                    sample_enzyme = new pepXML.Generated.msms_pipeline_analysisMsms_run_summarySample_enzyme()
                    {
                        name        = CommonParameters.DigestionParams.Protease.Name,
                        specificity = new pepXML.Generated.msms_pipeline_analysisMsms_run_summarySample_enzymeSpecificity[1]
                        {
                            new pepXML.Generated.msms_pipeline_analysisMsms_run_summarySample_enzymeSpecificity
                            {
                                cut    = proteaseC,
                                no_cut = proteaseNC,
                            }
                        }
                    },

                    search_summary = new pepXML.Generated.msms_pipeline_analysisMsms_run_summarySearch_summary[1]
                    {
                        new pepXML.Generated.msms_pipeline_analysisMsms_run_summarySearch_summary
                        {
                            base_name = filePathNoExtension,

                            // TODO: get MetaMorpheus recognized as a search engine type
                            //search_engine = pepXML.Generated.engineType.MetaMorpheus
                            search_engine_version = GlobalVariables.MetaMorpheusVersion,
                            precursor_mass_type   = pepXML.Generated.massType.monoisotopic,
                            fragment_mass_type    = pepXML.Generated.massType.monoisotopic,
                            search_id             = 1,
                            //generate database information
                            //TODO: multiple databases
                            search_database = new pepXML.Generated.msms_pipeline_analysisMsms_run_summarySearch_summarySearch_database
                            {
                                local_path = database.First().FilePath,
                                type       = pepXML.Generated.msms_pipeline_analysisMsms_run_summarySearch_summarySearch_databaseType.AA,
                            },
                            enzymatic_search_constraint = new pepXML.Generated.msms_pipeline_analysisMsms_run_summarySearch_summaryEnzymatic_search_constraint
                            {
                                enzyme = CommonParameters.DigestionParams.Protease.Name,
                                max_num_internal_cleavages = CommonParameters.DigestionParams.MaxMissedCleavages.ToString(),
                            },

                            parameter = para.ToArray()
                        }
                    },
                }
            };

            _pepxml.msms_run_summary[0].spectrum_query = new pepXML.Generated.msms_pipeline_analysisMsms_run_summarySpectrum_query[psms.Count];

            var searchHits = new List <pepXML.Generated.msms_pipeline_analysisMsms_run_summarySpectrum_querySearch_resultSearch_hit>();

            foreach (var psm in psms)
            {
                PeptideWithSetModifications peptide = psm.BestMatchingPeptides.First().Peptide;

                var mods = new List <pepXML.Generated.modInfoDataTypeMod_aminoacid_mass>();
                foreach (var mod in peptide.AllModsOneIsNterminus)
                {
                    var pepXmlMod = new pepXML.Generated.modInfoDataTypeMod_aminoacid_mass
                    {
                        mass     = (double)mod.Value.MonoisotopicMass,
                        position = (mod.Key - 1).ToString()
                    };
                    mods.Add(pepXmlMod);
                }

                var proteinAccessions = psm.BestMatchingPeptides.Select(p => p.Peptide.Protein.Accession).Distinct();

                var searchHit = new pepXML.Generated.msms_pipeline_analysisMsms_run_summarySpectrum_querySearch_resultSearch_hit
                {
                    // TODO: handle PSM ambiguity if pepXML supports it (base sequence, mod localization, protein)
                    // TODO: add target/decoy/contaminant designation for each PSM
                    // TODO: add amino acid substitution
                    hit_rank              = 1,
                    peptide               = ((psm.BaseSequence != null) ? psm.BaseSequence : "Ambiguous"),
                    peptide_prev_aa       = peptide.PreviousAminoAcid.ToString(),
                    peptide_next_aa       = peptide.NextAminoAcid.ToString(),
                    protein               = ((peptide.Protein.Accession != null) ? peptide.Protein.Accession : string.Join("|", proteinAccessions)),
                    num_tot_proteins      = (uint)proteinAccessions.Count(),
                    calc_neutral_pep_mass = (float)((psm.PeptideMonisotopicMass != null) ? psm.PeptideMonisotopicMass : float.NaN),
                    massdiff              = ((psm.PeptideMonisotopicMass != null) ? (psm.ScanPrecursorMass - psm.PeptideMonisotopicMass.Value).ToString() : "Ambiguous"),
                    modification_info     = (mods.Count == 0 ? new pepXML.Generated.modInfoDataType {
                        mod_aminoacid_mass = mods.ToArray()
                    } : null),
                    search_score          = new pepXML.Generated.nameValueType[]
                    {
                        new pepXML.Generated.nameValueType {
                            name = "Score", value = psm.Score.ToString()
                        },
                        new pepXML.Generated.nameValueType {
                            name = "Qvalue", value = psm.FdrInfo.QValue.ToString()
                        }
                    },
                };
                searchHits.Add(searchHit);
            }

            for (int i = 0; i < psms.Count; i++)
            {
                _pepxml.msms_run_summary[0].spectrum_query[i] = new pepXML.Generated.msms_pipeline_analysisMsms_run_summarySpectrum_query()
                {
                    spectrum               = fileNameNoExtension + "." + psms[i].ScanNumber.ToString(),
                    start_scan             = Convert.ToUInt32(psms[i].ScanNumber),
                    end_scan               = Convert.ToUInt32(psms[i].ScanNumber),
                    precursor_neutral_mass = (float)psms[i].ScanPrecursorMass,
                    assumed_charge         = psms[i].ScanPrecursorCharge.ToString(),
                    index = Convert.ToUInt32(i + 1),
                    retention_time_sec = (float)(psms[i].ScanRetentionTime * 60),
                    search_result      = new pepXML.Generated.msms_pipeline_analysisMsms_run_summarySpectrum_querySearch_result[1]
                    {
                        new pepXML.Generated.msms_pipeline_analysisMsms_run_summarySpectrum_querySearch_result
                        {
                            search_hit = new pepXML.Generated.msms_pipeline_analysisMsms_run_summarySpectrum_querySearch_resultSearch_hit[1]
                            {
                                searchHits[i]
                            }
                        }
                    }
                };
            }

            TextWriter writer = new StreamWriter(Path.Combine(outputPath));

            _indexedSerializer.Serialize(writer, _pepxml);
            writer.Close();
        }
Пример #11
0
        private DataPointAquisitionResults GetDataAcquisitionResults(MsDataFile myMsDataFile, string currentDataFile, List <ModificationWithMass> variableModifications, List <ModificationWithMass> fixedModifications, List <Protein> proteinList, string taskId, CommonParameters combinedParameters, Tolerance initPrecTol, Tolerance initProdTol)
        {
            var fileNameWithoutExtension = Path.GetFileNameWithoutExtension(currentDataFile);
            MassDiffAcceptor searchMode  = initPrecTol is PpmTolerance ?
                                           (MassDiffAcceptor) new SinglePpmAroundZeroSearchMode(initPrecTol.Value) :
                                           new SingleAbsoluteAroundZeroSearchMode(initPrecTol.Value);

            FragmentTypes fragmentTypesForCalibration = FragmentTypes.None;

            if (combinedParameters.BIons)
            {
                fragmentTypesForCalibration = fragmentTypesForCalibration | FragmentTypes.b;
            }
            if (combinedParameters.YIons)
            {
                fragmentTypesForCalibration = fragmentTypesForCalibration | FragmentTypes.y;
            }
            if (combinedParameters.CIons)
            {
                fragmentTypesForCalibration = fragmentTypesForCalibration | FragmentTypes.c;
            }
            if (combinedParameters.ZdotIons)
            {
                fragmentTypesForCalibration = fragmentTypesForCalibration | FragmentTypes.zdot;
            }

            var listOfSortedms2Scans = GetMs2Scans(myMsDataFile, currentDataFile, combinedParameters.DoPrecursorDeconvolution, combinedParameters.UseProvidedPrecursorInfo, combinedParameters.DeconvolutionIntensityRatio, combinedParameters.DeconvolutionMaxAssumedChargeState, combinedParameters.DeconvolutionMassTolerance).OrderBy(b => b.PrecursorMass).ToArray();

            PeptideSpectralMatch[] allPsmsArray = new PeptideSpectralMatch[listOfSortedms2Scans.Length];

            List <ProductType> productTypes = new List <ProductType>();

            if (combinedParameters.BIons)
            {
                productTypes.Add(ProductType.B);
            }
            if (combinedParameters.YIons)
            {
                productTypes.Add(ProductType.Y);
            }
            if (combinedParameters.CIons)
            {
                productTypes.Add(ProductType.C);
            }
            if (combinedParameters.ZdotIons)
            {
                productTypes.Add(ProductType.Zdot);
            }

            Log("Searching with searchMode: " + searchMode, new List <string> {
                taskId, "Individual Spectra Files", fileNameWithoutExtension
            });
            Log("Searching with productMassTolerance: " + initProdTol, new List <string> {
                taskId, "Individual Spectra Files", fileNameWithoutExtension
            });

            new ClassicSearchEngine(allPsmsArray, listOfSortedms2Scans, variableModifications, fixedModifications, proteinList, productTypes, searchMode, combinedParameters, new List <string> {
                taskId, "Individual Spectra Files", fileNameWithoutExtension
            }).Run();

            List <PeptideSpectralMatch> allPsms = allPsmsArray.Where(p => p != null).ToList();

            var compactPeptideToProteinPeptideMatching = ((SequencesToActualProteinPeptidesEngineResults) new SequencesToActualProteinPeptidesEngine
                                                              (allPsms, proteinList, fixedModifications, variableModifications, productTypes, new List <DigestionParams> {
                combinedParameters.DigestionParams
            },
                                                              combinedParameters.ReportAllAmbiguity, combinedParameters, new List <string> {
                taskId, "Individual Spectra Files", fileNameWithoutExtension
            }).Run()).CompactPeptideToProteinPeptideMatching;

            foreach (var huh in allPsms)
            {
                huh.MatchToProteinLinkedPeptides(compactPeptideToProteinPeptideMatching);
            }

            allPsms = allPsms.OrderByDescending(b => b.Score)
                      .ThenBy(b => b.PeptideMonisotopicMass.HasValue ? Math.Abs(b.ScanPrecursorMass - b.PeptideMonisotopicMass.Value) : double.MaxValue)
                      .GroupBy(b => (b.FullFilePath, b.ScanNumber, b.PeptideMonisotopicMass)).Select(b => b.First()).ToList();

            new FdrAnalysisEngine(allPsms, searchMode.NumNotches, CommonParameters, new List <string> {
                taskId, "Individual Spectra Files", fileNameWithoutExtension
            }).Run();

            List <PeptideSpectralMatch> goodIdentifications = allPsms.Where(b => b.FdrInfo.QValueNotch < 0.01 && !b.IsDecoy && b.FullSequence != null).ToList();

            if (!goodIdentifications.Any())
            {
                return(new DataPointAquisitionResults(null, new List <PeptideSpectralMatch>(), new List <LabeledDataPoint>(), new List <LabeledDataPoint>(), 0, 0, 0, 0));
            }

            DataPointAquisitionResults currentResult = (DataPointAquisitionResults) new DataPointAcquisitionEngine(
                goodIdentifications,
                myMsDataFile,
                initPrecTol,
                initProdTol,
                CalibrationParameters.NumFragmentsNeededForEveryIdentification,
                CalibrationParameters.MinMS1IsotopicPeaksNeededForConfirmedIdentification,
                CalibrationParameters.MinMS2IsotopicPeaksNeededForConfirmedIdentification,
                fragmentTypesForCalibration,
                CommonParameters,
                new List <string> {
                taskId, "Individual Spectra Files", fileNameWithoutExtension
            }).Run();

            return(currentResult);
        }
Пример #12
0
        protected override MyTaskResults RunSpecific(string OutputFolder, List <DbForTask> dbFilenameList, List <string> currentRawFileList, string taskId, FileSpecificParameters[] fileSettingsList)
        {
            MyTaskResults = new MyTaskResults(this);

            if (NeoType.Equals(NeoTaskType.AggregateTargetDecoyFiles))
            {
                //getfolders
                if (NeoParameters.DecoyFilePath == null)
                {
                    NeoParameters.DecoyFilePath = new DirectoryInfo(OutputFolder).Name;
                    string taskString = NeoParameters.DecoyFilePath.Split('-')[0];
                    int    taskNum    = Convert.ToInt32(taskString.Substring(4, taskString.Length - 4));
                    taskNum--;
                    NeoParameters.DecoyFilePath = OutputFolder.Substring(0, OutputFolder.Length - NeoParameters.DecoyFilePath.Length) + "Task" + taskNum + "-SearchTask\\" + Path.GetFileNameWithoutExtension(currentRawFileList[0]) + "_PSMs.psmtsv";
                    if (NeoParameters.TargetFilePath == null)
                    {
                        NeoParameters.TargetFilePath = new DirectoryInfo(OutputFolder).Name;
                        taskNum--;
                        NeoParameters.TargetFilePath = OutputFolder.Substring(0, OutputFolder.Length - NeoParameters.TargetFilePath.Length) + "Task" + taskNum + "-SearchTask\\" + Path.GetFileNameWithoutExtension(currentRawFileList[0]) + "_PSMs.psmtsv";
                    }
                }
                if (NeoParameters.TargetFilePath == null)
                {
                    NeoParameters.TargetFilePath = new DirectoryInfo(OutputFolder).Name;
                    string taskString = NeoParameters.TargetFilePath.Split('-')[0];
                    int    taskNum    = Convert.ToInt32(taskString.Substring(4, taskString.Length - 4));
                    taskNum--;
                    NeoParameters.TargetFilePath = OutputFolder.Substring(0, OutputFolder.Length - NeoParameters.TargetFilePath.Length) + "Task" + taskNum + "-SearchTask\\" + Path.GetFileNameWithoutExtension(currentRawFileList[0]) + "_PSMs.psmtsv";
                }
                AggregateSearchFiles.Combine(NeoParameters.TargetFilePath, NeoParameters.DecoyFilePath, OutputFolder + "\\" + Path.GetFileNameWithoutExtension(currentRawFileList[0]));
            }
            else if (NeoType.Equals(NeoTaskType.AggregateNormalSplicedFiles))
            {
                //reset database
                dbFilenameList = StoredDatabases;

                string normalPath = "";
                string cisPath    = new DirectoryInfo(OutputFolder).Name;
                string taskString = cisPath.Split('-')[0];
                int    taskNum    = Convert.ToInt32(taskString.Substring(4, taskString.Length - 4));
                taskNum -= 2;
                string transPath = OutputFolder.Substring(0, OutputFolder.Length - cisPath.Length) + "Task" + (taskNum + 1) + "-SearchTask\\" + Path.GetFileNameWithoutExtension(currentRawFileList[0]) + "_PSMs.psmtsv";
                cisPath = OutputFolder.Substring(0, OutputFolder.Length - cisPath.Length) + "Task" + taskNum + "-SearchTask\\" + Path.GetFileNameWithoutExtension(currentRawFileList[0]) + "_PSMs.psmtsv";
                AggregateSearchFiles.RecursiveNeoAggregation(normalPath, cisPath, OutputFolder, "CisResults.psmtsv");
                AggregateSearchFiles.RecursiveNeoAggregation(normalPath, transPath, OutputFolder, "TransResults.psmtsv");
            }
            else if (NeoType.Equals(NeoTaskType.GenerateSplicedPeptides))
            {
                NeoMassCalculator.ImportMasses();

                MyFileManager myFileManager = new MyFileManager(true);

                //Import Spectra
                for (int spectraFileIndex = 0; spectraFileIndex < currentRawFileList.Count; spectraFileIndex++)
                {
                    var origDataFile = currentRawFileList[spectraFileIndex];
                    CommonParameters combinedParams = SetAllFileSpecificCommonParams(CommonParameters, fileSettingsList[spectraFileIndex]);

                    var thisId = new List <string> {
                        taskId, "Individual Spectra Files", origDataFile
                    };
                    NewCollection(Path.GetFileName(origDataFile), thisId);
                    Status("Loading spectra file...", thisId);
                    MsDataFile myMsDataFile = myFileManager.LoadFile(origDataFile, combinedParams.TopNpeaks, combinedParams.MinRatio, combinedParams.TrimMs1Peaks, combinedParams.TrimMsMsPeaks, combinedParams);
                    Status("Getting ms2 scans...", thisId);
                    Ms2ScanWithSpecificMass[] arrayOfMs2ScansSortedByMass = GetMs2Scans(myMsDataFile, origDataFile, combinedParams.DoPrecursorDeconvolution, combinedParams.UseProvidedPrecursorInfo, combinedParams.DeconvolutionIntensityRatio, combinedParams.DeconvolutionMaxAssumedChargeState, combinedParams.DeconvolutionMassTolerance).OrderBy(b => b.PrecursorMass).ToArray();

                    //Import Database
                    Status("Loading modifications...", taskId);

                    List <ModificationWithMass> variableModifications = GlobalVariables.AllModsKnown.OfType <ModificationWithMass>().Where(b => CommonParameters.ListOfModsVariable.Contains((b.modificationType, b.id))).ToList();
                    List <ModificationWithMass> fixedModifications    = GlobalVariables.AllModsKnown.OfType <ModificationWithMass>().Where(b => CommonParameters.ListOfModsFixed.Contains((b.modificationType, b.id))).ToList();
                    List <string> localizeableModificationTypes       = GlobalVariables.AllModTypesKnown.ToList();

                    // load proteins
                    List <Protein> proteinList = LoadProteins(taskId, dbFilenameList, true, DecoyType.None, localizeableModificationTypes, combinedParams);

                    //Read N and C files
                    string nPath = NeoParameters.NFilePath;
                    string cPath = NeoParameters.CFilePath;
                    //if termini input

                    if (nPath == null || cPath == null)
                    {
                        //if no termini input
                        string   taskHeader = "Task";
                        string[] pathArray  = OutputFolder.Split('\\');
                        string   basePath   = "";
                        for (int i = 0; i < pathArray.Length - 1; i++)
                        {
                            basePath += pathArray[i] + '\\';
                        }
                        string currentTaskNumber = pathArray[pathArray.Length - 1].Split('-')[0];
                        currentTaskNumber = currentTaskNumber.Substring(taskHeader.Length, currentTaskNumber.Length - taskHeader.Length);
                        string NHeader = "";
                        string CHeader = "";
                        if (cPath == null)
                        {
                            CHeader = taskHeader + (Convert.ToInt16(currentTaskNumber) - 1);
                            if (nPath == null)
                            {
                                NHeader = taskHeader + (Convert.ToInt16(currentTaskNumber) - 2);
                            }
                        }
                        else
                        {
                            NHeader = taskHeader + (Convert.ToInt16(currentTaskNumber) - 1);
                        }
                        foreach (string s in Directory.GetDirectories(basePath))
                        {
                            if (s.Contains(NHeader))
                            {
                                nPath = s;
                            }
                            else if (s.Contains(CHeader))
                            {
                                cPath = s;
                            }
                        }
                        string fileName = Path.GetFileNameWithoutExtension(currentRawFileList[0]) + "_PSMs.psmtsv";
                        nPath += "\\" + fileName;
                        cPath += "\\" + fileName;
                    }

                    Status("Importing Search Results...", taskId);
                    List <NeoPsm> psms = ImportPsmtsv.ImportNeoPsms(nPath, cPath);

                    //Splice
                    Status("Splicing Fragments...", taskId);
                    List <NeoPsm> candidates = NeoSplicePeptides.SplicePeptides(psms);

                    //Find Ambiguity
                    Status("Identifying Ambiguity...", taskId);
                    NeoFindAmbiguity.FindAmbiguity(candidates, proteinList, arrayOfMs2ScansSortedByMass, dbFilenameList[0].FilePath);

                    //Export Results
                    Status("Exporting Results...", taskId);
                    NeoExport.ExportAll(candidates, arrayOfMs2ScansSortedByMass, OutputFolder);

                    //Switch databases
                    string outputFolder = NeoExport.path + NeoExport.folder + @"\" + NeoExport.folder + "FusionDatabaseAppendixNC.fasta";
                    dbFilenameList = new List <DbForTask>()
                    {
                        new DbForTask(outputFolder, false)
                    };
                }
            }
            else //if SearchTransDb
            {
                string outputFolder = NeoExport.path + NeoExport.folder + @"\" + NeoExport.folder + "FusionDatabaseAppendixTS.fasta";
                dbFilenameList = new List <DbForTask>()
                {
                    new DbForTask(outputFolder, false)
                };
            }

            return(MyTaskResults);
        }
        public MyTaskResults Run(string outputFolder, List <DbForTask> dbFilenameList, List <string> currentRawFileList, string taskId, List <CrosslinkSpectralMatch> allPsms, CommonParameters commonParameters, XlSearchParameters xlSearchParameters, List <Protein> proteinList, List <Modification> variableModifications, List <Modification> fixedModifications, List <string> localizeableModificationTypes, MyTaskResults MyTaskResults)
        {
            // inter-crosslinks; different proteins are linked
            List <CrosslinkSpectralMatch> interCsms = allPsms.Where(p => p.CrossType == PsmCrossType.Inter).OrderByDescending(p => p.XLTotalScore).ToList();

            // intra-crosslinks; crosslinks within a protein
            List <CrosslinkSpectralMatch> intraCsms = allPsms.Where(p => p.CrossType == PsmCrossType.Intra).OrderByDescending(p => p.XLTotalScore).ToList();

            var singlePsms = allPsms.Where(p => p.CrossType == PsmCrossType.Single).OrderByDescending(p => p.Score).ToList();

            var loopPsms = allPsms.Where(p => p.CrossType == PsmCrossType.Loop).OrderByDescending(p => p.Score).ToList();

            var deadendPsms = allPsms.Where(p => p.CrossType == PsmCrossType.DeadEnd ||
                                            p.CrossType == PsmCrossType.DeadEndH2O ||
                                            p.CrossType == PsmCrossType.DeadEndNH2 ||
                                            p.CrossType == PsmCrossType.DeadEndTris).OrderByDescending(p => p.Score).ToList();

            ComputeXlinkQandPValues(allPsms, intraCsms, interCsms, commonParameters, taskId);

            WriteCsvFiles(outputFolder, interCsms, intraCsms, singlePsms, loopPsms, deadendPsms, taskId, xlSearchParameters);
            MyTaskResults.AddTaskSummaryText("Target inter-crosslinks within 1% FDR: " + interCsms.Count(p => p.FdrInfo.QValue <= 0.01 && !p.IsDecoy && !p.BetaPeptide.IsDecoy));
            MyTaskResults.AddTaskSummaryText("Target intra-crosslinks within 1% FDR: " + intraCsms.Count(p => p.FdrInfo.QValue <= 0.01 && !p.IsDecoy && !p.BetaPeptide.IsDecoy));
            MyTaskResults.AddTaskSummaryText("Target single peptides within 1% FDR: " + singlePsms.Count(p => p.FdrInfo.QValue <= 0.01 && !p.IsDecoy));
            MyTaskResults.AddTaskSummaryText("Target loop-linked peptides within 1% FDR: " + loopPsms.Count(p => p.FdrInfo.QValue <= 0.01 && !p.IsDecoy));
            MyTaskResults.AddTaskSummaryText("Target deadend peptides within 1% FDR: " + deadendPsms.Count(p => p.FdrInfo.QValue <= 0.01 && !p.IsDecoy));

            // write pepXML
            if (xlSearchParameters.WritePepXml)
            {
                List <CrosslinkSpectralMatch> writeToXml = new List <CrosslinkSpectralMatch>();
                writeToXml.AddRange(intraCsms.Where(p => !p.IsDecoy && !p.BetaPeptide.IsDecoy && p.FdrInfo.QValue <= 0.05));
                writeToXml.AddRange(interCsms.Where(p => !p.IsDecoy && !p.BetaPeptide.IsDecoy && p.FdrInfo.QValue <= 0.05));
                writeToXml.AddRange(singlePsms.Where(p => !p.IsDecoy && p.FdrInfo.QValue <= 0.05));
                writeToXml.AddRange(loopPsms.Where(p => !p.IsDecoy && p.FdrInfo.QValue <= 0.05));
                writeToXml.AddRange(deadendPsms.Where(p => !p.IsDecoy && p.FdrInfo.QValue <= 0.05));
                writeToXml = writeToXml.OrderBy(p => p.ScanNumber).ToList();

                foreach (var fullFilePath in currentRawFileList)
                {
                    string fileNameNoExtension = Path.GetFileNameWithoutExtension(fullFilePath);
                    WriteFile.WritePepXML_xl(writeToXml.Where(p => p.FullFilePath == fullFilePath).ToList(), proteinList, dbFilenameList[0].FilePath, variableModifications, fixedModifications, localizeableModificationTypes, outputFolder, fileNameNoExtension, commonParameters, xlSearchParameters);
                    FinishedWritingFile(Path.Combine(outputFolder, fileNameNoExtension + ".pep.XML"), new List <string> {
                        taskId
                    });
                }
            }

            return(MyTaskResults);
        }
Пример #14
0
        public SearchTask() : base(MyTask.Search)
        {
            CommonParameters = new CommonParameters();

            SearchParameters = new SearchParameters();
        }
Пример #15
0
        protected override MyTaskResults RunSpecific(string OutputFolder, List <DbForTask> dbFilenameList, List <string> currentRawFileList, string taskId, FileSpecificParameters[] fileSettingsList)
        {
            LoadModifications(taskId, out var variableModifications, out var fixedModifications, out var localizeableModificationTypes);

            // load proteins
            List <Protein> proteinList = LoadProteins(taskId, dbFilenameList, true, DecoyType.Reverse, localizeableModificationTypes, CommonParameters);

            // write prose settings
            ProseCreatedWhileRunning.Append("The following calibration settings were used: ");
            ProseCreatedWhileRunning.Append("protease = " + CommonParameters.DigestionParams.Protease + "; ");
            ProseCreatedWhileRunning.Append("maximum missed cleavages = " + CommonParameters.DigestionParams.MaxMissedCleavages + "; ");
            ProseCreatedWhileRunning.Append("minimum peptide length = " + CommonParameters.DigestionParams.MinPeptideLength + "; ");
            ProseCreatedWhileRunning.Append(CommonParameters.DigestionParams.MaxPeptideLength == int.MaxValue ?
                                            "maximum peptide length = unspecified; " :
                                            "maximum peptide length = " + CommonParameters.DigestionParams.MaxPeptideLength + "; ");
            ProseCreatedWhileRunning.Append("initiator methionine behavior = " + CommonParameters.DigestionParams.InitiatorMethionineBehavior + "; ");
            ProseCreatedWhileRunning.Append("fixed modifications = " + string.Join(", ", fixedModifications.Select(m => m.IdWithMotif)) + "; ");
            ProseCreatedWhileRunning.Append("variable modifications = " + string.Join(", ", variableModifications.Select(m => m.IdWithMotif)) + "; ");
            ProseCreatedWhileRunning.Append("max mods per peptide = " + CommonParameters.DigestionParams.MaxModsForPeptide + "; ");
            ProseCreatedWhileRunning.Append("max modification isoforms = " + CommonParameters.DigestionParams.MaxModificationIsoforms + "; ");
            ProseCreatedWhileRunning.Append("precursor mass tolerance = " + CommonParameters.PrecursorMassTolerance + "; ");
            ProseCreatedWhileRunning.Append("product mass tolerance = " + CommonParameters.ProductMassTolerance + ". ");
            ProseCreatedWhileRunning.Append("The combined search database contained " + proteinList.Count(p => !p.IsDecoy) + " non-decoy protein entries including " + proteinList.Count(p => p.IsContaminant) + " contaminant sequences. ");

            // start the calibration task
            Status("Calibrating...", new List <string> {
                taskId
            });
            MyTaskResults = new MyTaskResults(this)
            {
                NewSpectra           = new List <string>(),
                NewFileSpecificTomls = new List <string>()
            };

            var           myFileManager = new MyFileManager(true);
            List <string> spectraFilesAfterCalibration = new List <string>();

            for (int spectraFileIndex = 0; spectraFileIndex < currentRawFileList.Count; spectraFileIndex++)
            {
                if (GlobalVariables.StopLoops)
                {
                    break;
                }

                bool couldNotFindEnoughDatapoints = false;

                // get filename stuff
                var    originalUncalibratedFilePath = currentRawFileList[spectraFileIndex];
                var    originalUncalibratedFilenameWithoutExtension = Path.GetFileNameWithoutExtension(originalUncalibratedFilePath);
                string calibratedFilePath = Path.Combine(OutputFolder, originalUncalibratedFilenameWithoutExtension + CalibSuffix + ".mzML");

                // mark the file as in-progress
                StartingDataFile(originalUncalibratedFilePath, new List <string> {
                    taskId, "Individual Spectra Files", originalUncalibratedFilePath
                });

                CommonParameters combinedParams = SetAllFileSpecificCommonParams(CommonParameters, fileSettingsList[spectraFileIndex]);

                // load the file
                Status("Loading spectra file...", new List <string> {
                    taskId, "Individual Spectra Files"
                });

                var myMsDataFile = myFileManager.LoadFile(originalUncalibratedFilePath, CommonParameters);

                // get datapoints to fit calibration function to
                Status("Acquiring calibration data points...", new List <string> {
                    taskId, "Individual Spectra Files"
                });
                DataPointAquisitionResults acquisitionResults = null;

                for (int i = 1; i <= 5; i++)
                {
                    acquisitionResults = GetDataAcquisitionResults(myMsDataFile, originalUncalibratedFilePath, variableModifications, fixedModifications, proteinList, taskId, combinedParams, combinedParams.PrecursorMassTolerance, combinedParams.ProductMassTolerance);

                    // enough data points to calibrate?
                    if (acquisitionResults.Psms.Count >= NumRequiredPsms && acquisitionResults.Ms1List.Count > NumRequiredMs1Datapoints && acquisitionResults.Ms2List.Count > NumRequiredMs2Datapoints)
                    {
                        break;
                    }

                    if (i == 1) // failed round 1
                    {
                        CommonParameters.PrecursorMassTolerance = new PpmTolerance(20);
                        CommonParameters.ProductMassTolerance   = new PpmTolerance(50);
                    }
                    else if (i == 2) // failed round 2
                    {
                        CommonParameters.PrecursorMassTolerance = new PpmTolerance(30);
                        CommonParameters.ProductMassTolerance   = new PpmTolerance(100);
                    }
                    else if (i == 3) // failed round 3
                    {
                        CommonParameters.PrecursorMassTolerance = new PpmTolerance(40);
                        CommonParameters.ProductMassTolerance   = new PpmTolerance(150);
                    }
                    else // failed round 4
                    {
                        if (acquisitionResults.Psms.Count < NumRequiredPsms)
                        {
                            Warn("Calibration failure! Could not find enough high-quality PSMs. Required " + NumRequiredPsms + ", saw " + acquisitionResults.Psms.Count);
                        }
                        if (acquisitionResults.Ms1List.Count < NumRequiredMs1Datapoints)
                        {
                            Warn("Calibration failure! Could not find enough MS1 datapoints. Required " + NumRequiredMs1Datapoints + ", saw " + acquisitionResults.Ms1List.Count);
                        }
                        if (acquisitionResults.Ms2List.Count < NumRequiredMs2Datapoints)
                        {
                            Warn("Calibration failure! Could not find enough MS2 datapoints. Required " + NumRequiredMs2Datapoints + ", saw " + acquisitionResults.Ms2List.Count);
                        }

                        couldNotFindEnoughDatapoints = true;
                        FinishedDataFile(originalUncalibratedFilePath, new List <string> {
                            taskId, "Individual Spectra Files", originalUncalibratedFilePath
                        });
                        break;
                    }

                    Warn("Could not find enough PSMs to calibrate with; opening up tolerances to " +
                         Math.Round(CommonParameters.PrecursorMassTolerance.Value, 2) + " ppm precursor and " +
                         Math.Round(CommonParameters.ProductMassTolerance.Value, 2) + " ppm product");
                }

                if (couldNotFindEnoughDatapoints)
                {
                    spectraFilesAfterCalibration.Add(Path.GetFileNameWithoutExtension(currentRawFileList[spectraFileIndex]));
                    ReportProgress(new ProgressEventArgs(100, "Failed to calibrate!", new List <string> {
                        taskId, "Individual Spectra Files", originalUncalibratedFilenameWithoutExtension
                    }));
                    continue;
                }

                // stats before calibration
                int    prevPsmCount = acquisitionResults.Psms.Count;
                double preCalibrationPrecursorErrorIqr = acquisitionResults.PsmPrecursorIqrPpmError;
                double preCalibrationProductErrorIqr   = acquisitionResults.PsmProductIqrPpmError;

                // generate calibration function and shift data points
                Status("Calibrating...", new List <string> {
                    taskId, "Individual Spectra Files"
                });
                CalibrationEngine engine = new CalibrationEngine(myMsDataFile, acquisitionResults, CommonParameters, FileSpecificParameters, new List <string> {
                    taskId, "Individual Spectra Files", originalUncalibratedFilenameWithoutExtension
                });
                engine.Run();

                //update file
                myMsDataFile = engine.CalibratedDataFile;

                // do another search to evaluate calibration results
                Status("Post-calibration search...", new List <string> {
                    taskId, "Individual Spectra Files"
                });
                acquisitionResults = GetDataAcquisitionResults(myMsDataFile, originalUncalibratedFilePath, variableModifications, fixedModifications, proteinList, taskId, combinedParams, combinedParams.PrecursorMassTolerance, combinedParams.ProductMassTolerance);

                //generate calibration function and shift data points AGAIN because it's fast and contributes new data
                Status("Calibrating...", new List <string> {
                    taskId, "Individual Spectra Files"
                });
                engine = new CalibrationEngine(myMsDataFile, acquisitionResults, CommonParameters, FileSpecificParameters, new List <string> {
                    taskId, "Individual Spectra Files", originalUncalibratedFilenameWithoutExtension
                });
                engine.Run();

                //update file
                myMsDataFile = engine.CalibratedDataFile;

                // write the calibrated mzML file
                MzmlMethods.CreateAndWriteMyMzmlWithCalibratedSpectra(myMsDataFile, calibratedFilePath, false);
                myFileManager.DoneWithFile(originalUncalibratedFilePath);

                // stats after calibration
                int    postCalibrationPsmCount          = acquisitionResults.Psms.Count;
                double postCalibrationPrecursorErrorIqr = acquisitionResults.PsmPrecursorIqrPpmError;
                double postCalibrationProductErrorIqr   = acquisitionResults.PsmProductIqrPpmError;

                // did the data improve? (not used for anything yet...)
                bool improvement = ImprovGlobal(preCalibrationPrecursorErrorIqr, preCalibrationProductErrorIqr, prevPsmCount, postCalibrationPsmCount, postCalibrationPrecursorErrorIqr, postCalibrationProductErrorIqr);

                // write toml settings for the calibrated file
                var newTomlFileName = Path.Combine(OutputFolder, originalUncalibratedFilenameWithoutExtension + CalibSuffix + ".toml");

                var fileSpecificParams = new FileSpecificParameters();

                // carry over file-specific parameters from the uncalibrated file to the calibrated one
                if (fileSettingsList[spectraFileIndex] != null)
                {
                    fileSpecificParams = fileSettingsList[spectraFileIndex].Clone();
                }

                //suggest 4 * interquartile range as the ppm tolerance
                fileSpecificParams.PrecursorMassTolerance = new PpmTolerance((4.0 * postCalibrationPrecursorErrorIqr) + Math.Abs(acquisitionResults.PsmPrecursorMedianPpmError));
                fileSpecificParams.ProductMassTolerance   = new PpmTolerance((4.0 * postCalibrationProductErrorIqr) + Math.Abs(acquisitionResults.PsmProductMedianPpmError));

                Toml.WriteFile(fileSpecificParams, newTomlFileName, tomlConfig);

                FinishedWritingFile(newTomlFileName, new List <string> {
                    taskId, "Individual Spectra Files", originalUncalibratedFilenameWithoutExtension
                });

                // finished calibrating this file
                spectraFilesAfterCalibration.Add(Path.GetFileNameWithoutExtension(calibratedFilePath));
                FinishedWritingFile(calibratedFilePath, new List <string> {
                    taskId, "Individual Spectra Files", originalUncalibratedFilenameWithoutExtension
                });
                MyTaskResults.NewSpectra.Add(calibratedFilePath);
                MyTaskResults.NewFileSpecificTomls.Add(newTomlFileName);
                FinishedDataFile(originalUncalibratedFilePath, new List <string> {
                    taskId, "Individual Spectra Files", originalUncalibratedFilePath
                });
                ReportProgress(new ProgressEventArgs(100, "Done!", new List <string> {
                    taskId, "Individual Spectra Files", originalUncalibratedFilenameWithoutExtension
                }));
            }

            // re-write experimental design (if it has been defined) with new calibrated file names
            string assumedPathToExperDesign = Directory.GetParent(currentRawFileList.First()).FullName;

            assumedPathToExperDesign = Path.Combine(assumedPathToExperDesign, GlobalVariables.ExperimentalDesignFileName);

            if (File.Exists(assumedPathToExperDesign))
            {
                WriteNewExperimentalDesignFile(assumedPathToExperDesign, OutputFolder, spectraFilesAfterCalibration);
            }

            // finished calibrating all files for the task
            ReportProgress(new ProgressEventArgs(100, "Done!", new List <string> {
                taskId, "Individual Spectra Files"
            }));

            return(MyTaskResults);
        }
Пример #16
0
        protected override MyTaskResults RunSpecific(string OutputFolder, List <DbForTask> dbFilenameList, List <string> currentRawFileList, string taskId, FileSpecificParameters[] fileSettingsList)
        {
            if (SearchParameters.DoQuantification)
            {
                // disable quantification if a .mgf is being used
                if (currentRawFileList.Any(x => Path.GetExtension(x).Equals(".mgf", StringComparison.OrdinalIgnoreCase)))
                {
                    SearchParameters.DoQuantification = false;
                }
                //if we're doing SILAC, assign and add the silac labels to the residue dictionary
                else if (SearchParameters.SilacLabels != null || SearchParameters.StartTurnoverLabel != null || SearchParameters.EndTurnoverLabel != null)
                {
                    char heavyLabel = 'a'; //char to assign
                    //add the Turnoverlabels to the silacLabels list. They weren't there before just to prevent duplication in the tomls
                    if (SearchParameters.StartTurnoverLabel != null || SearchParameters.EndTurnoverLabel != null)
                    {
                        //original silacLabels object is null, so we need to initialize it
                        SearchParameters.SilacLabels = new List <SilacLabel>();
                        if (SearchParameters.StartTurnoverLabel != null)
                        {
                            var updatedLabel = SilacConversions.UpdateAminoAcidLabel(SearchParameters.StartTurnoverLabel, heavyLabel);
                            heavyLabel = updatedLabel.nextHeavyLabel;
                            SearchParameters.StartTurnoverLabel = updatedLabel.updatedLabel;
                            SearchParameters.SilacLabels.Add(SearchParameters.StartTurnoverLabel);
                        }
                        if (SearchParameters.EndTurnoverLabel != null)
                        {
                            var updatedLabel = SilacConversions.UpdateAminoAcidLabel(SearchParameters.EndTurnoverLabel, heavyLabel);
                            heavyLabel = updatedLabel.nextHeavyLabel;
                            SearchParameters.EndTurnoverLabel = updatedLabel.updatedLabel;
                            SearchParameters.SilacLabels.Add(SearchParameters.EndTurnoverLabel);
                        }
                    }
                    else
                    {
                        //change the silac residues to lower case amino acids (currently null)
                        List <SilacLabel> updatedLabels = new List <SilacLabel>();
                        for (int i = 0; i < SearchParameters.SilacLabels.Count; i++)
                        {
                            var updatedLabel = SilacConversions.UpdateAminoAcidLabel(SearchParameters.SilacLabels[i], heavyLabel);
                            heavyLabel = updatedLabel.nextHeavyLabel;
                            updatedLabels.Add(updatedLabel.updatedLabel);
                        }
                        SearchParameters.SilacLabels = updatedLabels;
                    }
                }
            }
            //if no quant, remove any silac labels that may have been added, because they screw up downstream analysis
            if (!SearchParameters.DoQuantification) //using "if" instead of "else", because DoQuantification can change if it's an mgf
            {
                SearchParameters.SilacLabels = null;
            }

            LoadModifications(taskId, out var variableModifications, out var fixedModifications, out var localizeableModificationTypes);

            // load proteins
            List <Protein> proteinList = LoadProteins(taskId, dbFilenameList, SearchParameters.SearchTarget, SearchParameters.DecoyType, localizeableModificationTypes, CommonParameters);

            // write prose settings
            ProseCreatedWhileRunning.Append("The following search settings were used: ");
            ProseCreatedWhileRunning.Append("protease = " + CommonParameters.DigestionParams.Protease + "; ");
            ProseCreatedWhileRunning.Append("maximum missed cleavages = " + CommonParameters.DigestionParams.MaxMissedCleavages + "; ");
            ProseCreatedWhileRunning.Append("minimum peptide length = " + CommonParameters.DigestionParams.MinPeptideLength + "; ");
            ProseCreatedWhileRunning.Append(CommonParameters.DigestionParams.MaxPeptideLength == int.MaxValue ?
                                            "maximum peptide length = unspecified; " :
                                            "maximum peptide length = " + CommonParameters.DigestionParams.MaxPeptideLength + "; ");
            ProseCreatedWhileRunning.Append("initiator methionine behavior = " + CommonParameters.DigestionParams.InitiatorMethionineBehavior + "; ");
            ProseCreatedWhileRunning.Append("fixed modifications = " + string.Join(", ", fixedModifications.Select(m => m.IdWithMotif)) + "; ");
            ProseCreatedWhileRunning.Append("variable modifications = " + string.Join(", ", variableModifications.Select(m => m.IdWithMotif)) + "; ");
            ProseCreatedWhileRunning.Append("max mods per peptide = " + CommonParameters.DigestionParams.MaxModsForPeptide + "; ");
            ProseCreatedWhileRunning.Append("max modification isoforms = " + CommonParameters.DigestionParams.MaxModificationIsoforms + "; ");
            ProseCreatedWhileRunning.Append("precursor mass tolerance = " + CommonParameters.PrecursorMassTolerance + "; ");
            ProseCreatedWhileRunning.Append("product mass tolerance = " + CommonParameters.ProductMassTolerance + "; ");
            ProseCreatedWhileRunning.Append("report PSM ambiguity = " + CommonParameters.ReportAllAmbiguity + ". ");
            ProseCreatedWhileRunning.Append("The combined search database contained " + proteinList.Count(p => !p.IsDecoy)
                                            + " non-decoy protein entries including " + proteinList.Count(p => p.IsContaminant) + " contaminant sequences. ");

            // start the search task
            MyTaskResults = new MyTaskResults(this);
            List <PeptideSpectralMatch> allPsms = new List <PeptideSpectralMatch>();

            //generate an array to store category specific fdr values (for speedy semi/nonspecific searches)
            int numFdrCategories = (int)(Enum.GetValues(typeof(FdrCategory)).Cast <FdrCategory>().Last() + 1); //+1 because it starts at zero

            List <PeptideSpectralMatch>[] allCategorySpecificPsms = new List <PeptideSpectralMatch> [numFdrCategories];
            for (int i = 0; i < numFdrCategories; i++)
            {
                allCategorySpecificPsms[i] = new List <PeptideSpectralMatch>();
            }

            FlashLfqResults flashLfqResults = null;

            MyFileManager myFileManager = new MyFileManager(SearchParameters.DisposeOfFileWhenDone);

            var fileSpecificCommonParams = fileSettingsList.Select(b => SetAllFileSpecificCommonParams(CommonParameters, b));

            int    completedFiles = 0;
            object indexLock      = new object();
            object psmLock        = new object();

            Status("Searching files...", taskId);
            Status("Searching files...", new List <string> {
                taskId, "Individual Spectra Files"
            });

            Dictionary <string, int[]> numMs2SpectraPerFile = new Dictionary <string, int[]>();

            for (int spectraFileIndex = 0; spectraFileIndex < currentRawFileList.Count; spectraFileIndex++)
            {
                if (GlobalVariables.StopLoops)
                {
                    break;
                }

                var origDataFile = currentRawFileList[spectraFileIndex];

                // mark the file as in-progress
                StartingDataFile(origDataFile, new List <string> {
                    taskId, "Individual Spectra Files", origDataFile
                });

                CommonParameters combinedParams = SetAllFileSpecificCommonParams(CommonParameters, fileSettingsList[spectraFileIndex]);

                MassDiffAcceptor massDiffAcceptor = GetMassDiffAcceptor(combinedParams.PrecursorMassTolerance, SearchParameters.MassDiffAcceptorType, SearchParameters.CustomMdac);

                var thisId = new List <string> {
                    taskId, "Individual Spectra Files", origDataFile
                };
                NewCollection(Path.GetFileName(origDataFile), thisId);
                Status("Loading spectra file...", thisId);
                MsDataFile myMsDataFile = myFileManager.LoadFile(origDataFile, combinedParams);
                Status("Getting ms2 scans...", thisId);
                Ms2ScanWithSpecificMass[] arrayOfMs2ScansSortedByMass = GetMs2Scans(myMsDataFile, origDataFile, combinedParams).OrderBy(b => b.PrecursorMass).ToArray();
                numMs2SpectraPerFile.Add(Path.GetFileNameWithoutExtension(origDataFile), new int[] { myMsDataFile.GetAllScansList().Count(p => p.MsnOrder == 2), arrayOfMs2ScansSortedByMass.Length });
                myFileManager.DoneWithFile(origDataFile);

                PeptideSpectralMatch[] fileSpecificPsms = new PeptideSpectralMatch[arrayOfMs2ScansSortedByMass.Length];

                // modern search
                if (SearchParameters.SearchType == SearchType.Modern)
                {
                    for (int currentPartition = 0; currentPartition < combinedParams.TotalPartitions; currentPartition++)
                    {
                        List <PeptideWithSetModifications> peptideIndex = null;
                        List <Protein> proteinListSubset = proteinList.GetRange(currentPartition * proteinList.Count / combinedParams.TotalPartitions,
                                                                                ((currentPartition + 1) * proteinList.Count / combinedParams.TotalPartitions) - (currentPartition * proteinList.Count / combinedParams.TotalPartitions));

                        Status("Getting fragment dictionary...", new List <string> {
                            taskId
                        });
                        var indexEngine = new IndexingEngine(proteinListSubset, variableModifications, fixedModifications, SearchParameters.SilacLabels,
                                                             SearchParameters.StartTurnoverLabel, SearchParameters.EndTurnoverLabel, currentPartition, SearchParameters.DecoyType, combinedParams, this.FileSpecificParameters,
                                                             SearchParameters.MaxFragmentSize, false, dbFilenameList.Select(p => new FileInfo(p.FilePath)).ToList(), new List <string> {
                            taskId
                        });
                        List <int>[] fragmentIndex  = null;
                        List <int>[] precursorIndex = null;

                        lock (indexLock)
                        {
                            GenerateIndexes(indexEngine, dbFilenameList, ref peptideIndex, ref fragmentIndex, ref precursorIndex, proteinList, taskId);
                        }

                        Status("Searching files...", taskId);

                        new ModernSearchEngine(fileSpecificPsms, arrayOfMs2ScansSortedByMass, peptideIndex, fragmentIndex, currentPartition,
                                               combinedParams, this.FileSpecificParameters, massDiffAcceptor, SearchParameters.MaximumMassThatFragmentIonScoreIsDoubled, thisId).Run();

                        ReportProgress(new ProgressEventArgs(100, "Done with search " + (currentPartition + 1) + "/" + combinedParams.TotalPartitions + "!", thisId));
                        if (GlobalVariables.StopLoops)
                        {
                            break;
                        }
                    }
                }
                // nonspecific search
                else if (SearchParameters.SearchType == SearchType.NonSpecific)
                {
                    PeptideSpectralMatch[][] fileSpecificPsmsSeparatedByFdrCategory = new PeptideSpectralMatch[numFdrCategories][]; //generate an array of all possible locals
                    for (int i = 0; i < numFdrCategories; i++)                                                                      //only add if we're using for FDR, else ignore it as null.
                    {
                        fileSpecificPsmsSeparatedByFdrCategory[i] = new PeptideSpectralMatch[arrayOfMs2ScansSortedByMass.Length];
                    }

                    //create params for N, C, or both if semi
                    List <CommonParameters> paramsToUse = new List <CommonParameters> {
                        combinedParams
                    };
                    if (combinedParams.DigestionParams.SearchModeType == CleavageSpecificity.Semi) //if semi, we need to do both N and C to hit everything
                    {
                        paramsToUse.Clear();
                        List <FragmentationTerminus> terminiToUse = new List <FragmentationTerminus> {
                            FragmentationTerminus.N, FragmentationTerminus.C
                        };
                        foreach (FragmentationTerminus terminus in terminiToUse) //set both termini
                        {
                            paramsToUse.Add(combinedParams.CloneWithNewTerminus(terminus));
                        }
                    }

                    //Compress array of deconvoluted ms2 scans to avoid searching the same ms2 multiple times while still identifying coisolated peptides
                    List <int>[] coisolationIndex = new List <int>[] { new List <int>() };
                    if (arrayOfMs2ScansSortedByMass.Length != 0)
                    {
                        int maxScanNumber = arrayOfMs2ScansSortedByMass.Max(x => x.OneBasedScanNumber);
                        coisolationIndex = new List <int> [maxScanNumber + 1];
                        for (int i = 0; i < arrayOfMs2ScansSortedByMass.Length; i++)
                        {
                            int scanNumber = arrayOfMs2ScansSortedByMass[i].OneBasedScanNumber;
                            if (coisolationIndex[scanNumber] == null)
                            {
                                coisolationIndex[scanNumber] = new List <int> {
                                    i
                                };
                            }
                            else
                            {
                                coisolationIndex[scanNumber].Add(i);
                            }
                        }
                        coisolationIndex = coisolationIndex.Where(x => x != null).ToArray();
                    }

                    //foreach terminus we're going to look at
                    foreach (CommonParameters paramToUse in paramsToUse)
                    {
                        //foreach database partition
                        for (int currentPartition = 0; currentPartition < paramToUse.TotalPartitions; currentPartition++)
                        {
                            List <PeptideWithSetModifications> peptideIndex = null;

                            List <Protein> proteinListSubset = proteinList.GetRange(currentPartition * proteinList.Count / paramToUse.TotalPartitions,
                                                                                    ((currentPartition + 1) * proteinList.Count / paramToUse.TotalPartitions) - (currentPartition * proteinList.Count / paramToUse.TotalPartitions));

                            List <int>[] fragmentIndex  = null;
                            List <int>[] precursorIndex = null;

                            Status("Getting fragment dictionary...", new List <string> {
                                taskId
                            });
                            var indexEngine = new IndexingEngine(proteinListSubset, variableModifications, fixedModifications, SearchParameters.SilacLabels,
                                                                 SearchParameters.StartTurnoverLabel, SearchParameters.EndTurnoverLabel, currentPartition, SearchParameters.DecoyType, paramToUse, this.FileSpecificParameters,
                                                                 SearchParameters.MaxFragmentSize, true, dbFilenameList.Select(p => new FileInfo(p.FilePath)).ToList(), new List <string> {
                                taskId
                            });
                            lock (indexLock)
                            {
                                GenerateIndexes(indexEngine, dbFilenameList, ref peptideIndex, ref fragmentIndex, ref precursorIndex, proteinList, taskId);
                            }

                            Status("Searching files...", taskId);

                            new NonSpecificEnzymeSearchEngine(fileSpecificPsmsSeparatedByFdrCategory, arrayOfMs2ScansSortedByMass, coisolationIndex, peptideIndex, fragmentIndex,
                                                              precursorIndex, currentPartition, paramToUse, this.FileSpecificParameters, variableModifications, massDiffAcceptor,
                                                              SearchParameters.MaximumMassThatFragmentIonScoreIsDoubled, thisId).Run();

                            ReportProgress(new ProgressEventArgs(100, "Done with search " + (currentPartition + 1) + "/" + paramToUse.TotalPartitions + "!", thisId));
                            if (GlobalVariables.StopLoops)
                            {
                                break;
                            }
                        }
                    }
                    lock (psmLock)
                    {
                        for (int i = 0; i < allCategorySpecificPsms.Length; i++)
                        {
                            if (allCategorySpecificPsms[i] != null)
                            {
                                allCategorySpecificPsms[i].AddRange(fileSpecificPsmsSeparatedByFdrCategory[i]);
                            }
                        }
                    }
                }
                // classic search
                else
                {
                    Status("Starting search...", thisId);
                    new ClassicSearchEngine(fileSpecificPsms, arrayOfMs2ScansSortedByMass, variableModifications, fixedModifications, SearchParameters.SilacLabels,
                                            SearchParameters.StartTurnoverLabel, SearchParameters.EndTurnoverLabel, proteinList, massDiffAcceptor, combinedParams, this.FileSpecificParameters, thisId).Run();

                    ReportProgress(new ProgressEventArgs(100, "Done with search!", thisId));
                }

                lock (psmLock)
                {
                    allPsms.AddRange(fileSpecificPsms);
                }

                completedFiles++;
                FinishedDataFile(origDataFile, new List <string> {
                    taskId, "Individual Spectra Files", origDataFile
                });
                ReportProgress(new ProgressEventArgs(completedFiles / currentRawFileList.Count, "Searching...", new List <string> {
                    taskId, "Individual Spectra Files"
                }));
            }

            ReportProgress(new ProgressEventArgs(100, "Done with all searches!", new List <string> {
                taskId, "Individual Spectra Files"
            }));

            int numNotches = GetNumNotches(SearchParameters.MassDiffAcceptorType, SearchParameters.CustomMdac);

            //resolve category specific fdrs (for speedy semi and nonspecific
            if (SearchParameters.SearchType == SearchType.NonSpecific)
            {
                allPsms = NonSpecificEnzymeSearchEngine.ResolveFdrCategorySpecificPsms(allCategorySpecificPsms, numNotches, taskId, CommonParameters, FileSpecificParameters);
            }

            PostSearchAnalysisParameters parameters = new PostSearchAnalysisParameters
            {
                SearchTaskResults             = MyTaskResults,
                SearchTaskId                  = taskId,
                SearchParameters              = SearchParameters,
                ProteinList                   = proteinList,
                AllPsms                       = allPsms,
                VariableModifications         = variableModifications,
                FixedModifications            = fixedModifications,
                ListOfDigestionParams         = new HashSet <DigestionParams>(fileSpecificCommonParams.Select(p => p.DigestionParams)),
                CurrentRawFileList            = currentRawFileList,
                MyFileManager                 = myFileManager,
                NumNotches                    = numNotches,
                OutputFolder                  = OutputFolder,
                IndividualResultsOutputFolder = Path.Combine(OutputFolder, "Individual File Results"),
                FlashLfqResults               = flashLfqResults,
                FileSettingsList              = fileSettingsList,
                NumMs2SpectraPerFile          = numMs2SpectraPerFile,
                DatabaseFilenameList          = dbFilenameList
            };
            PostSearchAnalysisTask postProcessing = new PostSearchAnalysisTask
            {
                Parameters             = parameters,
                FileSpecificParameters = this.FileSpecificParameters,
                CommonParameters       = CommonParameters
            };

            return(postProcessing.Run());
        }
Пример #17
0
        private static void UpdateTomls(string tomlFileName, string fileName, CommonParameters ye5, TerminusType terminusType, bool spliceSearch)
        {
            string[]      oldTomlLines = File.ReadAllLines(@fileName);
            List <string> newTomlLines = new List <string>();

            foreach (string line in oldTomlLines)
            {
                if (line.Contains("LocalizeAll") && terminusType.Equals(TerminusType.None))
                {
                    newTomlLines.Add(GetCorrectValue("LocalizeAll", tomlFileName, line));
                }
                else if (line.Contains("ListOfModsFixed"))
                {
                    newTomlLines.Add(GetCorrectValue("ListOfModsFixed", tomlFileName, line));
                }
                else if (line.Contains("ListOfModsVariable") && terminusType.Equals(TerminusType.None) && !spliceSearch)
                {
                    newTomlLines.Add(GetCorrectValue("ListOfModsVariable", tomlFileName, line));
                }
                else if (line.Contains("BIons"))
                {
                    if (terminusType.Equals(TerminusType.N) || terminusType.Equals(TerminusType.None))
                    {
                        newTomlLines.Add(GetCorrectValue("BIons", tomlFileName, line));
                    }
                    else
                    {
                        newTomlLines.Add("BIons = false");
                    }
                }
                else if (line.Contains("YIons"))
                {
                    if (terminusType.Equals(TerminusType.C) || terminusType.Equals(TerminusType.None))
                    {
                        newTomlLines.Add(GetCorrectValue("YIons", tomlFileName, line));
                    }
                    else
                    {
                        newTomlLines.Add("YIons = false");
                    }
                }
                else if (line.Contains("ZdotIons"))
                {
                    if (terminusType.Equals(TerminusType.C) || terminusType.Equals(TerminusType.None))
                    {
                        newTomlLines.Add(GetCorrectValue("ZdotIons", tomlFileName, line));
                    }
                    else
                    {
                        newTomlLines.Add("ZdotIons = false");
                    }
                }
                else if (line.Contains("CIons"))
                {
                    if (terminusType.Equals(TerminusType.N) || terminusType.Equals(TerminusType.None))
                    {
                        newTomlLines.Add(GetCorrectValue("CIons", tomlFileName, line));
                    }
                    else
                    {
                        newTomlLines.Add("CIons = false");
                    }
                }
                else if (line.Contains("ProductMassTolerance"))
                {
                    newTomlLines.Add(GetCorrectValue("ProductMassTolerance", tomlFileName, line));
                }
                else if (line.Contains("PrecursorMassTolerance"))
                {
                    newTomlLines.Add(GetCorrectValue("PrecursorMassTolerance", tomlFileName, line));
                }
                else if (line.Contains("MaxMissedCleavages"))
                {
                    newTomlLines.Add(GetCorrectValue("MaxMissedCleavages", tomlFileName, line));
                }
                else if (line.Contains("InitiatorMethionineBehavior"))
                {
                    newTomlLines.Add(GetCorrectValue("InitiatorMethionineBehavior", tomlFileName, line));
                }
                else if (line.Contains("MinPeptideLength") && !!terminusType.Equals(TerminusType.None))
                {
                    newTomlLines.Add(GetCorrectValue("MinPeptideLength", tomlFileName, line));
                }
                else if (line.Contains("MaxPeptideLength"))
                {
                    newTomlLines.Add(GetCorrectValue("MaxPeptideLength", tomlFileName, line));
                }
                else if (line.Contains("MaxModificationIsoforms"))
                {
                    newTomlLines.Add(GetCorrectValue("MaxModificationIsoforms", tomlFileName, line));
                }
                else if (line.Contains("MaxModsForPeptide"))
                {
                    newTomlLines.Add(GetCorrectValue("MaxModsForPeptide", tomlFileName, line));
                }
                else if (line.Contains("SemiProteaseDigestion"))
                {
                    newTomlLines.Add(GetCorrectValue("SemiProteaseDigestion", tomlFileName, line));
                }
                else if (line.Contains("TerminusTypeSemiProtease"))
                {
                    newTomlLines.Add(GetCorrectValue("TerminusTypeSemiProtease", tomlFileName, line));
                }
                else if (line.Contains("Protease") && terminusType.Equals(TerminusType.None) && !spliceSearch) //this must be last, else other names including protease will be overwritten and crash.
                {
                    newTomlLines.Add(GetCorrectValue("Protease", tomlFileName, line));
                }
                else
                {
                    newTomlLines.Add(line);
                }
            }
            using (StreamWriter file = new StreamWriter(fileName))
                foreach (string line in newTomlLines)
                {
                    file.WriteLine(line);
                }
        }
Пример #18
0
 public XLSearchTask() : base(MyTask.XLSearch)
 {
     CommonParameters   = new CommonParameters();
     XlSearchParameters = new XlSearchParameters();
 }
Пример #19
0
        protected override MyTaskResults RunSpecific(string OutputFolder, List <DbForTask> dbFilenameList, List <string> currentRawFileList, string taskId, FileSpecificParameters[] fileSettingsList)
        {
            MyTaskResults = new MyTaskResults(this);
            List <CrosslinkSpectralMatch> allPsms = new List <CrosslinkSpectralMatch>();

            LoadModifications(taskId, out var variableModifications, out var fixedModifications, out var localizeableModificationTypes);

            // load proteins
            List <Protein> proteinList = LoadProteins(taskId, dbFilenameList, true, XlSearchParameters.DecoyType, localizeableModificationTypes, CommonParameters);

            var crosslinker = XlSearchParameters.Crosslinker;

            MyFileManager myFileManager = new MyFileManager(true);

            var fileSpecificCommonParams = fileSettingsList.Select(b => SetAllFileSpecificCommonParams(CommonParameters, b));
            HashSet <DigestionParams> ListOfDigestionParams = new HashSet <DigestionParams>(fileSpecificCommonParams.Select(p => p.DigestionParams));

            int    completedFiles = 0;
            object indexLock      = new object();
            object psmLock        = new object();

            Status("Searching files...", taskId);

            ProseCreatedWhileRunning.Append("The following crosslink discovery were used: ");
            ProseCreatedWhileRunning.Append("crosslinker name = " + crosslinker.CrosslinkerName + "; ");
            ProseCreatedWhileRunning.Append("crosslinker type = " + crosslinker.Cleavable + "; ");
            ProseCreatedWhileRunning.Append("crosslinker mass = " + crosslinker.TotalMass + "; ");
            ProseCreatedWhileRunning.Append("crosslinker modification site(s) = " + crosslinker.CrosslinkerModSites + "; ");

            ProseCreatedWhileRunning.Append("protease = " + CommonParameters.DigestionParams.Protease + "; ");
            ProseCreatedWhileRunning.Append("maximum missed cleavages = " + CommonParameters.DigestionParams.MaxMissedCleavages + "; ");
            ProseCreatedWhileRunning.Append("minimum peptide length = " + CommonParameters.DigestionParams.MinPeptideLength + "; ");
            ProseCreatedWhileRunning.Append(CommonParameters.DigestionParams.MaxPeptideLength == int.MaxValue ?
                                            "maximum peptide length = unspecified; " :
                                            "maximum peptide length = " + CommonParameters.DigestionParams.MaxPeptideLength + "; ");
            ProseCreatedWhileRunning.Append("initiator methionine behavior = " + CommonParameters.DigestionParams.InitiatorMethionineBehavior + "; ");
            ProseCreatedWhileRunning.Append("max modification isoforms = " + CommonParameters.DigestionParams.MaxModificationIsoforms + "; ");

            ProseCreatedWhileRunning.Append("fixed modifications = " + string.Join(", ", fixedModifications.Select(m => m.IdWithMotif) + "; "));
            ProseCreatedWhileRunning.Append("variable modifications = " + string.Join(", ", variableModifications.Select(m => m.IdWithMotif)) + "; ");

            ProseCreatedWhileRunning.Append("parent mass tolerance(s) = " + CommonParameters.PrecursorMassTolerance + "; ");
            ProseCreatedWhileRunning.Append("product mass tolerance = " + CommonParameters.ProductMassTolerance + "; ");
            ProseCreatedWhileRunning.Append("The combined search database contained " + proteinList.Count + " total entries including " + proteinList.Where(p => p.IsContaminant).Count() + " contaminant sequences. ");

            for (int spectraFileIndex = 0; spectraFileIndex < currentRawFileList.Count; spectraFileIndex++)
            {
                var origDataFile = currentRawFileList[spectraFileIndex];
                CommonParameters combinedParams = SetAllFileSpecificCommonParams(CommonParameters, fileSettingsList[spectraFileIndex]);

                var thisId = new List <string> {
                    taskId, "Individual Spectra Files", origDataFile
                };
                NewCollection(Path.GetFileName(origDataFile), thisId);

                Status("Loading spectra file...", thisId);
                MsDataFile myMsDataFile = myFileManager.LoadFile(origDataFile, combinedParams);

                Status("Getting ms2 scans...", thisId);

                Ms2ScanWithSpecificMass[] arrayOfMs2ScansSortedByMass = GetMs2Scans(myMsDataFile, origDataFile, combinedParams).OrderBy(b => b.PrecursorMass).ToArray();

                CrosslinkSpectralMatch[] newPsms = new CrosslinkSpectralMatch[arrayOfMs2ScansSortedByMass.Length];
                for (int currentPartition = 0; currentPartition < CommonParameters.TotalPartitions; currentPartition++)
                {
                    List <PeptideWithSetModifications> peptideIndex = null;
                    List <Protein> proteinListSubset = proteinList.GetRange(currentPartition * proteinList.Count() / combinedParams.TotalPartitions, ((currentPartition + 1) * proteinList.Count() / combinedParams.TotalPartitions) - (currentPartition * proteinList.Count() / combinedParams.TotalPartitions));

                    Status("Getting fragment dictionary...", new List <string> {
                        taskId
                    });
                    var indexEngine = new IndexingEngine(proteinListSubset, variableModifications, fixedModifications, null, currentPartition, UsefulProteomicsDatabases.DecoyType.Reverse, combinedParams, 30000.0, false, dbFilenameList.Select(p => new FileInfo(p.FilePath)).ToList(), new List <string> {
                        taskId
                    });
                    List <int>[] fragmentIndex  = null;
                    List <int>[] precursorIndex = null;

                    GenerateIndexes(indexEngine, dbFilenameList, ref peptideIndex, ref fragmentIndex, ref precursorIndex, proteinList, taskId);

                    Status("Searching files...", taskId);
                    new CrosslinkSearchEngine(newPsms, arrayOfMs2ScansSortedByMass, peptideIndex, fragmentIndex, currentPartition, combinedParams, crosslinker,
                                              XlSearchParameters.RestrictToTopNHits, XlSearchParameters.CrosslinkSearchTopNum, XlSearchParameters.XlQuench_H2O,
                                              XlSearchParameters.XlQuench_NH2, XlSearchParameters.XlQuench_Tris, thisId).Run();

                    ReportProgress(new ProgressEventArgs(100, "Done with search " + (currentPartition + 1) + "/" + CommonParameters.TotalPartitions + "!", thisId));
                    if (GlobalVariables.StopLoops)
                    {
                        break;
                    }
                }

                allPsms.AddRange(newPsms.Where(p => p != null));

                completedFiles++;
                ReportProgress(new ProgressEventArgs(completedFiles / currentRawFileList.Count, "Searching...", new List <string> {
                    taskId, "Individual Spectra Files"
                }));
            }

            ReportProgress(new ProgressEventArgs(100, "Done with all searches!", new List <string> {
                taskId, "Individual Spectra Files"
            }));

            allPsms = allPsms.OrderByDescending(p => p.XLTotalScore).ToList();

            var allPsmsXL = allPsms.Where(p => p.CrossType == PsmCrossType.Cross).ToList();

            // inter-crosslinks; different proteins are linked
            var interCsms = allPsmsXL.Where(p => !p.ProteinAccession.Equals(p.BetaPeptide.ProteinAccession)).ToList();

            foreach (var item in interCsms)
            {
                item.CrossType = PsmCrossType.Inter;
            }

            // intra-crosslinks; crosslinks within a protein
            var intraCsms = allPsmsXL.Where(p => p.ProteinAccession.Equals(p.BetaPeptide.ProteinAccession)).ToList();

            foreach (var item in intraCsms)
            {
                item.CrossType = PsmCrossType.Intra;
            }

            // calculate FDR
            DoCrosslinkFdrAnalysis(interCsms);
            DoCrosslinkFdrAnalysis(intraCsms);
            SingleFDRAnalysis(allPsms, new List <string> {
                taskId
            });

            // calculate protein crosslink residue numbers
            foreach (var csm in allPsmsXL)
            {
                // alpha peptide crosslink residue in the protein
                csm.XlProteinPos = csm.OneBasedStartResidueInProtein.Value + csm.LinkPositions[0] - 1;

                // beta crosslink residue in protein
                csm.BetaPeptide.XlProteinPos = csm.BetaPeptide.OneBasedStartResidueInProtein.Value + csm.BetaPeptide.LinkPositions[0] - 1;
            }

            // write interlink CSMs
            if (interCsms.Any())
            {
                string file = Path.Combine(OutputFolder, "XL_Interlinks.tsv");
                WritePsmCrossToTsv(interCsms, file, 2);
                FinishedWritingFile(file, new List <string> {
                    taskId
                });
            }
            MyTaskResults.AddNiceText("Target inter-crosslinks within 1% FDR: " + interCsms.Count(p => p.FdrInfo.QValue <= 0.01 && !p.IsDecoy && !p.BetaPeptide.IsDecoy));

            if (XlSearchParameters.WriteOutputForPercolator)
            {
                var interPsmsXLPercolator = interCsms.Where(p => p.Score >= 2 && p.BetaPeptide.Score >= 2).OrderBy(p => p.ScanNumber).ToList();
                WriteCrosslinkToTxtForPercolator(interPsmsXLPercolator, OutputFolder, "XL_Interlinks_Percolator", crosslinker, new List <string> {
                    taskId
                });
            }

            // write intralink CSMs
            if (intraCsms.Any())
            {
                string file = Path.Combine(OutputFolder, "XL_Intralinks.tsv");
                WritePsmCrossToTsv(intraCsms, file, 2);
                FinishedWritingFile(file, new List <string> {
                    taskId
                });
            }
            MyTaskResults.AddNiceText("Target intra-crosslinks within 1% FDR: " + intraCsms.Count(p => p.FdrInfo.QValue <= 0.01 && !p.IsDecoy && !p.BetaPeptide.IsDecoy));

            if (XlSearchParameters.WriteOutputForPercolator)
            {
                var intraPsmsXLPercolator = intraCsms.Where(p => p.Score >= 2 && p.BetaPeptide.Score >= 2).OrderBy(p => p.ScanNumber).ToList();
                WriteCrosslinkToTxtForPercolator(intraPsmsXLPercolator, OutputFolder, "XL_Intralinks_Percolator", crosslinker, new List <string> {
                    taskId
                });
            }

            // write single peptides
            var singlePsms = allPsms.Where(p => p.CrossType == PsmCrossType.Single).ToList();

            if (singlePsms.Any())
            {
                string writtenFileSingle = Path.Combine(OutputFolder, "SinglePeptides" + ".tsv");
                WritePsmCrossToTsv(singlePsms, writtenFileSingle, 1);
                FinishedWritingFile(writtenFileSingle, new List <string> {
                    taskId
                });
            }
            MyTaskResults.AddNiceText("Target single peptides within 1% FDR: " + singlePsms.Count(p => p.FdrInfo.QValue <= 0.01 && !p.IsDecoy));

            // write loops
            var loopPsms = allPsms.Where(p => p.CrossType == PsmCrossType.Loop).ToList();

            if (loopPsms.Any())
            {
                string writtenFileLoop = Path.Combine(OutputFolder, "Looplinks" + ".tsv");
                WritePsmCrossToTsv(loopPsms, writtenFileLoop, 1);
                FinishedWritingFile(writtenFileLoop, new List <string> {
                    taskId
                });
            }
            MyTaskResults.AddNiceText("Target loop-linked peptides within 1% FDR: " + loopPsms.Count(p => p.FdrInfo.QValue <= 0.01 && !p.IsDecoy));

            // write deadends
            var deadendPsms = allPsms.Where(p => p.CrossType == PsmCrossType.DeadEnd ||
                                            p.CrossType == PsmCrossType.DeadEndH2O ||
                                            p.CrossType == PsmCrossType.DeadEndNH2 ||
                                            p.CrossType == PsmCrossType.DeadEndTris).ToList();

            if (deadendPsms.Any())
            {
                string writtenFileDeadend = Path.Combine(OutputFolder, "Deadends" + ".tsv");
                WritePsmCrossToTsv(deadendPsms, writtenFileDeadend, 1);
                FinishedWritingFile(writtenFileDeadend, new List <string> {
                    taskId
                });
            }
            MyTaskResults.AddNiceText("Target deadend peptides within 1% FDR: " + deadendPsms.Count(p => p.FdrInfo.QValue <= 0.01 && !p.IsDecoy));

            // write pepXML
            if (XlSearchParameters.WritePepXml)
            {
                List <CrosslinkSpectralMatch> writeToXml = new List <CrosslinkSpectralMatch>();
                writeToXml.AddRange(intraCsms.Where(p => !p.IsDecoy && !p.BetaPeptide.IsDecoy && p.FdrInfo.QValue <= 0.05));
                writeToXml.AddRange(interCsms.Where(p => !p.IsDecoy && !p.BetaPeptide.IsDecoy && p.FdrInfo.QValue <= 0.05));
                writeToXml.AddRange(singlePsms.Where(p => !p.IsDecoy && p.FdrInfo.QValue <= 0.05));
                writeToXml.AddRange(loopPsms.Where(p => !p.IsDecoy && p.FdrInfo.QValue <= 0.05));
                writeToXml.AddRange(deadendPsms.Where(p => !p.IsDecoy && p.FdrInfo.QValue <= 0.05));
                writeToXml = writeToXml.OrderBy(p => p.ScanNumber).ToList();

                foreach (var fullFilePath in currentRawFileList)
                {
                    string fileNameNoExtension = Path.GetFileNameWithoutExtension(fullFilePath);
                    WritePepXML_xl(writeToXml.Where(p => p.FullFilePath == fullFilePath).ToList(), proteinList, dbFilenameList[0].FilePath, variableModifications, fixedModifications, localizeableModificationTypes, OutputFolder, fileNameNoExtension, new List <string> {
                        taskId
                    });
                }
            }

            return(MyTaskResults);
        }
Пример #20
0
 public GptmdTask() : base(MyTask.Gptmd)
 {
     CommonParameters = new CommonParameters();
     GptmdParameters  = new GptmdParameters();
 }
Пример #21
0
        public MsDataFile LoadFile(string origDataFile, int?topNpeaks, double?minRatio, bool trimMs1Peaks, bool trimMsMsPeaks, CommonParameters commonParameters)
        {
            FilteringParams filter = new FilteringParams(topNpeaks, minRatio, 1, trimMs1Peaks, trimMsMsPeaks);

            if (MyMsDataFiles.TryGetValue(origDataFile, out MsDataFile value) && value != null)
            {
                return(value);
            }

            // By now know that need to load this file!!!
            lock (FileLoadingLock) // Lock because reading is sequential
            {
                if (Path.GetExtension(origDataFile).Equals(".mzML", StringComparison.OrdinalIgnoreCase))
                {
                    MyMsDataFiles[origDataFile] = Mzml.LoadAllStaticData(origDataFile, filter, commonParameters.MaxThreadsToUsePerFile);
                }
                else if (Path.GetExtension(origDataFile).Equals(".mgf", StringComparison.OrdinalIgnoreCase))
                {
                    MyMsDataFiles[origDataFile] = Mgf.LoadAllStaticData(origDataFile, filter);
                }
                else
                {
#if NETFRAMEWORK
                    MyMsDataFiles[origDataFile] = ThermoStaticData.LoadAllStaticData(origDataFile, filter);
#else
                    Warn("No capability for reading " + origDataFile);
#endif
                }
                return(MyMsDataFiles[origDataFile]);
            }
        }
        public MyTaskResults Run(string OutputFolder, List <DbForTask> dbFilenameList, List <string> currentRawFileList, string taskId, FileSpecificParameters[] fileSettingsList, List <CrosslinkSpectralMatch> allPsms, CommonParameters commonParameters, XlSearchParameters xlSearchParameters, List <Protein> proteinList, List <Modification> variableModifications, List <Modification> fixedModifications, List <string> localizeableModificationTypes, MyTaskResults MyTaskResults)
        {
            foreach (var csm in allPsms)
            {
                csm.ResolveProteinPosAmbiguitiesForXl();
            }

            var allPsmsXL = allPsms.Where(p => p.CrossType == PsmCrossType.Cross).ToList();

            // inter-crosslinks; different proteins are linked
            var interCsms = allPsmsXL.Where(p => !p.IsIntraCsm()).ToList();

            foreach (var item in interCsms)
            {
                item.CrossType = PsmCrossType.Inter;
            }

            // intra-crosslinks; crosslinks within a protein
            var intraCsms = allPsmsXL.Where(p => p.IsIntraCsm()).ToList();

            foreach (var item in intraCsms)
            {
                item.CrossType = PsmCrossType.Intra;
            }

            // calculate FDR
            DoCrosslinkFdrAnalysis(interCsms);
            DoCrosslinkFdrAnalysis(intraCsms);
            SingleFDRAnalysis(allPsms, commonParameters, new List <string> {
                taskId
            });

            // write interlink CSMs
            if (interCsms.Any())
            {
                string file = Path.Combine(OutputFolder, "XL_Interlinks.tsv");
                WriteFile.WritePsmCrossToTsv(interCsms, file, 2);
                FinishedWritingFile(file, new List <string> {
                    taskId
                });
            }
            MyTaskResults.AddTaskSummaryText("Target inter-crosslinks within 1% FDR: " + interCsms.Count(p => p.FdrInfo.QValue <= 0.01 && !p.IsDecoy && !p.BetaPeptide.IsDecoy));

            if (xlSearchParameters.WriteOutputForPercolator)
            {
                var interPsmsXLPercolator = interCsms.Where(p => p.Score >= 2 && p.BetaPeptide.Score >= 2).OrderBy(p => p.ScanNumber).ToList();
                WriteFile.WriteCrosslinkToTxtForPercolator(interPsmsXLPercolator, OutputFolder, "XL_Interlinks_Percolator", xlSearchParameters.Crosslinker);
                FinishedWritingFile(Path.Combine(OutputFolder, "XL_Interlinks_Percolator.txt"), new List <string> {
                    taskId
                });
            }

            // write intralink CSMs
            if (intraCsms.Any())
            {
                string file = Path.Combine(OutputFolder, "XL_Intralinks.tsv");
                WriteFile.WritePsmCrossToTsv(intraCsms, file, 2);
                FinishedWritingFile(file, new List <string> {
                    taskId
                });
            }
            MyTaskResults.AddTaskSummaryText("Target intra-crosslinks within 1% FDR: " + intraCsms.Count(p => p.FdrInfo.QValue <= 0.01 && !p.IsDecoy && !p.BetaPeptide.IsDecoy));

            if (xlSearchParameters.WriteOutputForPercolator)
            {
                var intraPsmsXLPercolator = intraCsms.Where(p => p.Score >= 2 && p.BetaPeptide.Score >= 2).OrderBy(p => p.ScanNumber).ToList();
                WriteFile.WriteCrosslinkToTxtForPercolator(intraPsmsXLPercolator, OutputFolder, "XL_Intralinks_Percolator", xlSearchParameters.Crosslinker);
                FinishedWritingFile(Path.Combine(OutputFolder, "XL_Intralinks_Percolator.txt"), new List <string> {
                    taskId
                });
            }

            // write single peptides
            var singlePsms = allPsms.Where(p => p.CrossType == PsmCrossType.Single).ToList();

            if (singlePsms.Any())
            {
                string writtenFileSingle = Path.Combine(OutputFolder, "SinglePeptides" + ".tsv");
                WriteFile.WritePsmCrossToTsv(singlePsms, writtenFileSingle, 1);
                FinishedWritingFile(writtenFileSingle, new List <string> {
                    taskId
                });
            }
            MyTaskResults.AddTaskSummaryText("Target single peptides within 1% FDR: " + singlePsms.Count(p => p.FdrInfo.QValue <= 0.01 && !p.IsDecoy));

            // write loops
            var loopPsms = allPsms.Where(p => p.CrossType == PsmCrossType.Loop).ToList();

            if (loopPsms.Any())
            {
                string writtenFileLoop = Path.Combine(OutputFolder, "Looplinks" + ".tsv");
                WriteFile.WritePsmCrossToTsv(loopPsms, writtenFileLoop, 1);
                FinishedWritingFile(writtenFileLoop, new List <string> {
                    taskId
                });
            }
            MyTaskResults.AddTaskSummaryText("Target loop-linked peptides within 1% FDR: " + loopPsms.Count(p => p.FdrInfo.QValue <= 0.01 && !p.IsDecoy));

            // write deadends
            var deadendPsms = allPsms.Where(p => p.CrossType == PsmCrossType.DeadEnd ||
                                            p.CrossType == PsmCrossType.DeadEndH2O ||
                                            p.CrossType == PsmCrossType.DeadEndNH2 ||
                                            p.CrossType == PsmCrossType.DeadEndTris).ToList();

            if (deadendPsms.Any())
            {
                string writtenFileDeadend = Path.Combine(OutputFolder, "Deadends" + ".tsv");
                WriteFile.WritePsmCrossToTsv(deadendPsms, writtenFileDeadend, 1);
                FinishedWritingFile(writtenFileDeadend, new List <string> {
                    taskId
                });
            }
            MyTaskResults.AddTaskSummaryText("Target deadend peptides within 1% FDR: " + deadendPsms.Count(p => p.FdrInfo.QValue <= 0.01 && !p.IsDecoy));

            // write pepXML
            if (xlSearchParameters.WritePepXml)
            {
                List <CrosslinkSpectralMatch> writeToXml = new List <CrosslinkSpectralMatch>();
                writeToXml.AddRange(intraCsms.Where(p => !p.IsDecoy && !p.BetaPeptide.IsDecoy && p.FdrInfo.QValue <= 0.05));
                writeToXml.AddRange(interCsms.Where(p => !p.IsDecoy && !p.BetaPeptide.IsDecoy && p.FdrInfo.QValue <= 0.05));
                writeToXml.AddRange(singlePsms.Where(p => !p.IsDecoy && p.FdrInfo.QValue <= 0.05));
                writeToXml.AddRange(loopPsms.Where(p => !p.IsDecoy && p.FdrInfo.QValue <= 0.05));
                writeToXml.AddRange(deadendPsms.Where(p => !p.IsDecoy && p.FdrInfo.QValue <= 0.05));
                writeToXml = writeToXml.OrderBy(p => p.ScanNumber).ToList();

                foreach (var fullFilePath in currentRawFileList)
                {
                    string fileNameNoExtension = Path.GetFileNameWithoutExtension(fullFilePath);
                    WriteFile.WritePepXML_xl(writeToXml.Where(p => p.FullFilePath == fullFilePath).ToList(), proteinList, dbFilenameList[0].FilePath, variableModifications, fixedModifications, localizeableModificationTypes, OutputFolder, fileNameNoExtension, commonParameters, xlSearchParameters);
                    FinishedWritingFile(Path.Combine(OutputFolder, fileNameNoExtension + ".pep.XML"), new List <string> {
                        taskId
                    });
                }
            }

            return(MyTaskResults);
        }
Пример #23
0
        protected override MyTaskResults RunSpecific(string OutputFolder, List <DbForTask> dbFilenameList, List <string> currentRawFileList, string taskId, FileSpecificParameters[] fileSettingsList)
        {
            MyTaskResults = new MyTaskResults(this);
            List <List <GlycoSpectralMatch> > ListOfGsmsPerMS2Scan = new List <List <GlycoSpectralMatch> >();

            LoadModifications(taskId, out var variableModifications, out var fixedModifications, out var localizeableModificationTypes);

            // load proteins
            List <Protein> proteinList = LoadProteins(taskId, dbFilenameList, true, _glycoSearchParameters.DecoyType, localizeableModificationTypes, CommonParameters);

            MyFileManager myFileManager = new MyFileManager(true);

            int completedFiles = 0;

            Status("Searching files...", taskId);
            ProseCreatedWhileRunning.Append("\n");
            ProseCreatedWhileRunning.Append("protease = " + CommonParameters.DigestionParams.Protease + "; \n");
            ProseCreatedWhileRunning.Append("maximum missed cleavages = " + CommonParameters.DigestionParams.MaxMissedCleavages + "; \n");
            ProseCreatedWhileRunning.Append("minimum peptide length = " + CommonParameters.DigestionParams.MinPeptideLength + "; \n");
            ProseCreatedWhileRunning.Append(CommonParameters.DigestionParams.MaxPeptideLength == int.MaxValue ?
                                            "maximum peptide length = unspecified; " :
                                            "maximum peptide length = " + CommonParameters.DigestionParams.MaxPeptideLength + "; \n");
            ProseCreatedWhileRunning.Append("initiator methionine behavior = " + CommonParameters.DigestionParams.InitiatorMethionineBehavior + "; \n");
            ProseCreatedWhileRunning.Append("max modification isoforms = " + CommonParameters.DigestionParams.MaxModificationIsoforms + "; \n");
            ProseCreatedWhileRunning.Append("fixed modifications = " + string.Join(", ", fixedModifications.Select(m => m.IdWithMotif)) + "; \n");
            ProseCreatedWhileRunning.Append("variable modifications = " + string.Join(", ", variableModifications.Select(m => m.IdWithMotif)) + "; \n");
            ProseCreatedWhileRunning.Append("parent mass tolerance(s) = " + CommonParameters.PrecursorMassTolerance + "; \n");
            ProseCreatedWhileRunning.Append("product mass tolerance = " + CommonParameters.ProductMassTolerance + "; \n");
            ProseCreatedWhileRunning.Append("The combined search database contained " + proteinList.Count + " total entries including " + proteinList.Where(p => p.IsContaminant).Count() + " contaminant sequences. \n");
            if (_glycoSearchParameters.GlycoSearchType == GlycoSearchType.OGlycanSearch)
            {
                ProseCreatedWhileRunning.Append("The O-glycan database: " + _glycoSearchParameters.OGlycanDatabasefile + "\n");
            }
            else if (_glycoSearchParameters.GlycoSearchType == GlycoSearchType.NGlycanSearch)
            {
                ProseCreatedWhileRunning.Append("The N-glycan database: " + _glycoSearchParameters.OGlycanDatabasefile + "\n");
            }
            else
            {
                ProseCreatedWhileRunning.Append("The O-glycan database: " + _glycoSearchParameters.OGlycanDatabasefile + "\n");
                ProseCreatedWhileRunning.Append("The N-glycan database: " + _glycoSearchParameters.NGlycanDatabasefile + "\n");
            }

            ProseCreatedWhileRunning.Append("\n");

            for (int spectraFileIndex = 0; spectraFileIndex < currentRawFileList.Count; spectraFileIndex++)
            {
                var origDataFile = currentRawFileList[spectraFileIndex];
                CommonParameters combinedParams = SetAllFileSpecificCommonParams(CommonParameters, fileSettingsList[spectraFileIndex]);

                var thisId = new List <string> {
                    taskId, "Individual Spectra Files", origDataFile
                };
                NewCollection(Path.GetFileName(origDataFile), thisId);

                Status("Loading spectra file...", thisId);
                MsDataFile myMsDataFile = myFileManager.LoadFile(origDataFile, combinedParams);

                Status("Getting ms2 scans...", thisId);

                Ms2ScanWithSpecificMass[]   arrayOfMs2ScansSortedByMass = GetMs2Scans(myMsDataFile, origDataFile, combinedParams).OrderBy(b => b.PrecursorMass).ToArray();
                List <GlycoSpectralMatch>[] newCsmsPerMS2ScanPerFile    = new List <GlycoSpectralMatch> [arrayOfMs2ScansSortedByMass.Length];

                for (int currentPartition = 0; currentPartition < CommonParameters.TotalPartitions; currentPartition++)
                {
                    List <PeptideWithSetModifications> peptideIndex = null;

                    //When partition, the proteinList will be split for each Thread.
                    List <Protein> proteinListSubset = proteinList.GetRange(currentPartition * proteinList.Count() / combinedParams.TotalPartitions, ((currentPartition + 1) * proteinList.Count() / combinedParams.TotalPartitions) - (currentPartition * proteinList.Count() / combinedParams.TotalPartitions));

                    Status("Getting fragment dictionary...", new List <string> {
                        taskId
                    });

                    //Only reverse Decoy for glyco search has been tested and are set as fixed parameter.
                    var indexEngine = new IndexingEngine(proteinListSubset, variableModifications, fixedModifications, null, null, null, currentPartition, _glycoSearchParameters.DecoyType, combinedParams, this.FileSpecificParameters, 30000.0, false, dbFilenameList.Select(p => new FileInfo(p.FilePath)).ToList(), TargetContaminantAmbiguity.RemoveContaminant, new List <string> {
                        taskId
                    });
                    List <int>[] fragmentIndex  = null;
                    List <int>[] precursorIndex = null;
                    GenerateIndexes(indexEngine, dbFilenameList, ref peptideIndex, ref fragmentIndex, ref precursorIndex, proteinList, taskId);

                    //The second Fragment index is for 'MS1-HCD_MS1-ETD_MS2s' type of data. If LowCID is used for MS1, ion-index is not allowed to use.
                    List <int>[] secondFragmentIndex = null;
                    //if (combinedParams.MS2ChildScanDissociationType != DissociationType.LowCID
                    //&& !CrosslinkSearchEngine.DissociationTypeGenerateSameTypeOfIons(combinedParams.DissociationType, combinedParams.MS2ChildScanDissociationType))
                    //{
                    //    //Becuase two different type of dissociation methods are used, the parameters are changed with different dissociation type.
                    //    var secondCombinedParams = CommonParameters.CloneWithNewDissociationType(combinedParams.MS2ChildScanDissociationType);
                    //    var secondIndexEngine = new IndexingEngine(proteinListSubset, variableModifications, fixedModifications, null, null, null, currentPartition, _glycoSearchParameters.DecoyType, secondCombinedParams, this.FileSpecificParameters, 30000.0, false, dbFilenameList.Select(p => new FileInfo(p.FilePath)).ToList(), new List<string> { taskId });
                    //    GenerateSecondIndexes(indexEngine, secondIndexEngine, dbFilenameList, ref secondFragmentIndex, proteinList, taskId);
                    //}

                    Status("Searching files...", taskId);
                    new GlycoSearchEngine(newCsmsPerMS2ScanPerFile, arrayOfMs2ScansSortedByMass, peptideIndex, fragmentIndex, secondFragmentIndex, currentPartition, combinedParams, this.FileSpecificParameters,
                                          _glycoSearchParameters.OGlycanDatabasefile, _glycoSearchParameters.NGlycanDatabasefile, _glycoSearchParameters.GlycoSearchType, _glycoSearchParameters.GlycoSearchTopNum, _glycoSearchParameters.MaximumOGlycanAllowed, _glycoSearchParameters.OxoniumIonFilt, thisId).Run();

                    ReportProgress(new ProgressEventArgs(100, "Done with search " + (currentPartition + 1) + "/" + CommonParameters.TotalPartitions + "!", thisId));
                    if (GlobalVariables.StopLoops)
                    {
                        break;
                    }
                }

                ListOfGsmsPerMS2Scan.AddRange(newCsmsPerMS2ScanPerFile.Where(p => p != null).ToList());

                completedFiles++;
                ReportProgress(new ProgressEventArgs(completedFiles / currentRawFileList.Count, "Searching...", new List <string> {
                    taskId, "Individual Spectra Files"
                }));
            }

            ReportProgress(new ProgressEventArgs(100, "Done with all searches!", new List <string> {
                taskId, "Individual Spectra Files"
            }));

            //For every Ms2Scans, each have a list of candidates psms. The allPsms from GlycoSearchEngine is the list (all ms2scans) of list (each ms2scan) of psm (all candidate psm).
            //Currently, only keep the first scan for consideration.
            List <GlycoSpectralMatch> GsmPerScans = ListOfGsmsPerMS2Scan.Select(p => p.First()).ToList();

            var filteredAllPsms = new List <GlycoSpectralMatch>();

            //For each ms2scan, try to find the best candidate psm from the psms list. Do the localizaiton analysis. Add it into filteredAllPsms.
            foreach (var gsmsPerScan in GsmPerScans.GroupBy(p => p.ScanNumber))
            {
                var glycos = RemoveSimilarSequenceDuplicates(gsmsPerScan.OrderByDescending(p => p.Score).ToList());

                foreach (var glycoSpectralMatch in glycos)
                {
                    if (glycoSpectralMatch.LocalizationGraphs != null)
                    {
                        List <Route> localizationCandidates = new List <Route>();

                        for (int i = 0; i < glycoSpectralMatch.LocalizationGraphs.Count; i++)
                        {
                            var allPathWithMaxScore = LocalizationGraph.GetAllHighestScorePaths(glycoSpectralMatch.LocalizationGraphs[i].array, glycoSpectralMatch.LocalizationGraphs[i].ChildModBoxes);

                            foreach (var path in allPathWithMaxScore)
                            {
                                var local = LocalizationGraph.GetLocalizedPath(glycoSpectralMatch.LocalizationGraphs[i], path);
                                local.ModBoxId = glycoSpectralMatch.LocalizationGraphs[i].ModBoxId;
                                localizationCandidates.Add(local);
                            }
                        }

                        glycoSpectralMatch.Routes = localizationCandidates;
                    }

                    if (glycoSpectralMatch.Routes != null)
                    {
                        LocalizationLevel localLevel;
                        glycoSpectralMatch.LocalizedGlycan   = GlycoSpectralMatch.GetLocalizedGlycan(glycoSpectralMatch.Routes, out localLevel);
                        glycoSpectralMatch.LocalizationLevel = localLevel;

                        //Localization PValue.
                        if (localLevel == LocalizationLevel.Level1 || localLevel == LocalizationLevel.Level2)
                        {
                            List <Route> allRoutes = new List <Route>();
                            foreach (var graph in glycoSpectralMatch.LocalizationGraphs)
                            {
                                allRoutes.AddRange(LocalizationGraph.GetAllPaths_CalP(graph, glycoSpectralMatch.ScanInfo_p, glycoSpectralMatch.Thero_n));
                            }
                            glycoSpectralMatch.SiteSpeciLocalProb = LocalizationGraph.CalSiteSpecificLocalizationProbability(allRoutes, glycoSpectralMatch.LocalizationGraphs.First().ModPos);
                        }
                    }

                    filteredAllPsms.Add(glycoSpectralMatch);
                }
            }

            PostGlycoSearchAnalysisTask postGlycoSearchAnalysisTask = new PostGlycoSearchAnalysisTask();

            postGlycoSearchAnalysisTask.FileSpecificParameters = this.FileSpecificParameters;
            return(postGlycoSearchAnalysisTask.Run(OutputFolder, dbFilenameList, currentRawFileList, taskId, fileSettingsList, filteredAllPsms.OrderByDescending(p => p.Score).ToList(), CommonParameters, _glycoSearchParameters, proteinList, variableModifications, fixedModifications, localizeableModificationTypes, MyTaskResults));
        }