예제 #1
0
 public TwoPassCrosslinkSearchEngine(List <PsmCross> globalPsmsCross, Ms2ScanWithSpecificMass[] listOfSortedms2Scans, List <CompactPeptide> peptideIndex, List <int>[] fragmentIndex, List <ProductType> lp, int currentPartition, ICommonParameters CommonParameters, bool addCompIons, Tolerance XLPrecusorMsTl, CrosslinkerTypeClass crosslinker, bool CrosslinkSearchTop, int CrosslinkSearchTopNum, bool quench_H2O, bool quench_NH2, bool quench_Tris, bool charge_2_3, bool charge_2_3_PrimeFragment, List <string> nestedIds) : base(nestedIds)
 {
     this.globalPsmsCross      = globalPsmsCross;
     this.listOfSortedms2Scans = listOfSortedms2Scans;
     this.peptideIndex         = peptideIndex;
     this.fragmentIndex        = fragmentIndex;
     this.lp = lp;
     this.currentPartition = currentPartition + 1;
     this.CommonParameters = CommonParameters;
     this.addCompIons      = addCompIons;
     //Here use LowTheoreticalDiffAcceptor in practice doesn't work in 12/2/2017
     this.massDiffAcceptor  = new OpenSearchMode();
     this.dissociationTypes = DetermineDissociationType(lp);
     this.XLPrecusorMsTl    = XLPrecusorMsTl;
     XLPrecusorSearchMode   = new SinglePpmAroundZeroSearchMode(XLPrecusorMsTl.Value);
     //if (XLBetaPrecusorMsTl.ToString().Contains("Absolute"))
     //{
     //    XLPrecusorSearchMode = new SingleAbsoluteAroundZeroSearchMode(XLPrecusorMsTl.Value);
     //}
     this.crosslinker              = crosslinker;
     this.CrosslinkSearchTop       = CrosslinkSearchTop;
     this.CrosslinkSearchTopNum    = CrosslinkSearchTopNum;
     this.quench_H2O               = quench_H2O;
     this.quench_NH2               = quench_NH2;
     this.quench_Tris              = quench_Tris;
     this.charge_2_3               = charge_2_3;
     this.charge_2_3_PrimeFragment = charge_2_3_PrimeFragment;
 }
예제 #2
0
 private void Text_TextChanged(object sender, TextChangedEventArgs e)
 {
     ICommonParameters.GetCommonData().AdditionalRequirements.Clear();
     foreach (var block in AddParamsBlocks)
     {
         ICommonParameters.GetCommonData().AdditionalRequirements.Add(block.Text.Text);
     }
 }
예제 #3
0
 public IndexingEngine(List <Protein> proteinList, List <ModificationWithMass> variableModifications, List <ModificationWithMass> fixedModifications, List <ProductType> lp, int currentPartition, DecoyType decoyType, IEnumerable <IDigestionParams> CollectionOfDigestionParams, ICommonParameters commonParams, double maxFragmentSize, List <string> nestedIds) : base(nestedIds)
 {
     this.proteinList           = proteinList;
     this.variableModifications = variableModifications;
     this.fixedModifications    = fixedModifications;
     this.lp = lp;
     this.currentPartition            = currentPartition + 1;
     this.decoyType                   = decoyType;
     this.CollectionOfDigestionParams = CollectionOfDigestionParams;
     this.commonParams                = commonParams;
     this.maxFragmentSize             = maxFragmentSize;
 }
 public CrosslinkAnalysisEngine(List <PsmCross> newPsms, Dictionary <CompactPeptideBase, HashSet <PeptideWithSetModifications> > compactPeptideToProteinPeptideMatching, List <Protein> proteinList, List <ModificationWithMass> variableModifications, List <ModificationWithMass> fixedModifications, List <ProductType> lp, string OutputFolder, CrosslinkerTypeClass crosslinker, TerminusType terminusType, ICommonParameters CommonParameters, List <string> nestedIds) : base(nestedIds)
 {
     this.newPsms = newPsms;
     this.compactPeptideToProteinPeptideMatching = compactPeptideToProteinPeptideMatching;
     this.proteinList           = proteinList;
     this.variableModifications = variableModifications;
     this.fixedModifications    = fixedModifications;
     this.lp               = lp;
     this.OutputFolder     = OutputFolder;
     this.crosslinker      = crosslinker;
     this.terminusType     = terminusType;
     this.CommonParameters = CommonParameters;
 }
예제 #5
0
 public ModernSearchEngine(PeptideSpectralMatch[] globalPsms, Ms2ScanWithSpecificMass[] listOfSortedms2Scans, List <CompactPeptide> peptideIndex, List <int>[] fragmentIndex, List <ProductType> lp, int currentPartition, ICommonParameters CommonParameters, bool addCompIons, MassDiffAcceptor massDiffAcceptor, double maximumMassThatFragmentIonScoreIsDoubled, List <string> nestedIds) : base(nestedIds)
 {
     this.globalPsms           = globalPsms;
     this.listOfSortedms2Scans = listOfSortedms2Scans;
     this.peptideIndex         = peptideIndex;
     this.fragmentIndex        = fragmentIndex;
     this.lp = lp;
     this.currentPartition  = currentPartition + 1;
     this.CommonParameters  = CommonParameters;
     this.addCompIons       = addCompIons;
     this.massDiffAcceptor  = massDiffAcceptor;
     this.dissociationTypes = DetermineDissociationType(lp);
     this.maximumMassThatFragmentIonScoreIsDoubled = maximumMassThatFragmentIonScoreIsDoubled;
 }
예제 #6
0
 public ClassicSearchEngine(PeptideSpectralMatch[] globalPsms, Ms2ScanWithSpecificMass[] arrayOfSortedMS2Scans, List <ModificationWithMass> variableModifications, List <ModificationWithMass> fixedModifications, List <Protein> proteinList, List <ProductType> lp, MassDiffAcceptor searchMode, bool addCompIons, ICommonParameters CommonParameters, Tolerance productMassTolerance, List <string> nestedIds) : base(nestedIds)
 {
     this.peptideSpectralMatches = globalPsms;
     this.arrayOfSortedMS2Scans  = arrayOfSortedMS2Scans;
     this.myScanPrecursorMasses  = arrayOfSortedMS2Scans.Select(b => b.PrecursorMass).ToArray();
     this.variableModifications  = variableModifications;
     this.fixedModifications     = fixedModifications;
     this.proteins             = proteinList;
     this.searchMode           = searchMode;
     this.lp                   = lp;
     this.addCompIons          = addCompIons;
     this.dissociationTypes    = DetermineDissociationType(lp);
     this.commonParameters     = CommonParameters;
     this.productMassTolerance = productMassTolerance;
 }
 public NonSpecificEnzymeSearchEngine(PeptideSpectralMatch[] globalPsms, Ms2ScanWithSpecificMass[] listOfSortedms2Scans, List <CompactPeptide> peptideIndex, List <int>[] fragmentIndex, List <int>[] fragmentIndexPrecursor, List <ProductType> lp, int currentPartition, ICommonParameters CommonParameters, bool addCompIons, MassDiffAcceptor massDiffAcceptor, double maximumMassThatFragmentIonScoreIsDoubled, List <string> nestedIds) : base(globalPsms, listOfSortedms2Scans, peptideIndex, fragmentIndex, lp, currentPartition, CommonParameters, addCompIons, massDiffAcceptor, maximumMassThatFragmentIonScoreIsDoubled, nestedIds)
 {
     this.fragmentIndexPrecursor = fragmentIndexPrecursor;
 }
예제 #8
0
        protected override MyTaskResults RunSpecific(string OutputFolder, List <DbForTask> dbFilenameList, List <string> currentRawFileList, string taskId, FileSpecificSettings[] fileSettingsList)
        {
            myTaskResults = new MyTaskResults(this);
            List <PsmCross> allPsms = new List <PsmCross>();
            var             compactPeptideToProteinPeptideMatch = new Dictionary <CompactPeptideBase, HashSet <PeptideWithSetModifications> >();

            Status("Loading modifications...", taskId);

            #region Load modifications

            List <ModificationWithMass> variableModifications = GlobalVariables.AllModsKnown.OfType <ModificationWithMass>().Where(b => CommonParameters.ListOfModsVariable.Contains((b.modificationType, b.id))).ToList();
            List <ModificationWithMass> fixedModifications    = GlobalVariables.AllModsKnown.OfType <ModificationWithMass>().Where(b => CommonParameters.ListOfModsFixed.Contains((b.modificationType, b.id))).ToList();
            List <string> localizeableModificationTypes       = CommonParameters.LocalizeAll ? GlobalVariables.AllModTypesKnown.ToList() : CommonParameters.ListOfModTypesLocalize.ToList();

            #endregion Load modifications

            Status("Loading proteins...", new List <string> {
                taskId
            });
            var proteinList = dbFilenameList.SelectMany(b => LoadProteinDb(b.FilePath, true, XlSearchParameters.DecoyType, localizeableModificationTypes, b.IsContaminant, out Dictionary <string, Modification> unknownModifications)).ToList();

            List <ProductType> ionTypes = new List <ProductType>();
            if (CommonParameters.BIons)
            {
                ionTypes.Add(ProductType.BnoB1ions);
            }
            if (CommonParameters.YIons)
            {
                ionTypes.Add(ProductType.Y);
            }
            if (CommonParameters.ZdotIons)
            {
                ionTypes.Add(ProductType.Zdot);
            }
            if (CommonParameters.CIons)
            {
                ionTypes.Add(ProductType.C);
            }
            TerminusType terminusType = ProductTypeMethod.IdentifyTerminusType(ionTypes);

            var crosslinker = new CrosslinkerTypeClass();
            crosslinker.SelectCrosslinker(XlSearchParameters.CrosslinkerType);
            if (XlSearchParameters.CrosslinkerType == CrosslinkerType.UserDefined)
            {
                crosslinker.CrosslinkerName    = XlSearchParameters.UdXLkerName;
                crosslinker.Cleavable          = XlSearchParameters.UdXLkerCleavable;
                crosslinker.TotalMass          = XlSearchParameters.UdXLkerTotalMass.HasValue ? (double)XlSearchParameters.UdXLkerTotalMass : 9999;
                crosslinker.CleaveMassShort    = XlSearchParameters.UdXLkerShortMass.HasValue ? (double)XlSearchParameters.UdXLkerShortMass : 9999;
                crosslinker.CleaveMassLong     = XlSearchParameters.UdXLkerShortMass.HasValue ? (double)XlSearchParameters.UdXLkerLongMass : 9999;
                crosslinker.CrosslinkerModSite = XlSearchParameters.UdXLkerResidue;
                crosslinker.LoopMass           = XlSearchParameters.UdXLkerLoopMass.HasValue ? (double)XlSearchParameters.UdXLkerLoopMass : 9999;
                crosslinker.DeadendMassH2O     = XlSearchParameters.UdXLkerDeadendMassH2O.HasValue ? (double)XlSearchParameters.UdXLkerDeadendMassH2O : 9999;
                crosslinker.DeadendMassNH2     = XlSearchParameters.UdXLkerDeadendMassNH2.HasValue ? (double)XlSearchParameters.UdXLkerDeadendMassNH2 : 9999;
                crosslinker.DeadendMassTris    = XlSearchParameters.UdXLkerDeadendMassTris.HasValue ? (double)XlSearchParameters.UdXLkerDeadendMassTris : 9999;
            }

            ParallelOptions parallelOptions = new ParallelOptions();
            if (CommonParameters.MaxParallelFilesToAnalyze.HasValue)
            {
                parallelOptions.MaxDegreeOfParallelism = CommonParameters.MaxParallelFilesToAnalyze.Value;
            }
            MyFileManager myFileManager = new MyFileManager(XlSearchParameters.DisposeOfFileWhenDone);

            HashSet <IDigestionParams> ListOfDigestionParams = GetListOfDistinctDigestionParams(CommonParameters, fileSettingsList.Select(b => SetAllFileSpecificCommonParams(CommonParameters, b)));

            int    completedFiles = 0;
            object indexLock      = new object();
            object psmLock        = new object();

            Status("Searching files...", taskId);

            #region proseCreatedWhileRunning

            proseCreatedWhileRunning.Append("The following crosslink discovery were used: ");
            proseCreatedWhileRunning.Append("crosslinker name = " + crosslinker.CrosslinkerName + "; ");
            proseCreatedWhileRunning.Append("crosslinker type = " + crosslinker.Cleavable + "; ");
            proseCreatedWhileRunning.Append("crosslinker mass = " + crosslinker.TotalMass + "; ");
            proseCreatedWhileRunning.Append("crosslinker modification site(s) = " + crosslinker.CrosslinkerModSite + "; ");

            proseCreatedWhileRunning.Append("protease = " + CommonParameters.DigestionParams.Protease + "; ");
            proseCreatedWhileRunning.Append("maximum missed cleavages = " + CommonParameters.DigestionParams.MaxMissedCleavages + "; ");
            proseCreatedWhileRunning.Append("minimum peptide length = " + CommonParameters.DigestionParams.MinPeptideLength + "; ");
            if (CommonParameters.DigestionParams.MaxPeptideLength == null)
            {
                proseCreatedWhileRunning.Append("maximum peptide length = unspecified; ");
            }
            else
            {
                proseCreatedWhileRunning.Append("maximum peptide length = " + CommonParameters.DigestionParams.MaxPeptideLength + "; ");
            }
            proseCreatedWhileRunning.Append("initiator methionine behavior = " + CommonParameters.DigestionParams.InitiatorMethionineBehavior + "; ");
            proseCreatedWhileRunning.Append("max modification isoforms = " + CommonParameters.DigestionParams.MaxModificationIsoforms + "; ");

            proseCreatedWhileRunning.Append("fixed modifications = " + string.Join(", ", fixedModifications.Select(m => m.id)) + "; ");
            proseCreatedWhileRunning.Append("variable modifications = " + string.Join(", ", variableModifications.Select(m => m.id)) + "; ");

            proseCreatedWhileRunning.Append("parent mass tolerance(s) = " + XlSearchParameters.XlPrecusorMsTl + "; ");
            proseCreatedWhileRunning.Append("product mass tolerance = " + CommonParameters.ProductMassTolerance + "; ");
            proseCreatedWhileRunning.Append("The combined search database contained " + proteinList.Count + " total entries including " + proteinList.Where(p => p.IsContaminant).Count() + " contaminant sequences. ");

            #endregion proseCreatedWhileRunning

            Parallel.For(0, currentRawFileList.Count, parallelOptions, spectraFileIndex =>
            {
                var origDataFile = currentRawFileList[spectraFileIndex];
                ICommonParameters combinedParams = SetAllFileSpecificCommonParams(CommonParameters, fileSettingsList[spectraFileIndex]);
                List <PsmCross> newPsms          = new List <PsmCross>();

                var thisId = new List <string> {
                    taskId, "Individual Spectra Files", origDataFile
                };
                NewCollection(Path.GetFileName(origDataFile), thisId);
                Status("Loading spectra file...", thisId);
                IMsDataFile <IMsDataScan <IMzSpectrum <IMzPeak> > > myMsDataFile = myFileManager.LoadFile(origDataFile, combinedParams.TopNpeaks, combinedParams.MinRatio, combinedParams.TrimMs1Peaks, combinedParams.TrimMsMsPeaks);
                Status("Getting ms2 scans...", thisId);
                Ms2ScanWithSpecificMass[] arrayOfMs2ScansSortedByMass = GetMs2Scans(myMsDataFile, origDataFile, combinedParams.DoPrecursorDeconvolution, combinedParams.UseProvidedPrecursorInfo, combinedParams.DeconvolutionIntensityRatio, combinedParams.DeconvolutionMaxAssumedChargeState, combinedParams.DeconvolutionMassTolerance).OrderBy(b => b.PrecursorMass).ToArray();

                //List<Ms2ScanWithSpecificMass> arrayOfMs2ScansSortedByMass = new List<Ms2ScanWithSpecificMass>();
                //arrayOfMs2ScansSortedByMass = GetMs2Scans(myMsDataFile, origDataFile, combinedParams.DoPrecursorDeconvolution, combinedParams.UseProvidedPrecursorInfo, combinedParams.DeconvolutionIntensityRatio, combinedParams.DeconvolutionMaxAssumedChargeState, combinedParams.DeconvolutionMassTolerance).OrderBy(b => b.PrecursorMass).ToList();
                //Code to resolve MS3 data
                //if (XlSearchParameters.FragmentationType == FragmentaionType.MS2_HCD || XlSearchParameters.FragmentationType == FragmentaionType.MS2_EthCD)
                //{
                //    arrayOfMs2ScansSortedByMass = GetMs2Scans(myMsDataFile, origDataFile, combinedParams.DoPrecursorDeconvolution, combinedParams.UseProvidedPrecursorInfo, combinedParams.DeconvolutionIntensityRatio, combinedParams.DeconvolutionMaxAssumedChargeState, combinedParams.DeconvolutionMassTolerance).OrderBy(b => b.PrecursorMass).ToList();
                //}
                //else
                //{
                //    arrayOfMs2ScansSortedByMass = GetCombinedMs2Scans(myMsDataFile, origDataFile, combinedParams.DoPrecursorDeconvolution, combinedParams.UseProvidedPrecursorInfo, combinedParams.DeconvolutionIntensityRatio, combinedParams.DeconvolutionMaxAssumedChargeState, combinedParams.DeconvolutionMassTolerance).OrderBy(b => b.PrecursorMass).ToList();
                //}

                for (int currentPartition = 0; currentPartition < CommonParameters.TotalPartitions; currentPartition++)
                {
                    List <CompactPeptide> peptideIndex = null;
                    List <Protein> proteinListSubset   = proteinList.GetRange(currentPartition * proteinList.Count() / combinedParams.TotalPartitions, ((currentPartition + 1) * proteinList.Count() / combinedParams.TotalPartitions) - (currentPartition * proteinList.Count() / combinedParams.TotalPartitions));

                    #region Generate indices for modern search

                    Status("Getting fragment dictionary...", new List <string> {
                        taskId
                    });
                    var indexEngine = new IndexingEngine(proteinListSubset, variableModifications, fixedModifications, ionTypes, currentPartition, UsefulProteomicsDatabases.DecoyType.Reverse, ListOfDigestionParams, combinedParams, 30000.0, new List <string> {
                        taskId
                    });
                    List <int>[] fragmentIndex = null;
                    lock (indexLock)
                        GenerateIndexes(indexEngine, dbFilenameList, ref peptideIndex, ref fragmentIndex, taskId);

                    #endregion Generate indices for modern search

                    Status("Searching files...", taskId);

                    new TwoPassCrosslinkSearchEngine(newPsms, arrayOfMs2ScansSortedByMass, peptideIndex, fragmentIndex, ionTypes, currentPartition, combinedParams, false, XlSearchParameters.XlPrecusorMsTl, crosslinker, XlSearchParameters.CrosslinkSearchTop, XlSearchParameters.CrosslinkSearchTopNum, XlSearchParameters.XlQuench_H2O, XlSearchParameters.XlQuench_NH2, XlSearchParameters.XlQuench_Tris, XlSearchParameters.XlCharge_2_3, XlSearchParameters.XlCharge_2_3_PrimeFragment, thisId).Run();
                    ReportProgress(new ProgressEventArgs(100, "Done with search " + (currentPartition + 1) + "/" + CommonParameters.TotalPartitions + "!", thisId));
                }

                lock (psmLock)
                {
                    allPsms.AddRange(newPsms);
                }

                completedFiles++;
                ReportProgress(new ProgressEventArgs(completedFiles / currentRawFileList.Count, "Searching...", new List <string> {
                    taskId, "Individual Spectra Files"
                }));
            });

            ReportProgress(new ProgressEventArgs(100, "Done with all searches!", new List <string> {
                taskId, "Individual Spectra Files"
            }));

            Status("Crosslink analysis engine", taskId);
            MetaMorpheusEngineResults allcrosslinkanalysisResults;
            allcrosslinkanalysisResults = new CrosslinkAnalysisEngine(allPsms, compactPeptideToProteinPeptideMatch, proteinList, variableModifications, fixedModifications, ionTypes, OutputFolder, crosslinker, terminusType, CommonParameters, new List <string> {
                taskId
            }).Run();
            allPsms = allPsms.Where(p => p != null).ToList();
            if (XlSearchParameters.XlOutAll)
            {
                try
                {
                    WriteAllToTsv(allPsms, OutputFolder, "allPsms", new List <string> {
                        taskId
                    });
                }
                catch (Exception)
                {
                    throw;
                }
            }
            var allPsmsXL = allPsms.Where(p => p.CrossType == PsmCrossType.Cross).Where(p => p.XLBestScore >= CommonParameters.ScoreCutoff && p.BetaPsmCross.XLBestScore >= CommonParameters.ScoreCutoff).ToList();
            foreach (var item in allPsmsXL)
            {
                if (item.OneBasedStartResidueInProtein.HasValue)
                {
                    item.XlProteinPos = item.OneBasedStartResidueInProtein.Value + item.XlPos - 1;
                }
                if (item.BetaPsmCross.OneBasedStartResidueInProtein.HasValue)
                {
                    item.BetaPsmCross.XlProteinPos = item.BetaPsmCross.OneBasedStartResidueInProtein.Value + item.BetaPsmCross.XlPos - 1;
                }
            }

            #region Inter Crosslink

            //Write Inter Psms FDR
            var interPsmsXL = allPsmsXL.Where(p => !p.CompactPeptides.First().Value.Item2.Select(b => b.Protein.Accession).First().Contains(p.BetaPsmCross.CompactPeptides.First().Value.Item2.Select(b => b.Protein.Accession).First()) &&
                                              !p.BetaPsmCross.CompactPeptides.First().Value.Item2.Select(b => b.Protein.Accession).First().Contains(p.CompactPeptides.First().Value.Item2.Select(b => b.Protein.Accession).First())).OrderByDescending(p => p.XLQvalueTotalScore).ToList();
            foreach (var item in interPsmsXL)
            {
                item.CrossType = PsmCrossType.Inter;
            }
            var interPsmsXLFDR = CrosslinkDoFalseDiscoveryRateAnalysis(interPsmsXL).ToList();
            //var interPsmsXLFDR = CrosslinkFDRAnalysis(interPsmsXL).ToList();
            if (XlSearchParameters.XlOutCrosslink)
            {
                WriteCrosslinkToTsv(interPsmsXLFDR, OutputFolder, "xl_inter_fdr", new List <string> {
                    taskId
                });
            }

            if (XlSearchParameters.XlOutPercolator)
            {
                try
                {
                    var interPsmsXLPercolator = interPsmsXL.Where(p => p.XLBestScore >= 2 && p.BetaPsmCross.XLBestScore >= 2).OrderBy(p => p.ScanNumber).ToList();
                    WriteCrosslinkToTxtForPercolator(interPsmsXLPercolator, OutputFolder, "xl_inter_perc", crosslinker, new List <string> {
                        taskId
                    });
                }
                catch (Exception)
                {
                    throw;
                }
            }

            #endregion Inter Crosslink

            #region Intra Cross-link

            //Write Intra Psms FDR
            var intraPsmsXL = allPsmsXL.Where(p => p.CompactPeptides.First().Value.Item2.Select(b => b.Protein.Accession).First() == p.BetaPsmCross.CompactPeptides.First().Value.Item2.Select(b => b.Protein.Accession).First() ||
                                              p.CompactPeptides.First().Value.Item2.Select(b => b.Protein.Accession).First().Contains(p.BetaPsmCross.CompactPeptides.First().Value.Item2.Select(b => b.Protein.Accession).First()) ||
                                              p.BetaPsmCross.CompactPeptides.First().Value.Item2.Select(b => b.Protein.Accession).First().Contains(p.CompactPeptides.First().Value.Item2.Select(b => b.Protein.Accession).First())).OrderByDescending(p => p.XLQvalueTotalScore).ToList();
            foreach (var item in intraPsmsXL)
            {
                item.CrossType = PsmCrossType.Intra;
            }
            var intraPsmsXLFDR = CrosslinkDoFalseDiscoveryRateAnalysis(intraPsmsXL).ToList();
            //var intraPsmsXLFDR = CrosslinkFDRAnalysis(intraPsmsXL).ToList();
            if (XlSearchParameters.XlOutCrosslink)
            {
                WriteCrosslinkToTsv(intraPsmsXLFDR, OutputFolder, "xl_intra_fdr", new List <string> {
                    taskId
                });
            }

            if (XlSearchParameters.XlOutPercolator)
            {
                try
                {
                    var intraPsmsXLPercolator = intraPsmsXL.Where(p => p.XLBestScore >= 2 && p.BetaPsmCross.XLBestScore >= 2).OrderBy(p => p.ScanNumber).ToList();
                    WriteCrosslinkToTxtForPercolator(intraPsmsXLPercolator, OutputFolder, "xl_intra_perc", crosslinker, new List <string> {
                        taskId
                    });
                }
                catch (Exception)
                {
                    throw;
                }
            }

            #endregion Intra Cross-link

            #region Single peptide

            var singlePsms    = allPsms.Where(p => p.CrossType == PsmCrossType.Singe && !p.FullSequence.Contains("Crosslink")).OrderByDescending(p => p.Score).ToList();
            var singlePsmsFDR = SingleFDRAnalysis(singlePsms).ToList();
            if (XlSearchParameters.XlOutAll)
            {
                WriteSingleToTsv(singlePsmsFDR, OutputFolder, "single_fdr", new List <string> {
                    taskId
                });
            }

            #endregion Single peptide

            #region Loop peptide

            var loopPsms    = allPsms.Where(p => p.CrossType == PsmCrossType.Loop).OrderByDescending(p => p.XLTotalScore).ToList();
            var loopPsmsFDR = SingleFDRAnalysis(loopPsms).ToList();
            if (XlSearchParameters.XlOutAll)
            {
                WriteSingleToTsv(loopPsmsFDR, OutputFolder, "loop_fdr", new List <string> {
                    taskId
                });
            }

            #endregion Loop peptide

            #region deadend peptide

            var deadendPsms = allPsms.Where(p => p.CrossType == PsmCrossType.DeadEnd || p.CrossType == PsmCrossType.DeadEndH2O || p.CrossType == PsmCrossType.DeadEndNH2 || p.CrossType == PsmCrossType.DeadEndTris).OrderByDescending(p => p.XLTotalScore).ToList();
            deadendPsms.AddRange(allPsms.Where(p => p.CrossType == PsmCrossType.Singe && p.FullSequence.Contains("Crosslink")).ToList());
            var deadendPsmsFDR = SingleFDRAnalysis(deadendPsms).ToList();
            if (XlSearchParameters.XlOutAll)
            {
                WriteSingleToTsv(deadendPsmsFDR, OutputFolder, "deadend_fdr", new List <string> {
                    taskId
                });
            }

            #endregion deadend peptide


            if (XlSearchParameters.XlOutPepXML)
            {
                List <PsmCross> allPsmsFDR = new List <PsmCross>();
                allPsmsFDR.AddRange(intraPsmsXLFDR.Where(p => p.IsDecoy != true && p.BetaPsmCross.IsDecoy != true && p.FdrInfo.QValue <= 0.05).ToList());
                allPsmsFDR.AddRange(interPsmsXLFDR.Where(p => p.IsDecoy != true && p.BetaPsmCross.IsDecoy != true && p.FdrInfo.QValue <= 0.05).ToList());
                allPsmsFDR.AddRange(singlePsmsFDR.Where(p => p.IsDecoy != true && p.FdrInfo.QValue <= 0.05).ToList());
                allPsmsFDR.AddRange(loopPsmsFDR.Where(p => p.IsDecoy != true && p.FdrInfo.QValue <= 0.05).ToList());
                allPsmsFDR.AddRange(deadendPsmsFDR.Where(p => p.IsDecoy != true && p.FdrInfo.QValue <= 0.05).ToList());
                allPsmsFDR = allPsmsFDR.OrderBy(p => p.ScanNumber).ToList();
                foreach (var fullFilePath in currentRawFileList)
                {
                    string fileNameNoExtension = Path.GetFileNameWithoutExtension(fullFilePath);
                    WritePepXML_xl(allPsmsFDR.Where(p => p.FullFilePath == fullFilePath).ToList(), dbFilenameList, variableModifications, fixedModifications, localizeableModificationTypes, OutputFolder, fileNameNoExtension, new List <string> {
                        taskId
                    });
                }
            }

            if (XlSearchParameters.XlOutAll)
            {
                List <PsmCross> allPsmsXLFDR = new List <PsmCross>();
                allPsmsXLFDR.AddRange(intraPsmsXLFDR.Where(p => p.IsDecoy != true && p.BetaPsmCross.IsDecoy != true && p.FdrInfo.QValue <= 0.05).ToList());
                allPsmsXLFDR.AddRange(interPsmsXLFDR.Where(p => p.IsDecoy != true && p.BetaPsmCross.IsDecoy != true && p.FdrInfo.QValue <= 0.05).ToList());
                try
                {
                    allPsmsXLFDR = allPsmsXLFDR.OrderByDescending(p => p.XLQvalueTotalScore).ToList();
                    var allPsmsXLFDRGroup = FindCrosslinks(allPsmsXLFDR);
                    WriteCrosslinkToTsv(allPsmsXLFDRGroup, OutputFolder, "allPsmsXLFDRGroup", new List <string> {
                        taskId
                    });
                }
                catch (Exception)
                {
                    throw;
                }
            }

            return(myTaskResults);
        }
예제 #9
0
        protected override MyTaskResults RunSpecific(string OutputFolder, List <DbForTask> dbFilenameList, List <string> currentRawFileList, string taskId, FileSpecificSettings[] fileSettingsList)
        {
            myTaskResults = new MyTaskResults(this)
            {
                newSpectra = new List <string>()
            };

            Status("Loading modifications...", new List <string> {
                taskId
            });
            List <ModificationWithMass> variableModifications = GlobalVariables.AllModsKnown.OfType <ModificationWithMass>().Where(b => CommonParameters.ListOfModsVariable.Contains((b.modificationType, b.id))).ToList();
            List <ModificationWithMass> fixedModifications    = GlobalVariables.AllModsKnown.OfType <ModificationWithMass>().Where(b => CommonParameters.ListOfModsFixed.Contains((b.modificationType, b.id))).ToList();
            List <string> localizeableModificationTypes       = CommonParameters.LocalizeAll ? GlobalVariables.AllModTypesKnown.ToList() : CommonParameters.ListOfModTypesLocalize.ToList();

            Status("Loading proteins...", new List <string> {
                taskId
            });
            var proteinList = dbFilenameList.SelectMany(b => LoadProteinDb(b.FilePath, true, UsefulProteomicsDatabases.DecoyType.Reverse, localizeableModificationTypes, b.IsContaminant, out Dictionary <string, Modification> um)).ToList();

            proseCreatedWhileRunning.Append("The following calibration settings were used: ");
            proseCreatedWhileRunning.Append("protease = " + CommonParameters.DigestionParams.Protease + "; ");
            proseCreatedWhileRunning.Append("maximum missed cleavages = " + CommonParameters.DigestionParams.MaxMissedCleavages + "; ");
            proseCreatedWhileRunning.Append("minimum peptide length = " + CommonParameters.DigestionParams.MinPeptideLength + "; ");
            if (CommonParameters.DigestionParams.MaxPeptideLength == null)
            {
                proseCreatedWhileRunning.Append("maximum peptide length = unspecified; ");
            }
            else
            {
                proseCreatedWhileRunning.Append("maximum peptide length = " + CommonParameters.DigestionParams.MaxPeptideLength + "; ");
            }
            proseCreatedWhileRunning.Append("initiator methionine behavior = " + CommonParameters.DigestionParams.InitiatorMethionineBehavior + "; ");
            proseCreatedWhileRunning.Append("max modification isoforms = " + CommonParameters.DigestionParams.MaxModificationIsoforms + "; ");

            proseCreatedWhileRunning.Append("fixed modifications = " + string.Join(", ", fixedModifications.Select(m => m.id)) + "; ");
            proseCreatedWhileRunning.Append("variable modifications = " + string.Join(", ", variableModifications.Select(m => m.id)) + "; ");
            proseCreatedWhileRunning.Append("product mass tolerance = " + CommonParameters.ProductMassTolerance + " Da. ");
            proseCreatedWhileRunning.Append("The combined search database contained " + proteinList.Count + " total entries including " + proteinList.Where(p => p.IsContaminant).Count() + " contaminant sequences. ");

            object          lock1           = new object();
            ParallelOptions parallelOptions = new ParallelOptions();

            if (CommonParameters.MaxParallelFilesToAnalyze.HasValue)
            {
                parallelOptions.MaxDegreeOfParallelism = CommonParameters.MaxParallelFilesToAnalyze.Value;
            }
            var myFileManager = new MyFileManager(true);

            Status("Calibrating...", new List <string> {
                taskId
            });
            Parallel.For(0, currentRawFileList.Count, parallelOptions, spectraFileIndex =>
            {
                var originalUncalibratedFilePath = currentRawFileList[spectraFileIndex];
                var originalUncalibratedFilenameWithoutExtension = Path.GetFileNameWithoutExtension(originalUncalibratedFilePath);

                ICommonParameters combinedParams = SetAllFileSpecificCommonParams(CommonParameters, fileSettingsList[spectraFileIndex]);

                string calibratedFilePath = Path.Combine(OutputFolder, originalUncalibratedFilenameWithoutExtension + "-calib.mzml");

                IMsDataFile <IMsDataScan <IMzSpectrum <IMzPeak> > > myMsDataFile;

                Status("Loading spectra file...", new List <string> {
                    taskId, "Individual Spectra Files"
                });
                // only load one file at a time
                lock (lock1)
                {
                    myMsDataFile = myFileManager.LoadFile(originalUncalibratedFilePath, CommonParameters.TopNpeaks, CommonParameters.MinRatio, CommonParameters.TrimMs1Peaks, CommonParameters.TrimMsMsPeaks);
                }

                Status("Acquiring calibration data points...", new List <string> {
                    taskId, "Individual Spectra Files"
                });

                // get datapoints to fit calibration function to
                var acquisitionResults = GetDataAcquisitionResults(myMsDataFile, originalUncalibratedFilePath, variableModifications, fixedModifications, proteinList, taskId, combinedParams, combinedParams.PrecursorMassTolerance, combinedParams.ProductMassTolerance);

                // stats before calibration
                int prevPsmCount = acquisitionResults.Item1.Count;

                var preCalibrationPrecursorErrors = acquisitionResults.Item1.Select(p => (p.ScanPrecursorMass - p.PeptideMonisotopicMass.Value) / p.PeptideMonisotopicMass.Value * 1e6).ToList();
                double preCalibrationPrecursorIqr = Statistics.InterquartileRange(preCalibrationPrecursorErrors);

                var preCalibrationProductErrors = acquisitionResults.Item1.SelectMany(p => p.ProductMassErrorPpm.SelectMany(v => v.Value)).ToList();
                double preCalibrationProductIqr = Statistics.InterquartileRange(preCalibrationProductErrors);

                // enough data points to calibrate with?
                if (acquisitionResults.Item2 == null)
                {
                    Warn("Could not find any datapoints to calibrate with!");
                    return;
                }
                if (acquisitionResults.Item2.Ms1List.Count < 4 || acquisitionResults.Item2.Ms2List.Count < 4)
                {
                    Warn("Could not find enough MS1 datapoints to calibrate (" + acquisitionResults.Item2.Ms1List.Count + " found)");
                    Warn("Could not find enough MS2 datapoints to calibrate (" + acquisitionResults.Item2.Ms2List.Count + " found)");
                    return;
                }

                // generate calibration function and shift data points
                Status("Calibrating...", new List <string> {
                    taskId, "Individual Spectra Files"
                });
                new CalibrationEngine(myMsDataFile, acquisitionResults.Item2, new List <string> {
                    taskId, "Individual Spectra Files", originalUncalibratedFilenameWithoutExtension
                }).Run();
                myFileManager.DoneWithFile(originalUncalibratedFilePath);

                // do another search to evaluate calibration results
                Status("Post-calibration search...", new List <string> {
                    taskId, "Individual Spectra Files"
                });
                acquisitionResults = GetDataAcquisitionResults(myMsDataFile, originalUncalibratedFilePath, variableModifications, fixedModifications, proteinList, taskId, combinedParams, combinedParams.PrecursorMassTolerance, combinedParams.ProductMassTolerance);

                // stats after calibration
                int postCalibrationPsmCount = acquisitionResults.Item1.Count;

                var postCalibrationPrecursorErrors = acquisitionResults.Item1.Select(p => (p.ScanPrecursorMass - p.PeptideMonisotopicMass) / p.PeptideMonisotopicMass * 1e6).ToList();
                double postCalibrationPrecursorIqr = Statistics.InterquartileRange(postCalibrationPrecursorErrors);

                var postCalibrationProductErrors = acquisitionResults.Item1.SelectMany(p => p.ProductMassErrorPpm.SelectMany(v => v.Value)).ToList();
                double postCalibrationProductIqr = Statistics.InterquartileRange(postCalibrationProductErrors);

                // did the data improve? (not used for anything yet...)
                bool improvement = ImprovGlobal(preCalibrationPrecursorIqr, preCalibrationProductIqr, prevPsmCount, postCalibrationPsmCount, postCalibrationPrecursorIqr, postCalibrationProductIqr);

                // write suggested tolerances for this file
                var tomlFileName         = Path.Combine(OutputFolder, originalUncalibratedFilenameWithoutExtension + "-calib.toml");
                FileSpecificTolerances f = new FileSpecificTolerances
                {
                    PrecursorMassTolerance = new PpmTolerance(4.0 * postCalibrationPrecursorIqr),
                    ProductMassTolerance   = new PpmTolerance(4.0 * postCalibrationProductIqr)
                };
                Toml.WriteFile(f, tomlFileName, tomlConfig);
                SucessfullyFinishedWritingFile(tomlFileName, new List <string> {
                    taskId, "Individual Spectra Files", originalUncalibratedFilenameWithoutExtension
                });

                // write the calibrated MZML file
                MzmlMethods.CreateAndWriteMyMzmlWithCalibratedSpectra(myMsDataFile, calibratedFilePath, false);

                // all done
                SucessfullyFinishedWritingFile(calibratedFilePath, new List <string> {
                    taskId, "Individual Spectra Files", originalUncalibratedFilenameWithoutExtension
                });
                myTaskResults.newSpectra.Add(calibratedFilePath);
                ReportProgress(new ProgressEventArgs(100, "Done!", new List <string> {
                    taskId, "Individual Spectra Files", originalUncalibratedFilenameWithoutExtension
                }));
            });
            ReportProgress(new ProgressEventArgs(100, "Done!", new List <string> {
                taskId, "Individual Spectra Files"
            }));

            return(myTaskResults);
        }
예제 #10
0
        private (List <PeptideSpectralMatch>, DataPointAquisitionResults) GetDataAcquisitionResults(IMsDataFile <IMsDataScan <IMzSpectrum <IMzPeak> > > myMsDataFile, string currentDataFile, List <ModificationWithMass> variableModifications, List <ModificationWithMass> fixedModifications, List <Protein> proteinList, string taskId, ICommonParameters combinedParameters, Tolerance initPrecTol, Tolerance initProdTol)
        {
            var fileNameWithoutExtension = Path.GetFileNameWithoutExtension(currentDataFile);
            MassDiffAcceptor searchMode;

            if (initPrecTol is PpmTolerance)
            {
                searchMode = new SinglePpmAroundZeroSearchMode(initPrecTol.Value);
            }
            else
            {
                searchMode = new SingleAbsoluteAroundZeroSearchMode(initPrecTol.Value);
            }

            FragmentTypes fragmentTypesForCalibration = FragmentTypes.None;

            if (combinedParameters.BIons)
            {
                fragmentTypesForCalibration = fragmentTypesForCalibration | FragmentTypes.b;
            }
            if (combinedParameters.YIons)
            {
                fragmentTypesForCalibration = fragmentTypesForCalibration | FragmentTypes.y;
            }
            if (combinedParameters.CIons)
            {
                fragmentTypesForCalibration = fragmentTypesForCalibration | FragmentTypes.c;
            }
            if (combinedParameters.ZdotIons)
            {
                fragmentTypesForCalibration = fragmentTypesForCalibration | FragmentTypes.zdot;
            }

            var listOfSortedms2Scans = GetMs2Scans(myMsDataFile, currentDataFile, combinedParameters.DoPrecursorDeconvolution, combinedParameters.UseProvidedPrecursorInfo, combinedParameters.DeconvolutionIntensityRatio, combinedParameters.DeconvolutionMaxAssumedChargeState, combinedParameters.DeconvolutionMassTolerance).OrderBy(b => b.PrecursorMass).ToArray();

            PeptideSpectralMatch[] allPsmsArray = new PeptideSpectralMatch[listOfSortedms2Scans.Length];

            List <ProductType> lp = new List <ProductType>();

            if (combinedParameters.BIons)
            {
                lp.Add(ProductType.B);
            }
            if (combinedParameters.YIons)
            {
                lp.Add(ProductType.Y);
            }
            if (combinedParameters.CIons)
            {
                lp.Add(ProductType.C);
            }
            if (combinedParameters.ZdotIons)
            {
                lp.Add(ProductType.Zdot);
            }

            Log("Searching with searchMode: " + searchMode, new List <string> {
                taskId, "Individual Spectra Files", fileNameWithoutExtension
            });
            Log("Searching with productMassTolerance: " + initProdTol, new List <string> {
                taskId, "Individual Spectra Files", fileNameWithoutExtension
            });

            new ClassicSearchEngine(allPsmsArray, listOfSortedms2Scans, variableModifications, fixedModifications, proteinList, lp, searchMode, false, combinedParameters, initProdTol, new List <string> {
                taskId, "Individual Spectra Files", fileNameWithoutExtension
            }).Run();

            List <PeptideSpectralMatch> allPsms = allPsmsArray.ToList();

            Dictionary <CompactPeptideBase, HashSet <PeptideWithSetModifications> > compactPeptideToProteinPeptideMatching = ((SequencesToActualProteinPeptidesEngineResults) new SequencesToActualProteinPeptidesEngine(allPsms, proteinList, fixedModifications, variableModifications, lp, new List <IDigestionParams> {
                combinedParameters.DigestionParams
            }, combinedParameters.ReportAllAmbiguity, new List <string> {
                taskId, "Individual Spectra Files", fileNameWithoutExtension
            }).Run()).CompactPeptideToProteinPeptideMatching;

            foreach (var huh in allPsms)
            {
                if (huh != null)
                {
                    huh.MatchToProteinLinkedPeptides(compactPeptideToProteinPeptideMatching);
                }
            }

            allPsms = allPsms.Where(b => b != null).OrderByDescending(b => b.Score).ThenBy(b => b.PeptideMonisotopicMass.HasValue ? Math.Abs(b.ScanPrecursorMass - b.PeptideMonisotopicMass.Value) : double.MaxValue).GroupBy(b => (b.FullFilePath, b.ScanNumber, b.PeptideMonisotopicMass)).Select(b => b.First()).ToList();

            new FdrAnalysisEngine(allPsms, searchMode.NumNotches, false, new List <string> {
                taskId, "Individual Spectra Files", fileNameWithoutExtension
            }).Run();

            List <PeptideSpectralMatch> goodIdentifications = allPsms.Where(b => b.FdrInfo.QValueNotch < 0.01 && !b.IsDecoy && b.FullSequence != null).ToList();

            if (!goodIdentifications.Any())
            {
                Warn("No PSMs below 1% FDR observed!");
                return(new List <PeptideSpectralMatch>(), null);
            }

            var dissociationTypes = MetaMorpheusEngine.DetermineDissociationType(lp);

            foreach (var psm in allPsms)
            {
                var    theScan          = myMsDataFile.GetOneBasedScan(psm.ScanNumber);
                double thePrecursorMass = psm.ScanPrecursorMass;

                foreach (var huh in lp)
                {
                    var ionMasses = psm.CompactPeptides.First().Key.ProductMassesMightHaveDuplicatesAndNaNs(new List <ProductType> {
                        huh
                    });
                    Array.Sort(ionMasses);
                    List <double> matchedIonMassesList    = new List <double>();
                    List <double> productMassErrorDaList  = new List <double>();
                    List <double> productMassErrorPpmList = new List <double>();
                    LocalizationEngine.MatchIons(theScan, initProdTol, ionMasses, matchedIonMassesList, productMassErrorDaList, productMassErrorPpmList, thePrecursorMass, dissociationTypes, false);
                    double[] matchedIonMassesOnlyMatches = matchedIonMassesList.ToArray();
                    psm.MatchedIonDictOnlyMatches.Add(huh, matchedIonMassesOnlyMatches);
                    psm.ProductMassErrorDa.Add(huh, productMassErrorDaList.ToArray());
                    psm.ProductMassErrorPpm.Add(huh, productMassErrorPpmList.ToArray());
                }
            }

            DataPointAquisitionResults currentResult = (DataPointAquisitionResults) new DataPointAcquisitionEngine(
                goodIdentifications,
                myMsDataFile,
                initPrecTol,
                initProdTol,
                CalibrationParameters.NumFragmentsNeededForEveryIdentification,
                CalibrationParameters.MinMS1IsotopicPeaksNeededForConfirmedIdentification,
                CalibrationParameters.MinMS2IsotopicPeaksNeededForConfirmedIdentification,
                fragmentTypesForCalibration,
                new List <string> {
                taskId, "Individual Spectra Files", fileNameWithoutExtension
            }).Run();

            return(goodIdentifications, currentResult);
        }
예제 #11
0
        protected override MyTaskResults RunSpecific(string OutputFolder, List <DbForTask> dbFilenameList, List <string> currentRawFileList, string taskId, FileSpecificSettings[] fileSettingsList)
        {
            myTaskResults = new MyTaskResults(this);

            if (NeoType.Equals(NeoTaskType.AggregateTargetDecoyFiles))
            {
                //getfolders
                if (NeoParameters.DecoyFilePath == null)
                {
                    NeoParameters.DecoyFilePath = new DirectoryInfo(OutputFolder).Name;
                    string taskString = NeoParameters.DecoyFilePath.Split('-')[0];
                    int    taskNum    = Convert.ToInt32(taskString.Substring(4, taskString.Length - 4));
                    taskNum--;
                    NeoParameters.DecoyFilePath = OutputFolder.Substring(0, OutputFolder.Length - NeoParameters.DecoyFilePath.Length) + "Task" + taskNum + "-SearchTask\\" + Path.GetFileNameWithoutExtension(currentRawFileList[0]) + "_PSMs.psmtsv";
                    if (NeoParameters.TargetFilePath == null)
                    {
                        NeoParameters.TargetFilePath = new DirectoryInfo(OutputFolder).Name;
                        taskNum--;
                        NeoParameters.TargetFilePath = OutputFolder.Substring(0, OutputFolder.Length - NeoParameters.TargetFilePath.Length) + "Task" + taskNum + "-SearchTask\\" + Path.GetFileNameWithoutExtension(currentRawFileList[0]) + "_PSMs.psmtsv";
                    }
                }
                if (NeoParameters.TargetFilePath == null)
                {
                    NeoParameters.TargetFilePath = new DirectoryInfo(OutputFolder).Name;
                    string taskString = NeoParameters.TargetFilePath.Split('-')[0];
                    int    taskNum    = Convert.ToInt32(taskString.Substring(4, taskString.Length - 4));
                    taskNum--;
                    NeoParameters.TargetFilePath = OutputFolder.Substring(0, OutputFolder.Length - NeoParameters.TargetFilePath.Length) + "Task" + taskNum + "-SearchTask\\" + Path.GetFileNameWithoutExtension(currentRawFileList[0]) + "_PSMs.psmtsv";
                }
                AggregateSearchFiles.Combine(NeoParameters.TargetFilePath, NeoParameters.DecoyFilePath, OutputFolder + "\\" + Path.GetFileNameWithoutExtension(currentRawFileList[0]));
            }
            else if (NeoType.Equals(NeoTaskType.AggregateNormalSplicedFiles))
            {
                //reset database
                dbFilenameList = StoredDatabases;

                string normalPath = "";
                string cisPath    = new DirectoryInfo(OutputFolder).Name;
                string taskString = cisPath.Split('-')[0];
                int    taskNum    = Convert.ToInt32(taskString.Substring(4, taskString.Length - 4));
                taskNum -= 2;
                string transPath = OutputFolder.Substring(0, OutputFolder.Length - cisPath.Length) + "Task" + (taskNum + 1) + "-SearchTask\\" + Path.GetFileNameWithoutExtension(currentRawFileList[0]) + "_PSMs.psmtsv";
                cisPath = OutputFolder.Substring(0, OutputFolder.Length - cisPath.Length) + "Task" + taskNum + "-SearchTask\\" + Path.GetFileNameWithoutExtension(currentRawFileList[0]) + "_PSMs.psmtsv";
                AggregateSearchFiles.RecursiveNeoAggregation(normalPath, cisPath, OutputFolder, "CisResults.psmtsv");
                AggregateSearchFiles.RecursiveNeoAggregation(normalPath, transPath, OutputFolder, "TransResults.psmtsv");
            }
            else if (NeoType.Equals(NeoTaskType.GenerateSplicedPeptides))
            {
                NeoMassCalculator.ImportMasses();

                ParallelOptions parallelOptions = new ParallelOptions();
                if (CommonParameters.MaxParallelFilesToAnalyze.HasValue)
                {
                    parallelOptions.MaxDegreeOfParallelism = CommonParameters.MaxParallelFilesToAnalyze.Value;
                }
                MyFileManager myFileManager = new MyFileManager(true);

                //Import Spectra
                Parallel.For(0, currentRawFileList.Count, parallelOptions, spectraFileIndex =>
                {
                    var origDataFile = currentRawFileList[spectraFileIndex];
                    ICommonParameters combinedParams = SetAllFileSpecificCommonParams(CommonParameters, fileSettingsList[spectraFileIndex]);

                    var thisId = new List <string> {
                        taskId, "Individual Spectra Files", origDataFile
                    };
                    NewCollection(Path.GetFileName(origDataFile), thisId);
                    Status("Loading spectra file...", thisId);
                    IMsDataFile <IMsDataScan <IMzSpectrum <IMzPeak> > > myMsDataFile = myFileManager.LoadFile(origDataFile, combinedParams.TopNpeaks, combinedParams.MinRatio, combinedParams.TrimMs1Peaks, combinedParams.TrimMsMsPeaks);
                    Status("Getting ms2 scans...", thisId);
                    Ms2ScanWithSpecificMass[] arrayOfMs2ScansSortedByMass = GetMs2Scans(myMsDataFile, origDataFile, combinedParams.DoPrecursorDeconvolution, combinedParams.UseProvidedPrecursorInfo, combinedParams.DeconvolutionIntensityRatio, combinedParams.DeconvolutionMaxAssumedChargeState, combinedParams.DeconvolutionMassTolerance).OrderBy(b => b.PrecursorMass).ToArray();

                    //Import Database
                    Status("Loading modifications...", taskId);

                    #region Load modifications

                    List <ModificationWithMass> variableModifications = GlobalVariables.AllModsKnown.OfType <ModificationWithMass>().Where(b => CommonParameters.ListOfModsVariable.Contains((b.modificationType, b.id))).ToList();
                    List <ModificationWithMass> fixedModifications    = GlobalVariables.AllModsKnown.OfType <ModificationWithMass>().Where(b => CommonParameters.ListOfModsFixed.Contains((b.modificationType, b.id))).ToList();
                    List <string> localizeableModificationTypes       = CommonParameters.ListOfModTypesLocalize == null ? new List <string>() : CommonParameters.ListOfModTypesLocalize.ToList();
                    if (CommonParameters.LocalizeAll)
                    {
                        localizeableModificationTypes = GlobalVariables.AllModTypesKnown.ToList();
                    }
                    else
                    {
                        localizeableModificationTypes = GlobalVariables.AllModTypesKnown.Where(b => localizeableModificationTypes.Contains(b)).ToList();
                    }

                    #endregion Load modifications

                    var proteinList = dbFilenameList.SelectMany(b => LoadProteinDb(b.FilePath, true, DecoyType.None, localizeableModificationTypes, b.IsContaminant, out Dictionary <string, Modification> unknownModifications)).ToList();

                    //Read N and C files
                    string nPath = NeoParameters.NFilePath;
                    string cPath = NeoParameters.CFilePath;
                    //if termini input

                    if (nPath == null || cPath == null)
                    {
                        //if no termini input
                        string taskHeader  = "Task";
                        string[] pathArray = OutputFolder.Split('\\');
                        string basePath    = "";
                        for (int i = 0; i < pathArray.Length - 1; i++)
                        {
                            basePath += pathArray[i] + '\\';
                        }
                        string currentTaskNumber = pathArray[pathArray.Length - 1].Split('-')[0];
                        currentTaskNumber        = currentTaskNumber.Substring(taskHeader.Length, currentTaskNumber.Length - taskHeader.Length);
                        string NHeader           = "";
                        string CHeader           = "";
                        if (cPath == null)
                        {
                            CHeader = taskHeader + (Convert.ToInt16(currentTaskNumber) - 1);
                            if (nPath == null)
                            {
                                NHeader = taskHeader + (Convert.ToInt16(currentTaskNumber) - 2);
                            }
                        }
                        else
                        {
                            NHeader = taskHeader + (Convert.ToInt16(currentTaskNumber) - 1);
                        }
                        foreach (string s in Directory.GetDirectories(basePath))
                        {
                            if (s.Contains(NHeader))
                            {
                                nPath = s;
                            }
                            else if (s.Contains(CHeader))
                            {
                                cPath = s;
                            }
                        }
                        string fileName = Path.GetFileNameWithoutExtension(currentRawFileList[0]) + "_PSMs.psmtsv";
                        nPath          += "\\" + fileName;
                        cPath          += "\\" + fileName;
                    }

                    Status("Importing Search Results...", taskId);
                    List <NeoPsm> psms = ImportPsmtsv.ImportNeoPsms(nPath, cPath);

                    //Splice
                    Status("Splicing Fragments...", taskId);
                    List <NeoPsm> candidates = NeoSplicePeptides.SplicePeptides(psms);

                    //Find Ambiguity
                    Status("Identifying Ambiguity...", taskId);
                    NeoFindAmbiguity.FindAmbiguity(candidates, proteinList, arrayOfMs2ScansSortedByMass, dbFilenameList[0].FilePath);

                    //Export Results
                    Status("Exporting Results...", taskId);
                    NeoExport.ExportAll(candidates, arrayOfMs2ScansSortedByMass, OutputFolder);

                    //Switch databases
                    string outputFolder = NeoExport.path + NeoExport.folder + @"\" + NeoExport.folder + "FusionDatabaseAppendixNC.fasta";
                    dbFilenameList      = new List <DbForTask>()
                    {
                        new DbForTask(outputFolder, false)
                    };
                });
예제 #12
0
 public PrecursorIndexingEngine(List <Protein> proteinList, List <ModificationWithMass> variableModifications, List <ModificationWithMass> fixedModifications, List <ProductType> lp, int currentPartition, DecoyType decoyType, IEnumerable <IDigestionParams> CollectionOfDigestionParams, ICommonParameters commonParams, double maxFragmentSize, List <string> nestedIds) : base(proteinList, variableModifications, fixedModifications, lp, currentPartition, decoyType, CollectionOfDigestionParams, commonParams, maxFragmentSize, nestedIds)
 {
 }
예제 #13
0
        protected override MyTaskResults RunSpecific(string OutputFolder, List <DbForTask> dbFilenameList, List <string> currentRawFileList, string taskId, FileSpecificSettings[] fileSettingsList)
        {
            myTaskResults = new MyTaskResults(this)
            {
                newDatabases = new List <DbForTask>()
            };
            Status("Loading modifications...", new List <string> {
                taskId
            });

            List <ModificationWithMass> variableModifications = GlobalVariables.AllModsKnown.OfType <ModificationWithMass>().Where(b => CommonParameters.ListOfModsVariable.Contains((b.modificationType, b.id))).ToList();
            List <ModificationWithMass> fixedModifications    = GlobalVariables.AllModsKnown.OfType <ModificationWithMass>().Where(b => CommonParameters.ListOfModsFixed.Contains((b.modificationType, b.id))).ToList();
            List <string> localizeableModificationTypes       = CommonParameters.LocalizeAll ? GlobalVariables.AllModTypesKnown.ToList() : CommonParameters.ListOfModTypesLocalize.ToList();

            List <ModificationWithMass> gptmdModifications = GlobalVariables.AllModsKnown.OfType <ModificationWithMass>().Where(b => GptmdParameters.ListOfModsGptmd.Contains((b.modificationType, b.id))).ToList();

            IEnumerable <Tuple <double, double> > combos = LoadCombos(gptmdModifications).ToList();

            List <PeptideSpectralMatch> allPsms = new List <PeptideSpectralMatch>();

            List <ProductType> ionTypes = new List <ProductType>();

            if (CommonParameters.BIons)
            {
                ionTypes.Add(ProductType.B);
            }
            if (CommonParameters.YIons)
            {
                ionTypes.Add(ProductType.Y);
            }
            if (CommonParameters.CIons)
            {
                ionTypes.Add(ProductType.C);
            }
            if (CommonParameters.ZdotIons)
            {
                ionTypes.Add(ProductType.Zdot);
            }

            Status("Loading proteins...", new List <string> {
                taskId
            });
            Dictionary <string, Modification> um = null;
            //Decoys are currently not being searched with DecoyType.None
            var proteinList = dbFilenameList.SelectMany(b => LoadProteinDb(b.FilePath, true, DecoyType.Reverse, localizeableModificationTypes, b.IsContaminant, out um)).ToList();

            var numRawFiles = currentRawFileList.Count;

            proseCreatedWhileRunning.Append("The following G-PTM-D settings were used: "); proseCreatedWhileRunning.Append("protease = " + CommonParameters.DigestionParams.Protease + "; ");
            proseCreatedWhileRunning.Append("maximum missed cleavages = " + CommonParameters.DigestionParams.MaxMissedCleavages + "; ");
            proseCreatedWhileRunning.Append("minimum peptide length = " + CommonParameters.DigestionParams.MinPeptideLength + "; ");
            if (CommonParameters.DigestionParams.MaxPeptideLength == null)
            {
                proseCreatedWhileRunning.Append("maximum peptide length = unspecified; ");
            }
            else
            {
                proseCreatedWhileRunning.Append("maximum peptide length = " + CommonParameters.DigestionParams.MaxPeptideLength + "; ");
            }
            proseCreatedWhileRunning.Append("initiator methionine behavior = " + CommonParameters.DigestionParams.InitiatorMethionineBehavior + "; ");
            proseCreatedWhileRunning.Append("max modification isoforms = " + CommonParameters.DigestionParams.MaxModificationIsoforms + "; ");

            proseCreatedWhileRunning.Append("fixed modifications = " + string.Join(", ", fixedModifications.Select(m => m.id)) + "; ");
            proseCreatedWhileRunning.Append("variable modifications = " + string.Join(", ", variableModifications.Select(m => m.id)) + "; ");
            proseCreatedWhileRunning.Append("G-PTM-D modifications count = " + gptmdModifications.Count + "; ");
            //puppet searchmode for writing files. Actual searchmode is filespecific
            MassDiffAcceptor tempSearchMode = new DotMassDiffAcceptor("", GetAcceptableMassShifts(fixedModifications, variableModifications, gptmdModifications, combos), CommonParameters.PrecursorMassTolerance);

            proseCreatedWhileRunning.Append("parent mass tolerance(s) = {" + tempSearchMode.ToProseString() + "}; ");
            proseCreatedWhileRunning.Append("product mass tolerance = " + CommonParameters.ProductMassTolerance + " Da. ");
            proseCreatedWhileRunning.Append("The combined search database contained " + proteinList.Count + " total entries including " + proteinList.Where(p => p.IsContaminant).Count() + " contaminant sequences. ");

            Status("Running G-PTM-D...", new List <string> {
                taskId
            });

            HashSet <IDigestionParams> ListOfDigestionParams = GetListOfDistinctDigestionParams(CommonParameters, fileSettingsList.Select(b => SetAllFileSpecificCommonParams(CommonParameters, b)));

            MyFileManager myFileManager = new MyFileManager(true);

            object          lock1           = new object();
            object          lock2           = new object();
            ParallelOptions parallelOptions = new ParallelOptions();

            if (CommonParameters.MaxParallelFilesToAnalyze.HasValue)
            {
                parallelOptions.MaxDegreeOfParallelism = CommonParameters.MaxParallelFilesToAnalyze.Value;
            }
            Parallel.For(0, currentRawFileList.Count, parallelOptions, spectraFileIndex =>
            {
                var origDataFile = currentRawFileList[spectraFileIndex];
                ICommonParameters combinedParams = SetAllFileSpecificCommonParams(CommonParameters, fileSettingsList[spectraFileIndex]);
                MassDiffAcceptor searchMode      = new DotMassDiffAcceptor("", GetAcceptableMassShifts(fixedModifications, variableModifications, gptmdModifications, combos), combinedParams.PrecursorMassTolerance);

                NewCollection(Path.GetFileName(origDataFile), new List <string> {
                    taskId, "Individual Spectra Files", origDataFile
                });
                StartingDataFile(origDataFile, new List <string> {
                    taskId, "Individual Spectra Files", origDataFile
                });

                Status("Loading spectra file...", new List <string> {
                    taskId, "Individual Spectra Files", origDataFile
                });
                IMsDataFile <IMsDataScan <IMzSpectrum <IMzPeak> > > myMsDataFile = myFileManager.LoadFile(origDataFile, combinedParams.TopNpeaks, combinedParams.MinRatio, combinedParams.TrimMs1Peaks, combinedParams.TrimMsMsPeaks);
                Status("Getting ms2 scans...", new List <string> {
                    taskId, "Individual Spectra Files", origDataFile
                });
                Ms2ScanWithSpecificMass[] arrayOfMs2ScansSortedByMass = GetMs2Scans(myMsDataFile, origDataFile, combinedParams.DoPrecursorDeconvolution, combinedParams.UseProvidedPrecursorInfo, combinedParams.DeconvolutionIntensityRatio, combinedParams.DeconvolutionMaxAssumedChargeState, combinedParams.DeconvolutionMassTolerance).OrderBy(b => b.PrecursorMass).ToArray();
                myFileManager.DoneWithFile(origDataFile);
                PeptideSpectralMatch[] allPsmsArray = new PeptideSpectralMatch[arrayOfMs2ScansSortedByMass.Length];
                new ClassicSearchEngine(allPsmsArray, arrayOfMs2ScansSortedByMass, variableModifications, fixedModifications, proteinList, ionTypes, searchMode, false, combinedParams, combinedParams.ProductMassTolerance, new List <string> {
                    taskId, "Individual Spectra Files", origDataFile
                }).Run();
                lock (lock2)
                {
                    allPsms.AddRange(allPsmsArray);
                }
                FinishedDataFile(origDataFile, new List <string> {
                    taskId, "Individual Spectra Files", origDataFile
                });
                ReportProgress(new ProgressEventArgs(100, "Done!", new List <string> {
                    taskId, "Individual Spectra Files", origDataFile
                }));
            });
            ReportProgress(new ProgressEventArgs(100, "Done!", new List <string> {
                taskId, "Individual Spectra Files"
            }));

            // Group and order psms

            SequencesToActualProteinPeptidesEngine sequencesToActualProteinPeptidesEngineTest = new SequencesToActualProteinPeptidesEngine(allPsms, proteinList, fixedModifications, variableModifications, ionTypes, ListOfDigestionParams, CommonParameters.ReportAllAmbiguity, new List <string> {
                taskId
            });

            var resTest = (SequencesToActualProteinPeptidesEngineResults)sequencesToActualProteinPeptidesEngineTest.Run();
            Dictionary <CompactPeptideBase, HashSet <PeptideWithSetModifications> > compactPeptideToProteinPeptideMatchingTest = resTest.CompactPeptideToProteinPeptideMatching;

            foreach (var huh in allPsms)
            {
                if (huh != null)
                {
                    huh.MatchToProteinLinkedPeptides(compactPeptideToProteinPeptideMatchingTest);
                }
            }

            allPsms = allPsms.Where(b => b != null).OrderByDescending(b => b.Score).ThenBy(b => b.PeptideMonisotopicMass.HasValue ? Math.Abs(b.ScanPrecursorMass - b.PeptideMonisotopicMass.Value) : double.MaxValue).GroupBy(b => new Tuple <string, int, double?>(b.FullFilePath, b.ScanNumber, b.PeptideMonisotopicMass)).Select(b => b.First()).ToList();

            new FdrAnalysisEngine(allPsms, tempSearchMode.NumNotches, false, new List <string> {
                taskId
            }).Run();

            var writtenFile = Path.Combine(OutputFolder, "GPTMD_Candidates.psmtsv");

            WritePsmsToTsv(allPsms, writtenFile, new Dictionary <string, int>());
            SucessfullyFinishedWritingFile(writtenFile, new List <string> {
                taskId
            });

            var gptmdResults = (GptmdResults) new GptmdEngine(allPsms, gptmdModifications, combos, CommonParameters.PrecursorMassTolerance, new List <string> {
                taskId
            }).Run();

            if (dbFilenameList.Any(b => !b.IsContaminant))
            {
                // do NOT use this code (Path.GetFilenameWithoutExtension) because GPTMD on .xml.gz will result in .xml.xml file type being written
                //string outputXMLdbFullName = Path.Combine(OutputFolder, string.Join("-", dbFilenameList.Where(b => !b.IsContaminant).Select(b => Path.GetFileNameWithoutExtension(b.FilePath))) + "GPTMD.xml");

                List <string> databaseNames = new List <string>();
                foreach (var nonContaminantDb in dbFilenameList.Where(p => !p.IsContaminant))
                {
                    var dbName          = Path.GetFileName(nonContaminantDb.FilePath);
                    int indexOfFirstDot = dbName.IndexOf(".");
                    databaseNames.Add(dbName.Substring(0, indexOfFirstDot));
                }
                string outputXMLdbFullName = Path.Combine(OutputFolder, string.Join("-", databaseNames) + "GPTMD.xml");

                var newModsActuallyWritten = ProteinDbWriter.WriteXmlDatabase(gptmdResults.Mods, proteinList.Where(b => !b.IsDecoy && !b.IsContaminant).ToList(), outputXMLdbFullName);

                SucessfullyFinishedWritingFile(outputXMLdbFullName, new List <string> {
                    taskId
                });

                myTaskResults.newDatabases.Add(new DbForTask(outputXMLdbFullName, false));
                myTaskResults.AddNiceText("Modifications added: " + newModsActuallyWritten.Select(b => b.Value).Sum());
                myTaskResults.AddNiceText("Mods types and counts:");
                myTaskResults.AddNiceText(string.Join(Environment.NewLine, newModsActuallyWritten.OrderByDescending(b => b.Value).Select(b => "\t" + b.Key + "\t" + b.Value)));
            }
            if (dbFilenameList.Any(b => b.IsContaminant))
            {
                // do NOT use this code (Path.GetFilenameWithoutExtension) because GPTMD on .xml.gz will result in .xml.xml file type being written
                //string outputXMLdbFullNameContaminants = Path.Combine(OutputFolder, string.Join("-", dbFilenameList.Where(b => b.IsContaminant).Select(b => Path.GetFileNameWithoutExtension(b.FilePath))) + "GPTMD.xml");
                List <string> databaseNames = new List <string>();
                foreach (var contaminantDb in dbFilenameList.Where(p => p.IsContaminant))
                {
                    var dbName          = Path.GetFileName(contaminantDb.FilePath);
                    int indexOfFirstDot = dbName.IndexOf(".");
                    databaseNames.Add(dbName.Substring(0, indexOfFirstDot));
                }
                string outputXMLdbFullNameContaminants = Path.Combine(OutputFolder, string.Join("-", databaseNames) + "GPTMD.xml");

                var newModsActuallyWritten = ProteinDbWriter.WriteXmlDatabase(gptmdResults.Mods, proteinList.Where(b => !b.IsDecoy && b.IsContaminant).ToList(), outputXMLdbFullNameContaminants);

                SucessfullyFinishedWritingFile(outputXMLdbFullNameContaminants, new List <string> {
                    taskId
                });

                myTaskResults.newDatabases.Add(new DbForTask(outputXMLdbFullNameContaminants, true));
                myTaskResults.AddNiceText("Contaminant modifications added: " + newModsActuallyWritten.Select(b => b.Value).Sum());
                myTaskResults.AddNiceText("Mods types and counts:");
                myTaskResults.AddNiceText(string.Join(Environment.NewLine, newModsActuallyWritten.OrderByDescending(b => b.Value).Select(b => "\t" + b.Key + "\t" + b.Value)));
            }
            return(myTaskResults);
        }
예제 #14
0
        private static void UpdateTomls(string tomlFileName, string fileName, ICommonParameters ye5, TerminusType terminusType, bool spliceSearch)
        {
            string[]      oldTomlLines = File.ReadAllLines(@fileName);
            List <string> newTomlLines = new List <string>();

            foreach (string line in oldTomlLines)
            {
                if (line.Contains("LocalizeAll") && terminusType.Equals(TerminusType.None))
                {
                    newTomlLines.Add(GetCorrectValue("LocalizeAll", tomlFileName, line));
                }
                else if (line.Contains("ListOfModsFixed"))
                {
                    newTomlLines.Add(GetCorrectValue("ListOfModsFixed", tomlFileName, line));
                }
                else if (line.Contains("ListOfModsVariable") && terminusType.Equals(TerminusType.None) && !spliceSearch)
                {
                    newTomlLines.Add(GetCorrectValue("ListOfModsVariable", tomlFileName, line));
                }
                else if (line.Contains("BIons"))
                {
                    if (terminusType.Equals(TerminusType.N) || terminusType.Equals(TerminusType.None))
                    {
                        newTomlLines.Add(GetCorrectValue("BIons", tomlFileName, line));
                    }
                    else
                    {
                        newTomlLines.Add("BIons = false");
                    }
                }
                else if (line.Contains("YIons"))
                {
                    if (terminusType.Equals(TerminusType.C) || terminusType.Equals(TerminusType.None))
                    {
                        newTomlLines.Add(GetCorrectValue("YIons", tomlFileName, line));
                    }
                    else
                    {
                        newTomlLines.Add("YIons = false");
                    }
                }
                else if (line.Contains("ZdotIons"))
                {
                    if (terminusType.Equals(TerminusType.C) || terminusType.Equals(TerminusType.None))
                    {
                        newTomlLines.Add(GetCorrectValue("ZdotIons", tomlFileName, line));
                    }
                    else
                    {
                        newTomlLines.Add("ZdotIons = false");
                    }
                }
                else if (line.Contains("CIons"))
                {
                    if (terminusType.Equals(TerminusType.N) || terminusType.Equals(TerminusType.None))
                    {
                        newTomlLines.Add(GetCorrectValue("CIons", tomlFileName, line));
                    }
                    else
                    {
                        newTomlLines.Add("CIons = false");
                    }
                }
                else if (line.Contains("ProductMassTolerance"))
                {
                    newTomlLines.Add(GetCorrectValue("ProductMassTolerance", tomlFileName, line));
                }
                else if (line.Contains("PrecursorMassTolerance"))
                {
                    newTomlLines.Add(GetCorrectValue("PrecursorMassTolerance", tomlFileName, line));
                }
                else if (line.Contains("MaxMissedCleavages"))
                {
                    newTomlLines.Add(GetCorrectValue("MaxMissedCleavages", tomlFileName, line));
                }
                else if (line.Contains("InitiatorMethionineBehavior"))
                {
                    newTomlLines.Add(GetCorrectValue("InitiatorMethionineBehavior", tomlFileName, line));
                }
                else if (line.Contains("MinPeptideLength") && !!terminusType.Equals(TerminusType.None))
                {
                    newTomlLines.Add(GetCorrectValue("MinPeptideLength", tomlFileName, line));
                }
                else if (line.Contains("MaxPeptideLength"))
                {
                    newTomlLines.Add(GetCorrectValue("MaxPeptideLength", tomlFileName, line));
                }
                else if (line.Contains("MaxModificationIsoforms"))
                {
                    newTomlLines.Add(GetCorrectValue("MaxModificationIsoforms", tomlFileName, line));
                }
                else if (line.Contains("MaxModsForPeptide"))
                {
                    newTomlLines.Add(GetCorrectValue("MaxModsForPeptide", tomlFileName, line));
                }
                else if (line.Contains("SemiProteaseDigestion"))
                {
                    newTomlLines.Add(GetCorrectValue("SemiProteaseDigestion", tomlFileName, line));
                }
                else if (line.Contains("TerminusTypeSemiProtease"))
                {
                    newTomlLines.Add(GetCorrectValue("TerminusTypeSemiProtease", tomlFileName, line));
                }
                else if (line.Contains("Protease") && terminusType.Equals(TerminusType.None) && !spliceSearch) //this must be last, else other names including protease will be overwritten and crash.
                {
                    newTomlLines.Add(GetCorrectValue("Protease", tomlFileName, line));
                }
                else
                {
                    newTomlLines.Add(line);
                }
            }
            using (StreamWriter file = new StreamWriter(fileName))
                foreach (string line in newTomlLines)
                {
                    file.WriteLine(line);
                }
        }