/* * Writes the result database to a file, to make it faster to re-initialize the * software */ //public static void writeResultDatabaseToFile(String file_path, ResultDatabase rd) //{ // log.Debug("Writing Result Database to a file..."); // try // { // StreamWriter writer = new StreamWriter(file_path); // log.Debug("File name: " + file_path); // // Get all IDs // List<IDs> ids = new List<IDs>(rd.getIDs()); // // Sort by scan number // ids.Sort((IDs x, IDs y) => (x.getScanNum()).CompareTo(y.getScanNum())); // // Write header // String[] header = new String[] { "scan", "scan_t", "peptide_mass", "peptide_sequence", "parent_proteins", // "peptide_evidence", "peptide_reference", "database_sequence_id", "xCorr", "deltaCN", "deltaCNStar", // "spscore", "sprank", "evalue" }; // writer.Write(String.Join("\t", header)); // foreach (IDs id in ids) // { // writer.Write("\n" + outputIDToTSVFormat(id)); // writer.Flush(); // } // writer.Flush(); // writer.Close(); // } // catch (Exception e) // { // Console.WriteLine(e.ToString()); // log.Error("Writing file unsuccessful!!!"); // Environment.Exit(0); // } // log.Debug("Writing file successful."); //} /* * Write the identification features used for training the logistic regression * classifier */ //public static void writeIdentificationFeaturesFile(String file_path, // List<IdentificationFeatures> positiveTrainingSet, // List<IdentificationFeatures> negativeTrainingSet) //{ // log.Debug("Writing Identification Features to a file..."); // try // { // StreamWriter writer = new StreamWriter(file_path); // log.Debug("File name: " + file_path); // // Write header TODO remove // String header = "label," + IdentificationFeatures.getHeader(); // writer.Write(header); // // in the first column, 1 indicates positive training set // foreach (IdentificationFeatures i in positiveTrainingSet) // { // writer.Write("\n" + "1," + i.WriteToFile()); // writer.Flush(); // } // // in the first column, 0 indicates negative training set // foreach (IdentificationFeatures i in negativeTrainingSet) // { // writer.Write("\n" + "0," + i.WriteToFile()); // writer.Flush(); // } // writer.Flush(); // writer.Close(); // } // catch (Exception e) // { // e.printStackTrace(); // log.Error("Writing file unsuccessful!!!"); // System.exit(0); // } // log.Debug("Writing file successful."); //} /* * Useful for outputting the IDs object in the correct order for * writeResultDatabaseToFile */ private static String outputIDToTSVFormat(IDs id) { return(id.getScanNum() + "\t" + id.getScanTime() + "\t" + id.getPeptideMass() + "\t" + id.getPeptideSequence() + "\t" + id.getParentProteinAccessions() + "\t" + id.getPepEvid() + "\t" + id.getPepRef() + "\t" + id.getDBSeqID() + "\t" + id.getXCorr() + "\t" + id.getDeltaCN() + "\t" + id.getDeltaCNStar() + "\t" + id.getSPScore() + "\t" + id.getSPRank() + "\t" + id.getEValue()); }
protected void evaluateIdentification(IDs id) { // check if the peptide is identified or not if (id == null) { performanceEvaluator.countMS2UnidentifiedAnalyzed(); return; } Peptide pep = getPeptideFromIdentification(id); // if it was going to be null, it already returned // is fragmented // add decoy or non-existent protein connections // database.addProteinFromIdentification(pep, id.getParentProteinAccessions()); Double xCorr = id.getXCorr(); Double dCN = id.getDeltaCN(); pep.addScore(xCorr, xCorrThreshold, dCN); performanceEvaluator.evaluateAnalysis(exclusionList, pep); // add the peptide to the exclusion list if it is over the xCorr threshold if ((xCorr > xCorrThreshold)) { performanceEvaluator.countPeptidesExcluded(); log.Debug("xCorrThreshold passed. Peptide added to the exclusion list."); exclusionList.addPeptide(pep); // calibrates our retention time alignment if the observed time is different // from the predicted only if it passes this threshold calibrateRetentionTime(pep); } // add all of the other peptides belonging to the parent protein(s) if numDB // threshold is passed foreach (Protein parentProtein in pep.getProteins()) { if ((parentProtein.getNumDB() >= numDBThreshold) && (!parentProtein.IsExcluded())) { parentProtein.setExcluded(true); log.Debug("Parent protein " + parentProtein.getAccession() + " is identified confidently " + parentProtein.getNumDB() + " times!"); performanceEvaluator.countProteinsExcluded(); exclusionList.addProtein(parentProtein); } log.Debug(parentProtein); } log.Debug(pep); }
protected void evaluateIdentification(IDs id) { log.Debug("NoExclusion. Scores added, but nothing added to the exclusion list"); // check if the peptide is identified or not if (id == null) { performanceEvaluator.countMS2UnidentifiedAnalyzed(); return; } Peptide pep = getPeptideFromIdentification(id); // if it was going to be null, it already returned // is fragmented // add decoy or non-existent protein connections // database.addProteinFromIdentification(pep, id.getParentProteinAccessions()); Double xCorr = id.getXCorr(); Double dCN = id.getDeltaCN(); pep.addScore(xCorr, 0.0, dCN); performanceEvaluator.evaluateAnalysis(exclusionList, pep); RetentionTime rt = pep.getRetentionTime(); if (!rtCalcPredictedRT.Keys.Contains(pep.getSequence())) { rtCalcPredictedRT.Add(pep.getSequence(), rt.getRetentionTimePeak()); } ObservedPeptideRtTrackerObject observedPep = new ObservedPeptideRtTrackerObject(pep.getSequence(), id.getScanTime(), id.getXCorr(), rt.getRetentionTimePeak(), rt.getRetentionTimeStart() + GlobalVar.retentionTimeWindowSize, RetentionTime.getRetentionTimeOffset(), rtCalcPredictedRT[pep.getSequence()], (rt.IsPredicted() ? 1 : 0)); if ((xCorr > 2.5)) { // calibrates our retention time alignment if the observed time is different // from the predicted only if it passes this threshold calibrateRetentionTime(pep); } observedPep.offset = RetentionTime.getRetentionTimeOffset(); peptideIDRT.Add(observedPep); }
protected void evaluateIdentification(IDs id) { log.Debug("RandomExclusion. Scores added, but nothing added to the exclusion list"); // check if the peptide is identified or not if (id == null) { performanceEvaluator.countMS2UnidentifiedAnalyzed(); return; } Peptide pep = getPeptideFromIdentification(id); // if it was going to be null, it already returned // is fragmented Double xCorr = id.getXCorr(); Double dCN = id.getDeltaCN(); pep.addScore(xCorr, 0.0, dCN); performanceEvaluator.evaluateAnalysis(exclusionList, pep); }
public static void WritePSM(IDs id) { IDWriter.WriteLine(String.Join("\t", id.getScanNum().ToString(), id.getScanTime().ToString(), id.getPeptideSequence().ToString(), id.getPeptideMass().ToString(), id.getXCorr().ToString(), id.getDeltaCN().ToString(), String.Join(",", id.getParentProteinAccessions()))); }
protected void evaluateIdentification(IDs id) { // check if the peptide is identified or not if (id == null) { performanceEvaluator.countMS2UnidentifiedAnalyzed(); return; } Peptide pep = getPeptideFromIdentification(id); // id is null, it already returned // add decoy or non-existent protein connections // database.AddProteinFromIdentification(pep, id.getParentProteinAccessions()); Double xCorr = id.getXCorr(); Double dCN = id.getDeltaCN(); pep.addScore(xCorr, dCN); #if (!DONTEVALUATE) performanceEvaluator.evaluateAnalysis(exclusionList, pep); #endif // exclude this peptide for analysis if the xCorr score is above a threshold const double XCORR_THRESHOLD = 2.5; // add the peptide to the exclusion list if it is over the xCorr threshold if ((xCorr > XCORR_THRESHOLD)) { performanceEvaluator.countPeptidesExcluded(); log.Debug("xCorrThreshold passed. Peptide added to the exclusion list."); exclusionList.addPeptide(pep); // calibrates our retention time alignment if the observed time is different // from the predicted only if it passes this threshold calibrateRetentionTime(pep); } // Add all the peptides corresponding to the parent protein, if the parent // protein is deemed confidently identified by the logisitc regression // classifier Dictionary <String, Boolean> identificationPredictions = IdentificationFeatureExtractionUtil .assessProteinIdentificationConfidence(pep.getProteins(), lrAccord); List <Protein> proteinsToExclude = new List <Protein>(); foreach (Protein parentProtein in pep.getProteins()) { // prevents repeated exclusion of a protein already excluded if ((!parentProtein.IsExcluded())) { // determine if parent protein is confidently identified bool isConfidentlyIdentified = identificationPredictions[parentProtein.getAccession()]; if (isConfidentlyIdentified) { // exclude all peptides of that protein parentProtein.setExcluded(true); log.Debug("Parent protein " + parentProtein.getAccession() + " is identified confidently " + parentProtein.getNumDB() + " times!"); performanceEvaluator.countProteinsExcluded(); proteinsToExclude.Add(parentProtein); } } } exclusionList.addProteins(proteinsToExclude); }
protected void evaluateIdentification(IDs id) { // check if the peptide is identified or not if (id == null) { performanceEvaluator.countMS2UnidentifiedAnalyzed(); return; } Peptide pep = getPeptideFromIdentification(id); // id is null, it already returned //log.Info("Peptide Observed Time: {0}\tPredicted Time: {1} -----------------", id.getScanTime(),pep.getRetentionTime().getRetentionTimeStart()); // add decoy or non-existent protein connections // database.AddProteinFromIdentification(pep, id.getParentProteinAccessions()); Double xCorr = id.getXCorr(); double dCN = id.getDeltaCN(); pep.addScore(xCorr, dCN); #if (!DONTEVALUATE) performanceEvaluator.evaluateAnalysis(exclusionList, pep); #endif //RetentionTime rt = pep.getRetentionTime(); //if (!rtCalcPredictedRT.Keys.Contains(pep.getSequence())) //{ // rtCalcPredictedRT.Add(pep.getSequence(), rt.getRetentionTimePeak()); //} //double[] values = new double[] { id.getScanTime(), id.getXCorr(), rt.getRetentionTimePeak(), rt.getRetentionTimeStart() + GlobalVar.retentionTimeWindowSize, RetentionTime.getRetentionTimeOffset(), rtCalcPredictedRT[pep.getSequence()], rt.IsPredicted() ? 1 : 0 }; // exclude this peptide for analysis if the xCorr score is above a threshold const double XCORR_THRESHOLD = 2.5; // add the peptide to the exclusion list if it is over the xCorr threshold if ((xCorr > XCORR_THRESHOLD)) { performanceEvaluator.countPeptidesExcluded(); log.Debug("xCorrThreshold passed. Peptide added to the exclusion list."); exclusionList.addPeptide(pep); // calibrates our retention time alignment if the observed time is different // from the predicted only if it passes this threshold calibrateRetentionTime(pep); } // Add all the peptides corresponding to the parent protein, if the parent // protein is deemed confidently identified by the logisitc regression // classifier Dictionary <String, Boolean> identificationPredictions = IdentificationFeatureExtractionUtil .assessProteinIdentificationConfidence(pep.getProteins(), lrAccord); List <Protein> proteinsToExclude = new List <Protein>(); foreach (Protein parentProtein in pep.getProteins()) { // prevents repeated exclusion of a protein already excluded if ((!parentProtein.IsExcluded())) { // determine if parent protein is confidently identified bool isConfidentlyIdentified = identificationPredictions[parentProtein.getAccession()]; if (isConfidentlyIdentified) { // exclude all peptides of that protein #if TRACKEXCLUDEDPROTEINFEATURE excludedProteinFeatureList.Add(parentProtein.vectorize().ItemArray); #endif parentProtein.setExcluded(true); log.Debug("Parent protein " + parentProtein.getAccession() + " is identified confidently " + parentProtein.getNumDB() + " times!"); performanceEvaluator.countProteinsExcluded(); proteinsToExclude.Add(parentProtein); } } } exclusionList.addProteins(proteinsToExclude); }
public static void CometSingleSearchTest() { String idx = "C:\\Coding\\2019LavalleeLab\\GitProjectRealTimeMS\\TestData\\PreComputedFiles\\uniprot_SwissProt_Human_1_11_2017_decoyConcacenated.fasta.idx"; //String idx = "C:\\temp\\comet_2019015\\comet_source_2019015\\IDXMake\\uniprot_SwissProt_Human_1_11_2017_decoyConcacenated.fasta.idx"; String param = "C:\\Coding\\2019LavalleeLab\\temp2\\ExampleDataSet\\2019.comet.params"; CometSingleSearch.InitializeComet(idx, param); CometSingleSearch.QualityCheck(); Program.ExitProgram(1); String dataRoot = "C:\\Users\\LavalleeLab\\Documents\\JoshTemp\\MealTimeMS_APITestRun\\Data\\"; String outputRoot = "C:\\Users\\LavalleeLab\\Documents\\JoshTemp\\MealTimeMS_APITestRun\\Output\\"; //String mzmlPath = dataRoot+"60minMZMLShrink.csv"; String mzmlPath = dataRoot + "8001.ms2.txt"; String dbPath = dataRoot + "tinyDB.fasta.idx"; // String outputPath = outputRoot + "output.txt"; String paramsPath = dataRoot + "comet.params"; MZMLFile mzml = Loader.parseMS2File(mzmlPath); //MZMLFile mzml = null; CometSingleSearch.InitializeComet(dbPath, paramsPath); var watch = System.Diagnostics.Stopwatch.StartNew(); int counter = 0; Console.WriteLine("Starting comet search"); WriterClass.initiateWriter(outputPath); for (int i = 0; i < 1; i++) { if (i % 1 == 0) { Spectra spec = mzml.getSpectraArray()[i]; if (spec.getMSLevel() != 2) { continue; } Console.WriteLine("scanNum {0} RT {2} Mass {2} MSLevel {3}", spec.getScanNum(), spec.getStartTime(), spec.getCalculatedPrecursorMass(), spec.getMSLevel()); IDs id = null; if (CometSingleSearch.Search(spec, out id)) { String outLine = String.Format("{0}\t{1}\txcorr\t{2}\tdcn\t{3}", id.getScanNum(), id.getPeptideSequence(), id.getXCorr(), id.getDeltaCN()); Console.WriteLine(outLine); WriterClass.writeln(outLine); } else { Console.WriteLine("Spectrum cannot be matched\n"); } counter++; } } watch.Stop(); Console.WriteLine("Comet search of " + counter + " spectra took " + watch.ElapsedMilliseconds + " milliseconds"); WriterClass.CloseWriter(); }