static void PreExperimentSetUp() { ConstructDecoyFasta(); ConstructIDX(); if (GlobalVar.IsSimulation) //if(false) { ms2SpectraList = Loader.parseMS2File(InputFileOrganizer.MS2SimulationTestFile).getSpectraArray(); GlobalVar.ExperimentTotalScans = ms2SpectraList.Count; FullPepXMLAndProteinProphetSetup(); baseLinePpr = ProteinProphetEvaluator.getProteinProphetResult(InputFileOrganizer.OriginalProtXMLFile); //so in alex's original code, "original experiment" refers to original experiment without any exclusion or manipulation with this program //"baseline comparison" refers to the results after "NoExclusion" run, which is a top 6 or top 12 DDA run, which is not implemented in this program //So the two are the same in thie program int numMS2Analyzed = (int)GlobalVar.ExperimentTotalScans; PerformanceEvaluator.setBaselineComparison(baseLinePpr, numMS2Analyzed, 12); PerformanceEvaluator.setOriginalExperiment(baseLinePpr.getNum_proteins_identified()); } log.Debug("Setting up Database"); database = databaseSetUp(InputFileOrganizer.ExclusionDBFasta); log.Debug("Done setting up database."); CometSingleSearch.InitializeComet(InputFileOrganizer.IDXDataBase, InputFileOrganizer.CometParamsFile); //CometSingleSearch.InitializeComet_NonRealTime("C:\\Coding\\2019LavalleeLab\\temp2\\ModifiedDBSearchFiles\\NoExclusionRealTimeCometSearch.tsv"); //CometSingleSearch.QualityCheck(); Console.WriteLine("pre-experimental setup finished"); }
public static ProteinProphetResult postProcessing(ExclusionProfile exclusionProfile, String experimentName, Boolean keepResults) { String partialCometFileOutputFolder = Path.Combine(InputFileOrganizer.OutputFolderOfTheRun, "PartialCometFile"); if (!Directory.Exists(partialCometFileOutputFolder)) { Directory.CreateDirectory(partialCometFileOutputFolder); } String outputCometFile = Path.Combine(partialCometFileOutputFolder, experimentName + "_partial" + InputFileOrganizer.PepXMLSuffix); PartialPepXMLWriter.writePartialPepXMLFile(InputFileOrganizer.OriginalCometOutput, exclusionProfile.getSpectraUsed(), outputCometFile, InputFileOrganizer.MS2SimulationTestFile, InputFileOrganizer.FASTA_FILE, outputCometFile); //TODO was using MZML instead of MS2 ProteinProphetResult ppr = RunProteinProphet(outputCometFile, InputFileOrganizer.OutputFolderOfTheRun, keepResults); //PostProcessingScripts.deleteFile(outputCometFile); // delete these files if this flag is false //if (!keepResults) //{ //} return(ppr); }
public BaselineComparison(ProteinProphetResult ppr, int numMS2, int ddaNum) { proteinsIdentifiedByNoExclusion = ppr.getProteinsIdentified(); numProteinsIdentifiedNaiveExperiment = proteinsIdentifiedByNoExclusion.Count; totalResourcesNaiveExperiment = numMS2; this.ddaNum = ddaNum; }
public String getPerformanceVector(String experimentName, String experimentType, double analysisTime, double totalRunTime, ProteinProphetResult ppr, int ddaNum, ExclusionProfile exclusionProfile) { performanceEvaluator.finalizePerformanceEvaluator(experimentName, experimentType, analysisTime, totalRunTime, exclusionList, ppr, ddaNum, exclusionProfile); return(performanceEvaluator.outputPerformance()); }
public static void setBaselineComparison(ProteinProphetResult ppr, int numMS2, int ddaNum) { if (!baselineComparisonSet.ContainsKey(ddaNum)) { BaselineComparison bc = new BaselineComparison(ppr, numMS2, ddaNum); baselineComparisonSet.Add(ddaNum, bc); } }
public static void DoJob() { String comet = "C:\\Users\\LavalleeLab\\Documents\\JoshTemp\\RealTimeMS\\TestData\\MS_QC_120min.pep.xml"; String output = "C:\\Users\\LavalleeLab\\Documents\\JoshTemp\\RealTimeMS\\TestData\\"; ProteinProphetResult ppr = PostProcessingScripts.RunProteinProphet(comet, output, true); Console.WriteLine(ppr.ToString()); }
private void setProteinsIdentified(ProteinProphetResult ppr, List <String> proteinsIdentifiedByNoExclusion) { if (ppr != null) { List <String> proteinsIdentified = ppr.getProteinsIdentified(); int intersect = compareProteins(proteinsIdentifiedByNoExclusion, proteinsIdentified); ChangeValue(Header.NumProteinsIdentified, proteinsIdentified.Count); ChangeValue(Header.ProteinsIdentifiedInLimitedDDA, intersect); } }
public ProteinProphetFile(String protXMLFileName, Dictionary <String, List <String> > proteinsToPeptides, double fdr_threshold, double protein_probablity_threshold) { this.protXMLFileName = protXMLFileName; this.proteinsToPeptides = proteinsToPeptides; int numProteinsIdentified = proteinsToPeptides.Keys.Count; proteinProphetResult = new ProteinProphetResult(fdr_threshold, protein_probablity_threshold, getProteinNames()); }
public static ProteinProphetResult RunProteinProphet(String cometFilePath, String outputFolder, Boolean keepResults) { Logger.debug("Post processing comet file: "); Console.WriteLine("Runnign protein prophet, if program doesn't respond for a long time, try pressing a typing a few keys into the command line"); String proteinProphetOutput = ProteinProphetSearch(cometFilePath, outputFolder, keepResults); ProteinProphetResult ppr = ProteinProphetEvaluator.getProteinProphetResult(proteinProphetOutput); ppr.SetProteinGroup(ProteinProphetEvaluator.ExtractPositiveProteinGroups(proteinProphetOutput)); return(ppr); }
public static void DoJob() { InputFileOrganizer.FASTA_FILE = "C:\\Coding\\2019LavalleeLab\\temp2\\ExampleDataSet\\uniprot_SwissProt_Human_1_11_2017.fasta"; //String outputCometFile = "C:\\Coding\\2019LavalleeLab\\temp2\\ModifiedDBSearchFiles\\Result\\ModdedSearch_MLGEGolden_nonCheat_peptideSearchResultIncluded\\PartialCometFile\\1ModdedSearch_MLGE_nonCheat_peptideSearchResultIncluded_partial.pep.xml"; String outputCometFile = "C:\\Coding\\2019LavalleeLab\\temp2\\ModifiedDBSearchFiles\\Result\\Modded_MLGEGolden_rtCheatpeptideSearchResultIncluded\\PartialCometFile\\1Modded_MLGEGolden_rtCheat_partial.pep.xml"; ProteinProphetResult ppr = PostProcessingScripts.RunProteinProphet(outputCometFile, InputFileOrganizer.OutputFolderOfTheRun, true); //String proteinProphetFile = "C:\\Coding\\2019LavalleeLab\\temp2\\Output\\ProteinGroup_Combined_rtCheat\\protein_prophet_output\\ProteinGroup_Combined_rtCheat_partial_interact.prot.xml"; //ProteinProphetResult ppr = ProteinProphetEvaluator.getProteinProphetResult(proteinProphetFile); Console.WriteLine(ppr.ToString()); Console.WriteLine("Protein groups " + ppr.getFilteredProteinGroups().Count); }
private void postProcessingCalculations(int ddaNum, ProteinProphetResult ppr, ExclusionProfile exclusionProfile) { BaselineComparison bc = baselineComparisonSet[ddaNum]; List <String> proteinsIdentifiedByNoExclusion = bc.getProteinsIdentifiedByNoExclusion(); int totalResourcesNaiveExperiment = bc.getTotalResourcesNaiveExperiment(); int numProteinsIdentifiedNaiveExperiment = bc.getNumProteinsIdentifiedNaiveExperiment(); // set proteins identified first setProteinsIdentified(ppr, proteinsIdentifiedByNoExclusion); int correctlyExcluded = (int)data[Header.EvaluateExclusion_FoundOnCurrentExclusionList] + (int)data[Header.EvaluateExclusion_FoundOnCurrentObservedExclusionList]; int incorrectlyExcluded = (int)data[Header.EvaluateExclusion_NotFoundOnExclusionList]; /* * found on past observed, found on past exclusion list, and found on future * exclusion list are not incorrect exclusions, they are retention time being * predicted incorrectly... */ double ratioIncorrectlyExcludedOverCorrectlyExcluded = takeRatio(incorrectlyExcluded, correctlyExcluded); ChangeValue(Header.CorrectlyExcluded, correctlyExcluded); ChangeValue(Header.IncorrectlyExcluded, incorrectlyExcluded); ChangeValue(Header.RatioIncorrectlyExcludedOverCorrectlyExcluded, ratioIncorrectlyExcludedOverCorrectlyExcluded); // Resources saved in total # available MS2 - # ms2 used foreach analysis int resourcesSaved = totalResourcesNaiveExperiment - (int)data[Header.NumMS2Analyzed]; double percentResourcesSaved = takeRatio(resourcesSaved, totalResourcesNaiveExperiment); double percentResourcesUsed = 1 - percentResourcesSaved; ChangeValue(Header.PercentResourcesSaved, percentResourcesSaved); ChangeValue(Header.PercentResourcesUsed, percentResourcesUsed); /*- * Protein Identification Sensitivity = # proteins identified / # proteins identified in whole experiment * Protein Identification Fold Change = # proteins identified / # proteins identified by naive approach * Protein Identification Sensitivity Limited DDA = # proteins identified also identified in naive approach / proteins identified by naive approach */ ChangeValue(Header.ProteinIdentificationSensitivity, takeRatio((int)data[Header.NumProteinsIdentified], numProteinsIdentifiedOriginalExperiment)); ChangeValue(Header.ProteinIdentificationFoldChange, takeRatio((int)data[Header.NumProteinsIdentified], numProteinsIdentifiedNaiveExperiment)); ChangeValue(Header.ProteinIdentificationSensitivityLimitedDDA, takeRatio((int)data[Header.ProteinsIdentifiedInLimitedDDA], numProteinsIdentifiedNaiveExperiment)); List <String> inProgramExcludedProteins = exclusionProfile.getDatabase().getExcludedProteins(); int proteinOverlap_inProgramExcluded_vs_NoExclusion = compareProteins(inProgramExcludedProteins, proteinsIdentifiedByNoExclusion); ChangeValue(Header.NumProteinOverlap_ExcludedProteinsAgainstNoExclusionProteins, proteinOverlap_inProgramExcluded_vs_NoExclusion); ChangeValue(Header.ProteinGroupsIdentified, ppr.getFilteredProteinGroups().Count); }
public static String DoJob(ProteinProphetResult ppr, ExclusionProfile exclusionProfile, int experimentNum) { String outputFile = Path.Combine(InputFileOrganizer.OutputFolderOfTheRun, "NumberOfPeptidesPerIdentifiedProtein_" + experimentNum + ".txt"); StreamWriter sw = new StreamWriter(outputFile); sw.WriteLine("Accession\tNumberOfPeptides\tSpectralCount"); List <String> confidentlyIdentifiedProts = ppr.getProteinsIdentified(); foreach (String accession in confidentlyIdentifiedProts) { Protein prot = exclusionProfile.getDatabase().getProtein(accession); HashSet <String> peptidesObserved = new HashSet <String>(); foreach (PeptideScore pepEvidence in prot.getPeptideScore()) { peptidesObserved.Add(pepEvidence.getPeptideSequence()); } sw.WriteLine("{0}\t{1}\t{2}", accession, peptidesObserved.Count, prot.getPeptideScore().Count); } sw.Close(); return(outputFile); }
public static void DoJob() { StreamWriter sw = new StreamWriter(Path.Combine(InputFileOrganizer.OutputFolderOfTheRun, "ExcludedProteinComparison.txt")); String ExcludedProteinFile = "C:\\Coding\\2019LavalleeLab\\temp2\\Output\\Gold_MLGE_nonCheat.txt_output\\ExcludedProteinList.txt"; StreamReader sr = new StreamReader(ExcludedProteinFile); List <String> inProgramConfidentlyIdentified = new List <String>(); String line = sr.ReadLine(); while (line != null) { inProgramConfidentlyIdentified.Add(line); line = sr.ReadLine(); } String proteinProphetFile = "C:\\Coding\\2019LavalleeLab\\GitProjectRealTimeMS\\TestData\\PreComputedFiles\\MS_QC_120min_interact.prot.xml"; ProteinProphetResult ppr = ProteinProphetEvaluator.getProteinProphetResult(proteinProphetFile); List <String> realConfidentIdentified = ppr.getProteinsIdentified(); List <String> intersection = ListUtil.FindIntersection(inProgramConfidentlyIdentified, realConfidentIdentified); sw.WriteLine("In-Program excluded: {0}", inProgramConfidentlyIdentified.Count); sw.WriteLine("Real confidently identified: {0}", realConfidentIdentified.Count); sw.WriteLine("Intersection: {0}", intersection.Count); sw.Close(); }
public void finalizePerformanceEvaluator(String experimentName, String experimentType, double analysisTime, double totalRunTime, ExclusionList exclusionList, ProteinProphetResult ppr, int ddaNum, ExclusionProfile exclusionProfile) { setExperimentName(experimentName, experimentType); setExperimentDuration(analysisTime, totalRunTime); setExperimentParams(); setExclusionList(exclusionList); postProcessingCalculations(ddaNum, ppr, exclusionProfile); }
static String mzml = "C:\\Coding\\2019LavalleeLab\\RealTest_Results_20200219\\MSQC_QE_200ng_HEK_2hr_to_run_200219172225.mzML"; //"C:\\Coding\\2019LavalleeLab\\GoldStandardData\\MZML_Files\\MS_QC_120min.mzml"; public static void DoJob() { //comet Console.WriteLine("Performing Comet search on full ms2 data"); String fullCometFile = PostProcessingScripts.CometStandardSearch(ms2File, InputFileOrganizer.OutputFolderOfTheRun, true); InputFileOrganizer.OriginalCometOutput = fullCometFile; //protein prophet Console.WriteLine("Perform a protein prophet search on full pepxml"); String fullProteinProphetFile = PostProcessingScripts.ProteinProphetSearch(fullCometFile, InputFileOrganizer.OutputFolderOfTheRun, true); InputFileOrganizer.OriginalProtXMLFile = fullProteinProphetFile; ProteinProphetResult baseLinePpr = ProteinProphetEvaluator.getProteinProphetResult(InputFileOrganizer.OriginalProtXMLFile); //load spectra Console.WriteLine("loading spectra array"); List <Spectra> ls = Loader.parseMS2File(ms2File).getSpectraArray(); List <int> includedSpectra = new List <int>(); List <int> excludedSpectra = new List <int>(); StreamReader sr = new StreamReader(excludedSpectraFile); String line = sr.ReadLine(); while (line != null) { int excluded = int.Parse(line); excludedSpectra.Add(excluded); line = sr.ReadLine(); } foreach (Spectra sp in ls) { if (!excludedSpectra.Contains(sp.getScanNum())) { includedSpectra.Add(sp.getScanNum()); } } String outputCometFile = Path.Combine(InputFileOrganizer.OutputFolderOfTheRun, "realTestpartialOut.pep.xml"); //"C:\\Coding\\2019LavalleeLab\\GoldStandardData\\pepxml\\MS_QC_120min_partial.pep.xml"; String fastaFile = InputFileOrganizer.FASTA_FILE; //"C:\\Coding\\2019LavalleeLab\\GoldStandardData\\Database\\uniprot_SwissProt_Human_1_11_2017.fasta"; PartialPepXMLWriter.writePartialPepXMLFile(fullCometFile, includedSpectra, outputCometFile, mzml, fastaFile, outputCometFile); String partialProt = PostProcessingScripts.ProteinProphetSearch(outputCometFile, InputFileOrganizer.OutputFolderOfTheRun, true); ProteinProphetResult partialPpr = ProteinProphetEvaluator.getProteinProphetResult(partialProt); double partialNum = partialPpr.getNum_proteins_identified(); double totalNum = baseLinePpr.getNum_proteins_identified(); double idSens = partialNum / totalNum * 100.0; double includedScanNum = includedSpectra.Count; double totalScanNum = ls.Count; double usedResource = includedScanNum / totalScanNum * 100; String line1 = String.Format("includedScans {0} \t totalScanNum {1} \tUsedResources {2}", includedScanNum, totalScanNum, usedResource); String line2 = String.Format("partialNum {0} \t totalNum {1} \tidsens {2}", partialNum, totalNum, idSens); Console.WriteLine(line1); Console.WriteLine(line2); WriterClass.writeln(line1); WriterClass.writeln(line2); WriterClass.CloseWriter(); }