/* * Returns the ProteinProphetFile... this gives us a query-able object which can * determine if a protein accession was identified by an experiment, as well as * specific peptides. */ private static ProteinProphetFile processProteinProphetFile(String protXMLFileName) { const double fdr_threshold = 0.01; // 1% false discovery rate double fdr = setFDRThreshold(protXMLFileName, fdr_threshold); List <String> proteinGroupsData = extractProteinGroupsData(protXMLFileName); List <String> filteredProteinsData = filterProteinsData(proteinGroupsData); Dictionary <String, List <String> > proteinsToPeptides = extractPeptides(filteredProteinsData); ProteinProphetFile ppf = new ProteinProphetFile(protXMLFileName, proteinsToPeptides, fdr, protein_probablity_threshold); ppf.getProteinProphetResult().SetProteinGroup(ProteinProphetEvaluator.ExtractPositiveProteinGroups(protXMLFileName)); return(ppf); }
/* * Extracts the protein names from proteins not identified with high confidence. * The fdr_threshold should be a high value (0.2 or higher) for this to be true. * Extracts the proteins identified above this fdr threshold. */ public static List <String> extractNegativeTrainingSetProteinNames(String proteinProphetFile, double pr_threshold) { // setting to an fdr of 0.2 didn't work, because the largest fdr is 0.173... // double fdr = setFDRThreshold(proteinProphetFile, fdr_threshold); protein_probablity_threshold = pr_threshold; List <String> proteinGroupsData = extractNegativeProteinGroupsData(proteinProphetFile); List <String> filteredProteinsData = filterNegativeTrainingSetProteinData(proteinGroupsData); Dictionary <String, List <String> > proteinsToPeptides = extractPeptides(filteredProteinsData); ProteinProphetFile ppf = new ProteinProphetFile(proteinProphetFile, proteinsToPeptides, 1, protein_probablity_threshold); return(ppf.getProteinNames()); }
public static void main(String[] args) { String original_protein_prophet_output = "/Users/apell035/workspace/RealTimeMS/data/processed_files/protein_prophet_output/Alex_Mac_MS_QC_60min_interact.prot.xml"; String testFile = "/Users/apell035/workspace/RealTimeMS/data/processed_files/protein_prophet_output/Alex_Mac_MS_QC_240min_interact.prot.xml"; // String rdb_file = // "data/tsv_files/MS_QC_60min_result_database_NEW_REPROCESSED.tsv"; // ResultDatabase rdb = Loader.parseResultDatabase(rdb_file); ProteinProphetFile ogppf = processProteinProphetFile(original_protein_prophet_output); ProteinProphetFile ppf = processProteinProphetFile(testFile); Dictionary <String, List <String> > setOfUnidentifiedPeptides = ogppf.comparePeptides(ppf); List <int> unusedSpectra = new List <int>(); // String temp = // "C:\\Users\\Alexander\\workspace\\tpp\\fromMac\\2018-10-29_explore_Experiment_480_xCorr_3.5_numDB_5_ppmTol_2.0E-6_rtWindow_2.0_comet_search_interact.prot.xml"; // ProteinProphetResult numProtID = // numProteinsIdentified(original_protein_prophet_output); // List<String> proteins = // extractIdentifiedProteinNames(original_protein_prophet_output); // System.out.println(proteins); }
/* * Returns the list of proteins identified from a protein prophet experiment */ public static List <String> extractIdentifiedProteinNames(String protXMLFileName) { ProteinProphetFile ppf = processProteinProphetFile(protXMLFileName); return(ppf.getProteinNames()); }
/* * Extract the number of proteins identified at a given FDR threshold and * protein probability threshold */ public static ProteinProphetResult getProteinProphetResult(String protXMLFileName) { ProteinProphetFile ppf = processProteinProphetFile(protXMLFileName); return(ppf.getProteinProphetResult()); }