public static void WriteSearchMGF(QcParameters qcParameters, RawDataCollection rawData, IRawDataPlus rawFile, bool fixedScans = false) { var pars = qcParameters.searchParameters; int[] scans = AdditionalMath.SelectRandomScans(scans: rawData.scanIndex.ScanEnumerators[MSOrderType.Ms2], num: pars.NumSpectra, fixedScans: fixedScans); MGF.WriteMGF(rawData, rawFile, qcParameters.QcSearchDataDirectory, pars.MgfMassCutoff, scans, pars.MgfIntensityCutoff); }
public static XElement LoadSearchResults(QcParameters qcParameters, RawDataCollection rawData) { string QcSearchDataDirectory = qcParameters.QcSearchDataDirectory; string resultsFile = Path.Combine(QcSearchDataDirectory, Path.GetFileName(rawData.rawFileName) + ".pep.xml"); return(XElement.Load(resultsFile)); }
public static void RunSearch(QcParameters qcParameters, RawDataCollection rawData, IRawDataPlus rawFile) { string mgfFile = Path.Combine(qcParameters.QcSearchDataDirectory, Path.GetFileName(rawData.rawFileName) + ".mgf"); string outputFile = Path.Combine(qcParameters.QcSearchDataDirectory, Path.GetFileName(rawData.rawFileName) + ".pep.xml"); if (qcParameters.searchParameters.SearchAlgorithm == SearchAlgorithm.XTandem) { XTandem.RunXTandem(rawData, qcParameters.searchParameters, mgfFile, outputFile, genDecoy: true); } if (qcParameters.searchParameters.SearchAlgorithm == SearchAlgorithm.IdentiPy) { var pars = qcParameters.searchParameters; Identipy.RunIdentipy(rawData, rawFile, qcParameters.QcSearchDataDirectory, pars, writeMGF: false); } }
public static void ParseSearchResults(this QcDataContainer qcData, RawDataCollection rawData, IRawDataPlus rawFile, QcParameters qcParameters) { XElement results = LoadSearchResults(qcParameters, rawData); PsmDataCollection Psms = ExtractPsmData(results, qcParameters.searchParameters.SearchAlgorithm); qcData.ParsePSMs(Psms, qcParameters); }
public static void ParsePSMs(this QcDataContainer qcData, PsmDataCollection psmCollection, QcParameters qcParameters) { XElement results, searchSummary; IEnumerable <XElement> decoyPSMs, search_hits, spectrumQueries; int numGoodPSMs, totalCleavageSites, pepsWithNoMissedCleavages, peptidesWithNoMissedCleavages; IEnumerable <int> allMissedCleavages, charges; double IdRate, chargeRatio3to2, chargeRatio4to2; double digestionEfficiencyByCleavage, digestionEfficiency, topDecoyScore; double missedCleavageRate; Dictionary <int, int> numCharges = new Dictionary <int, int>(); SearchParameters searchParameters = qcParameters.searchParameters; int numSearched = searchParameters.NumSpectra; List <PsmData> psms; IEnumerable <PsmData> goodPsms, nonDecoys; // convert the dictionary to a list for easy parsing psms = psmCollection.Values.ToList(); // get the top decoy score topDecoyScore = (from x in psms where x.Decoy select x.Hyperscore) .ToArray().Percentile(95); // get the non-decoys nonDecoys = from x in psms where !x.Decoy select x; // and select the non-decoy hits which are above the top decoy score goodPsms = from x in psms where !x.Decoy & x.Hyperscore > topDecoyScore select x; Console.WriteLine("Total hits: {0}", psms.Count()); Console.WriteLine("Top decoy score: {0}", topDecoyScore); Console.WriteLine("Non-decoy hits: {0}", nonDecoys.Count()); Console.WriteLine("Non-decoy hits above top decoy score: {0}", goodPsms.Count()); // parse out the charges charges = from x in goodPsms select x.Charge; // get the number of each charge, add to a dictionary foreach (int charge in new List <int>() { 2, 3, 4 }) { numCharges.Add(charge, (from x in charges where x == charge select 1).Count()); } // calculate charge ratios chargeRatio3to2 = Convert.ToDouble(numCharges[3]) / Convert.ToDouble(numCharges[2]); chargeRatio4to2 = Convert.ToDouble(numCharges[4]) / Convert.ToDouble(numCharges[2]); // parse out the missed cleavage data pepsWithNoMissedCleavages = (from x in goodPsms where x.MissedCleavages == 0 select 1).Sum(); // number of PSMs is the length of this collection numGoodPSMs = goodPsms.Count(); // missed cleavages per PSM digestionEfficiency = (double)pepsWithNoMissedCleavages / numGoodPSMs; Console.WriteLine("Digestion efficiency: {0}", digestionEfficiency); // get missed cleavage rate, i.e. number of missed cleavages per psm missedCleavageRate = (double)(from x in goodPsms select x.MissedCleavages).Sum() / numGoodPSMs; Console.WriteLine("Missed cleavage rate (/PSM): {0}", missedCleavageRate); // calculate ID rate IdRate = (double)numGoodPSMs / numSearched; Console.WriteLine("IDrate: {0}", IdRate); // get labeling efficiency metrics if ((searchParameters.NMod != null) | (searchParameters.KMod != null) | (searchParameters.XMod != null)) { qcData.GetModificationFrequency(goodPsms, searchParameters); } // get median mass drift qcData.MedianMassDrift = (from x in goodPsms select x.MassDrift) .ToArray().Percentile(50); qcData.IdentificationRate = IdRate; qcData.MissedCleavageRate = missedCleavageRate; qcData.DigestionEfficiency = digestionEfficiency; qcData.ChargeRatio3to2 = chargeRatio3to2; qcData.ChargeRatio4to2 = chargeRatio4to2; }
static int DoStuff(ArgumentParser.QcOptions opts) { Log.Information("Starting QC. Identipy: {Identipy}", opts.Identipy); //Console.WriteLine("\n"); SearchParameters searchParameters; QcParameters qcParameters = new QcParameters(); qcParameters.RawFileDirectory = opts.DirectoryToQc; qcParameters.QcDirectory = opts.QcDirectory; qcParameters.QcFile = Path.Combine(opts.QcDirectory, "QC.xml"); if (opts.SearchAlgorithm != null & !(new List <string>() { "identipy", "xtandem" }.Contains(opts.SearchAlgorithm))) { // the search algorithm is not null but it also it not identipy or xtandem Log.Error("Invalid search algorithm argument: {Argument}", opts.SearchAlgorithm); Console.WriteLine("ERROR: Search algorithm must be one of {identipy, xtandem}"); return(1); } if (opts.Identipy) { opts.SearchAlgorithm = "identipy"; } if (opts.SearchAlgorithm != null) { if (opts.FastaDatabase == null) { Log.Error("No fasta database provided for Identipy search"); Console.WriteLine("ERROR: A fasta protein database is required for an Identipy search. Please use the --db parameter to " + "provide the path to a database."); Environment.Exit(1); } searchParameters = new SearchParameters { PythonExecutable = opts.PythonExecutable, IdentipyScript = opts.IdentipyScript, XTandemDirectory = opts.XTandemDirectory, FastaDatabase = opts.FastaDatabase, FixedMods = opts.FixedMods, NMod = opts.VariableNMod, KMod = opts.VariableKMod, XMod = opts.VariableXMod, NumSpectra = opts.NumberSpectra, MgfIntensityCutoff = opts.IntensityCutoff, MgfMassCutoff = opts.MassCutOff, FixedScans = opts.FixedScans }; if (opts.SearchAlgorithm == "identipy") { if ((opts.IdentipyScript == null & opts.PythonExecutable != null) | (opts.IdentipyScript != null & opts.PythonExecutable == null)) { Log.Error("If providing location of python or identipy, must specify both of them."); Console.WriteLine("ERROR: When invoking the -p or -I options, you must supply both of them."); Environment.Exit(1); } Identipy.CheckIdentipyDependencies(searchParameters); searchParameters.SearchAlgorithm = SearchAlgorithm.IdentiPy; } if (opts.SearchAlgorithm == "xtandem") { if (opts.XTandemDirectory == null) { Log.Error("Path to XTandem directory was not provided"); Console.WriteLine("ERROR: You must specify the X! Tandem directory using the -X argument to perform a search using X! Tandem."); return(1); } searchParameters.SearchAlgorithm = SearchAlgorithm.XTandem; } } else { searchParameters = null; } qcParameters.searchParameters = searchParameters; QC.QC.DoQc(qcParameters); return(0); }
public static void DoQc(QcParameters qcParameters) { QcDataCollection qcDataCollection; string dataDirectory = qcParameters.RawFileDirectory; string qcDirectory = qcParameters.QcDirectory; string qcSearchDataDirecotry = qcParameters.QcSearchDataDirectory; SearchParameters searchParameters = qcParameters.searchParameters; // our qc file string qcFile = Path.Combine(qcDirectory, "QC.xml"); // see if the file exists if (File.Exists(qcFile)) { // if so, open it try { qcDataCollection = XmlSerialization.ReadFromXmlFile <QcDataCollection>(qcFile); Log.Information("QC data file loaded successfully"); } catch (Exception e) { Log.Error(e, "Failed while loading QC data"); throw e; } } else { // if not, check if the directory exists if (!Directory.Exists(qcDirectory)) { Directory.CreateDirectory(qcDirectory); } qcDataCollection = new QcDataCollection(dataDirectory, qcDirectory); Log.Information("Appears to be a new QC directory. New QC data collection created."); } // get our list of new raw files. it is every raw file in the directory that is not listed in the qc data var fileList = Directory.GetFiles(dataDirectory, "*.*", SearchOption.TopDirectoryOnly) .Where(s => s.EndsWith(".raw", StringComparison.OrdinalIgnoreCase)).ToList(); if (fileList.Count() == 0) { Log.Error("No raw files found in {Directory}", dataDirectory); Console.WriteLine("{0} contains no raw files!", dataDirectory); Environment.Exit(1); } fileList.RemoveAll(s => qcDataCollection.ProcessedRawFiles.Contains(Path.GetFileName(s))); Log.Information("Raw files in QC queue: {Files}", fileList); if (fileList.Count() == 0) { Log.Information("No new files to QC"); Console.WriteLine("No new files in the directory to QC!"); Environment.Exit(0); } Console.WriteLine("{0} file(s) to process", fileList.Count()); foreach (string fileName in fileList) { Console.WriteLine("Processing {0}", fileName); IFileHeader rawHeader; // try to open the raw file header try { rawHeader = FileHeaderReaderFactory.ReadFile(fileName);; } catch (Exception) { Log.Information("{File} is not a valid raw file", fileName); Console.WriteLine("{0} is not a valid raw file, continuing to next file.", fileName); continue; } // is it a real raw file? if (rawHeader.FileType == FileType.RawFile) { Log.Information("{File} is a valid raw file", fileName); Log.Information("Creation date: {Date}", rawHeader.CreationDate); Log.Information("File description: {Description}", rawHeader.FileDescription); } else { Log.Information("{File} is not a valid raw file", fileName); Console.WriteLine("{0} is not a valid raw file, continuing to next file.", fileName); continue; } // okay, it is probably a real raw file, let's do the QC // check if the raw file already exists in the QC data with a different name if (qcDataCollection.QcData.Keys.Contains(rawHeader.CreationDate)) { Log.Information("A file with the same creation date and time as {File} already exists in the QC data", fileName); Console.WriteLine("{0} appears to already exist in the QC data with the name {1}. Skipping to next file.", fileName, qcDataCollection.QcData[rawHeader.CreationDate].RawFile); continue; } using (IRawDataPlus rawFile = RawFileReaderFactory.ReadFile(fileName)) { rawFile.SelectInstrument(Device.MS, 1); RawDataCollection rawData = new RawDataCollection(rawFile); rawData.ExtractAll(rawFile); /* * if (idpyPars?.QuantMods != null) * { * rawData.quantData.Quantify(rawData, rawFile, ) * } */ QcDataContainer newQcData = ProcessQcData(Data: qcDataCollection, rawData: rawData, rawFile: rawFile, qcDirectory: qcDirectory); if (searchParameters != null) { Search.WriteSearchMGF(qcParameters, rawData, rawFile, searchParameters.FixedScans); Search.RunSearch(qcParameters, rawData, rawFile); newQcData.ParseSearchResults(rawData, rawFile, qcParameters); /* * if (searchParameters.SearchAlgorithm == SearchAlgorithm.XTandem) * { * SearchQC.ParseXTandem(newQcData, qcParameters); * newQcData.IdentipyParameters = String.Format("\"Algorithm: X!Tandem; fmods: {0}; nmod: {1}; kmod: {2}; xmod: {3}; fastaDB: {4}; xtandemDirectory: {5}\"", * searchParameters.FixedMods, searchParameters.NMod, searchParameters.KMod, searchParameters.XMod, searchParameters.FastaDatabase, searchParameters.XTandemDirectory); * } * else * { * SearchQC.ParseIdentipy(newQcData, rawData, rawFile, qcParameters); * newQcData.IdentipyParameters = String.Format("\"Algorithm: IdentiPy; fmods: {0}; nmod: {1}; kmod: {2}; xmod: {3}; fastaDB: {4}; pythonExecutable: {5}; identipyScript: {6}\"", * searchParameters.FixedMods, searchParameters.NMod, searchParameters.KMod, searchParameters.XMod, searchParameters.FastaDatabase, searchParameters.PythonExecutable, searchParameters.IdentipyScript); * } */ } qcDataCollection.QcData.Add(rawFile.CreationDate, newQcData); qcDataCollection.ProcessedRawFiles.Add(Path.GetFileName(rawData.rawFileName)); qcDataCollection.WriteQcToTable(); } Log.Information("QC finished: {File}", fileName); } Log.Information("QC of all files completed"); Console.WriteLine("QC of all files completed!"); try { XmlSerialization.WriteToXmlFile <QcDataCollection>(qcFile, qcDataCollection); Log.Information("QC file saved successfully"); Console.WriteLine("QC file saved successfully"); } catch (Exception e) { Log.Error(e, "Failed during serialization of QC data"); throw e; } }