Example #1
0
        public static void WriteSearchMGF(QcParameters qcParameters, RawDataCollection rawData, IRawDataPlus rawFile, bool fixedScans = false)
        {
            var pars = qcParameters.searchParameters;

            int[] scans = AdditionalMath.SelectRandomScans(scans: rawData.scanIndex.ScanEnumerators[MSOrderType.Ms2], num: pars.NumSpectra, fixedScans: fixedScans);
            MGF.WriteMGF(rawData, rawFile, qcParameters.QcSearchDataDirectory, pars.MgfMassCutoff, scans, pars.MgfIntensityCutoff);
        }
Example #2
0
        public static XElement LoadSearchResults(QcParameters qcParameters, RawDataCollection rawData)
        {
            string QcSearchDataDirectory = qcParameters.QcSearchDataDirectory;
            string resultsFile           = Path.Combine(QcSearchDataDirectory, Path.GetFileName(rawData.rawFileName) + ".pep.xml");

            return(XElement.Load(resultsFile));
        }
Example #3
0
        public static void RunSearch(QcParameters qcParameters, RawDataCollection rawData, IRawDataPlus rawFile)
        {
            string mgfFile    = Path.Combine(qcParameters.QcSearchDataDirectory, Path.GetFileName(rawData.rawFileName) + ".mgf");
            string outputFile = Path.Combine(qcParameters.QcSearchDataDirectory, Path.GetFileName(rawData.rawFileName) + ".pep.xml");

            if (qcParameters.searchParameters.SearchAlgorithm == SearchAlgorithm.XTandem)
            {
                XTandem.RunXTandem(rawData, qcParameters.searchParameters, mgfFile, outputFile, genDecoy: true);
            }

            if (qcParameters.searchParameters.SearchAlgorithm == SearchAlgorithm.IdentiPy)
            {
                var pars = qcParameters.searchParameters;
                Identipy.RunIdentipy(rawData, rawFile, qcParameters.QcSearchDataDirectory, pars, writeMGF: false);
            }
        }
Example #4
0
        public static void ParseSearchResults(this QcDataContainer qcData, RawDataCollection rawData, IRawDataPlus rawFile, QcParameters qcParameters)
        {
            XElement results = LoadSearchResults(qcParameters, rawData);

            PsmDataCollection Psms = ExtractPsmData(results, qcParameters.searchParameters.SearchAlgorithm);

            qcData.ParsePSMs(Psms, qcParameters);
        }
Example #5
0
        public static void ParsePSMs(this QcDataContainer qcData, PsmDataCollection psmCollection, QcParameters qcParameters)
        {
            XElement results, searchSummary;
            IEnumerable <XElement> decoyPSMs, search_hits, spectrumQueries;
            int numGoodPSMs, totalCleavageSites, pepsWithNoMissedCleavages, peptidesWithNoMissedCleavages;
            IEnumerable <int>     allMissedCleavages, charges;
            double                IdRate, chargeRatio3to2, chargeRatio4to2;
            double                digestionEfficiencyByCleavage, digestionEfficiency, topDecoyScore;
            double                missedCleavageRate;
            Dictionary <int, int> numCharges       = new Dictionary <int, int>();
            SearchParameters      searchParameters = qcParameters.searchParameters;
            int                   numSearched      = searchParameters.NumSpectra;
            List <PsmData>        psms;
            IEnumerable <PsmData> goodPsms, nonDecoys;

            // convert the dictionary to a list for easy parsing
            psms = psmCollection.Values.ToList();

            // get the top decoy score
            topDecoyScore = (from x in psms
                             where x.Decoy
                             select x.Hyperscore)
                            .ToArray().Percentile(95);

            // get the non-decoys
            nonDecoys = from x in psms
                        where !x.Decoy
                        select x;

            // and select the non-decoy hits which are above the top decoy score
            goodPsms = from x in psms
                       where !x.Decoy & x.Hyperscore > topDecoyScore
                       select x;

            Console.WriteLine("Total hits: {0}", psms.Count());
            Console.WriteLine("Top decoy score: {0}", topDecoyScore);
            Console.WriteLine("Non-decoy hits: {0}", nonDecoys.Count());
            Console.WriteLine("Non-decoy hits above top decoy score: {0}", goodPsms.Count());


            // parse out the charges
            charges = from x in goodPsms
                      select x.Charge;

            // get the number of each charge, add to a dictionary
            foreach (int charge in new List <int>()
            {
                2, 3, 4
            })
            {
                numCharges.Add(charge, (from x in charges where x == charge select 1).Count());
            }

            // calculate charge ratios
            chargeRatio3to2 = Convert.ToDouble(numCharges[3]) / Convert.ToDouble(numCharges[2]);
            chargeRatio4to2 = Convert.ToDouble(numCharges[4]) / Convert.ToDouble(numCharges[2]);

            // parse out the missed cleavage data
            pepsWithNoMissedCleavages = (from x in goodPsms
                                         where x.MissedCleavages == 0
                                         select 1).Sum();

            // number of PSMs is the length of this collection
            numGoodPSMs = goodPsms.Count();

            // missed cleavages per PSM
            digestionEfficiency = (double)pepsWithNoMissedCleavages / numGoodPSMs;
            Console.WriteLine("Digestion efficiency: {0}", digestionEfficiency);

            // get missed cleavage rate, i.e. number of missed cleavages per psm
            missedCleavageRate = (double)(from x in goodPsms select x.MissedCleavages).Sum() / numGoodPSMs;
            Console.WriteLine("Missed cleavage rate (/PSM): {0}", missedCleavageRate);

            // calculate ID rate
            IdRate = (double)numGoodPSMs / numSearched;
            Console.WriteLine("IDrate: {0}", IdRate);

            // get labeling efficiency metrics
            if ((searchParameters.NMod != null) | (searchParameters.KMod != null) | (searchParameters.XMod != null))
            {
                qcData.GetModificationFrequency(goodPsms, searchParameters);
            }

            // get median mass drift
            qcData.MedianMassDrift = (from x in goodPsms
                                      select x.MassDrift)
                                     .ToArray().Percentile(50);

            qcData.IdentificationRate  = IdRate;
            qcData.MissedCleavageRate  = missedCleavageRate;
            qcData.DigestionEfficiency = digestionEfficiency;
            qcData.ChargeRatio3to2     = chargeRatio3to2;
            qcData.ChargeRatio4to2     = chargeRatio4to2;
        }
Example #6
0
        static int DoStuff(ArgumentParser.QcOptions opts)
        {
            Log.Information("Starting QC. Identipy: {Identipy}", opts.Identipy);
            //Console.WriteLine("\n");
            SearchParameters searchParameters;

            QcParameters qcParameters = new QcParameters();

            qcParameters.RawFileDirectory = opts.DirectoryToQc;
            qcParameters.QcDirectory      = opts.QcDirectory;
            qcParameters.QcFile           = Path.Combine(opts.QcDirectory, "QC.xml");


            if (opts.SearchAlgorithm != null & !(new List <string>()
            {
                "identipy", "xtandem"
            }.Contains(opts.SearchAlgorithm)))
            {
                // the search algorithm is not null but it also it not identipy or xtandem
                Log.Error("Invalid search algorithm argument: {Argument}", opts.SearchAlgorithm);
                Console.WriteLine("ERROR: Search algorithm must be one of {identipy, xtandem}");
                return(1);
            }

            if (opts.Identipy)
            {
                opts.SearchAlgorithm = "identipy";
            }

            if (opts.SearchAlgorithm != null)
            {
                if (opts.FastaDatabase == null)
                {
                    Log.Error("No fasta database provided for Identipy search");
                    Console.WriteLine("ERROR: A fasta protein database is required for an Identipy search. Please use the --db parameter to " +
                                      "provide the path to a database.");
                    Environment.Exit(1);
                }

                searchParameters = new SearchParameters
                {
                    PythonExecutable   = opts.PythonExecutable,
                    IdentipyScript     = opts.IdentipyScript,
                    XTandemDirectory   = opts.XTandemDirectory,
                    FastaDatabase      = opts.FastaDatabase,
                    FixedMods          = opts.FixedMods,
                    NMod               = opts.VariableNMod,
                    KMod               = opts.VariableKMod,
                    XMod               = opts.VariableXMod,
                    NumSpectra         = opts.NumberSpectra,
                    MgfIntensityCutoff = opts.IntensityCutoff,
                    MgfMassCutoff      = opts.MassCutOff,
                    FixedScans         = opts.FixedScans
                };

                if (opts.SearchAlgorithm == "identipy")
                {
                    if ((opts.IdentipyScript == null & opts.PythonExecutable != null) | (opts.IdentipyScript != null & opts.PythonExecutable == null))
                    {
                        Log.Error("If providing location of python or identipy, must specify both of them.");
                        Console.WriteLine("ERROR: When invoking the -p or -I options, you must supply both of them.");
                        Environment.Exit(1);
                    }

                    Identipy.CheckIdentipyDependencies(searchParameters);

                    searchParameters.SearchAlgorithm = SearchAlgorithm.IdentiPy;
                }

                if (opts.SearchAlgorithm == "xtandem")
                {
                    if (opts.XTandemDirectory == null)
                    {
                        Log.Error("Path to XTandem directory was not provided");
                        Console.WriteLine("ERROR: You must specify the X! Tandem directory using the -X argument to perform a search using X! Tandem.");
                        return(1);
                    }
                    searchParameters.SearchAlgorithm = SearchAlgorithm.XTandem;
                }
            }
            else
            {
                searchParameters = null;
            }
            qcParameters.searchParameters = searchParameters;

            QC.QC.DoQc(qcParameters);

            return(0);
        }
Example #7
0
        public static void DoQc(QcParameters qcParameters)
        {
            QcDataCollection qcDataCollection;
            string           dataDirectory         = qcParameters.RawFileDirectory;
            string           qcDirectory           = qcParameters.QcDirectory;
            string           qcSearchDataDirecotry = qcParameters.QcSearchDataDirectory;
            SearchParameters searchParameters      = qcParameters.searchParameters;

            // our qc file
            string qcFile = Path.Combine(qcDirectory, "QC.xml");

            // see if the file exists
            if (File.Exists(qcFile))
            {
                // if so, open it
                try
                {
                    qcDataCollection = XmlSerialization.ReadFromXmlFile <QcDataCollection>(qcFile);
                    Log.Information("QC data file loaded successfully");
                }
                catch (Exception e)
                {
                    Log.Error(e, "Failed while loading QC data");
                    throw e;
                }
            }
            else
            {
                // if not, check if the directory exists
                if (!Directory.Exists(qcDirectory))
                {
                    Directory.CreateDirectory(qcDirectory);
                }

                qcDataCollection = new QcDataCollection(dataDirectory, qcDirectory);
                Log.Information("Appears to be a new QC directory. New QC data collection created.");
            }

            // get our list of new raw files. it is every raw file in the directory that is not listed in the qc data
            var fileList = Directory.GetFiles(dataDirectory, "*.*", SearchOption.TopDirectoryOnly)
                           .Where(s => s.EndsWith(".raw", StringComparison.OrdinalIgnoreCase)).ToList();

            if (fileList.Count() == 0)
            {
                Log.Error("No raw files found in {Directory}", dataDirectory);
                Console.WriteLine("{0} contains no raw files!", dataDirectory);
                Environment.Exit(1);
            }

            fileList.RemoveAll(s => qcDataCollection.ProcessedRawFiles.Contains(Path.GetFileName(s)));

            Log.Information("Raw files in QC queue: {Files}", fileList);

            if (fileList.Count() == 0)
            {
                Log.Information("No new files to QC");
                Console.WriteLine("No new files in the directory to QC!");
                Environment.Exit(0);
            }

            Console.WriteLine("{0} file(s) to process", fileList.Count());

            foreach (string fileName in fileList)
            {
                Console.WriteLine("Processing {0}", fileName);

                IFileHeader rawHeader;

                // try to open the raw file header
                try
                {
                    rawHeader = FileHeaderReaderFactory.ReadFile(fileName);;
                }
                catch (Exception)
                {
                    Log.Information("{File} is not a valid raw file", fileName);
                    Console.WriteLine("{0} is not a valid raw file, continuing to next file.", fileName);
                    continue;
                }

                // is it a real raw file?
                if (rawHeader.FileType == FileType.RawFile)
                {
                    Log.Information("{File} is a valid raw file", fileName);
                    Log.Information("Creation date: {Date}", rawHeader.CreationDate);
                    Log.Information("File description: {Description}", rawHeader.FileDescription);
                }
                else
                {
                    Log.Information("{File} is not a valid raw file", fileName);
                    Console.WriteLine("{0} is not a valid raw file, continuing to next file.", fileName);
                    continue;
                }
                // okay, it is probably a real raw file, let's do the QC

                // check if the raw file already exists in the QC data with a different name
                if (qcDataCollection.QcData.Keys.Contains(rawHeader.CreationDate))
                {
                    Log.Information("A file with the same creation date and time as {File} already exists in the QC data", fileName);
                    Console.WriteLine("{0} appears to already exist in the QC data with the name {1}. Skipping to next file.",
                                      fileName, qcDataCollection.QcData[rawHeader.CreationDate].RawFile);
                    continue;
                }

                using (IRawDataPlus rawFile = RawFileReaderFactory.ReadFile(fileName))
                {
                    rawFile.SelectInstrument(Device.MS, 1);
                    RawDataCollection rawData = new RawDataCollection(rawFile);
                    rawData.ExtractAll(rawFile);

                    /*
                     * if (idpyPars?.QuantMods != null)
                     * {
                     *  rawData.quantData.Quantify(rawData, rawFile, )
                     * }
                     */

                    QcDataContainer newQcData = ProcessQcData(Data: qcDataCollection, rawData: rawData, rawFile: rawFile, qcDirectory: qcDirectory);

                    if (searchParameters != null)
                    {
                        Search.WriteSearchMGF(qcParameters, rawData, rawFile, searchParameters.FixedScans);
                        Search.RunSearch(qcParameters, rawData, rawFile);
                        newQcData.ParseSearchResults(rawData, rawFile, qcParameters);

                        /*
                         * if (searchParameters.SearchAlgorithm == SearchAlgorithm.XTandem)
                         * {
                         *  SearchQC.ParseXTandem(newQcData, qcParameters);
                         *  newQcData.IdentipyParameters = String.Format("\"Algorithm: X!Tandem; fmods: {0}; nmod: {1}; kmod: {2}; xmod: {3}; fastaDB: {4}; xtandemDirectory: {5}\"",
                         *  searchParameters.FixedMods, searchParameters.NMod, searchParameters.KMod, searchParameters.XMod, searchParameters.FastaDatabase, searchParameters.XTandemDirectory);
                         * }
                         * else
                         * {
                         *  SearchQC.ParseIdentipy(newQcData, rawData, rawFile, qcParameters);
                         *  newQcData.IdentipyParameters = String.Format("\"Algorithm: IdentiPy; fmods: {0}; nmod: {1}; kmod: {2}; xmod: {3}; fastaDB: {4}; pythonExecutable: {5}; identipyScript: {6}\"",
                         *  searchParameters.FixedMods, searchParameters.NMod, searchParameters.KMod, searchParameters.XMod, searchParameters.FastaDatabase, searchParameters.PythonExecutable, searchParameters.IdentipyScript);
                         * }
                         */
                    }

                    qcDataCollection.QcData.Add(rawFile.CreationDate, newQcData);
                    qcDataCollection.ProcessedRawFiles.Add(Path.GetFileName(rawData.rawFileName));
                    qcDataCollection.WriteQcToTable();
                }

                Log.Information("QC finished: {File}", fileName);
            }

            Log.Information("QC of all files completed");
            Console.WriteLine("QC of all files completed!");

            try
            {
                XmlSerialization.WriteToXmlFile <QcDataCollection>(qcFile, qcDataCollection);
                Log.Information("QC file saved successfully");
                Console.WriteLine("QC file saved successfully");
            }
            catch (Exception e)
            {
                Log.Error(e, "Failed during serialization of QC data");
                throw e;
            }
        }