Esempio n. 1
0
        /// <summary>
        /// run clustering, Rescue PSMs, update idpDB
        /// </summary>
        private void RescuePSMsByClustering()
        {
            DateTime startTime = DateTime.Now;
            reportProgressDelegate reportProgress = new reportProgressDelegate(setProgress);
            reportStatusDelegate reportStatus = new reportStatusDelegate(setStatus);

            string database = session.Connection.GetDataSource();
            logFile = Path.ChangeExtension(database, ".log.txt");

            string config = string.Format("Parameters:\r\n" +
                                          "PrecursorMZTol: {0} \r\n" +
                                          "FragmentMZTol: {1} \r\n" +
                                          "Similarity Threshold >= {2} \r\n" +
                                          "Rank <= {3} \r\n" +
                                          "Cluster Size >= {4} \r\n" +
                                          "Search Scores: {5}{6}{7};{8}{9}{10};{11}{12}{13} \r\n\r\n",
                                          precursorMzTolerance,
                                          fragmentMzTolerance,
                                          similarityThreshold,
                                          maxRank,
                                          minClusterSize,
                                          searchScore1Name, searchScore1Order, searchScore1Threshold,
                                          searchScore2Name, searchScore2Order, searchScore2Threshold,
                                          searchScore3Name, searchScore3Order, searchScore3Threshold);
            reportStatus(config);

            //if (writeLog)
            //    File.WriteAllText(logFile, config);

            /*
             * back up original idpDB
             */
            if (backupDB)
            {
                string dbBackupFile = Path.ChangeExtension(database, ".backup.idpDB");
                reportStatus(string.Format("Backing up idpDB to {0} ... ", dbBackupFile));
                reportProgress(-1, "Backing up idpDB");
                File.Copy(database, dbBackupFile, true);
                reportStatus(reportSecondsElapsed((DateTime.Now - startTime).TotalSeconds));
            }

            //reportStatus("Dropping filters... \r\n");
            // basicDataFilter.DropFilters(session);  //// this will drop all filtered tables and rename unfiltered tables
            //basicDataFilter.ApplyBasicFilters(session);

            reportStatus("Querying spectra...");
            reportProgress(-1, "Querying spectra...");
            IList<object[]> queryRows;
            lock (session)
                //// SQL query to retrieve spectrum info for unfiltered psm, filter query results by rank1 search score
                //                queryRows = session.CreateSQLQuery(@"SELECT s.Id, source.Name, NativeID, PrecursorMZ
                //                                                        FROM Spectrum s
                //                                                        JOIN SpectrumSource source ON s.Source = source.Id
                //                                                        JOIN UnfilteredPeptideSpectrumMatch psm ON s.Id = psm.Spectrum AND psm.Rank = 1
                //                                                        JOIN PeptideSpectrumMatchScore psmScore ON psm.Id = psmScore.PsmId
                //                                                        JOIN PeptideSpectrumMatchScoreName scoreName ON psmScore.ScoreNameId=scoreName.Id
                //                                                        WHERE (scoreName.Name = " + "'" + searchScore1Name + "'" + " AND psmScore.Value " + searchScore1Order + searchScore1Threshold.ToString() + ") OR (scoreName.Name = " + "'" + searchScore2Name + "'" + " AND psmScore.Value " + searchScore2Order + searchScore2Threshold.ToString() + ") OR (scoreName.Name = " + "'" + searchScore3Name + "'" + " AND psmScore.Value " + searchScore3Order + searchScore3Threshold.ToString() + ")" +
                //                                                        " GROUP BY s.Id"
                //                                                    ).List<object[]>();

                //// SQL query to retrieve spectrum info for unfiltered psm that map to identified peptide, filter by search score 
                queryRows = session.CreateSQLQuery(@"SELECT s.Id, source.Name, NativeID, PrecursorMZ
                                                        FROM UnfilteredSpectrum s
                                                        JOIN SpectrumSource source ON s.Source = source.Id
                                                        JOIN UnfilteredPeptideSpectrumMatch psm ON s.Id = psm.Spectrum
                                                        JOIN Peptide p ON p.Id = psm.Peptide
                                                        JOIN PeptideSpectrumMatchScore psmScore ON psm.Id = psmScore.PsmId
                                                        JOIN PeptideSpectrumMatchScoreName scoreName ON psmScore.ScoreNameId=scoreName.Id
                                                        WHERE (scoreName.Name = " + "'" + searchScore1Name + "'" + " AND psmScore.Value " + searchScore1Order + searchScore1Threshold.ToString() + ") OR (scoreName.Name = " + "'" + searchScore2Name + "'" + " AND psmScore.Value " + searchScore2Order + searchScore2Threshold.ToString() + ") OR (scoreName.Name = " + "'" + searchScore3Name + "'" + " AND psmScore.Value " + searchScore3Order + searchScore3Threshold.ToString() + ")" +
                                                                       " GROUP BY s.Id"
                                                                   ).List<object[]>();
            var foundSpectraList = session.CreateSQLQuery(@"SELECT distinct spectrum FROM PeptideSpectrumMatch").List<object>();
            var foundSpectra = new HashSet<long>();
            {
                long tempLong;
                foreach (var item in foundSpectraList)
                    if (long.TryParse(item.ToString(), out tempLong))
                        foundSpectra.Add(tempLong);
            }

            var spectrumRows = queryRows.Select(o => new SpectrumRow(o)).OrderBy(o => o.SourceName).ToList();
            ////converted IOrderedEnumerable to List, the former one may end up with multiple enumeration, each invokes constructor, resulting a fresh set of object

            /*
             * extract peaks for each spectrum, spectrumRows was sorted by SourceName
            */
            string currentSourceName = null;
            string currentSourcePath = null;
            msdata.MSData msd = null;
            int spectrumRowsCount = spectrumRows.Count();
            //Set<long> processedSpectrumIDs = new Set<long>();

            reportStatus(reportSecondsElapsed((DateTime.Now - startTime).TotalSeconds));
            reportStatus(string.Format("Extracting peaks for {0} spectra ... ", spectrumRowsCount));
            lock (owner)
                for (int i = 0; i < spectrumRowsCount; ++i)
                {
                    if (_bgWorkerClustering.CancellationPending)
                    {
                        _bgWorkerCancelled = true;
                        return;
                    }

                    var row = spectrumRows.ElementAt(i);

                    reportProgress((int)(((double)(i + 1) / (double)spectrumRowsCount) * 100), string.Format("Extracting peaks ({0}/{1}) from {2}", i + 1, spectrumRowsCount, row.SourceName));

                    //if (processedSpectrumIDs.Contains(row.SpectrumId))
                    //    break;
                    if (row.SourceName != currentSourceName)
                    {
                        currentSourceName = row.SourceName;
                        currentSourcePath = IDPickerForm.LocateSpectrumSource(currentSourceName, session.Connection.GetDataSource());
                        if (msd != null)
                            msd.Dispose();
                        msd = new pwiz.CLI.msdata.MSDataFile(currentSourcePath);

                        SpectrumListFactory.wrap(msd, "threshold count 100 most-intense"); //only keep the top 100 peaks
                        //SpectrumListFactory.wrap(msd, "threshold bpi-relative .5 most-intense"); //keep all peaks that are at least 50% of the intensity of the base peak
                        //SpectrumListFactory.wrap(msd, "threshold tic-cutoff .95 most-intense"); //keep all peaks that count for 95% TIC
                        //threshold <count|count-after-ties|absolute|bpi-relative|tic-relative|tic-cutoff> <threshold> <most-intense|least-intense> [int_set(MS levels)]
                    }

                    var spectrumList = msd.run.spectrumList;
                    var pwizSpectrum = spectrumList.spectrum(spectrumList.find(row.SpectrumNativeID), true); //may create indexoutofrange error if no spectrum nativeID                   
                    row.OriginalMZs = pwizSpectrum.getMZArray().data; //getMZArray().data returns IList<double>
                    row.OriginalIntensities = pwizSpectrum.getIntensityArray().data;
                    //processedSpectrumIDs.Add(row.SpectrumId);

                }

            /* 
             * re-sort spectrumRows by precursorMZ
             * walk through each spectrum. compare similarity to all other spectra within the precursorMZTolerance 
             * (e.g. compare 1 to 2,3,4, then 2 to 3,4,5, then 3 to 4,5 etc), 
             * if above similarityThreshold, add link edge to BOTH spectra
             * merge all connected spectra to a cluster             
            */
            reportStatus(reportSecondsElapsed((DateTime.Now - startTime).TotalSeconds));
            reportStatus("Computing similarities... ");
            var spectrumRowsOrderByPrecursorMZ = (from randomVar in spectrumRows orderby randomVar.PrecursorMZ select randomVar).ToList();
            LinkMap linkMap = new LinkMap(); //// spectrum Id as key, directly linked spectra as value
            double similarityScore = 0;
            lock (owner)
                for (int i = 0; i < spectrumRowsCount; ++i)
                {
                    if (_bgWorkerClustering.CancellationPending)
                    {
                        _bgWorkerCancelled = true;
                        return;
                    }

                    var row = spectrumRowsOrderByPrecursorMZ.ElementAt(i);

                    reportProgress((int)(((double)(i + 1) / (double)spectrumRowsCount) * 100), "Computing similarities");
                    for (int j = i + 1; j < spectrumRowsCount; ++j)
                    {
                        var nextRow = spectrumRowsOrderByPrecursorMZ.ElementAt(j);

                        if (Math.Abs(row.PrecursorMZ - nextRow.PrecursorMZ) > precursorMzTolerance)
                        {
                            break;
                        }
                        else
                        {
                            ////compare pairwise similarity, link spectra passing threshold to both spectrum
                            Peaks rowPeakList = new Peaks(row.OriginalMZs, row.OriginalIntensities);
                            Peaks nextRowPeakList = new Peaks(nextRow.OriginalMZs, nextRow.OriginalIntensities);
                            //// converting peak intensities to sqrt here is 5-fold slower than doing this in DotProductCompareTo function
                            //Peaks rowPeakList = new Peaks(row.OriginalMZs, row.OriginalIntensities.Select(o => Math.Sqrt(o)).ToList());
                            //Peaks nextRowPeakList = new Peaks(nextRow.OriginalMZs, nextRow.OriginalIntensities.Select(o => Math.Sqrt(o)).ToList());
                            similarityScore = ClusteringAnalysis.DotProductCompareTo(rowPeakList, nextRowPeakList, fragmentMzTolerance);
                            //reportStatus("similarity between " + row.SpectrumNativeID + " and " + nextRow.SpectrumNativeID + " is " + similarityScore.ToString() + "\r\n");
                            if (similarityScore >= similarityThreshold)
                            {
                                linkMap[(long)row.SpectrumId].Add((long)nextRow.SpectrumId);
                                linkMap[(long)nextRow.SpectrumId].Add((long)row.SpectrumId); //// if a -> b, then b -> a  
                            }
                        }
                    }
                }
            reportStatus(reportSecondsElapsed((DateTime.Now - startTime).TotalSeconds));

            reportStatus("Clustering spectra... ");
            reportProgress(-1, "Clustering spectra");
            linkMap.GetMergedLinkList();
            reportStatus(reportSecondsElapsed((DateTime.Now - startTime).TotalSeconds));

            //// print clustered spectra
            //foreach (var cluster in linkMap.MergedLinkList)
            //{
            //    reportStatus("Number of spectra in cluster: " + cluster.Count().ToString() + "\r\n");
            //    foreach (var sID in cluster)
            //    {
            //        var nativeID = (from o in spectrumRows where o.SpectrumId == sID select o.SpectrumNativeID).First();
            //        reportStatus(nativeID.ToString() + "\t");
            //    }
            //    reportStatus("\r\n");
            //}

            ////free some memory
            queryRows.Clear();
            queryRows = null;
            msd.Dispose();
            msd = null;
            spectrumRows.Clear();
            spectrumRows = null;
            spectrumRowsOrderByPrecursorMZ.Clear();
            spectrumRowsOrderByPrecursorMZ = null;

            /* 
             * Go through each cluster, rescue PSMs if spectra in the same cluster were identified as the same peptide (id)
             */
            List<Set<long>> clusterSetList = (from o in linkMap.MergedLinkList where o.Count >= minClusterSize select o).ToList();    //// each element in the list is a set of clustered spectrum Ids, select sets with at least minClusterSize element           
            int clusterSetListCount = clusterSetList.Count();
            var allSpectrumIDs = (from o in clusterSetList from j in o select j).ToList();
            reportStatus(string.Format("Number of clusters: {0} \r\n", clusterSetListCount));
            reportStatus(string.Format("Number of spectra clustered: {0}/{1} ({2:0.0%}) \r\n", allSpectrumIDs.Count, spectrumRowsCount, (double)allSpectrumIDs.Count / spectrumRowsCount));

            IList<object> identPSMQueryRows;
            lock (session)
                identPSMQueryRows = session.CreateSQLQuery(@"SELECT psm.Id FROM PeptideSpectrumMatch psm").List<object>();

            var identPSMIdSet = new Set<long>(identPSMQueryRows.Select(o => (long)o));
            reportStatus(string.Format("Number of PSMs identified: {0} \r\n", identPSMIdSet.Count));

            //// create a temp table to store clustered spectrum IDs
            session.CreateSQLQuery(@"DROP TABLE IF EXISTS TempSpecIds;
                                     CREATE TEMP TABLE TempSpecIds (Id INTEGER PRIMARY KEY)
                                    ").ExecuteUpdate();

            var insertTempSpecIdscmd = session.Connection.CreateCommand();
            insertTempSpecIdscmd.CommandText = "INSERT INTO TempSpecIds VALUES (?)";
            var insertTempSpecIdsParameters = new List<System.Data.IDbDataParameter>();
            for (int i = 0; i < 1; ++i)
            {
                insertTempSpecIdsParameters.Add(insertTempSpecIdscmd.CreateParameter());
                insertTempSpecIdscmd.Parameters.Add(insertTempSpecIdsParameters[i]);
            }
            insertTempSpecIdscmd.Prepare();
            foreach (var id in allSpectrumIDs)
            {
                insertTempSpecIdsParameters[0].Value = id;
                insertTempSpecIdscmd.ExecuteNonQuery();
            }


            IList<object> allPsmIdQueryRows;
            lock (session)
                //// SQL query to retrieve all psm id for clustered spectra with score above a threshold
                allPsmIdQueryRows = session.CreateSQLQuery(@"SELECT GROUP_CONCAT(psm.Id)
                                                        FROM TempSpecIds
                                                        JOIN UnfilteredPeptideSpectrumMatch psm ON TempSpecIds.Id = psm.Spectrum
                                                        JOIN PeptideSpectrumMatchScore psmScore ON psm.Id = psmScore.PsmId
                                                        JOIN PeptideSpectrumMatchScoreName scoreName ON psmScore.ScoreNameId=scoreName.Id
                                                        WHERE psm.Rank <= " + maxRank.ToString() + " AND ((scoreName.Name = " + "'" + searchScore1Name + "'" + " AND psmScore.Value " + searchScore1Order + searchScore1Threshold.ToString() + ") OR (scoreName.Name = " + "'" + searchScore2Name + "'" + " AND psmScore.Value " + searchScore2Order + searchScore2Threshold.ToString() + ") OR (scoreName.Name = " + "'" + searchScore3Name + "'" + " AND psmScore.Value " + searchScore3Order + searchScore3Threshold.ToString() + "))" +
                                                        " GROUP BY TempSpecIds.Id, psm.Charge"
                                                    ).List<object>();

            var allPsmIdsRows = allPsmIdQueryRows.Select(o => new PsmIdRow(o)).ToList();

            Set<long> allPsmIds = new Set<long>();
            foreach (var row in allPsmIdsRows)
            {
                allPsmIds.Union(row.PsmIds);
            }

            session.CreateSQLQuery(@"DROP TABLE IF EXISTS TempSpecIds").ExecuteUpdate();

            reportStatus("Querying PSMs...");
            reportProgress(-1, "Querying PSMs");
            IList<object[]> allClusterQueryRows;

            //// create a temp table to store psm IDs
            session.CreateSQLQuery(@"DROP TABLE IF EXISTS TempPsmIds;
                                     CREATE TEMP TABLE TempPsmIds (Id INTEGER PRIMARY KEY)
                                    ").ExecuteUpdate();

            var cmd = session.Connection.CreateCommand();
            cmd.CommandText = "INSERT INTO TempPsmIds VALUES (?)";
            var parameters = new List<System.Data.IDbDataParameter>();
            for (int i = 0; i < 1; ++i)
            {
                parameters.Add(cmd.CreateParameter());
                cmd.Parameters.Add(parameters[i]);
            }
            cmd.Prepare();
            foreach (var id in allPsmIds)
            {
                parameters[0].Value = id;
                cmd.ExecuteNonQuery();
            }

            //// qurey string for revison 286, no DecoySequence in Peptide table
            //            string queryCmd = @"SELECT psm.Id as psmId, s.Id, source.Name, s.NativeID, psm.Rank, psm.Charge, psmScore.Value, IFNULL(GROUP_CONCAT(DISTINCT pm.Offset || ':' || mod.MonoMassDelta),''),
            //                                    (SELECT SUBSTR(pro.Sequence, pi.Offset+1, pi.Length)
            //                                                                FROM PeptideInstance pi
            //                                                                JOIN ProteinData pro ON pi.Protein=pro.Id
            //                                                                WHERE pi.Protein=pro.Id AND
            //                                                                  pi.Id=(SELECT MIN(pi2.Id)
            //                                                                         FROM PeptideInstance pi2
            //                                                                         WHERE psm.Peptide=pi2.Peptide))
            //                                    FROM TempIDs tempIDs
            //                                    JOIN Spectrum s ON s.Id = tempIDs.Id
            //                                    JOIN SpectrumSource source ON s.Source = source.Id
            //                                    JOIN PeptideSpectrumMatch psm ON s.Id = psm.Spectrum
            //                                    LEFT JOIN PeptideModification pm ON psm.Id = pm.PeptideSpectrumMatch
            //                                    LEFT JOIN Modification mod ON pm.Modification = mod.Id
            //                                    JOIN PeptideSpectrumMatchScore psmScore ON psm.Id = psmScore.PsmId
            //                                    JOIN PeptideSpectrumMatchScoreName scoreName ON psmScore.ScoreNameId=scoreName.Id
            //                                    WHERE scoreName.Name = " + "'" + searchScoreName + "'" + " AND psm.Rank <= 5" +
            //                                " GROUP BY psm.Id";
            //AND s.Id IN ( " + String.Join(",", allSpectrumIDs.Select(o => o.ToString()).ToArray()) + " ) " +

            //// query string for revison 288, added DecoySequence in Peptide table
            //            string queryCmd = @"SELECT psm.Id as psmId, s.Id, source.Name, s.NativeID, psm.Rank, psm.Charge, psmScore.Value, IFNULL(GROUP_CONCAT(DISTINCT pm.Offset || ':' || mod.MonoMassDelta),''),
            //                                    (SELECT IFNULL(SUBSTR(pro.Sequence, pi.Offset+1, pi.Length), (SELECT DecoySequence FROM Peptide p WHERE p.Id = pi.Peptide))
            //                                            FROM PeptideInstance pi
            //                                            LEFT JOIN ProteinData pro ON pi.Protein=pro.Id
            //                                            WHERE pi.Id=(SELECT pi2.Id FROM PeptideInstance pi2 WHERE pi2.Peptide=psm.Peptide LIMIT 1))
            //                                    FROM TempIDs tempIDs
            //                                    JOIN Spectrum s ON s.Id = tempIDs.Id
            //                                    JOIN SpectrumSource source ON s.Source = source.Id
            //                                    JOIN PeptideSpectrumMatch psm ON s.Id = psm.Spectrum
            //                                    LEFT JOIN PeptideModification pm ON psm.Id = pm.PeptideSpectrumMatch
            //                                    LEFT JOIN Modification mod ON pm.Modification = mod.Id
            //                                    JOIN PeptideSpectrumMatchScore psmScore ON psm.Id = psmScore.PsmId
            //                                    JOIN PeptideSpectrumMatchScoreName scoreName ON psmScore.ScoreNameId=scoreName.Id
            //                                    WHERE scoreName.Name = " + "'" + searchScoreName + "'" + " AND psm.Rank <= 5" +
            //                                " GROUP BY psm.Id";

            ////query string for revision 291, retrive by PSM Ids
            //            string queryCmd = @"SELECT psm.Id as psmId, psm.Peptide,s.Id, source.Name, s.NativeID, psm.Charge, IFNULL(GROUP_CONCAT(DISTINCT pm.Offset || ':' || mod.MonoMassDelta),''),
            //                                    (SELECT IFNULL(SUBSTR(pd.Sequence, pi.Offset+1, pi.Length), (SELECT DecoySequence FROM UnfilteredPeptide p WHERE p.Id = pi.Peptide))),
            //                                    GROUP_CONCAT(pro.Accession),psm.QValue, psm.Rank, psmScore.Value, analysis.Id
            //                                    FROM TempPsmIds tempPsmIds
            //                                    JOIN UnfilteredPeptideSpectrumMatch psm ON psm.Id = tempPsmIds.Id 
            //                                    JOIN Analysis analysis ON psm.Analysis = analysis.Id
            //                                    JOIN Spectrum s ON s.Id = psm.Spectrum
            //                                    JOIN SpectrumSource source ON s.Source = source.Id
            //                                    JOIN UnfilteredPeptideInstance pi ON psm.Peptide = pi.Peptide
            //                                    JOIN UnfilteredProtein pro ON pi.Protein = pro.Id
            //                                    LEFT JOIN ProteinData pd ON pi.Protein=pd.Id
            //                                    LEFT JOIN PeptideModification pm ON psm.Id = pm.PeptideSpectrumMatch
            //                                    LEFT JOIN Modification mod ON pm.Modification = mod.Id
            //                                    LEFT JOIN PeptideSpectrumMatchScore psmScore ON psm.Id = psmScore.PsmId
            //                                    LEFT JOIN PeptideSpectrumMatchScoreName scoreName ON psmScore.ScoreNameId=scoreName.Id
            //                                    WHERE scoreName.Name = " + "'" + searchScore1Name + "'" +
            //                                    " GROUP BY psm.Id";

            // query for r291, fix no seq for some peptides shared by target and decoy proteins, query seq for target and decoy proteins separately then union
            string queryCmd = @"SELECT psm.Id as psmId, psm.Peptide,s.Id, source.Name, s.NativeID, psm.Charge, 
                                        IFNULL(GROUP_CONCAT(DISTINCT pm.Offset || ':' || mod.MonoMassDelta),''),
                                        IFNULL(IFNULL(SUBSTR(pd.Sequence, pi.Offset+1, pi.Length),(SELECT DecoySequence FROM UnfilteredPeptide p WHERE p.Id = pi.Peptide)),
                                                (SELECT SUBSTR(pd.Sequence, pi.Offset+1, pi.Length)
                                                FROM UnfilteredPeptideInstance pi 
                                                JOIN UnfilteredProtein pro ON pi.Protein = pro.Id AND pro.IsDecoy = 0
                                                LEFT JOIN ProteinData pd ON pi.Protein=pd.Id
                                                WHERE psm.Peptide = pi.Peptide
                                                UNION
                                                SELECT p.DecoySequence
                                                FROM UnfilteredPeptide p
                                                JOIN UnfilteredPeptideInstance pi ON p.Id = pi.Peptide
                                                JOIN UnfilteredProtein pro ON pi.Protein = pro.Id AND pro.IsDecoy = 1
                                                WHERE psm.Peptide = pi.Peptide AND p.DecoySequence is not null)),
                                        GROUP_CONCAT(pro.Accession),
                                        psm.QValue, psm.Rank, psmScore.Value, psm.Analysis
                                        FROM TempPsmIds tempPsmIds
                                        JOIN UnfilteredPeptideSpectrumMatch psm ON psm.Id = tempPsmIds.Id 
                                        JOIN UnfilteredSpectrum s ON s.Id = psm.Spectrum
                                        JOIN SpectrumSource source ON s.Source = source.Id
                                        JOIN UnfilteredPeptideInstance pi ON psm.Peptide = pi.Peptide
                                        JOIN UnfilteredProtein pro ON pi.Protein = pro.Id
                                        LEFT JOIN ProteinData pd ON pi.Protein=pd.Id
                                        LEFT JOIN PeptideModification pm ON psm.Id = pm.PeptideSpectrumMatch
                                        LEFT JOIN Modification mod ON pm.Modification = mod.Id
                                        LEFT JOIN PeptideSpectrumMatchScore psmScore ON psm.Id = psmScore.PsmId
                                        LEFT JOIN PeptideSpectrumMatchScoreName scoreName ON psmScore.ScoreNameId=scoreName.Id
                                        WHERE scoreName.Name in ( " + "'" + searchScore1Name + "','" + searchScore2Name + "','" + searchScore3Name + "')" +
                                        " GROUP BY psm.Id";

            lock (session)
                allClusterQueryRows = session.CreateSQLQuery(queryCmd).List<object[]>();
            var allClusterSpectrumRows = allClusterQueryRows.Select(o => new ClusterSpectrumRow(o)).ToList();

            session.CreateSQLQuery(@"DROP TABLE IF EXISTS TempPsmIds").ExecuteUpdate();
            reportStatus(reportSecondsElapsed((DateTime.Now - startTime).TotalSeconds));
            reportStatus(string.Format("Number of PSMs retrieved: {0} \r\n", allClusterSpectrumRows.Count));

            reportStatus("Rescuing PSMs... ");
            if (writeLog)
            {
                string logHeader = string.Join("\t", new string[] { "SourceName", "NativeID", "Charge", "RescuedSequence", "Protein", "ScoreName", "SearchScore", "BAScore", "QValue", "Rank", "Rank1Sequence", "Rank1Protein", "Rank1SearchScore", "Rank1BAScore", "Rank1Qvalue", "\r\n" });
                File.WriteAllText(logFile, logHeader);
            }

            Dictionary<long, UpdateValues> updateDict = new Dictionary<long, UpdateValues>();  ////key: Id in unfiltered psm table, value: reassigned Qvalue and reassinged Rank
            Set<long> rescuedDistinctSpectraIds = new Set<long>();

            //// SQL query to retrieve anlaysis Id and search score order in QonvertSettings table
            IList<object[]> qonvertSettingsQueryRows;
            lock (session)
                qonvertSettingsQueryRows = session.CreateSQLQuery("SELECT Id, ScoreInfoByName FROM QonverterSettings").List<object[]>();
            var qonvertSettingRows = qonvertSettingsQueryRows.Select(o => new qonvertSettingRows(o)).ToList();
            Dictionary<long, string> analysisScoreOrder = new Dictionary<long, string>();
            Dictionary<long, string> analysisScoreName = new Dictionary<long, string>();
            foreach (var qonvertSettingRow in qonvertSettingRows)
            {
                analysisScoreOrder.Add(qonvertSettingRow.Id, qonvertSettingRow.ScoreOrder);
                analysisScoreName.Add(qonvertSettingRow.Id, qonvertSettingRow.ScoreName);
            }

            ////walk through each cluster to rescue PSMs
            for (int i = 0; i < clusterSetListCount; ++i)
            {
                var clusterSet = clusterSetList.ElementAt(i);

                if (_bgWorkerClustering.CancellationPending)
                {
                    _bgWorkerCancelled = true;
                    return;
                }

                //reportStatus("Clustering set: " + String.Join(",",clusterSet.Select(j => j.ToString()).ToArray()) + "\r\n");
                reportProgress((int)(((double)(i + 1) / (double)clusterSetListCount) * 100), "Rescuing PSMs");
                var clusterSpectrumRows = (from o in allClusterSpectrumRows where clusterSet.Contains(o.SpectrumId) select o).ToList();
                //Map<long, Set<long>> peptideIdDict = new Map<long, Set<long>>(); //key: peptide id, value: psm ids
                //Set<long> unprocessedPSMIds = new Set<long>();
                Set<string> unprocessedSpecChargeAnalysisSet = new Set<string>();  //spectrumId.charge.analysis

                var pepSeqDict = new PepDictionary();  //key: modified peptide sequence, value: spectrumId.charge.analysis, score
                //var peptideIdDict = new PepDictionary(); //key: peptide ID, value: PSM Ids and scores

                foreach (var row in clusterSpectrumRows)
                {
                    //peptideIdDict.Add(row.PeptideId,row.PSMId, row.SearchScore);
                    //peptideIdDict[row.PeptideId].Add(row.PSMId);
                    pepSeqDict.Add(row.ModifiedSequence, row.SpectrumId, row.Charge, row.Analysis, row.SearchScore, row.PSMId);
                    //unprocessedPSMIds.Add(row.PSMId);
                    //unprocessedSpectrumCharge.Add(row.SpectrumId.ToString() + "." + row.Charge.ToString());
                    unprocessedSpecChargeAnalysisSet.Add(row.SpectrumId.ToString() + "." + row.Charge.ToString() + "." + row.Analysis.ToString());
                }


                pepSeqDict.ComputeBayesianAverage(analysisScoreOrder); //replace score from sum of search scores to Bayesian Average

                var sortedPepSeqDictKeys = from k in pepSeqDict.Keys orderby pepSeqDict[k].FinalScore descending, pepSeqDict[k].PsmIdSpecDict.Count() descending select k; // sort by score, if tied, second sort by # of linked psms

                foreach (var pepSeq in sortedPepSeqDictKeys)
                {
                    if (unprocessedSpecChargeAnalysisSet.Count == 0)
                        break;

                    if (pepSeqDict[pepSeq].PsmIdSpecDict.Keys.Any(pId => identPSMIdSet.Contains(pId))) ////at least one psm identified as this peptide in this cluster
                    {
                        foreach (var psmId in pepSeqDict[pepSeq].PsmIdSpecDict.Keys)
                        {
                            var row = (from o in clusterSpectrumRows where o.PSMId == psmId select o).First();
                            string spec = row.SpectrumId.ToString() + "." + row.Charge.ToString() + "." + row.Analysis.ToString();
                            if (unprocessedSpecChargeAnalysisSet.Contains(spec))
                            {
                                if (identPSMIdSet.Contains(psmId) || foundSpectra.Contains(row.SpectrumId))
                                {
                                    //// not process ident PSMs
                                    unprocessedSpecChargeAnalysisSet.Remove(spec);
                                }
                                else
                                {
                                    updateDict.Add(psmId, new UpdateValues(-1, 1)); //// update Qvalue = -1, Rank =1
                                    ++rescuedPSMsCount;
                                    rescuedDistinctSpectraIds.Add(row.SpectrumId);
                                    unprocessedSpecChargeAnalysisSet.Remove(spec);

                                    if (writeLog)
                                    {
                                        string originalRank1Seq = "";
                                        string originalRank1Protein = "";
                                        string originalRank1Score = "";
                                        string originalRank1BAScore = "";
                                        string originalRank1Qvalue = "";

                                        if (row.Rank != 1)
                                        {
                                            var originalRank1Rows = (from o in clusterSpectrumRows where o.SpectrumId == row.SpectrumId && o.Rank == 1 && o.Charge == row.Charge && o.Analysis == row.Analysis select new { o.ModifiedSequence, o.Protein, o.SearchScore, o.QValue }).ToList(); ////may exist more than one rank1 hits
                                            foreach (var originalRank1Row in originalRank1Rows)
                                            {
                                                originalRank1Seq += originalRank1Row.ModifiedSequence + ";";
                                                originalRank1Protein += originalRank1Row.Protein + ";";
                                                originalRank1Score += originalRank1Row.SearchScore.ToString("0.0000") + ";";
                                                originalRank1BAScore += pepSeqDict.ContainsKey(originalRank1Row.ModifiedSequence) ? pepSeqDict[originalRank1Row.ModifiedSequence].FinalScore.ToString("0.0000") + ";" : "";
                                                originalRank1Qvalue += originalRank1Row.QValue.ToString("0.0000") + ";";
                                            }
                                        }
                                        string logLine = string.Join("\t", new string[] { row.SourceName, row.SpectrumNativeID, row.Charge.ToString(), row.ModifiedSequence, row.Protein, analysisScoreName[row.Analysis], row.SearchScore.ToString("0.0000"), pepSeqDict[pepSeq].FinalScore.ToString("0.0000"), row.QValue.ToString("0.0000"), row.Rank.ToString(), originalRank1Seq, originalRank1Protein, originalRank1Score, originalRank1BAScore, originalRank1Qvalue });
                                        using (StreamWriter sw = File.AppendText(logFile))
                                        {
                                            sw.WriteLine(logLine);
                                        }
                                    }
                                }
                            }
                        }
                    }
                } //// end of foreach (var pepSeq in sortedPepSeqDictKeys)

            } //// end of for (int i = 0; i < clusterSetListCount; ++i)
            reportStatus(string.Format("{0} seconds elapsed\r\n", (DateTime.Now - startTime).TotalSeconds));

            /*
             *update unfiltered psm table in idpDB
            */
            if (rescuedPSMsCount == 0)
                return;

            reportStatus("Updating idpDB... ");

            session.Transaction.Begin();
            //basicDataFilter.DropFilters(session);  // tables were dropped before querying
            var updateCmd = session.Connection.CreateCommand();
            updateCmd.CommandText = "UPDATE UnfilteredPeptideSpectrumMatch SET QValue = ?, Rank = ? WHERE Id = ?";
            var updateParameters = new List<System.Data.IDbDataParameter>();
            for (int i = 0; i < 3; ++i)
            {
                updateParameters.Add(updateCmd.CreateParameter());
                updateCmd.Parameters.Add(updateParameters[i]);
            }
            updateCmd.Prepare();
            int updateCount = 0;
            int allUpdateCount = updateDict.Count;
            foreach (KeyValuePair<long, UpdateValues> pair in updateDict)
            {
                updateParameters[0].Value = pair.Value.ReassignedQvalue;   //// Qvalue
                updateParameters[1].Value = pair.Value.ReassignedRank;   //// Rank
                updateParameters[2].Value = pair.Key;    //// psm id
                updateCmd.ExecuteNonQuery();
                reportProgress((int)(((double)(updateCount + 1) / (double)allUpdateCount) * 100), "Updating idpDB");
                ++updateCount;
            }
            session.Transaction.Commit();

            //basicDataFilter.ApplyBasicFilters(session);
            reportStatus(reportSecondsElapsed((DateTime.Now - startTime).TotalSeconds));
            reportStatus(string.Format("Rescued {0} PSMs for {1} distinct spectra\r\n", rescuedPSMsCount, rescuedDistinctSpectraIds.Count));
            reportProgress(0, "Ready");
            /*
             * not recompute q values, reload idpDB, implemented in _bgWorkerClustering_RunWorkerCompleted
            */

        } //// end of RescuePSMsByClustering