Example #1
0
        private static List <PsmTsvLine> AssignFDRToTarget(string[] primaryLines, string[] secondaryLines)
        {
            List <PsmTsvLine> primaryPsms   = ImportPsmtsv.ImportLinesToAggregate(primaryLines);
            List <PsmTsvLine> secondaryPsms = ImportPsmtsv.ImportLinesToAggregate(secondaryLines);

            primaryPsms   = primaryPsms.OrderByDescending(x => x.score).ToList();
            secondaryPsms = secondaryPsms.OrderByDescending(x => x.score).ToList();
            int        p         = 0;
            int        s         = 0;
            int        target    = 0;
            int        decoy     = 0;
            double     qMax      = 0;
            PsmTsvLine decoyLine = secondaryPsms[s];

            while (p < primaryPsms.Count)
            {
                PsmTsvLine targetLine = primaryPsms[p];
                if (targetLine.score > decoyLine.score || s == secondaryPsms.Count)
                {
                    target++;
                    targetLine.target = target.ToString();
                    targetLine.decoy  = decoy.ToString();
                    double qValue = (1.0d * decoy / target);
                    qMax         = (qMax > qValue) ? qMax : qValue;
                    targetLine.q = qMax.ToString();
                    p++;
                }
                else
                {
                    decoy++;
                    s++;
                }
            }
            return(primaryPsms);
        }
Example #2
0
        private static List <PsmTsvLine> AggregateDifferentDatabaseSearches(string[] primaryLines, string[] secondaryLines)
        {
            List <PsmTsvLine> primaryPsms     = ImportPsmtsv.ImportLinesToAggregate(primaryLines);
            List <PsmTsvLine> secondaryPsms   = ImportPsmtsv.ImportLinesToAggregate(secondaryLines);
            List <PsmTsvLine> aggregatedLines = new List <PsmTsvLine>();
            int p = 0;
            int s = 0;

            while (p < primaryPsms.Count && s < secondaryPsms.Count)
            {
                PsmTsvLine psmP = primaryPsms[p];
                PsmTsvLine psmS = secondaryPsms[s];
                if (psmP.scanNumber < psmS.scanNumber)
                {
                    aggregatedLines.Add(psmP);
                    p++;
                }
                else if (psmP.scanNumber > psmS.scanNumber)
                {
                    aggregatedLines.Add(psmS);
                    s++;
                }
                else
                {
                    if (psmP.score > psmS.score - 0.001 && psmP.score < psmS.score + 0.001)
                    {
                        aggregatedLines.Add(psmP.AggregateLine(psmS));
                    }
                    else if (psmP.score > psmS.score)
                    {
                        aggregatedLines.Add(psmP);
                    }
                    else
                    {
                        aggregatedLines.Add(psmS);
                    }
                    p++;
                    s++;
                }
            }
            while (p < primaryPsms.Count)
            {
                aggregatedLines.Add(primaryPsms[p]);
                p++;
            }
            while (s < secondaryPsms.Count)
            {
                aggregatedLines.Add(primaryPsms[s]);
                s++;
            }
            return(aggregatedLines);
        }
Example #3
0
        public static void RecursiveNeoAggregation(string standardFilePath, string neoResultFilePath, string outputFolder, string identifier)
        {
            //This method determines the optimum cutoff for gold standard identification and the minimum score difference required for a splice to outscore a normal
            double qThreshold                   = 0;
            double oldQThreshold                = 0;
            double scoreDifferenceThreshold     = 0;
            double oldScoreDifferenceThreshold  = 0;
            int    numSplicedHighScoreQ         = -1; //highest number of splice assignments at a 1% local FDR
            int    numSplicedHighScoreThreshold = -1; //highest number of splice assignments at a 1% local FDR
            int    numSplicedScore              = 0;  //current number of splic assignments at a 1% local FDR
            bool   increaseQ = true;
            bool   increaseScoreDifference = true;

            string[]          primaryLines   = (System.IO.File.ReadAllLines(@standardFilePath));
            string[]          secondaryLines = (System.IO.File.ReadAllLines(@neoResultFilePath));
            List <PsmTsvLine> primaryPsms    = ImportPsmtsv.ImportLinesToAggregate(primaryLines);

            primaryPsms.ForEach(x => x.neoType = PsmTsvLine.NeoType.Normal);
            List <PsmTsvLine> secondaryPsms = ImportPsmtsv.ImportLinesToAggregate(secondaryLines);

            do                                                       //determine if score difference should be changed
            {
                if (numSplicedScore <= numSplicedHighScoreThreshold) //check second time around if move score Threhold the other way
                {
                    increaseScoreDifference = false;
                }
                else
                {
                    numSplicedHighScoreThreshold = numSplicedScore;                                                           //update highscore
                    oldScoreDifferenceThreshold  = scoreDifferenceThreshold;                                                  //update score difference
                }
                scoreDifferenceThreshold = UpdateScoreDifferenceThreshold(scoreDifferenceThreshold, increaseScoreDifference); //update score Threshold

                do                                                                                                            //determine gold standards to use
                {
                    if (numSplicedScore <= numSplicedHighScoreQ)                                                              //check second time around if move qValue the other way
                    {
                        increaseQ = false;
                    }
                    else
                    {
                        numSplicedHighScoreQ = numSplicedScore; //update highscore
                        oldQThreshold        = qThreshold;      //updateQ
                    }

                    qThreshold = UpdateQThreshold(primaryPsms, qThreshold, increaseQ); //get qValue
                    List <PsmTsvLine> aggregatedLines = Percolate(primaryPsms, secondaryPsms, qThreshold, scoreDifferenceThreshold);
                    numSplicedScore = CalculateNumberOfConfidentSpliced(aggregatedLines);
                } while (numSplicedScore > numSplicedHighScoreQ || increaseQ); //do again the otherway if done increasing
                List <PsmTsvLine> oldAggregatedLines = Percolate(primaryPsms, secondaryPsms, oldQThreshold, scoreDifferenceThreshold);
                numSplicedScore = CalculateNumberOfConfidentSpliced(oldAggregatedLines);
                increaseQ       = true;
                qThreshold      = oldQThreshold;
            } while (numSplicedScore > numSplicedHighScoreThreshold || increaseScoreDifference);
            List <PsmTsvLine> finalAggregatedLines = Percolate(primaryPsms, secondaryPsms, oldQThreshold, oldScoreDifferenceThreshold);

            using (StreamWriter file = new StreamWriter(Path.Combine(outputFolder, identifier)))
            {
                file.WriteLine(primaryLines[0]); //header
                foreach (PsmTsvLine line in finalAggregatedLines)
                {
                    file.WriteLine(line.ToString());
                }
            }
            using (StreamWriter file = new StreamWriter(Path.Combine(outputFolder, "PercolatorInfo_" + identifier)))
            {
                file.WriteLine("Maxmimum q-Value of Gold Standards: " + oldQThreshold);
                file.WriteLine("Minimum Score Difference for Splice Selection Over Normal: " + oldScoreDifferenceThreshold);
            }
        }