示例#1
0
        /// <summary>
        ///  pVal is the probability that we would observe as big an AUC diff as we
        /// did if the ROC curves were drawn from the null hypothesis (which is that
        /// one model does not perform better than the other)
        ///
        /// think of it this way: we want a null distribution which says that there
        /// is no difference between the ROCs. Since an AUC difference
        /// cannot arise from any entries in the ordered lists that match up, we can
        /// ignore these (though we could include them as well, but it would fall out
        /// in the wash). So instead, we assume (know) that all the information in the
        /// differences in AUCs is contained in the mismatched pairs, and we want to
        /// destroy this info for the null, so we swap the values between the two
        /// models. However, we want to keep the number of positive/negative samples
        /// the same, so when we swap one pair, we must also swap another in the
        /// other direction.
        ///
        /// Note that in this method, we use a fast AUC computation which ignores ties.
        /// This is fine for random experiemnts since the ties will fall out in the wash.
        /// </summary>
        /// <param name="roc1"></param>
        /// <param name="roc2"></param>
        /// <returns>pValue</returns>
        public static double ROCswapPermTest(ROC roc1, ROC roc2, int numTrial, double maxFPR, ParallelOptions parallelOptions)
        {
            string randomStringSeed = "78923";//"123456";

            Helper.CheckCondition(roc1._lowerScoreIsMoreLikelyClass1 == roc2._lowerScoreIsMoreLikelyClass1);

            double      realAUCdiff = Math.Abs(roc1._AUC - roc2._AUC);
            DoubleArray permDiffs   = new DoubleArray(1, numTrial);

            //to make it the same as my matlab code, rename these:
            DoubleArray orderedTargets1 = DoubleArray.From(roc1._classLabels);
            DoubleArray orderedTargets2 = DoubleArray.From(roc2._classLabels);

            //orderedTargets1.WriteToCSVNoDate("orderedTargets1");
            //orderedTargets2.WriteToCSVNoDate("orderedTargets2");

            if (orderedTargets1.Length == 0)
            {
                throw new Exception("empty ROCs given as input");
            }

            DoubleArray targetDiff = orderedTargets1 - orderedTargets2;
            IntArray    posDiffInd = targetDiff.Find(v => v > 0);
            IntArray    negDiffInd = targetDiff.Find(v => v < 0);

            int numPos = posDiffInd.Length;
            int numNeg = negDiffInd.Length;
            //Helper.CheckCondition(Math.Abs(numPos - numNeg) <= 1, "don't think this should happen when non-truncated ROCs are used");

            double pVal;

            if (numPos == 0 || numNeg == 0)
            {//ROCs are identical
                pVal = 1;
                return(pVal);
            }

            //bug checking:
            int numPos1 = roc1._classLabels.ElementEQ(POS_LABEL).Sum();
            int numNeg1 = roc1._classLabels.ElementEQ(NEG_LABEL).Sum();
            int numPos2 = roc2._classLabels.ElementEQ(POS_LABEL).Sum();
            int numNeg2 = roc2._classLabels.ElementEQ(NEG_LABEL).Sum();

            //these won't be true if we're using a truncated ROC test
            //Helper.CheckCondition(numPos1 == numPos2, "rocs must correspond to the same labelled data (i.e, # of 1/0 must be the same)");
            //Helper.CheckCondition(numNeg1 == numNeg2, "rocs must correspond to the same labelled data (i.e, # of 1/0 must be the same)");

            //for bug checking, keep track of avg number of swaps:
            //DoubleArray numPairedSwaps = new DoubleArray(1, numTrial);
            //DoubleArray auc1tmp= new DoubleArray(1, numTrial);
            //DoubleArray auc2tmp = new DoubleArray(1, numTrial);

            int numPairs = Math.Min(numPos, numNeg);

            //for (int t = 0; t < numTrial; t++)
            Parallel.For(0, numTrial, t =>
            {
                //Helper.CheckCondition((orderedTargets1 - orderedTargets2).Abs().Sum() != 0);

                Random myRand = SpecialFunctions.GetRandFromSeedAndNullIndex(randomStringSeed, t + 1);

                //randomly pair up each positive mismatch with each negative mismatch
                IntArray posIndRand = posDiffInd.RandomPermutation(myRand);
                IntArray negIndRand = negDiffInd.RandomPermutation(myRand);

                //throw new NotImplementedException("Change to GetSlice()");
                IntArray possiblePosPairs = posIndRand.GetColSlice(0, 1, numPairs - 1); //ColSlice(0, 1, numPairs - 1);
                IntArray possibleNegPairs = negIndRand.GetColSlice(0, 1, numPairs - 1); //ColSlice(0, 1, numPairs - 1);
                IntArray possiblePairs    = ShoUtils.CatArrayCol(possiblePosPairs, possibleNegPairs).T;
                //Helper.CheckCondition(possiblePairs.size1 == numPairs, "something went wrong");

                //randomly pick each pair with prob=0.5 to include in the swap:
                DoubleArray randVec = (new DoubleArray(1, numPairs)).FillRandUseSeed(myRand);

                IntArray pairsOfPairsToBothSwapInd = randVec.Find(v => v >= 0.5);
                List <int> listInd = pairsOfPairsToBothSwapInd.T.ToListOrEmpty();
                //numPairedSwaps[t] = listInd.Count;

                DoubleArray newTarg1 = DoubleArray.From(orderedTargets1);
                DoubleArray newTarg2 = DoubleArray.From(orderedTargets2);

                if (listInd.Count > 0)
                {
                    //throw new NotImplementedException("Change to GetSlice()");
                    List <int> swapThesePairs = possiblePairs.GetRows(pairsOfPairsToBothSwapInd.T.ToList()).ToVector().ToList();

                    //swap the chosen pairs with a 1-x
                    //Helper.CheckCondition((newTarg1.GetColsE(swapThesePairs) - newTarg2.GetColsE(swapThesePairs)).Abs().Sum() == swapThesePairs.Count);

                    //throw new NotImplementedException("Change to SetSlice()");
                    newTarg1.SetCols(swapThesePairs, 1 - newTarg1.GetCols(swapThesePairs)); //.GetColsE(swapThesePairs));
                    newTarg2.SetCols(swapThesePairs, 1 - newTarg2.GetCols(swapThesePairs)); //GetColsE(swapThesePairs));

                    //newTarg1.WriteToCSVNoDate("newTarg1Swapped");
                    //newTarg2.WriteToCSVNoDate("newTarg2Swapped");

                    //Helper.CheckCondition(newTarg1.Sum() == orderedTargets1.Sum());
                    //Helper.CheckCondition((newTarg1 - newTarg2).Abs().Sum() == numPos + numNeg);
                    //Helper.CheckCondition((newTarg1 - newTarg2).Find(v => v != 0).Length == numPos + numNeg);
                    //Helper.CheckCondition((newTarg1 - orderedTargets1).Abs().Sum() == swapThesePairs.Count);
                    //Helper.CheckCondition((newTarg2 - orderedTargets2).Abs().Sum() == swapThesePairs.Count);
                    //Helper.CheckCondition((orderedTargets1 - orderedTargets2).Abs().Sum() != 0);
                }

                double AUC1, AUC2;

                if (maxFPR == 1)
                {
                    //do it the cheap way
                    AUC1 = ComputeAUCfromOrderedList(newTarg1);
                    AUC2 = ComputeAUCfromOrderedList(newTarg2);
                }
                else
                {
                    //do it with manual integration, the more expensive way
                    AUC1 = new ROC(newTarg1, roc1._classProbs, roc1._lowerScoreIsMoreLikelyClass1, maxFPR, true)._AucAtMaxFpr;
                    AUC2 = new ROC(newTarg2, roc2._classProbs, roc2._lowerScoreIsMoreLikelyClass1, maxFPR, true)._AucAtMaxFpr;
                }

                //auc1tmp[t] = AUC1;
                //auc2tmp[t] = AUC2;

                //permDiffs[t] = Math.Abs(AUC1 - AUC2);
                permDiffs[t] = (AUC1 - AUC2);
            }
                         );

            //double markerSize = 0.1;
            //ShoUtils.MakePlotAndView(permDiffs, "permDiffs", false, markerSize, ".");
            //ShoUtils.MakePlotAndView(numPairedSwaps, "numPairedSwaps", false, markerSize, "*");
            permDiffs = permDiffs.Map(v => Math.Abs(v));
            //debugging:
            //permDiffs.WriteToCSVNoDate("permDiffs");
            //numPairedSwaps.WriteToCSVNoDate("numPairedSwapsC#");


            double pseudoCount = 1;

            pVal = (pseudoCount + (double)(permDiffs >= realAUCdiff).Sum()) / (double)numTrial;
            pVal = Math.Min(pVal, 1);

            //ShoUtils.MakePlotAndView((auc1tmp-auc2tmp).Map(v=>Math.Abs(v)), "auc1-auc2", false, 0.2, ".");

            //System.Console.WriteLine("Avg # swaps: " + numPairedSwaps.Mean() + " ( of " + numPairs + " total), numGreaterSwaps=" + (double)(permDiffs >= realAUCdiff).Sum() + ", p=" + pVal + ", realAUCdiff=" + String.Format("{0:0.00000}", realAUCdiff));


            return(pVal);
        }