/// <summary> /// pVal is the probability that we would observe as big an AUC diff as we /// did if the ROC curves were drawn from the null hypothesis (which is that /// one model does not perform better than the other) /// /// think of it this way: we want a null distribution which says that there /// is no difference between the ROCs. Since an AUC difference /// cannot arise from any entries in the ordered lists that match up, we can /// ignore these (though we could include them as well, but it would fall out /// in the wash). So instead, we assume (know) that all the information in the /// differences in AUCs is contained in the mismatched pairs, and we want to /// destroy this info for the null, so we swap the values between the two /// models. However, we want to keep the number of positive/negative samples /// the same, so when we swap one pair, we must also swap another in the /// other direction. /// /// Note that in this method, we use a fast AUC computation which ignores ties. /// This is fine for random experiemnts since the ties will fall out in the wash. /// </summary> /// <param name="roc1"></param> /// <param name="roc2"></param> /// <returns>pValue</returns> public static double ROCswapPermTest(ROC roc1, ROC roc2, int numTrial, double maxFPR, ParallelOptions parallelOptions) { string randomStringSeed = "78923";//"123456"; Helper.CheckCondition(roc1._lowerScoreIsMoreLikelyClass1 == roc2._lowerScoreIsMoreLikelyClass1); double realAUCdiff = Math.Abs(roc1._AUC - roc2._AUC); DoubleArray permDiffs = new DoubleArray(1, numTrial); //to make it the same as my matlab code, rename these: DoubleArray orderedTargets1 = DoubleArray.From(roc1._classLabels); DoubleArray orderedTargets2 = DoubleArray.From(roc2._classLabels); //orderedTargets1.WriteToCSVNoDate("orderedTargets1"); //orderedTargets2.WriteToCSVNoDate("orderedTargets2"); if (orderedTargets1.Length == 0) { throw new Exception("empty ROCs given as input"); } DoubleArray targetDiff = orderedTargets1 - orderedTargets2; IntArray posDiffInd = targetDiff.Find(v => v > 0); IntArray negDiffInd = targetDiff.Find(v => v < 0); int numPos = posDiffInd.Length; int numNeg = negDiffInd.Length; //Helper.CheckCondition(Math.Abs(numPos - numNeg) <= 1, "don't think this should happen when non-truncated ROCs are used"); double pVal; if (numPos == 0 || numNeg == 0) {//ROCs are identical pVal = 1; return(pVal); } //bug checking: int numPos1 = roc1._classLabels.ElementEQ(POS_LABEL).Sum(); int numNeg1 = roc1._classLabels.ElementEQ(NEG_LABEL).Sum(); int numPos2 = roc2._classLabels.ElementEQ(POS_LABEL).Sum(); int numNeg2 = roc2._classLabels.ElementEQ(NEG_LABEL).Sum(); //these won't be true if we're using a truncated ROC test //Helper.CheckCondition(numPos1 == numPos2, "rocs must correspond to the same labelled data (i.e, # of 1/0 must be the same)"); //Helper.CheckCondition(numNeg1 == numNeg2, "rocs must correspond to the same labelled data (i.e, # of 1/0 must be the same)"); //for bug checking, keep track of avg number of swaps: //DoubleArray numPairedSwaps = new DoubleArray(1, numTrial); //DoubleArray auc1tmp= new DoubleArray(1, numTrial); //DoubleArray auc2tmp = new DoubleArray(1, numTrial); int numPairs = Math.Min(numPos, numNeg); //for (int t = 0; t < numTrial; t++) Parallel.For(0, numTrial, t => { //Helper.CheckCondition((orderedTargets1 - orderedTargets2).Abs().Sum() != 0); Random myRand = SpecialFunctions.GetRandFromSeedAndNullIndex(randomStringSeed, t + 1); //randomly pair up each positive mismatch with each negative mismatch IntArray posIndRand = posDiffInd.RandomPermutation(myRand); IntArray negIndRand = negDiffInd.RandomPermutation(myRand); //throw new NotImplementedException("Change to GetSlice()"); IntArray possiblePosPairs = posIndRand.GetColSlice(0, 1, numPairs - 1); //ColSlice(0, 1, numPairs - 1); IntArray possibleNegPairs = negIndRand.GetColSlice(0, 1, numPairs - 1); //ColSlice(0, 1, numPairs - 1); IntArray possiblePairs = ShoUtils.CatArrayCol(possiblePosPairs, possibleNegPairs).T; //Helper.CheckCondition(possiblePairs.size1 == numPairs, "something went wrong"); //randomly pick each pair with prob=0.5 to include in the swap: DoubleArray randVec = (new DoubleArray(1, numPairs)).FillRandUseSeed(myRand); IntArray pairsOfPairsToBothSwapInd = randVec.Find(v => v >= 0.5); List <int> listInd = pairsOfPairsToBothSwapInd.T.ToListOrEmpty(); //numPairedSwaps[t] = listInd.Count; DoubleArray newTarg1 = DoubleArray.From(orderedTargets1); DoubleArray newTarg2 = DoubleArray.From(orderedTargets2); if (listInd.Count > 0) { //throw new NotImplementedException("Change to GetSlice()"); List <int> swapThesePairs = possiblePairs.GetRows(pairsOfPairsToBothSwapInd.T.ToList()).ToVector().ToList(); //swap the chosen pairs with a 1-x //Helper.CheckCondition((newTarg1.GetColsE(swapThesePairs) - newTarg2.GetColsE(swapThesePairs)).Abs().Sum() == swapThesePairs.Count); //throw new NotImplementedException("Change to SetSlice()"); newTarg1.SetCols(swapThesePairs, 1 - newTarg1.GetCols(swapThesePairs)); //.GetColsE(swapThesePairs)); newTarg2.SetCols(swapThesePairs, 1 - newTarg2.GetCols(swapThesePairs)); //GetColsE(swapThesePairs)); //newTarg1.WriteToCSVNoDate("newTarg1Swapped"); //newTarg2.WriteToCSVNoDate("newTarg2Swapped"); //Helper.CheckCondition(newTarg1.Sum() == orderedTargets1.Sum()); //Helper.CheckCondition((newTarg1 - newTarg2).Abs().Sum() == numPos + numNeg); //Helper.CheckCondition((newTarg1 - newTarg2).Find(v => v != 0).Length == numPos + numNeg); //Helper.CheckCondition((newTarg1 - orderedTargets1).Abs().Sum() == swapThesePairs.Count); //Helper.CheckCondition((newTarg2 - orderedTargets2).Abs().Sum() == swapThesePairs.Count); //Helper.CheckCondition((orderedTargets1 - orderedTargets2).Abs().Sum() != 0); } double AUC1, AUC2; if (maxFPR == 1) { //do it the cheap way AUC1 = ComputeAUCfromOrderedList(newTarg1); AUC2 = ComputeAUCfromOrderedList(newTarg2); } else { //do it with manual integration, the more expensive way AUC1 = new ROC(newTarg1, roc1._classProbs, roc1._lowerScoreIsMoreLikelyClass1, maxFPR, true)._AucAtMaxFpr; AUC2 = new ROC(newTarg2, roc2._classProbs, roc2._lowerScoreIsMoreLikelyClass1, maxFPR, true)._AucAtMaxFpr; } //auc1tmp[t] = AUC1; //auc2tmp[t] = AUC2; //permDiffs[t] = Math.Abs(AUC1 - AUC2); permDiffs[t] = (AUC1 - AUC2); } ); //double markerSize = 0.1; //ShoUtils.MakePlotAndView(permDiffs, "permDiffs", false, markerSize, "."); //ShoUtils.MakePlotAndView(numPairedSwaps, "numPairedSwaps", false, markerSize, "*"); permDiffs = permDiffs.Map(v => Math.Abs(v)); //debugging: //permDiffs.WriteToCSVNoDate("permDiffs"); //numPairedSwaps.WriteToCSVNoDate("numPairedSwapsC#"); double pseudoCount = 1; pVal = (pseudoCount + (double)(permDiffs >= realAUCdiff).Sum()) / (double)numTrial; pVal = Math.Min(pVal, 1); //ShoUtils.MakePlotAndView((auc1tmp-auc2tmp).Map(v=>Math.Abs(v)), "auc1-auc2", false, 0.2, "."); //System.Console.WriteLine("Avg # swaps: " + numPairedSwaps.Mean() + " ( of " + numPairs + " total), numGreaterSwaps=" + (double)(permDiffs >= realAUCdiff).Sum() + ", p=" + pVal + ", realAUCdiff=" + String.Format("{0:0.00000}", realAUCdiff)); return(pVal); }