Example #1
0
        /// <summary>
        /// Add a bunch of 1's in the last row, and call this row key "bias"--useful for regression
        /// </summary>
        /// <returns></returns>
        public static Matrix <string, string, double> AddBiasRow(this Matrix <string, string, double> x)
        {
            DoubleArray biasArray  = ShoUtils.DoubleArrayOnes(1, x.ColCount);//new DoubleArray(1,x.ColCount);
            ShoMatrix   biasMatrix = new ShoMatrix(biasArray, new List <string> {
                "bias"
            }, x.ColKeys, x.MissingValue);

            return(x.MergeRowsView <string, string, double>(true, biasMatrix));

            //DoubleArray newD = ShoUtils.AddBiasToInputData(this.DoubleArray.T, this.ColCount).T;
            //List<string> newRowKeys = this.RowKeys.ToList();
            //newRowKeys.Add("bias");
            //ShoMatrix x = new ShoMatrix(newD, newRowKeys, this.ColKeys, this.MissingValue);
            //return x;
        }
Example #2
0
        //private static Random _myRand = new Random(123456);

        /// <summary>
        /// Like doing ind=find(x satisfies some condition) in Matlab
        /// </summary>
        /// <param name="x"></param>
        /// <param name="testPredicate"></param>
        /// <returns></returns>
        public static IntArray Find(this IntArray x, Predicate <double> testPredicate)
        {
            List <int> ind = new List <int>();

            if (x.IsVector())
            {
                for (int j = 0; j < x.Length; j++)
                {
                    if (testPredicate(x[j]))
                    {
                        ind.Add(j);
                    }
                }
            }
            else
            {
                throw new NotImplementedException();
            }
            return(ShoUtils.ToIntArray(ind));
        }
Example #3
0
        //for each row, average any columns that have the same groupId if numRepeat=-1
        public static ShoMatrix AverageColSubsetsInSameGroup(this ShoMatrix M, Matrix <string, string, string> groupIds, out ShoMatrix numInstancesPerGroup, ParallelOptions parallelOptions, int repeatNum)
        {
            M.CheckEqualColKeys(groupIds.ColKeys.ToList());
            Helper.CheckCondition(groupIds.RowCount == 1, "group ids should have only one row");

            List <string> uniqueGroupIds = groupIds.Unique().ToList();
            int           G = uniqueGroupIds.Count;
            DoubleArray   averagedResults           = new DoubleArray(M.RowCount, G);
            DoubleArray   numInstancesPerGroupArray = ShoUtils.DoubleArrayZeros(1, G);
            BoolArray     haveProcessedGroupG       = new BoolArray(1, G);

            for (int g = 0; g < G; g++)
            {
                string     gId       = uniqueGroupIds[g];
                List <int> theseCols = new List <int>();
                for (int n = 0; n < groupIds.ColCount; n++)
                {
                    if (groupIds[0, n] == uniqueGroupIds[g])
                    {
                        if (repeatNum < 0 || !haveProcessedGroupG[g])//averaging, or else it is the right repeat
                        {
                            theseCols.Add(n);
                            haveProcessedGroupG[g] = true;
                        }
                    }
                }

                DoubleArray avgValues = M.DoubleArray.GetCols(theseCols).Mean(DimOp.OverCol);
                averagedResults.SetCol(g, avgValues);
                numInstancesPerGroupArray[0, g] = theseCols.Count();
            }
            numInstancesPerGroup = new ShoMatrix(numInstancesPerGroupArray, new List <string>()
            {
                "count"
            }, uniqueGroupIds, double.NaN);
            return(new ShoMatrix(averagedResults, M.RowKeys, uniqueGroupIds, double.NaN));
        }
Example #4
0
        /// <summary>
        ///  pVal is the probability that we would observe as big an AUC diff as we
        /// did if the ROC curves were drawn from the null hypothesis (which is that
        /// one model does not perform better than the other)
        ///
        /// think of it this way: we want a null distribution which says that there
        /// is no difference between the ROCs. Since an AUC difference
        /// cannot arise from any entries in the ordered lists that match up, we can
        /// ignore these (though we could include them as well, but it would fall out
        /// in the wash). So instead, we assume (know) that all the information in the
        /// differences in AUCs is contained in the mismatched pairs, and we want to
        /// destroy this info for the null, so we swap the values between the two
        /// models. However, we want to keep the number of positive/negative samples
        /// the same, so when we swap one pair, we must also swap another in the
        /// other direction.
        ///
        /// Note that in this method, we use a fast AUC computation which ignores ties.
        /// This is fine for random experiemnts since the ties will fall out in the wash.
        /// </summary>
        /// <param name="roc1"></param>
        /// <param name="roc2"></param>
        /// <returns>pValue</returns>
        public static double ROCswapPermTest(ROC roc1, ROC roc2, int numTrial, double maxFPR, ParallelOptions parallelOptions)
        {
            string randomStringSeed = "78923";//"123456";

            Helper.CheckCondition(roc1._lowerScoreIsMoreLikelyClass1 == roc2._lowerScoreIsMoreLikelyClass1);

            double      realAUCdiff = Math.Abs(roc1._AUC - roc2._AUC);
            DoubleArray permDiffs   = new DoubleArray(1, numTrial);

            //to make it the same as my matlab code, rename these:
            DoubleArray orderedTargets1 = DoubleArray.From(roc1._classLabels);
            DoubleArray orderedTargets2 = DoubleArray.From(roc2._classLabels);

            //orderedTargets1.WriteToCSVNoDate("orderedTargets1");
            //orderedTargets2.WriteToCSVNoDate("orderedTargets2");

            if (orderedTargets1.Length == 0)
            {
                throw new Exception("empty ROCs given as input");
            }

            DoubleArray targetDiff = orderedTargets1 - orderedTargets2;
            IntArray    posDiffInd = targetDiff.Find(v => v > 0);
            IntArray    negDiffInd = targetDiff.Find(v => v < 0);

            int numPos = posDiffInd.Length;
            int numNeg = negDiffInd.Length;
            //Helper.CheckCondition(Math.Abs(numPos - numNeg) <= 1, "don't think this should happen when non-truncated ROCs are used");

            double pVal;

            if (numPos == 0 || numNeg == 0)
            {//ROCs are identical
                pVal = 1;
                return(pVal);
            }

            //bug checking:
            int numPos1 = roc1._classLabels.ElementEQ(POS_LABEL).Sum();
            int numNeg1 = roc1._classLabels.ElementEQ(NEG_LABEL).Sum();
            int numPos2 = roc2._classLabels.ElementEQ(POS_LABEL).Sum();
            int numNeg2 = roc2._classLabels.ElementEQ(NEG_LABEL).Sum();

            //these won't be true if we're using a truncated ROC test
            //Helper.CheckCondition(numPos1 == numPos2, "rocs must correspond to the same labelled data (i.e, # of 1/0 must be the same)");
            //Helper.CheckCondition(numNeg1 == numNeg2, "rocs must correspond to the same labelled data (i.e, # of 1/0 must be the same)");

            //for bug checking, keep track of avg number of swaps:
            //DoubleArray numPairedSwaps = new DoubleArray(1, numTrial);
            //DoubleArray auc1tmp= new DoubleArray(1, numTrial);
            //DoubleArray auc2tmp = new DoubleArray(1, numTrial);

            int numPairs = Math.Min(numPos, numNeg);

            //for (int t = 0; t < numTrial; t++)
            Parallel.For(0, numTrial, t =>
            {
                //Helper.CheckCondition((orderedTargets1 - orderedTargets2).Abs().Sum() != 0);

                Random myRand = SpecialFunctions.GetRandFromSeedAndNullIndex(randomStringSeed, t + 1);

                //randomly pair up each positive mismatch with each negative mismatch
                IntArray posIndRand = posDiffInd.RandomPermutation(myRand);
                IntArray negIndRand = negDiffInd.RandomPermutation(myRand);

                //throw new NotImplementedException("Change to GetSlice()");
                IntArray possiblePosPairs = posIndRand.GetColSlice(0, 1, numPairs - 1); //ColSlice(0, 1, numPairs - 1);
                IntArray possibleNegPairs = negIndRand.GetColSlice(0, 1, numPairs - 1); //ColSlice(0, 1, numPairs - 1);
                IntArray possiblePairs    = ShoUtils.CatArrayCol(possiblePosPairs, possibleNegPairs).T;
                //Helper.CheckCondition(possiblePairs.size1 == numPairs, "something went wrong");

                //randomly pick each pair with prob=0.5 to include in the swap:
                DoubleArray randVec = (new DoubleArray(1, numPairs)).FillRandUseSeed(myRand);

                IntArray pairsOfPairsToBothSwapInd = randVec.Find(v => v >= 0.5);
                List <int> listInd = pairsOfPairsToBothSwapInd.T.ToListOrEmpty();
                //numPairedSwaps[t] = listInd.Count;

                DoubleArray newTarg1 = DoubleArray.From(orderedTargets1);
                DoubleArray newTarg2 = DoubleArray.From(orderedTargets2);

                if (listInd.Count > 0)
                {
                    //throw new NotImplementedException("Change to GetSlice()");
                    List <int> swapThesePairs = possiblePairs.GetRows(pairsOfPairsToBothSwapInd.T.ToList()).ToVector().ToList();

                    //swap the chosen pairs with a 1-x
                    //Helper.CheckCondition((newTarg1.GetColsE(swapThesePairs) - newTarg2.GetColsE(swapThesePairs)).Abs().Sum() == swapThesePairs.Count);

                    //throw new NotImplementedException("Change to SetSlice()");
                    newTarg1.SetCols(swapThesePairs, 1 - newTarg1.GetCols(swapThesePairs)); //.GetColsE(swapThesePairs));
                    newTarg2.SetCols(swapThesePairs, 1 - newTarg2.GetCols(swapThesePairs)); //GetColsE(swapThesePairs));

                    //newTarg1.WriteToCSVNoDate("newTarg1Swapped");
                    //newTarg2.WriteToCSVNoDate("newTarg2Swapped");

                    //Helper.CheckCondition(newTarg1.Sum() == orderedTargets1.Sum());
                    //Helper.CheckCondition((newTarg1 - newTarg2).Abs().Sum() == numPos + numNeg);
                    //Helper.CheckCondition((newTarg1 - newTarg2).Find(v => v != 0).Length == numPos + numNeg);
                    //Helper.CheckCondition((newTarg1 - orderedTargets1).Abs().Sum() == swapThesePairs.Count);
                    //Helper.CheckCondition((newTarg2 - orderedTargets2).Abs().Sum() == swapThesePairs.Count);
                    //Helper.CheckCondition((orderedTargets1 - orderedTargets2).Abs().Sum() != 0);
                }

                double AUC1, AUC2;

                if (maxFPR == 1)
                {
                    //do it the cheap way
                    AUC1 = ComputeAUCfromOrderedList(newTarg1);
                    AUC2 = ComputeAUCfromOrderedList(newTarg2);
                }
                else
                {
                    //do it with manual integration, the more expensive way
                    AUC1 = new ROC(newTarg1, roc1._classProbs, roc1._lowerScoreIsMoreLikelyClass1, maxFPR, true)._AucAtMaxFpr;
                    AUC2 = new ROC(newTarg2, roc2._classProbs, roc2._lowerScoreIsMoreLikelyClass1, maxFPR, true)._AucAtMaxFpr;
                }

                //auc1tmp[t] = AUC1;
                //auc2tmp[t] = AUC2;

                //permDiffs[t] = Math.Abs(AUC1 - AUC2);
                permDiffs[t] = (AUC1 - AUC2);
            }
                         );

            //double markerSize = 0.1;
            //ShoUtils.MakePlotAndView(permDiffs, "permDiffs", false, markerSize, ".");
            //ShoUtils.MakePlotAndView(numPairedSwaps, "numPairedSwaps", false, markerSize, "*");
            permDiffs = permDiffs.Map(v => Math.Abs(v));
            //debugging:
            //permDiffs.WriteToCSVNoDate("permDiffs");
            //numPairedSwaps.WriteToCSVNoDate("numPairedSwapsC#");


            double pseudoCount = 1;

            pVal = (pseudoCount + (double)(permDiffs >= realAUCdiff).Sum()) / (double)numTrial;
            pVal = Math.Min(pVal, 1);

            //ShoUtils.MakePlotAndView((auc1tmp-auc2tmp).Map(v=>Math.Abs(v)), "auc1-auc2", false, 0.2, ".");

            //System.Console.WriteLine("Avg # swaps: " + numPairedSwaps.Mean() + " ( of " + numPairs + " total), numGreaterSwaps=" + (double)(permDiffs >= realAUCdiff).Sum() + ", p=" + pVal + ", realAUCdiff=" + String.Format("{0:0.00000}", realAUCdiff));


            return(pVal);
        }
Example #5
0
        public static void Main(string[] args)
        {
            //SpecialFunctions.CheckDate(2010, 4, 16);
            //double[][] ragged = new double[][]{new double[]{1,2,3},new double[]{4,5,6}};
            //TestIt(ragged);
            //double[,] twoD = new double[,] {{ 1, 2, 3 },{ 4, 5, 6 } };
            ////TestIt(twoD);Nope
            //var sparse = SparseMatrix<string, string, double>.CreateEmptyInstance(new[] { "key1", "key2" }, new[] { "cid1" }, double.NaN);
            //TestIt(sparse);


            ////BioMatrixSample.BioMatrixSample.DemoMatrix(Console.Out);
            ////Bio.Matrix.MatrixUnitTest.MainTest(doOutOfRangeTest: true, parallelOptions: new ParallelOptions { MaxDegreeOfParallelism = Environment.ProcessorCount });
            //return;

            Console.WriteLine(Environment.MachineName);
            Console.WriteLine(Helper.CreateDelimitedString(" ", args));

            try
            {
                ShoUtils.SetShoDirEnvironmentVariable(1);

                ArgumentCollection argumentCollection = new CommandArguments(args);

                if (argumentCollection.ExtractOptionalFlag("help"))
                {
                    Console.WriteLine("");
                    Console.WriteLine(UsageMessage);
                    return;
                }

                bool useCorrel = argumentCollection.ExtractOptionalFlag("correl");
                //bool doubleUp = argCollection.ExtractOptionalFlag("doubleUp");
                ParallelOptions parallelOptions = new ParallelOptions {
                    MaxDegreeOfParallelism = argumentCollection.ExtractOptional("MaxDegreeOfParallelism", Environment.ProcessorCount)
                };
                int randomSeed           = argumentCollection.ExtractOptional <int>("randomSeed", (int)MachineInvariantRandom.GetSeedUInt("Eigenstrat"));
                int?randomRowCountOrNull = argumentCollection.ExtractOptional <int?>("randomRowCount", null);


                argumentCollection.CheckNoMoreOptions(3);
                int    maxValue           = argumentCollection.ExtractNext <int>("maxValue");
                string inputDenseFileName = argumentCollection.ExtractNext <string>("inputDenseFile");
                string outputCovFileName  = argumentCollection.ExtractNext <string>("outputCovFile");
                argumentCollection.CheckThatEmpty();

                Console.WriteLine("Reading input file " + inputDenseFileName);
                //var originalMatrix = MatrixFactorySSC.Parse(inputDenseFileName, '?', parallelOptions);

                Console.WriteLine("Using 'GetInstanceFromDenseAnsi' How about 'GetInstanceFromRowKeysAnsi', too?");
                using (var originalMatrix = RowKeysAnsi.GetInstanceFromDenseAnsi(inputDenseFileName, parallelOptions))
                {
                    Matrix <string, string, char> matrixOptionallyCutDown;
                    if (null != randomRowCountOrNull)
                    {
                        Random random        = new Random(randomSeed);
                        var    sampleRowKeys = SpecialFunctions.SelectRandom(originalMatrix.RowKeys, randomRowCountOrNull.Value, ref random);
                        matrixOptionallyCutDown = originalMatrix.SelectRowsView(sampleRowKeys);
                    }
                    else
                    {
                        matrixOptionallyCutDown = originalMatrix;
                    }

                    var gMatrix = matrixOptionallyCutDown.ConvertValueView(new CharToDoubleWithLimitsConverter(maxValue), double.NaN);

                    //DenseMatrix<string, string, double>.CreateDefaultInstance
                    var xMatrix = StandardizeGToCreateX <ShoMatrix>(maxValue, gMatrix, ShoMatrix.CreateDefaultInstance, parallelOptions);

                    var psiMatrix = CreatePsiTheMatrixOfCovarianceValues(useCorrel, xMatrix, /*isOKToDestroyXMatrix*/ true, parallelOptions);

                    Console.WriteLine("Writing output file " + outputCovFileName);
                    psiMatrix.WriteDense(outputCovFileName);
                }
            }
            catch (Exception exception)
            {
                Console.WriteLine("");
                Console.WriteLine(exception.Message);
                if (exception.InnerException != null)
                {
                    Console.WriteLine(exception.InnerException.Message);
                }

                Console.WriteLine("");
                Console.WriteLine(UsageMessage);

                Console.WriteLine(exception.StackTrace);
                throw new Exception("", exception);
            }
        }