示例#1
0
        /// <summary>
        /// See nicer wrapper: MannWhitneyUTestOneSided.
        /// this is a one-sided test looking for the case where the group labelled with 1 is larger than the group labelled with 0
        /// </summary>
        /// <typeparam name="T"></typeparam>
        /// <param name="rowList"></param>
        /// <param name="scoreAccessor"></param>
        /// <param name="label01Accessor"></param>
        /// <param name="maxNumPermutations"></param>
        /// <param name="forceAssymptoticApprox"></param>
        /// <param name="neverDoExactPermutations"></param>
        /// <param name="parallelOptionsOrNullFor1"></param>
        /// <returns>The z score and the p-value</returns>
        public static KeyValuePair <double, double> ComputeZ0AndPValue <T>(IList <T> rowList,
                                                                           Func <T, double> scoreAccessor, Func <T, int> label01Accessor, int maxNumPermutations = 10000, bool forceAssymptoticApprox = false, bool neverDoExactPermutations = false,
                                                                           ParallelOptions parallelOptionsOrNullFor1 = null)
        {
            ParallelOptions parallelOptions = parallelOptionsOrNullFor1 ?? new ParallelOptions()
            {
                MaxDegreeOfParallelism = 1
            };


            //var zeroAndCountThenOneAndCount = CreateZeroAndCountThenOneAndCount(rowList, pTargetFunc, targetValFunc, parallelOptions);
            //int n0 = zeroAndCountThenOneAndCount.First().Value;
            //int n1 = SpecialFunctions.FirstAndOnly(zeroAndCountThenOneAndCount.Skip(1)).Value;// the class we think has larger values for the one-tailed test

            //having problems with the parallelOptions above, so re-writing like this
            int n0 = rowList.Where(elt => label01Accessor(elt) == 0).Count();
            int n1 = rowList.Where(elt => label01Accessor(elt) == 1).Count();

            double z0;

            //Helper.CheckCondition(ignoreSafetyOfNormal || (n0 > 10 && n1 > 10), "The count should be at least 10 for the normal distribution to work");

            double p;

            if ((n0 > 10 && n1 > 10) || forceAssymptoticApprox)
            {
                z0 = ComputeZ0 <T>(rowList, parallelOptions, n0, n1, scoreAccessor, label01Accessor);
                p  = 1.0 - SpecialFunctions.ZScoreToOneTailedPValue(z0, 1e-10);
                SanityCheckP(z0, p);
            }
            else
            {
                ParallelOptions parallelOptions1 = new ParallelOptions {
                    MaxDegreeOfParallelism = 1
                };

                //now need to check out here if using all permutations or not to bypass Carl's code if not
                double        logExactPermutationCount = SpecialFunctions.LogFactorialNMOverFactorialNFactorialMApprox(n0, n1);
                bool          useExactPermutations     = (logExactPermutationCount <= Math.Log(maxNumPermutations)) && !neverDoExactPermutations;
                List <double> zList;

                if (useExactPermutations)
                {
                    z0 = ComputeZ0 <T>(rowList, parallelOptions, n0, n1, scoreAccessor, label01Accessor);
                    /*faster than this is to simply permute the ranks of the real data (including ties), rather than the real data itself, but leaving this in for when exact permutations are needed*/
                    zList =
                        (from permutation in SpecialFunctions.Permute01Targets(rowList, scoreAccessor, label01Accessor, maxNumPermutations)
                         .AsParallel().WithDegreeOfParallelism(parallelOptions.MaxDegreeOfParallelism)
                         let z = ComputeZ0(permutation, parallelOptions1, n0, n1, pair => pair.Key, pair => pair.Value)
                                 orderby z
                                 select z).ToList();
                }
                else
                {
                    /*--------------------------------------------------------------------------------------------------
                     * NB there is now a dead branch in SpecialFunctions.Permute01Targets(), which formerly used to do both
                     * 'exact'/'complete' and 'inexact'/'subsampled' permutations. Now it only does the former ,and the 'inexact' is here. This is because I
                     * do it much faster, but didn't want to bother with doing the 'exact'.
                     * -------------------------------------------------------------------------------------------------*/
                    //don't bother converting to z, just use u instead
                    List <double> listOfAllValues = rowList.Select(elt => scoreAccessor(elt)).ToList();
                    List <double> ranksWithTies   = SpecialFunctions.RanksWithTies(listOfAllValues);
                    //List<int> indsOfClass0 = Enumerable.Range(0, n0 + n1).ToList().Where(elt => targetValFunc(rowList[elt]) == 0).ToList();
                    //List<double> ranksWithTiesClass0 = ranksWithTies.SubList(indsOfClass0);
                    //double u0 = ComputeUFromRanks(ranksWithTiesClass0);
                    List <int>    indsOfClass1        = Enumerable.Range(0, n0 + n1).ToList().Where(elt => label01Accessor(rowList[elt]) == 1).ToList();
                    List <double> ranksWithTiesClass1 = ranksWithTies.SubList(indsOfClass1);
                    double        u1 = ComputeUFromRanks(ranksWithTiesClass1);

                    //!!!not parallelized
                    List <double> uList  = new List <double>();
                    Random        myRand = new MachineInvariantRandom("123456");
                    for (int perm = 0; perm < maxNumPermutations; perm++)
                    {
                        ranksWithTies.ShuffleInPlace(myRand);

                        List <double> ranksWithTies0 = ranksWithTies.SubSequence(0, n0).ToList();
                        double        thisUscore0    = ComputeUFromRanks(ranksWithTies0);
                        List <double> ranksWithTies1 = ranksWithTies.SubSequence(n0, n1).ToList();
                        double        thisUscore1    = ComputeUFromRanks(ranksWithTies1);

                        //if it were 2-sided, we would use this (I think)
                        //double uScore = Math.Min(thisUscore0, thisUscore1);
                        //but it's one-sided, so we use the one from the set that had labels "1"
                        double uScore = thisUscore1;

                        //double thisZ = ComputeZfromU(n0, n1, uScore);
                        uList.Add(uScore);
                    }
                    //to let the rest of the code do what it should
                    zList = uList;
                    z0    = u1;
                }
                TwoByOne twoByOne = TwoByOne.GetInstance(zList, z => z0 <= z);
                p = twoByOne.Freq;
                //Can't  SanityCheckP(z0, p) because ties mean it wont always get the right answer
            }



            ////To get two-sided, which says "are they different" use this pTwoSided = 2 * ((p < .5) ? p : (1-p));
            //ResultsRow resultRow = new ResultsRow { DataSetName = dataSetName, CidGroup= cidGroup, PValue = p, N0 = n0, N1 = n1, UScore0 = uScore0, UScore1 = uScore1, Z0 = z0, Z1 = -z0 };
            //return resultRow;
            return(new KeyValuePair <double, double>(z0, p));
        }
示例#2
0
        public static void Main(string[] args)
        {
            //SpecialFunctions.CheckDate(2010, 4, 16);
            //double[][] ragged = new double[][]{new double[]{1,2,3},new double[]{4,5,6}};
            //TestIt(ragged);
            //double[,] twoD = new double[,] {{ 1, 2, 3 },{ 4, 5, 6 } };
            ////TestIt(twoD);Nope
            //var sparse = SparseMatrix<string, string, double>.CreateEmptyInstance(new[] { "key1", "key2" }, new[] { "cid1" }, double.NaN);
            //TestIt(sparse);


            ////BioMatrixSample.BioMatrixSample.DemoMatrix(Console.Out);
            ////Bio.Matrix.MatrixUnitTest.MainTest(doOutOfRangeTest: true, parallelOptions: new ParallelOptions { MaxDegreeOfParallelism = Environment.ProcessorCount });
            //return;

            Console.WriteLine(Environment.MachineName);
            Console.WriteLine(Helper.CreateDelimitedString(" ", args));

            try
            {
                ShoUtils.SetShoDirEnvironmentVariable(1);

                ArgumentCollection argumentCollection = new CommandArguments(args);

                if (argumentCollection.ExtractOptionalFlag("help"))
                {
                    Console.WriteLine("");
                    Console.WriteLine(UsageMessage);
                    return;
                }

                bool useCorrel = argumentCollection.ExtractOptionalFlag("correl");
                //bool doubleUp = argCollection.ExtractOptionalFlag("doubleUp");
                ParallelOptions parallelOptions = new ParallelOptions {
                    MaxDegreeOfParallelism = argumentCollection.ExtractOptional("MaxDegreeOfParallelism", Environment.ProcessorCount)
                };
                int randomSeed           = argumentCollection.ExtractOptional <int>("randomSeed", (int)MachineInvariantRandom.GetSeedUInt("Eigenstrat"));
                int?randomRowCountOrNull = argumentCollection.ExtractOptional <int?>("randomRowCount", null);


                argumentCollection.CheckNoMoreOptions(3);
                int    maxValue           = argumentCollection.ExtractNext <int>("maxValue");
                string inputDenseFileName = argumentCollection.ExtractNext <string>("inputDenseFile");
                string outputCovFileName  = argumentCollection.ExtractNext <string>("outputCovFile");
                argumentCollection.CheckThatEmpty();

                Console.WriteLine("Reading input file " + inputDenseFileName);
                //var originalMatrix = MatrixFactorySSC.Parse(inputDenseFileName, '?', parallelOptions);

                Console.WriteLine("Using 'GetInstanceFromDenseAnsi' How about 'GetInstanceFromRowKeysAnsi', too?");
                using (var originalMatrix = RowKeysAnsi.GetInstanceFromDenseAnsi(inputDenseFileName, parallelOptions))
                {
                    Matrix <string, string, char> matrixOptionallyCutDown;
                    if (null != randomRowCountOrNull)
                    {
                        Random random        = new Random(randomSeed);
                        var    sampleRowKeys = SpecialFunctions.SelectRandom(originalMatrix.RowKeys, randomRowCountOrNull.Value, ref random);
                        matrixOptionallyCutDown = originalMatrix.SelectRowsView(sampleRowKeys);
                    }
                    else
                    {
                        matrixOptionallyCutDown = originalMatrix;
                    }

                    var gMatrix = matrixOptionallyCutDown.ConvertValueView(new CharToDoubleWithLimitsConverter(maxValue), double.NaN);

                    //DenseMatrix<string, string, double>.CreateDefaultInstance
                    var xMatrix = StandardizeGToCreateX <ShoMatrix>(maxValue, gMatrix, ShoMatrix.CreateDefaultInstance, parallelOptions);

                    var psiMatrix = CreatePsiTheMatrixOfCovarianceValues(useCorrel, xMatrix, /*isOKToDestroyXMatrix*/ true, parallelOptions);

                    Console.WriteLine("Writing output file " + outputCovFileName);
                    psiMatrix.WriteDense(outputCovFileName);
                }
            }
            catch (Exception exception)
            {
                Console.WriteLine("");
                Console.WriteLine(exception.Message);
                if (exception.InnerException != null)
                {
                    Console.WriteLine(exception.InnerException.Message);
                }

                Console.WriteLine("");
                Console.WriteLine(UsageMessage);

                Console.WriteLine(exception.StackTrace);
                throw new Exception("", exception);
            }
        }