Beispiel #1
0
        public void Run()
        {
            // Number of label classes for this example
            int numLabels = 3;

            // Train the model
            ClickModelMarginals marginals = Model1(numLabels, false);

            if (marginals == null)
            {
                return;
            }

            //-----------------------------------------------------------------------------
            // The prediction model
            //-----------------------------------------------------------------------------

            // The observations will be in the form of an array of distributions
            Variable <int>           numberOfObservations = Variable.New <int>().Named("NumObs");
            Range                    r = new Range(numberOfObservations).Named("N");
            VariableArray <Gaussian> observationDistribs = Variable.Array <Gaussian>(r).Named("Obs");

            // Use the marginals from the trained model
            Variable <double> scoreMean = Variable.Random(marginals.marginalScoreMean).Named("scoreMean");
            Variable <double> scorePrec = Variable.Random(marginals.marginalScorePrec).Named("scorePrec");
            Variable <double> judgePrec = Variable.Random(marginals.marginalJudgePrec).Named("judgePrec");
            Variable <double> clickPrec = Variable.Random(marginals.marginalClickPrec).Named("clickPrec");

            Variable <double>[] thresholds = new Variable <double> [numLabels + 1];

            // Variables for each observation
            VariableArray <double> scores  = Variable.Array <double>(r).Named("Scores");
            VariableArray <double> scoresJ = Variable.Array <double>(r).Named("ScoresJ");
            VariableArray <double> scoresC = Variable.Array <double>(r).Named("ScoresC");

            scores[r]  = Variable.GaussianFromMeanAndPrecision(scoreMean, scorePrec).ForEach(r);
            scoresJ[r] = Variable.GaussianFromMeanAndPrecision(scores[r], judgePrec);
            scoresC[r] = Variable.GaussianFromMeanAndPrecision(scores[r], clickPrec);

            // Constrain to the click observation
            Variable.ConstrainEqualRandom(scoresC[r], observationDistribs[r]);

            // The threshold variables
            thresholds[0] = Variable.GaussianFromMeanAndVariance(Double.NegativeInfinity, 0.0).Named("thresholds0");
            for (int i = 1; i < thresholds.Length - 1; i++)
            {
                thresholds[i] = Variable.Random(marginals.marginalThresh[i]).Named("thresholds" + i);
            }

            thresholds[thresholds.Length - 1] = Variable.GaussianFromMeanAndVariance(Double.PositiveInfinity, 0.0).Named("thresholds" + (thresholds.Length - 1));

            // Boolean label variables
            VariableArray <bool>[] testLabels = new VariableArray <bool> [numLabels];
            for (int j = 0; j < numLabels; j++)
            {
                testLabels[j]    = Variable.Array <bool>(r).Named("TestLabels" + j);
                testLabels[j][r] = Variable.IsBetween(scoresJ[r], thresholds[j], thresholds[j + 1]);
            }

            //--------------------------------------------------------------------
            // Running the prediction model
            //--------------------------------------------------------------------
            int[]      clicks = { 10, 100, 1000, 9, 99, 999, 10, 10, 10 };
            int[]      exams  = { 20, 200, 2000, 10, 100, 1000, 100, 1000, 10000 };
            Gaussian[] obs    = new Gaussian[clicks.Length];
            for (int i = 0; i < clicks.Length; i++)
            {
                int    nC = clicks[i]; // Number of clicks
                int    nE = exams[i];  // Number of examinations
                int    nNC = nE - nC;  // Number of non-clicks
                Beta   b = new Beta(1.0 + nC, 1.0 + nNC);
                double m, v;
                b.GetMeanAndVariance(out m, out v);
                obs[i] = Gaussian.FromMeanAndVariance(m, v);
            }

            numberOfObservations.ObservedValue = obs.Length;
            observationDistribs.ObservedValue  = obs;
            InferenceEngine engine = new InferenceEngine();

            Gaussian[]    latentScore     = engine.Infer <Gaussian[]>(scores);
            Bernoulli[][] predictedLabels = new Bernoulli[numLabels][];
            for (int j = 0; j < numLabels; j++)
            {
                predictedLabels[j] = engine.Infer <Bernoulli[]>(testLabels[j]);
            }

            Console.WriteLine("\n******   Some Predictions  ******\n");
            Console.WriteLine("Clicks\tExams\t\tScore\t\tLabel0\t\tLabel1\t\tLabel2");
            for (int i = 0; i < clicks.Length; i++)
            {
                Console.WriteLine(
                    "{0}\t{1}\t\t{2}\t\t{3}\t\t{4}\t\t{5}",
                    clicks[i],
                    exams[i],
                    latentScore[i].GetMean().ToString("F4"),
                    predictedLabels[0][i].GetProbTrue().ToString("F4"),
                    predictedLabels[1][i].GetProbTrue().ToString("F4"),
                    predictedLabels[2][i].GetProbTrue().ToString("F4"));
            }
        }
Beispiel #2
0
        static private ClickModelMarginals Model2(int numLabels, bool allowNoExams)
        {
            // Inference engine must be EP because of the ConstrainBetween constraint
            InferenceEngine engine = new InferenceEngine();

            if (!(engine.Algorithm is Algorithms.ExpectationPropagation))
            {
                Console.WriteLine("This example only runs with Expectation Propagation");
                return(null);
            }

            engine.NumberOfIterations = 10;

            // Includes lower and upper bounds
            int numThresholds = numLabels + 1;

            // Partition the dat into chunks to improve the schedule
            int chunkSize = 200;

            // Maximum number of passes through the data
            int maxPasses = 5;

            // The marginals at any given stage.
            ClickModelMarginals marginals = new ClickModelMarginals(numLabels);

            // Compare the marginals with the previous marginals to create
            // a convergence criterion
            Gaussian prevMargScoreMean;
            Gamma    prevMargJudgePrec;
            Gamma    prevMargClickPrec;
            double   convergenceThresh = 0.01;

            // Get the arrays of human judgement labels, clicks, and examinations
            int[]  labels;
            int[]  clicks;
            int[]  exams;
            string fileName = Path.Combine(
#if NETCORE
                Path.GetDirectoryName(typeof(ClickModel).Assembly.Location),     // work dir is not the one with Microsoft.ML.Probabilistic.Tests.dll on netcore and neither is .Location on netfull
#endif
                "TutorialData", "ClickModel.txt");

            if (!File.Exists(fileName))
            {
                fileName = Path.Combine(
#if NETCORE
                    Path.GetDirectoryName(typeof(ClickModel).Assembly.Location), // work dir is not the one with Microsoft.ML.Probabilistic.Tests.dll on netcore and neither is .Location on netfull
#endif
                    "..", "Samples", "C#", "ExamplesBrowser", "TutorialData", "ClickModel.txt");
            }

            LoadData(fileName, allowNoExams, out labels, out clicks, out exams);

            // Convert the raw click data into uncertain Gaussian observations chunk-by-chunk
            Gaussian[][][] allObs    = getClickObservations(numLabels, chunkSize, labels, clicks, exams);
            int            numChunks = allObs.Length;

            //-------------------------------------------------------------
            // Specify prior distributions
            //-------------------------------------------------------------
            Gaussian   priorScoreMean = Gaussian.FromMeanAndVariance(0.5, 1.0);
            Gamma      priorScorePrec = Gamma.FromMeanAndVariance(2.0, 0.0);
            Gamma      priorJudgePrec = Gamma.FromMeanAndVariance(2.0, 1.0);
            Gamma      priorClickPrec = Gamma.FromMeanAndVariance(2.0, 1.0);
            Gaussian[] priorThresholds;
            CreateThresholdPriors(numLabels, out priorThresholds);

            //-----------------------------------------------------
            // Create shared variables - these are the variables
            // which are shared between all chunks
            //-----------------------------------------------------
            Model model = new Model(numChunks);
            SharedVariable <double> scoreMean = SharedVariable <double> .Random(priorScoreMean).Named("scoreMean");

            SharedVariable <double> scorePrec = SharedVariable <double> .Random(priorScorePrec).Named("scorePrec");

            SharedVariable <double> judgePrec = SharedVariable <double> .Random(priorJudgePrec).Named("judgePrec");

            SharedVariable <double> clickPrec = SharedVariable <double> .Random(priorClickPrec).Named("clickPrec");

            SharedVariable <double>[] thresholds = new SharedVariable <double> [numThresholds];
            for (int t = 0; t < numThresholds; t++)
            {
                thresholds[t] = SharedVariable <double> .Random(priorThresholds[t]).Named("threshold" + t);
            }

            //----------------------------------------------------------------------------------
            // The model
            //----------------------------------------------------------------------------------

            // Gaussian click observations are given to the model - one set of observations
            // per label class. Also the number of observations per label class is given to the model
            VariableArray <Gaussian>[] observationDistribs  = new VariableArray <Gaussian> [numLabels];
            Variable <int>[]           numberOfObservations = new Variable <int> [numLabels];

            // For each label, and each observation (consisting of a human judgement and
            // a Gaussian click observation), there is a latent score variable, a judgement
            // score variable, and a click score variable
            for (int i = 0; i < numLabels; i++)
            {
                numberOfObservations[i] = Variable.New <int>().Named("NumObs" + i);
                Range r = new Range(numberOfObservations[i]).Named("N" + i);
                observationDistribs[i] = Variable.Array <Gaussian>(r).Named("Obs" + i);
                VariableArray <double> scores  = Variable.Array <double>(r).Named("Scores" + i);
                VariableArray <double> scoresJ = Variable.Array <double>(r).Named("ScoresJ" + i);
                VariableArray <double> scoresC = Variable.Array <double>(r).Named("ScoresC" + i);
                scores[r]  = Variable.GaussianFromMeanAndPrecision(scoreMean.GetCopyFor(model), scorePrec.GetCopyFor(model)).ForEach(r);
                scoresJ[r] = Variable.GaussianFromMeanAndPrecision(scores[r], judgePrec.GetCopyFor(model));
                scoresC[r] = Variable.GaussianFromMeanAndPrecision(scores[r], clickPrec.GetCopyFor(model));
                Variable.ConstrainEqualRandom(scoresC[r], observationDistribs[i][r]);
                Variable.ConstrainBetween(scoresJ[r], thresholds[i].GetCopyFor(model), thresholds[i + 1].GetCopyFor(model));
            }

            //----------------------------------------------------------
            // Outer loop iterates over a number of passes
            // Inner loop iterates over the unique labels
            //----------------------------------------------------------
            Console.WriteLine("Training: sample size: " + labels.Length + "\n");
            for (int pass = 0; pass < maxPasses; pass++)
            {
                prevMargScoreMean = marginals.marginalScoreMean;
                prevMargJudgePrec = marginals.marginalJudgePrec;
                prevMargClickPrec = marginals.marginalClickPrec;
                for (int c = 0; c < numChunks; c++)
                {
                    for (int i = 0; i < numLabels; i++)
                    {
                        numberOfObservations[i].ObservedValue = allObs[c][i].Length;
                        observationDistribs[i].ObservedValue  = allObs[c][i];
                    }

                    model.InferShared(engine, c);

                    // Retrieve marginals
                    marginals.marginalScoreMean = scoreMean.Marginal <Gaussian>();
                    marginals.marginalScorePrec = scorePrec.Marginal <Gamma>();
                    marginals.marginalJudgePrec = judgePrec.Marginal <Gamma>();
                    marginals.marginalClickPrec = clickPrec.Marginal <Gamma>();
                    for (int i = 0; i < numThresholds; i++)
                    {
                        marginals.marginalThresh[i] = thresholds[i].Marginal <Gaussian>();
                    }

                    Console.WriteLine("\n****** Pass {0}, chunk {1} ******", pass, c);
                    Console.WriteLine("----- Marginals -----");
                    Console.WriteLine("scoreMean = {0}", marginals.marginalScoreMean);
                    Console.WriteLine("scorePrec = {0}", marginals.marginalScorePrec);
                    Console.WriteLine("judgePrec = {0}", marginals.marginalJudgePrec);
                    Console.WriteLine("clickPrec = {0}", marginals.marginalClickPrec);
                    for (int t = 0; t < numThresholds; t++)
                    {
                        Console.WriteLine("threshMean {0} = {1}", t, marginals.marginalThresh[t]);
                    }
                }

                // Test for convergence
                if (marginals.marginalScoreMean.MaxDiff(prevMargScoreMean) < convergenceThresh &&
                    marginals.marginalJudgePrec.MaxDiff(prevMargJudgePrec) < convergenceThresh &&
                    marginals.marginalClickPrec.MaxDiff(prevMargClickPrec) < convergenceThresh)
                {
                    Console.WriteLine("\n****** Inference converged ******\n");
                    break;
                }
            }

            return(marginals);
        }
Beispiel #3
0
        static private ClickModelMarginals Model1(int numLabels, bool allowNoExams)
        {
            // Inference engine must be EP because of the ConstrainBetween constraint
            InferenceEngine engine = new InferenceEngine();

            if (!(engine.Algorithm is Algorithms.ExpectationPropagation))
            {
                Console.WriteLine("This example only runs with Expectation Propagation");
                return(null);
            }

            engine.NumberOfIterations = 10;  // Restrict the number of iterations

            // Includes lower and upper bounds
            int numThresholds = numLabels + 1;

            //-------------------------------------------------------------
            // Specify prior distributions
            //-------------------------------------------------------------
            Gaussian priorScoreMean = Gaussian.FromMeanAndVariance(0.5, 1.0);
            Gamma    priorScorePrec = Gamma.FromMeanAndVariance(2.0, 0.0);
            Gamma    priorJudgePrec = Gamma.FromMeanAndVariance(2.0, 1.0);
            Gamma    priorClickPrec = Gamma.FromMeanAndVariance(2.0, 1.0);

            Gaussian[] priorThresholds;
            CreateThresholdPriors(numLabels, out priorThresholds);

            //-------------------------------------------------------------
            // Variables to infer
            //-------------------------------------------------------------
            Variable <double> scoreMean = Variable.Random(priorScoreMean).Named("scoreMean");
            Variable <double> scorePrec = Variable.Random(priorScorePrec).Named("scorePrec");
            Variable <double> judgePrec = Variable.Random(priorJudgePrec).Named("judgePrec");
            Variable <double> clickPrec = Variable.Random(priorClickPrec).Named("clickPrec");

            Variable <double>[] thresholds = new Variable <double> [numLabels + 1];
            for (int i = 0; i < thresholds.Length; i++)
            {
                thresholds[i] = Variable.Random(priorThresholds[i]).Named("thresholds" + i);
            }

            //----------------------------------------------------------------------------------
            // The model
            //----------------------------------------------------------------------------------
            VariableArray <Gaussian>[] observationDistribs  = new VariableArray <Gaussian> [numLabels];
            Variable <int>[]           numberOfObservations = new Variable <int> [numLabels];
            for (int i = 0; i < numLabels; i++)
            {
                numberOfObservations[i] = Variable.New <int>().Named("NumObs" + i);
                Range r = new Range(numberOfObservations[i]).Named("N" + i);
                observationDistribs[i] = Variable.Array <Gaussian>(r).Named("Obs" + i);
                VariableArray <double> scores  = Variable.Array <double>(r).Named("Scores" + i);
                VariableArray <double> scoresJ = Variable.Array <double>(r).Named("ScoresJ" + i);
                VariableArray <double> scoresC = Variable.Array <double>(r).Named("ScoresC" + i);
                scores[r]  = Variable.GaussianFromMeanAndPrecision(scoreMean, scorePrec).ForEach(r);
                scoresJ[r] = Variable.GaussianFromMeanAndPrecision(scores[r], judgePrec);
                scoresC[r] = Variable.GaussianFromMeanAndPrecision(scores[r], clickPrec);
                Variable.ConstrainBetween(scoresJ[r], thresholds[i], thresholds[i + 1]);
                Variable.ConstrainEqualRandom(scoresC[r], observationDistribs[i][r]);
            }

            // Get the arrays of human judgement labels, clicks, and examinations
            int[]  labels;
            int[]  clicks;
            int[]  exams;
            string fileName = Path.Combine(
#if NETCORE
                Path.GetDirectoryName(typeof(ClickModel).Assembly.Location),     // work dir is not the one with Microsoft.ML.Probabilistic.Tests.dll on netcore and neither is .Location on netfull
#endif
                "TutorialData", "ClickModel.txt");

            if (!File.Exists(fileName))
            {
                fileName = Path.Combine(
#if NETCORE
                    Path.GetDirectoryName(typeof(ClickModel).Assembly.Location), // work dir is not the one with Microsoft.ML.Probabilistic.Tests.dll on netcore and neither is .Location on netfull
#endif
                    "..", "Samples", "C#", "ExamplesBrowser", "TutorialData", "ClickModel.txt");
            }

            LoadData(fileName, allowNoExams, out labels, out clicks, out exams);

            // Convert the raw click data into uncertain Gaussian observations chunk-by-chunk
            Gaussian[][] allObs = getClickObservations(numLabels, labels, clicks, exams);

            // (a) Set the observation and observation count parameters in the model
            for (int i = 0; i < numLabels; i++)
            {
                numberOfObservations[i].ObservedValue = allObs[i].Length;
                observationDistribs[i].ObservedValue  = allObs[i];
            }

            // (b) Request the marginals
            ClickModelMarginals marginals = new ClickModelMarginals(numLabels);
            marginals.marginalScoreMean = engine.Infer <Gaussian>(scoreMean);
            marginals.marginalScorePrec = engine.Infer <Gamma>(scorePrec);
            marginals.marginalJudgePrec = engine.Infer <Gamma>(judgePrec);
            marginals.marginalClickPrec = engine.Infer <Gamma>(clickPrec);
            for (int i = 0; i < numThresholds; i++)
            {
                marginals.marginalThresh[i] = engine.Infer <Gaussian>(thresholds[i]);
            }

            Console.WriteLine("Training: sample size: " + labels.Length + "\n");
            Console.WriteLine("scoreMean = {0}", marginals.marginalScoreMean);
            Console.WriteLine("scorePrec = {0}", marginals.marginalScorePrec);
            Console.WriteLine("judgePrec = {0}", marginals.marginalJudgePrec);
            Console.WriteLine("clickPrec = {0}", marginals.marginalClickPrec);
            for (int t = 0; t < numThresholds; t++)
            {
                Console.WriteLine("threshMean {0} = {1}", t, marginals.marginalThresh[t]);
            }

            return(marginals);
        }