public void Run() { // Number of label classes for this example int numLabels = 3; // Train the model ClickModelMarginals marginals = Model1(numLabels, false); if (marginals == null) { return; } //----------------------------------------------------------------------------- // The prediction model //----------------------------------------------------------------------------- // The observations will be in the form of an array of distributions Variable <int> numberOfObservations = Variable.New <int>().Named("NumObs"); Range r = new Range(numberOfObservations).Named("N"); VariableArray <Gaussian> observationDistribs = Variable.Array <Gaussian>(r).Named("Obs"); // Use the marginals from the trained model Variable <double> scoreMean = Variable.Random(marginals.marginalScoreMean).Named("scoreMean"); Variable <double> scorePrec = Variable.Random(marginals.marginalScorePrec).Named("scorePrec"); Variable <double> judgePrec = Variable.Random(marginals.marginalJudgePrec).Named("judgePrec"); Variable <double> clickPrec = Variable.Random(marginals.marginalClickPrec).Named("clickPrec"); Variable <double>[] thresholds = new Variable <double> [numLabels + 1]; // Variables for each observation VariableArray <double> scores = Variable.Array <double>(r).Named("Scores"); VariableArray <double> scoresJ = Variable.Array <double>(r).Named("ScoresJ"); VariableArray <double> scoresC = Variable.Array <double>(r).Named("ScoresC"); scores[r] = Variable.GaussianFromMeanAndPrecision(scoreMean, scorePrec).ForEach(r); scoresJ[r] = Variable.GaussianFromMeanAndPrecision(scores[r], judgePrec); scoresC[r] = Variable.GaussianFromMeanAndPrecision(scores[r], clickPrec); // Constrain to the click observation Variable.ConstrainEqualRandom(scoresC[r], observationDistribs[r]); // The threshold variables thresholds[0] = Variable.GaussianFromMeanAndVariance(Double.NegativeInfinity, 0.0).Named("thresholds0"); for (int i = 1; i < thresholds.Length - 1; i++) { thresholds[i] = Variable.Random(marginals.marginalThresh[i]).Named("thresholds" + i); } thresholds[thresholds.Length - 1] = Variable.GaussianFromMeanAndVariance(Double.PositiveInfinity, 0.0).Named("thresholds" + (thresholds.Length - 1)); // Boolean label variables VariableArray <bool>[] testLabels = new VariableArray <bool> [numLabels]; for (int j = 0; j < numLabels; j++) { testLabels[j] = Variable.Array <bool>(r).Named("TestLabels" + j); testLabels[j][r] = Variable.IsBetween(scoresJ[r], thresholds[j], thresholds[j + 1]); } //-------------------------------------------------------------------- // Running the prediction model //-------------------------------------------------------------------- int[] clicks = { 10, 100, 1000, 9, 99, 999, 10, 10, 10 }; int[] exams = { 20, 200, 2000, 10, 100, 1000, 100, 1000, 10000 }; Gaussian[] obs = new Gaussian[clicks.Length]; for (int i = 0; i < clicks.Length; i++) { int nC = clicks[i]; // Number of clicks int nE = exams[i]; // Number of examinations int nNC = nE - nC; // Number of non-clicks Beta b = new Beta(1.0 + nC, 1.0 + nNC); double m, v; b.GetMeanAndVariance(out m, out v); obs[i] = Gaussian.FromMeanAndVariance(m, v); } numberOfObservations.ObservedValue = obs.Length; observationDistribs.ObservedValue = obs; InferenceEngine engine = new InferenceEngine(); Gaussian[] latentScore = engine.Infer <Gaussian[]>(scores); Bernoulli[][] predictedLabels = new Bernoulli[numLabels][]; for (int j = 0; j < numLabels; j++) { predictedLabels[j] = engine.Infer <Bernoulli[]>(testLabels[j]); } Console.WriteLine("\n****** Some Predictions ******\n"); Console.WriteLine("Clicks\tExams\t\tScore\t\tLabel0\t\tLabel1\t\tLabel2"); for (int i = 0; i < clicks.Length; i++) { Console.WriteLine( "{0}\t{1}\t\t{2}\t\t{3}\t\t{4}\t\t{5}", clicks[i], exams[i], latentScore[i].GetMean().ToString("F4"), predictedLabels[0][i].GetProbTrue().ToString("F4"), predictedLabels[1][i].GetProbTrue().ToString("F4"), predictedLabels[2][i].GetProbTrue().ToString("F4")); } }
static private ClickModelMarginals Model2(int numLabels, bool allowNoExams) { // Inference engine must be EP because of the ConstrainBetween constraint InferenceEngine engine = new InferenceEngine(); if (!(engine.Algorithm is Algorithms.ExpectationPropagation)) { Console.WriteLine("This example only runs with Expectation Propagation"); return(null); } engine.NumberOfIterations = 10; // Includes lower and upper bounds int numThresholds = numLabels + 1; // Partition the dat into chunks to improve the schedule int chunkSize = 200; // Maximum number of passes through the data int maxPasses = 5; // The marginals at any given stage. ClickModelMarginals marginals = new ClickModelMarginals(numLabels); // Compare the marginals with the previous marginals to create // a convergence criterion Gaussian prevMargScoreMean; Gamma prevMargJudgePrec; Gamma prevMargClickPrec; double convergenceThresh = 0.01; // Get the arrays of human judgement labels, clicks, and examinations int[] labels; int[] clicks; int[] exams; string fileName = Path.Combine( #if NETCORE Path.GetDirectoryName(typeof(ClickModel).Assembly.Location), // work dir is not the one with Microsoft.ML.Probabilistic.Tests.dll on netcore and neither is .Location on netfull #endif "TutorialData", "ClickModel.txt"); if (!File.Exists(fileName)) { fileName = Path.Combine( #if NETCORE Path.GetDirectoryName(typeof(ClickModel).Assembly.Location), // work dir is not the one with Microsoft.ML.Probabilistic.Tests.dll on netcore and neither is .Location on netfull #endif "..", "Samples", "C#", "ExamplesBrowser", "TutorialData", "ClickModel.txt"); } LoadData(fileName, allowNoExams, out labels, out clicks, out exams); // Convert the raw click data into uncertain Gaussian observations chunk-by-chunk Gaussian[][][] allObs = getClickObservations(numLabels, chunkSize, labels, clicks, exams); int numChunks = allObs.Length; //------------------------------------------------------------- // Specify prior distributions //------------------------------------------------------------- Gaussian priorScoreMean = Gaussian.FromMeanAndVariance(0.5, 1.0); Gamma priorScorePrec = Gamma.FromMeanAndVariance(2.0, 0.0); Gamma priorJudgePrec = Gamma.FromMeanAndVariance(2.0, 1.0); Gamma priorClickPrec = Gamma.FromMeanAndVariance(2.0, 1.0); Gaussian[] priorThresholds; CreateThresholdPriors(numLabels, out priorThresholds); //----------------------------------------------------- // Create shared variables - these are the variables // which are shared between all chunks //----------------------------------------------------- Model model = new Model(numChunks); SharedVariable <double> scoreMean = SharedVariable <double> .Random(priorScoreMean).Named("scoreMean"); SharedVariable <double> scorePrec = SharedVariable <double> .Random(priorScorePrec).Named("scorePrec"); SharedVariable <double> judgePrec = SharedVariable <double> .Random(priorJudgePrec).Named("judgePrec"); SharedVariable <double> clickPrec = SharedVariable <double> .Random(priorClickPrec).Named("clickPrec"); SharedVariable <double>[] thresholds = new SharedVariable <double> [numThresholds]; for (int t = 0; t < numThresholds; t++) { thresholds[t] = SharedVariable <double> .Random(priorThresholds[t]).Named("threshold" + t); } //---------------------------------------------------------------------------------- // The model //---------------------------------------------------------------------------------- // Gaussian click observations are given to the model - one set of observations // per label class. Also the number of observations per label class is given to the model VariableArray <Gaussian>[] observationDistribs = new VariableArray <Gaussian> [numLabels]; Variable <int>[] numberOfObservations = new Variable <int> [numLabels]; // For each label, and each observation (consisting of a human judgement and // a Gaussian click observation), there is a latent score variable, a judgement // score variable, and a click score variable for (int i = 0; i < numLabels; i++) { numberOfObservations[i] = Variable.New <int>().Named("NumObs" + i); Range r = new Range(numberOfObservations[i]).Named("N" + i); observationDistribs[i] = Variable.Array <Gaussian>(r).Named("Obs" + i); VariableArray <double> scores = Variable.Array <double>(r).Named("Scores" + i); VariableArray <double> scoresJ = Variable.Array <double>(r).Named("ScoresJ" + i); VariableArray <double> scoresC = Variable.Array <double>(r).Named("ScoresC" + i); scores[r] = Variable.GaussianFromMeanAndPrecision(scoreMean.GetCopyFor(model), scorePrec.GetCopyFor(model)).ForEach(r); scoresJ[r] = Variable.GaussianFromMeanAndPrecision(scores[r], judgePrec.GetCopyFor(model)); scoresC[r] = Variable.GaussianFromMeanAndPrecision(scores[r], clickPrec.GetCopyFor(model)); Variable.ConstrainEqualRandom(scoresC[r], observationDistribs[i][r]); Variable.ConstrainBetween(scoresJ[r], thresholds[i].GetCopyFor(model), thresholds[i + 1].GetCopyFor(model)); } //---------------------------------------------------------- // Outer loop iterates over a number of passes // Inner loop iterates over the unique labels //---------------------------------------------------------- Console.WriteLine("Training: sample size: " + labels.Length + "\n"); for (int pass = 0; pass < maxPasses; pass++) { prevMargScoreMean = marginals.marginalScoreMean; prevMargJudgePrec = marginals.marginalJudgePrec; prevMargClickPrec = marginals.marginalClickPrec; for (int c = 0; c < numChunks; c++) { for (int i = 0; i < numLabels; i++) { numberOfObservations[i].ObservedValue = allObs[c][i].Length; observationDistribs[i].ObservedValue = allObs[c][i]; } model.InferShared(engine, c); // Retrieve marginals marginals.marginalScoreMean = scoreMean.Marginal <Gaussian>(); marginals.marginalScorePrec = scorePrec.Marginal <Gamma>(); marginals.marginalJudgePrec = judgePrec.Marginal <Gamma>(); marginals.marginalClickPrec = clickPrec.Marginal <Gamma>(); for (int i = 0; i < numThresholds; i++) { marginals.marginalThresh[i] = thresholds[i].Marginal <Gaussian>(); } Console.WriteLine("\n****** Pass {0}, chunk {1} ******", pass, c); Console.WriteLine("----- Marginals -----"); Console.WriteLine("scoreMean = {0}", marginals.marginalScoreMean); Console.WriteLine("scorePrec = {0}", marginals.marginalScorePrec); Console.WriteLine("judgePrec = {0}", marginals.marginalJudgePrec); Console.WriteLine("clickPrec = {0}", marginals.marginalClickPrec); for (int t = 0; t < numThresholds; t++) { Console.WriteLine("threshMean {0} = {1}", t, marginals.marginalThresh[t]); } } // Test for convergence if (marginals.marginalScoreMean.MaxDiff(prevMargScoreMean) < convergenceThresh && marginals.marginalJudgePrec.MaxDiff(prevMargJudgePrec) < convergenceThresh && marginals.marginalClickPrec.MaxDiff(prevMargClickPrec) < convergenceThresh) { Console.WriteLine("\n****** Inference converged ******\n"); break; } } return(marginals); }
static private ClickModelMarginals Model1(int numLabels, bool allowNoExams) { // Inference engine must be EP because of the ConstrainBetween constraint InferenceEngine engine = new InferenceEngine(); if (!(engine.Algorithm is Algorithms.ExpectationPropagation)) { Console.WriteLine("This example only runs with Expectation Propagation"); return(null); } engine.NumberOfIterations = 10; // Restrict the number of iterations // Includes lower and upper bounds int numThresholds = numLabels + 1; //------------------------------------------------------------- // Specify prior distributions //------------------------------------------------------------- Gaussian priorScoreMean = Gaussian.FromMeanAndVariance(0.5, 1.0); Gamma priorScorePrec = Gamma.FromMeanAndVariance(2.0, 0.0); Gamma priorJudgePrec = Gamma.FromMeanAndVariance(2.0, 1.0); Gamma priorClickPrec = Gamma.FromMeanAndVariance(2.0, 1.0); Gaussian[] priorThresholds; CreateThresholdPriors(numLabels, out priorThresholds); //------------------------------------------------------------- // Variables to infer //------------------------------------------------------------- Variable <double> scoreMean = Variable.Random(priorScoreMean).Named("scoreMean"); Variable <double> scorePrec = Variable.Random(priorScorePrec).Named("scorePrec"); Variable <double> judgePrec = Variable.Random(priorJudgePrec).Named("judgePrec"); Variable <double> clickPrec = Variable.Random(priorClickPrec).Named("clickPrec"); Variable <double>[] thresholds = new Variable <double> [numLabels + 1]; for (int i = 0; i < thresholds.Length; i++) { thresholds[i] = Variable.Random(priorThresholds[i]).Named("thresholds" + i); } //---------------------------------------------------------------------------------- // The model //---------------------------------------------------------------------------------- VariableArray <Gaussian>[] observationDistribs = new VariableArray <Gaussian> [numLabels]; Variable <int>[] numberOfObservations = new Variable <int> [numLabels]; for (int i = 0; i < numLabels; i++) { numberOfObservations[i] = Variable.New <int>().Named("NumObs" + i); Range r = new Range(numberOfObservations[i]).Named("N" + i); observationDistribs[i] = Variable.Array <Gaussian>(r).Named("Obs" + i); VariableArray <double> scores = Variable.Array <double>(r).Named("Scores" + i); VariableArray <double> scoresJ = Variable.Array <double>(r).Named("ScoresJ" + i); VariableArray <double> scoresC = Variable.Array <double>(r).Named("ScoresC" + i); scores[r] = Variable.GaussianFromMeanAndPrecision(scoreMean, scorePrec).ForEach(r); scoresJ[r] = Variable.GaussianFromMeanAndPrecision(scores[r], judgePrec); scoresC[r] = Variable.GaussianFromMeanAndPrecision(scores[r], clickPrec); Variable.ConstrainBetween(scoresJ[r], thresholds[i], thresholds[i + 1]); Variable.ConstrainEqualRandom(scoresC[r], observationDistribs[i][r]); } // Get the arrays of human judgement labels, clicks, and examinations int[] labels; int[] clicks; int[] exams; string fileName = Path.Combine( #if NETCORE Path.GetDirectoryName(typeof(ClickModel).Assembly.Location), // work dir is not the one with Microsoft.ML.Probabilistic.Tests.dll on netcore and neither is .Location on netfull #endif "TutorialData", "ClickModel.txt"); if (!File.Exists(fileName)) { fileName = Path.Combine( #if NETCORE Path.GetDirectoryName(typeof(ClickModel).Assembly.Location), // work dir is not the one with Microsoft.ML.Probabilistic.Tests.dll on netcore and neither is .Location on netfull #endif "..", "Samples", "C#", "ExamplesBrowser", "TutorialData", "ClickModel.txt"); } LoadData(fileName, allowNoExams, out labels, out clicks, out exams); // Convert the raw click data into uncertain Gaussian observations chunk-by-chunk Gaussian[][] allObs = getClickObservations(numLabels, labels, clicks, exams); // (a) Set the observation and observation count parameters in the model for (int i = 0; i < numLabels; i++) { numberOfObservations[i].ObservedValue = allObs[i].Length; observationDistribs[i].ObservedValue = allObs[i]; } // (b) Request the marginals ClickModelMarginals marginals = new ClickModelMarginals(numLabels); marginals.marginalScoreMean = engine.Infer <Gaussian>(scoreMean); marginals.marginalScorePrec = engine.Infer <Gamma>(scorePrec); marginals.marginalJudgePrec = engine.Infer <Gamma>(judgePrec); marginals.marginalClickPrec = engine.Infer <Gamma>(clickPrec); for (int i = 0; i < numThresholds; i++) { marginals.marginalThresh[i] = engine.Infer <Gaussian>(thresholds[i]); } Console.WriteLine("Training: sample size: " + labels.Length + "\n"); Console.WriteLine("scoreMean = {0}", marginals.marginalScoreMean); Console.WriteLine("scorePrec = {0}", marginals.marginalScorePrec); Console.WriteLine("judgePrec = {0}", marginals.marginalJudgePrec); Console.WriteLine("clickPrec = {0}", marginals.marginalClickPrec); for (int t = 0; t < numThresholds; t++) { Console.WriteLine("threshMean {0} = {1}", t, marginals.marginalThresh[t]); } return(marginals); }