Esempio n. 1
0
        static private ClickModelMarginals Model2(int numLabels, bool allowNoExams)
        {
            // Inference engine must be EP because of the ConstrainBetween constraint
            InferenceEngine engine = new InferenceEngine();

            if (!(engine.Algorithm is ExpectationPropagation))
            {
                Console.WriteLine("This example only runs with Expectation Propagation");
                return(null);
            }
            engine.NumberOfIterations = 10;

            // Includes lower and upper bounds
            int numThresholds = numLabels + 1;
            // Partition the dat into chunks to improve the schedule
            int chunkSize = 200;
            // Maximum number of passes through the data
            int maxPasses = 5;
            // The marginals at any given stage.
            ClickModelMarginals marginals = new ClickModelMarginals(numLabels);
            // Compare the marginals with the previous marginals to create
            // a convergence criterion
            Gaussian prevMargScoreMean;
            Gamma    prevMargJudgePrec;
            Gamma    prevMargClickPrec;
            double   convergenceThresh = 0.01;

            // Get the arrays of human judgement labels, clicks, and examinations
            int[]  labels;
            int[]  clicks;
            int[]  exams;
            string fileName = @"data\ClickModel.txt";

            if (!File.Exists(fileName))
            {
                fileName = @"..\Samples\C#\ExamplesBrowser\data\ClickModel.txt";
            }
            LoadData(fileName, allowNoExams, out labels, out clicks, out exams);
            // Convert the raw click data into uncertain Gaussian observations chunk-by-chunk
            Gaussian[][][] allObs    = getClickObservations(numLabels, chunkSize, labels, clicks, exams);
            int            numChunks = allObs.Length;

            //-------------------------------------------------------------
            // Specify prior distributions
            //-------------------------------------------------------------
            Gaussian priorScoreMean = Gaussian.FromMeanAndVariance(0.5, 1.0);
            Gamma    priorScorePrec = Gamma.FromMeanAndVariance(2.0, 0.0);
            Gamma    priorJudgePrec = Gamma.FromMeanAndVariance(2.0, 1.0);
            Gamma    priorClickPrec = Gamma.FromMeanAndVariance(2.0, 1.0);

            Gaussian[] priorThresholds;
            CreateThresholdPriors(numLabels, out priorThresholds);
            //-----------------------------------------------------
            // Create shared variables - these are the variables
            // which are shared between all chunks
            //-----------------------------------------------------
            Model model = new Model(numChunks);
            SharedVariable <double> scoreMean = SharedVariable <double> .Random(priorScoreMean).Named("scoreMean");

            SharedVariable <double> scorePrec = SharedVariable <double> .Random(priorScorePrec).Named("scorePrec");

            SharedVariable <double> judgePrec = SharedVariable <double> .Random(priorJudgePrec).Named("judgePrec");

            SharedVariable <double> clickPrec = SharedVariable <double> .Random(priorClickPrec).Named("clickPrec");

            SharedVariable <double>[] thresholds = new SharedVariable <double> [numThresholds];
            for (int t = 0; t < numThresholds; t++)
            {
                thresholds[t] = SharedVariable <double> .Random(priorThresholds[t]).Named("threshold" + t);
            }

            //----------------------------------------------------------------------------------
            // The model
            //----------------------------------------------------------------------------------

            // Gaussian click observations are given to the model - one set of observations
            // per label class. Also the number of observations per label class is given to the model
            VariableArray <Gaussian>[] observationDistribs  = new VariableArray <Gaussian> [numLabels];
            Variable <int>[]           numberOfObservations = new Variable <int> [numLabels];
            // For each label, and each observation (consisting of a human judgement and
            // a Gaussian click observation), there is a latent score variable, a judgement
            // score variable, and a click score variable
            for (int i = 0; i < numLabels; i++)
            {
                numberOfObservations[i] = Variable.New <int>().Named("NumObs" + i);
                Range r = new Range(numberOfObservations[i]).Named("N" + i);
                observationDistribs[i] = Variable.Array <Gaussian>(r).Named("Obs" + i);
                VariableArray <double> scores  = Variable.Array <double>(r).Named("Scores" + i);
                VariableArray <double> scoresJ = Variable.Array <double>(r).Named("ScoresJ" + i);
                VariableArray <double> scoresC = Variable.Array <double>(r).Named("ScoresC" + i);
                scores[r]  = Variable.GaussianFromMeanAndPrecision(scoreMean.GetCopyFor(model), scorePrec.GetCopyFor(model)).ForEach(r);
                scoresJ[r] = Variable.GaussianFromMeanAndPrecision(scores[r], judgePrec.GetCopyFor(model));
                scoresC[r] = Variable.GaussianFromMeanAndPrecision(scores[r], clickPrec.GetCopyFor(model));
                Variable.ConstrainEqualRandom(scoresC[r], observationDistribs[i][r]);
                Variable.ConstrainBetween(scoresJ[r], thresholds[i].GetCopyFor(model), thresholds[i + 1].GetCopyFor(model));
            }

            //----------------------------------------------------------
            // Outer loop iterates over a number of passes
            // Inner loop iterates over the unique labels
            //----------------------------------------------------------
            Console.WriteLine("Training: sample size: " + labels.Length + "\n");
            for (int pass = 0; pass < maxPasses; pass++)
            {
                prevMargScoreMean = marginals.marginalScoreMean;
                prevMargJudgePrec = marginals.marginalJudgePrec;
                prevMargClickPrec = marginals.marginalClickPrec;
                for (int c = 0; c < numChunks; c++)
                {
                    for (int i = 0; i < numLabels; i++)
                    {
                        numberOfObservations[i].ObservedValue = allObs[c][i].Length;
                        observationDistribs[i].ObservedValue  = allObs[c][i];
                    }

                    model.InferShared(engine, c);

                    // Retrieve marginals
                    marginals.marginalScoreMean = scoreMean.Marginal <Gaussian>();
                    marginals.marginalScorePrec = scorePrec.Marginal <Gamma>();
                    marginals.marginalJudgePrec = judgePrec.Marginal <Gamma>();
                    marginals.marginalClickPrec = clickPrec.Marginal <Gamma>();
                    for (int i = 0; i < numThresholds; i++)
                    {
                        marginals.marginalThresh[i] = thresholds[i].Marginal <Gaussian>();
                    }

                    Console.WriteLine("\n****** Pass {0}, chunk {1} ******", pass, c);
                    Console.WriteLine("----- Marginals -----");
                    Console.WriteLine("scoreMean = {0}", marginals.marginalScoreMean);
                    Console.WriteLine("scorePrec = {0}", marginals.marginalScorePrec);
                    Console.WriteLine("judgePrec = {0}", marginals.marginalJudgePrec);
                    Console.WriteLine("clickPrec = {0}", marginals.marginalClickPrec);
                    for (int t = 0; t < numThresholds; t++)
                    {
                        Console.WriteLine("threshMean {0} = {1}", t, marginals.marginalThresh[t]);
                    }
                }
                // Test for convergence
                if (marginals.marginalScoreMean.MaxDiff(prevMargScoreMean) < convergenceThresh &&
                    marginals.marginalJudgePrec.MaxDiff(prevMargJudgePrec) < convergenceThresh &&
                    marginals.marginalClickPrec.MaxDiff(prevMargClickPrec) < convergenceThresh)
                {
                    Console.WriteLine("\n****** Inference converged ******\n");
                    break;
                }
            }
            return(marginals);
        }
Esempio n. 2
0
        private void LearnAPIClick5LabelModel(
            int numLabels,
            bool learnScoreMean,
            bool learnScorePrec,
            bool learnJudgePrec,
            bool learnClickPrec,
            bool learnThresholds,
            double nominalScoreMean,
            double nominalScorePrec,
            double nominalJudgePrec,
            double nominalClickPrec,
            int[] labels,
            int[] clicks,
            int[] exams,
            int chunkSize,
            int nPasses,
            bool printToConsole,
            out Gaussian margScoreMean,
            out Gamma margScorePrec,
            out Gamma margJudgePrec,
            out Gamma margClickPrec,
            out Gaussian[] margThresh)
        {
            //------------------------------------------------------
            // Observations
            //------------------------------------------------------
            Gaussian[][][] allObs    = getClickObservations(numLabels, chunkSize, labels, clicks, exams);
            int            numChunks = allObs.Length;

            ////-------------------------------------------------------------
            //// Prior distributions
            ////-------------------------------------------------------------
            Gaussian priorScoreMean = Gaussian.FromMeanAndVariance(nominalScoreMean, learnScoreMean ? 1 : 0);
            Gamma    priorScorePrec = Gamma.FromMeanAndVariance(nominalScorePrec, learnScorePrec ? 1 : 0);
            Gamma    priorJudgePrec = Gamma.FromMeanAndVariance(nominalJudgePrec, learnJudgePrec ? 1 : 0);
            Gamma    priorClickPrec = Gamma.FromMeanAndVariance(nominalClickPrec, learnClickPrec ? 1 : 0);

            Gaussian[] priorThreshMean;
            CalculatePriors(learnThresholds, numLabels, out priorThreshMean);

            ////-----------------------------------------------------
            //// Creates shared variables
            ////-----------------------------------------------------
            int numThresholds = numLabels + 1;
            SharedVariable <double> scoreMean = SharedVariable <double> .Random(priorScoreMean).Named("scoreMean");

            SharedVariable <double> scorePrec = SharedVariable <double> .Random(priorScorePrec).Named("scorePrec");

            SharedVariable <double> judgePrec = SharedVariable <double> .Random(priorJudgePrec).Named("judgePrec");

            SharedVariable <double> clickPrec = SharedVariable <double> .Random(priorClickPrec).Named("clickPrec");

            SharedVariable <double>[] thresholds = new SharedVariable <double> [numThresholds];
            for (int t = 0; t < numThresholds; t++)
            {
                thresholds[t] = SharedVariable <double> .Random(priorThreshMean[t]).Named("threshMeans" + t);
            }

            //----------------------------------------------------------------------------------
            // The model
            //----------------------------------------------------------------------------------

            Model model = new Model(numChunks);

            VariableArray <Gaussian>[] clickObs       = new VariableArray <Gaussian> [numLabels];
            Variable <int>[]           clickObsLength = new Variable <int> [numLabels];

            for (int i = 0; i < numLabels; i++)
            {
                clickObsLength[i] = Variable.New <int>().Named("clickObsLength" + i);
                Range r = new Range(clickObsLength[i]).Named("dataCount" + i);
                clickObs[i] = Variable.Array <Gaussian>(r).Named("Obs" + i);
                VariableArray <double> scores  = Variable.Array <double>(r).Named("scores" + i);
                VariableArray <double> scoresJ = Variable.Array <double>(r).Named("scoresJ" + i);
                VariableArray <double> scoresC = Variable.Array <double>(r).Named("scoresC" + i);
                scores[r] = Variable <double> .GaussianFromMeanAndPrecision(scoreMean.GetCopyFor(model), scorePrec.GetCopyFor(model)).ForEach(r);

                scoresJ[r] = Variable <double> .GaussianFromMeanAndPrecision(scores[r], judgePrec.GetCopyFor(model));

                scoresC[r] = Variable <double> .GaussianFromMeanAndPrecision(scores[r], clickPrec.GetCopyFor(model));

                Variable.ConstrainBetween(scoresJ[r], thresholds[i].GetCopyFor(model), thresholds[i + 1].GetCopyFor(model));
                Variable.ConstrainEqualRandom <double, Gaussian>(scoresC[r], clickObs[i][r]);
                r.AddAttribute(new Sequential());
            }

            InferenceEngine engine = new InferenceEngine();

            //----------------------------------------------------------
            // Outer loop iterates over a number of passes
            // Inner loop iterates over the unique labels
            //----------------------------------------------------------
            for (int pass = 0; pass < nPasses; pass++)
            {
                for (int c = 0; c < numChunks; c++)
                {
                    for (int i = 0; i < numLabels; i++)
                    {
                        clickObsLength[i].ObservedValue = allObs[c][i].Length;
                        clickObs[i].ObservedValue       = allObs[c][i];
                    }

                    // Infer the output messages
                    model.InferShared(engine, c);

                    if (printToConsole)
                    {
                        margScoreMean = scoreMean.Marginal <Gaussian>();
                        margScorePrec = scorePrec.Marginal <Gamma>();
                        margJudgePrec = judgePrec.Marginal <Gamma>();
                        margClickPrec = clickPrec.Marginal <Gamma>();
                        margThresh    = new Gaussian[numThresholds];
                        for (int i = 0; i < numThresholds; i++)
                        {
                            margThresh[i] = thresholds[i].Marginal <Gaussian>();
                        }
                        Console.WriteLine("****** Pass {0}, chunk {1} ******", pass, c);
                        Console.WriteLine("----- Marginals -----");
                        Console.WriteLine("scoreMean = {0}", margScoreMean);
                        Console.WriteLine("scorePrec = {0}", margScorePrec);
                        Console.WriteLine("judgePrec = {0}", margJudgePrec);
                        Console.WriteLine("clickPrec = {0}", margClickPrec);
                        for (int t = 0; t < numThresholds; t++)
                        {
                            Console.WriteLine("threshMean {0} = {1}", t, margThresh[t]);
                        }
                    }
                }
            }
            margScoreMean = scoreMean.Marginal <Gaussian>();
            margScorePrec = scorePrec.Marginal <Gamma>();
            margJudgePrec = judgePrec.Marginal <Gamma>();
            margClickPrec = clickPrec.Marginal <Gamma>();
            margThresh    = new Gaussian[numThresholds];
            for (int i = 0; i < numThresholds; i++)
            {
                margThresh[i] = thresholds[i].Marginal <Gaussian>();
            }
        }
Esempio n. 3
0
        /// <summary>
        /// Constructs an LDA model
        /// </summary>
        /// <param name="sizeVocab">Size of vocabulary</param>
        /// <param name="numTopics">Number of topics</param>
        public LDAShared(int numBatches, int sizeVocab, int numTopics)
        {
            SizeVocab         = sizeVocab;
            NumTopics         = numTopics;
            ThetaSparsity     = Sparsity.Dense;
            PhiSparsity       = Sparsity.ApproximateWithTolerance(0.00000000001); // Allow for round-off error
            NumDocuments      = Variable.New <int>().Named("NumDocuments");
            NumBatches        = numBatches;
            IterationsPerPass = new int[] { 1, 3, 5, 7, 9 };

            //---------------------------------------------
            // The model
            //---------------------------------------------
            Range D = new Range(NumDocuments).Named("D");
            Range W = new Range(SizeVocab).Named("W");
            Range T = new Range(NumTopics).Named("T");

            NumWordsInDoc = Variable.Array <int>(D).Named("NumWordsInDoc");
            Range WInD = new Range(NumWordsInDoc[D]).Named("WInD");

            Evidence = SharedVariable <bool> .Random(new Bernoulli(0.5)).Named("Evidence");

            Evidence.IsEvidenceVariable = true;

            Phi = SharedVariable <Vector> .Random(T, CreateUniformDirichletArray(numTopics, sizeVocab, PhiSparsity)).Named("Phi");

            // Phi definition sub-model - just one copy
            PhiDefModel = new Model(1).Named("PhiDefModel");

            IfBlock evidencePhiDefBlock = null;

            EvidencePhiDef      = Evidence.GetCopyFor(PhiDefModel).Named("EvidencePhiDef");
            evidencePhiDefBlock = Variable.If(EvidencePhiDef);
            PhiDef = Variable.Array <Vector>(T).Named("PhiDef");
            PhiDef.SetSparsity(PhiSparsity);
            PhiDef.SetValueRange(W);
            PhiPrior  = Variable.Array <Dirichlet>(T).Named("PhiPrior");
            PhiDef[T] = Variable <Vector> .Random(PhiPrior[T]);

            Phi.SetDefinitionTo(PhiDefModel, PhiDef);
            evidencePhiDefBlock.CloseBlock();

            // Document sub-model - many copies
            DocModel = new Model(numBatches).Named("DocModel");

            IfBlock evidenceDocBlock = null;

            EvidenceDoc      = Evidence.GetCopyFor(DocModel).Named("EvidenceDoc");
            evidenceDocBlock = Variable.If(EvidenceDoc);
            Theta            = Variable.Array <Vector>(D).Named("Theta");
            Theta.SetSparsity(ThetaSparsity);
            Theta.SetValueRange(T);
            ThetaPrior = Variable.Array <Dirichlet>(D).Named("ThetaPrior");
            Theta[D]   = Variable <Vector> .Random(ThetaPrior[D]);

            PhiDoc = Phi.GetCopyFor(DocModel);
            PhiDoc.AddAttribute(new MarginalPrototype(Dirichlet.Uniform(sizeVocab, PhiSparsity)));
            Words      = Variable.Array(Variable.Array <int>(WInD), D).Named("Words");
            WordCounts = Variable.Array(Variable.Array <double>(WInD), D).Named("WordCounts");
            using (Variable.ForEach(D))
            {
                using (Variable.ForEach(WInD))
                {
                    using (Variable.Repeat(WordCounts[D][WInD]))
                    {
                        Variable <int> topic = Variable.Discrete(Theta[D]).Named("topic");
                        using (Variable.Switch(topic))
                        {
                            Words[D][WInD] = Variable.Discrete(PhiDoc[topic]);
                        }
                    }
                }
            }

            evidenceDocBlock.CloseBlock();

            // Initialization to break symmetry
            ThetaInit = Variable.Array <Dirichlet>(D).Named("ThetaInit");
            Theta[D].InitialiseTo(ThetaInit[D]);
            EnginePhiDef = new InferenceEngine(new VariationalMessagePassing());
            EnginePhiDef.Compiler.ShowWarnings = false;
            EnginePhiDef.ModelName             = "LDASharedPhiDef";

            Engine = new InferenceEngine(new VariationalMessagePassing());
            Engine.OptimiseForVariables = new IVariable[] { Theta, PhiDoc, EvidenceDoc };

            Engine.Compiler.ShowWarnings = false;
            Engine.ModelName             = "LDAShared";
            Engine.Compiler.ReturnCopies = false;
            Engine.Compiler.FreeMemory   = true;
        }
Esempio n. 4
0
		/// <summary>
		/// Constructs an LDA model
		/// </summary>
		/// <param name="sizeVocab">Size of vocabulary</param>
		/// <param name="numTopics">Number of topics</param>
		public LDAShared(int numBatches, int sizeVocab, int numTopics)
		{
			SizeVocab = sizeVocab;
			NumTopics = numTopics;
			ThetaSparsity = Sparsity.Dense;
			PhiSparsity = Sparsity.ApproximateWithTolerance(0.00000000001); // Allow for round-off error
			NumDocuments = Variable.New<int>().Named("NumDocuments");
			NumBatches = numBatches;
			IterationsPerPass = new int[] { 1, 3, 5, 7, 9 };

			//---------------------------------------------
			// The model
			//---------------------------------------------
			Range D = new Range(NumDocuments).Named("D");
			Range W = new Range(SizeVocab).Named("W");
			Range T = new Range(NumTopics).Named("T");
			NumWordsInDoc = Variable.Array<int>(D).Named("NumWordsInDoc");
			Range WInD = new Range(NumWordsInDoc[D]).Named("WInD");

			Evidence = SharedVariable<bool>.Random(new Bernoulli(0.5)).Named("Evidence");
			Evidence.IsEvidenceVariable = true;

			Phi = SharedVariable<Vector>.Random(T, CreateUniformDirichletArray(numTopics, sizeVocab, PhiSparsity)).Named("Phi");

			// Phi definition sub-model - just one copy
			PhiDefModel = new Model(1).Named("PhiDefModel");

			IfBlock evidencePhiDefBlock = null;
			EvidencePhiDef = Evidence.GetCopyFor(PhiDefModel).Named("EvidencePhiDef");
			evidencePhiDefBlock = Variable.If(EvidencePhiDef);
			PhiDef = Variable.Array<Vector>(T).Named("PhiDef");
			PhiDef.SetSparsity(PhiSparsity);
			PhiDef.SetValueRange(W);
			PhiPrior = Variable.Array<Dirichlet>(T).Named("PhiPrior");
			PhiDef[T] = Variable<Vector>.Random(PhiPrior[T]);
			Phi.SetDefinitionTo(PhiDefModel, PhiDef);
			evidencePhiDefBlock.CloseBlock();

			// Document sub-model - many copies
			DocModel = new Model(numBatches).Named("DocModel");

			IfBlock evidenceDocBlock = null;
			EvidenceDoc = Evidence.GetCopyFor(DocModel).Named("EvidenceDoc");
			evidenceDocBlock = Variable.If(EvidenceDoc);
			Theta = Variable.Array<Vector>(D).Named("Theta");
			Theta.SetSparsity(ThetaSparsity);
			Theta.SetValueRange(T);
			ThetaPrior = Variable.Array<Dirichlet>(D).Named("ThetaPrior");
			Theta[D] = Variable<Vector>.Random(ThetaPrior[D]);
			PhiDoc = Phi.GetCopyFor(DocModel);
			PhiDoc.AddAttribute(new MarginalPrototype(Dirichlet.Uniform(sizeVocab, PhiSparsity)));
			Words = Variable.Array(Variable.Array<int>(WInD), D).Named("Words");
			WordCounts = Variable.Array(Variable.Array<double>(WInD), D).Named("WordCounts");
			using (Variable.ForEach(D))
			{
				using (Variable.ForEach(WInD))
				{
					using (Variable.Repeat(WordCounts[D][WInD]))
					{
						Variable<int> topic = Variable.Discrete(Theta[D]).Named("topic");
						using (Variable.Switch(topic))
							Words[D][WInD] = Variable.Discrete(PhiDoc[topic]);
					} 
				}
			}
			evidenceDocBlock.CloseBlock();

			// Initialization to break symmetry
			ThetaInit = Variable.New<IDistribution<Vector[]>>().Named("ThetaInit");
			Theta.InitialiseTo(ThetaInit);
			EnginePhiDef = new InferenceEngine(new VariationalMessagePassing());
			EnginePhiDef.Compiler.ShowWarnings = false;
			EnginePhiDef.ModelName = "LDASharedPhiDef";

			Engine = new InferenceEngine(new VariationalMessagePassing());
			Engine.OptimiseForVariables = new IVariable[] { Theta, PhiDoc, EvidenceDoc };

			Engine.Compiler.ShowWarnings = false;
			Engine.ModelName = "LDAShared";
			Engine.Compiler.ReturnCopies = false;
			Engine.Compiler.FreeMemory = true;
		}