Example #1
0
		/// <summary>
		/// Creates a uniform distribution array over Dirichlets
		/// </summary>
		/// <param name="length">Length of array</param>
		/// <param name="valueLength">Dimension of each Dirichlet</param>
		/// <returns></returns>
		private static DirichletArray CreateUniformDirichletArray(
			int length, int valueLength, Sparsity sparsity)
		{
			Dirichlet[] result = new Dirichlet[length];
			for (int i=0; i < length; i++)
				result[i] = Dirichlet.Uniform(valueLength, sparsity);
			return (DirichletArray)Distribution<Vector>.Array<Dirichlet>(result);
		}
Example #2
0
 /// <summary>
 /// Creates a uniform Discrete distribution with a specified sparsity, from 0 to dimension-1.
 /// </summary>
 /// <param name="dimension">Dimension</param>
 /// <param name="sparsity">Sparsity</param>
 protected Discrete(int dimension, Sparsity sparsity)
 {
     prob = Vector.Zero(dimension, sparsity);
     SetToUniform();
 }
Example #3
0
        public static Discrete Uniform(int numValues, Sparsity sparsity)
        {
            Discrete d = new Discrete(numValues, sparsity);

            return(d);
        }
 /// <summary>
 /// Creates a discrete
 /// </summary>
 /// <param name="dimension">The dimension of the underlying discrete</param>
 /// <param name="sparsity">The sparsity of the underlying discrete</param>
 protected GenericDiscreteBase(int dimension, Sparsity sparsity)
 {
     disc = Discrete.Uniform(dimension, sparsity);
 }
Example #5
0
        /// <summary>
        /// Constructs an LDA model
        /// </summary>
        /// <param name="sizeVocab">Size of vocabulary</param>
        /// <param name="numTopics">Number of topics</param>
        public LDAShared(int numBatches, int sizeVocab, int numTopics)
        {
            SizeVocab         = sizeVocab;
            NumTopics         = numTopics;
            ThetaSparsity     = Sparsity.Dense;
            PhiSparsity       = Sparsity.ApproximateWithTolerance(0.00000000001); // Allow for round-off error
            NumDocuments      = Variable.New <int>().Named("NumDocuments");
            NumBatches        = numBatches;
            IterationsPerPass = new int[] { 1, 3, 5, 7, 9 };

            //---------------------------------------------
            // The model
            //---------------------------------------------
            Range D = new Range(NumDocuments).Named("D");
            Range W = new Range(SizeVocab).Named("W");
            Range T = new Range(NumTopics).Named("T");

            NumWordsInDoc = Variable.Array <int>(D).Named("NumWordsInDoc");
            Range WInD = new Range(NumWordsInDoc[D]).Named("WInD");

            Evidence = SharedVariable <bool> .Random(new Bernoulli(0.5)).Named("Evidence");

            Evidence.IsEvidenceVariable = true;

            Phi = SharedVariable <Vector> .Random(T, CreateUniformDirichletArray(numTopics, sizeVocab, PhiSparsity)).Named("Phi");

            // Phi definition sub-model - just one copy
            PhiDefModel = new Model(1).Named("PhiDefModel");

            IfBlock evidencePhiDefBlock = null;

            EvidencePhiDef      = Evidence.GetCopyFor(PhiDefModel).Named("EvidencePhiDef");
            evidencePhiDefBlock = Variable.If(EvidencePhiDef);
            PhiDef = Variable.Array <Vector>(T).Named("PhiDef");
            PhiDef.SetSparsity(PhiSparsity);
            PhiDef.SetValueRange(W);
            PhiPrior  = Variable.Array <Dirichlet>(T).Named("PhiPrior");
            PhiDef[T] = Variable <Vector> .Random(PhiPrior[T]);

            Phi.SetDefinitionTo(PhiDefModel, PhiDef);
            evidencePhiDefBlock.CloseBlock();

            // Document sub-model - many copies
            DocModel = new Model(numBatches).Named("DocModel");

            IfBlock evidenceDocBlock = null;

            EvidenceDoc      = Evidence.GetCopyFor(DocModel).Named("EvidenceDoc");
            evidenceDocBlock = Variable.If(EvidenceDoc);
            Theta            = Variable.Array <Vector>(D).Named("Theta");
            Theta.SetSparsity(ThetaSparsity);
            Theta.SetValueRange(T);
            ThetaPrior = Variable.Array <Dirichlet>(D).Named("ThetaPrior");
            Theta[D]   = Variable <Vector> .Random(ThetaPrior[D]);

            PhiDoc = Phi.GetCopyFor(DocModel);
            PhiDoc.AddAttribute(new MarginalPrototype(Dirichlet.Uniform(sizeVocab, PhiSparsity)));
            Words      = Variable.Array(Variable.Array <int>(WInD), D).Named("Words");
            WordCounts = Variable.Array(Variable.Array <double>(WInD), D).Named("WordCounts");
            using (Variable.ForEach(D))
            {
                using (Variable.ForEach(WInD))
                {
                    using (Variable.Repeat(WordCounts[D][WInD]))
                    {
                        Variable <int> topic = Variable.Discrete(Theta[D]).Named("topic");
                        using (Variable.Switch(topic))
                        {
                            Words[D][WInD] = Variable.Discrete(PhiDoc[topic]);
                        }
                    }
                }
            }

            evidenceDocBlock.CloseBlock();

            // Initialization to break symmetry
            ThetaInit = Variable.New <IDistribution <Vector[]> >().Named("ThetaInit");
            Theta.InitialiseTo(ThetaInit);
            EnginePhiDef = new InferenceEngine(new VariationalMessagePassing());
            EnginePhiDef.Compiler.ShowWarnings = false;
            EnginePhiDef.ModelName             = "LDASharedPhiDef";

            Engine = new InferenceEngine(new VariationalMessagePassing());
            Engine.OptimiseForVariables = new IVariable[] { Theta, PhiDoc, EvidenceDoc };

            Engine.Compiler.ShowWarnings = false;
            Engine.ModelName             = "LDAShared";
            Engine.Compiler.ReturnCopies = false;
            Engine.Compiler.FreeMemory   = true;
        }
Example #6
0
        private IDistribution <Vector[]> GetInitialisation(double initMaxPseudoCount, double initWordsPerTopic, Sparsity sparsity, double beta)
        {
            Dirichlet[] initPhi = new Dirichlet[TotalTopics.ObservedValue];
            Random      r       = new Random(12347);

            for (int i = 0; i < TotalTopics.ObservedValue; i++)
            {
                Vector v        = Vector.Constant(TotalWords.ObservedValue, beta, sparsity);
                int[]  perm     = Rand.Perm(TotalWords.ObservedValue);
                int    numWords = Poisson.Sample(initWordsPerTopic);
                for (int j = 0; j < numWords; j++)
                {
                    v[perm[j]] += initMaxPseudoCount * r.NextDouble();
                }
                initPhi[i] = new Dirichlet(v);
            }
            return(Distribution <Vector> .Array(initPhi));
        }
Example #7
0
        public void LDATest()
        {
            int    numTopics = 2;
            int    numVocab  = 10;
            int    numDocs   = 5;
            double alpha     = 1.0 / numTopics;
            double beta      = 1.0 / numVocab;

            Dirichlet[] truePhi, trueTheta;
            Rand.Restart(12347);
            int[][] wordsInDoc = GenerateToyLDAData(numTopics, numVocab, numDocs, 100,
                                                    out trueTheta, out truePhi);

            Rand.Restart(12347);
            var modelDense = new LDAModel();

            modelDense.Engine.NumberOfIterations = 15;
            modelDense.Engine.ModelName          = "LdaDense";
            modelDense.Infer(10.0, numVocab, numTopics, wordsInDoc, alpha, beta, Sparsity.Dense, Sparsity.Dense, Sparsity.Dense);

            // Same model, but run as sparse
            Rand.Restart(12347);
            var modelSparse = new LDAModel();

            modelSparse.Engine.NumberOfIterations    = 15;
            modelSparse.Engine.ModelName             = "LdaSparse";
            modelSparse.Engine.Compiler.ReturnCopies = false;
            // previous tolerances appeared to be set to alpha and beta but were in fact set to dense due to a downstream bug.
            modelSparse.Infer(10.0, numVocab, numTopics, wordsInDoc, alpha, beta, Sparsity.ApproximateWithTolerance(1e-7), Sparsity.ApproximateWithTolerance(1e-7),
                              Sparsity.ApproximateWithTolerance(1e-6));

            Console.WriteLine(StringUtil.JoinColumns("Phi sparsity = ", SparsityFraction(modelSparse.PostPhi)));
            Console.WriteLine(StringUtil.JoinColumns("Theta sparsity = ", SparsityFraction(modelSparse.PostTheta)));
            for (int i = 0; i < numDocs; i++)
            {
                Assert.Equal(Sparsity.Dense, modelDense.PostTheta[i].Sparsity);
                //Assert.True(modelSparse.PostTheta[i].Sparsity.IsApproximate);
                Assert.Equal(0.0, modelDense.PostTheta[i].MaxDiff(modelSparse.PostTheta[i]), 1e-3);
            }
            for (int i = 0; i < numTopics; i++)
            {
                Assert.Equal(Sparsity.Dense, modelDense.PostPhi[i].Sparsity);
                // Assert.True(modelSparse.PostPhi[i].Sparsity.IsApproximate);
                Assert.Equal(0.0, modelDense.PostPhi[i].MaxDiff(modelSparse.PostPhi[i]), 1e-3);
            }
        }
Example #8
0
#pragma warning disable 162
#endif

        /// <summary>
        /// Run inference on the LDA model
        /// </summary>
        /// <param name="initMaxPseudoCount">Max psudo-count for initialisation</param>
        /// <param name="alpha">Pseudo-counts for theta</param>
        /// <param name="beta">Pseudo-counts for phi</param>
        public void Infer(double initMaxPseudoCount, int numVocab, int numTopics, int[][] wordsInDoc, double alpha, double beta, Sparsity alphaSparsity, Sparsity betaSparsity,
                          Sparsity phiSparsity)
        {
            // Set up the observed values
            TotalWords.ObservedValue     = numVocab;
            TotalDocuments.ObservedValue = wordsInDoc.Length;
            TotalTopics.ObservedValue    = numTopics;
            int[] numWordsInDoc = new int[wordsInDoc.Length];
            for (int i = 0; i < wordsInDoc.Length; i++)
            {
                numWordsInDoc[i] = wordsInDoc[i].Length;
            }
            NumWordsInDoc.ObservedValue = numWordsInDoc;
            Words.ObservedValue         = wordsInDoc;
            Vector alphaVector = Vector.Constant(numTopics, alpha, alphaSparsity);
            Vector betaVector  = Vector.Constant(numVocab, beta, betaSparsity);

            AlphaPrior.ObservedValue = new Dirichlet(alphaVector);
            BetaPrior.ObservedValue  = new Dirichlet(betaVector);
            // This has to occur after the other observed values are set:
            PhiInit.ObservedValue = GetInitialisation(initMaxPseudoCount, numVocab / numTopics, phiSparsity, beta);

            if (false)
            {
                // for debugging, put the observed values in the code.
                TotalWords.IsReadOnly     = true;
                TotalDocuments.IsReadOnly = true;
                TotalTopics.IsReadOnly    = true;
                NumWordsInDoc.IsReadOnly  = true;
                Words.IsReadOnly          = true;
                AlphaPrior.IsReadOnly     = true;
                BetaPrior.IsReadOnly      = true;
                PhiInit.IsReadOnly        = true;
            }

            Engine.OptimiseForVariables = new List <IVariable>()
            {
                Theta, Phi
            };
            PostTheta = Engine.Infer <Dirichlet[]>(Theta);
            PostPhi   = Engine.Infer <Dirichlet[]>(Phi);
        }