/// <summary> /// Initializes a new instance of the <see cref="BPMShared"/> class. /// </summary> /// <param name="numClasses">The number of classes.</param> /// <param name="noisePrecision">The precision of the noise.</param> /// <param name="numFeatures">The number of features.</param> /// <param name="numChunksTraining">The number of training set chunks.</param> /// <param name="numChunksTesting">The number of test set chunks.</param> public BPMShared(int numClasses, double noisePrecision, int numFeatures, int numChunksTraining, int numChunksTesting) { // Range over classes. this.c = new Range(numClasses).Named("c"); // Setup shared weights and weights' prior. this.weightsPrior = InitializePrior(numClasses, numFeatures); this.weights = SharedVariable<Vector>.Random(this.c, this.weightsPrior).Named("w"); // Configure models. this.trainModel = new Model(this.weights, this.c, numChunksTraining); this.testModel = new Model(this.weights, this.c, numChunksTesting); // Observe the noise precision. this.trainModel.noisePrecision.ObservedValue = noisePrecision; this.testModel.noisePrecision.ObservedValue = noisePrecision; }
/// <summary> /// Constructs a multi-component sparse Bayes Point Machine using shared variables for chunking data /// </summary> /// <param name="nClass">Number of components (classes)</param> /// <param name="featureCount">Number of features</param> /// <param name="noisePrec">Noise precision</param> /// <param name="trainChunkSize">Chunk size for training</param> /// <param name="testChunkSize">Chunk size for testing</param> public BPMSparse_Shared(int nClass, int featureCount, double noisePrec, int trainChunkSize, int testChunkSize) { nComponents = nClass; nFeatures = featureCount; NoisePrec = noisePrec; this.trainChunkSize = trainChunkSize; this.testChunkSize = testChunkSize; feature = new Range(nFeatures).Named("feature"); w = new SharedVariableArray<double>[nComponents]; IDistribution<double[]> wPrior0 = Distribution<double>.Array(nFeatures, delegate(int index) { return Gaussian.PointMass(0); }); IDistribution<double[]> wPrior = Distribution<double>.Array(nFeatures, delegate(int index) { return Gaussian.FromMeanAndPrecision(0.0, 1.0); }); for (int c = 0; c < nComponents; c++) { w[c] = (c == 0) ? SharedVariable<double>.Random(feature, (DistributionStructArray<Gaussian,double>)wPrior0).Named("w_" + c) : SharedVariable<double>.Random(feature, (DistributionStructArray<Gaussian,double>)wPrior).Named("w_" + c); } trainModel = SpecifyTrainModel("_train", trainChunkSize); testModel = SpecifyTestModel("_test", testChunkSize); }
/// <summary> /// Constructs an LDA model /// </summary> /// <param name="sizeVocab">Size of vocabulary</param> /// <param name="numTopics">Number of topics</param> public LDAShared(int numBatches, int sizeVocab, int numTopics) { SizeVocab = sizeVocab; NumTopics = numTopics; ThetaSparsity = Sparsity.Dense; PhiSparsity = Sparsity.ApproximateWithTolerance(0.00000000001); // Allow for round-off error NumDocuments = Variable.New <int>().Named("NumDocuments"); NumBatches = numBatches; IterationsPerPass = new int[] { 1, 3, 5, 7, 9 }; //--------------------------------------------- // The model //--------------------------------------------- Range D = new Range(NumDocuments).Named("D"); Range W = new Range(SizeVocab).Named("W"); Range T = new Range(NumTopics).Named("T"); NumWordsInDoc = Variable.Array <int>(D).Named("NumWordsInDoc"); Range WInD = new Range(NumWordsInDoc[D]).Named("WInD"); Evidence = SharedVariable <bool> .Random(new Bernoulli(0.5)).Named("Evidence"); Evidence.IsEvidenceVariable = true; Phi = SharedVariable <Vector> .Random(T, CreateUniformDirichletArray(numTopics, sizeVocab, PhiSparsity)).Named("Phi"); // Phi definition sub-model - just one copy PhiDefModel = new Model(1).Named("PhiDefModel"); IfBlock evidencePhiDefBlock = null; EvidencePhiDef = Evidence.GetCopyFor(PhiDefModel).Named("EvidencePhiDef"); evidencePhiDefBlock = Variable.If(EvidencePhiDef); PhiDef = Variable.Array <Vector>(T).Named("PhiDef"); PhiDef.SetSparsity(PhiSparsity); PhiDef.SetValueRange(W); PhiPrior = Variable.Array <Dirichlet>(T).Named("PhiPrior"); PhiDef[T] = Variable <Vector> .Random(PhiPrior[T]); Phi.SetDefinitionTo(PhiDefModel, PhiDef); evidencePhiDefBlock.CloseBlock(); // Document sub-model - many copies DocModel = new Model(numBatches).Named("DocModel"); IfBlock evidenceDocBlock = null; EvidenceDoc = Evidence.GetCopyFor(DocModel).Named("EvidenceDoc"); evidenceDocBlock = Variable.If(EvidenceDoc); Theta = Variable.Array <Vector>(D).Named("Theta"); Theta.SetSparsity(ThetaSparsity); Theta.SetValueRange(T); ThetaPrior = Variable.Array <Dirichlet>(D).Named("ThetaPrior"); Theta[D] = Variable <Vector> .Random(ThetaPrior[D]); PhiDoc = Phi.GetCopyFor(DocModel); PhiDoc.AddAttribute(new MarginalPrototype(Dirichlet.Uniform(sizeVocab, PhiSparsity))); Words = Variable.Array(Variable.Array <int>(WInD), D).Named("Words"); WordCounts = Variable.Array(Variable.Array <double>(WInD), D).Named("WordCounts"); using (Variable.ForEach(D)) { using (Variable.ForEach(WInD)) { using (Variable.Repeat(WordCounts[D][WInD])) { Variable <int> topic = Variable.Discrete(Theta[D]).Named("topic"); using (Variable.Switch(topic)) { Words[D][WInD] = Variable.Discrete(PhiDoc[topic]); } } } } evidenceDocBlock.CloseBlock(); // Initialization to break symmetry ThetaInit = Variable.Array <Dirichlet>(D).Named("ThetaInit"); Theta[D].InitialiseTo(ThetaInit[D]); EnginePhiDef = new InferenceEngine(new VariationalMessagePassing()); EnginePhiDef.Compiler.ShowWarnings = false; EnginePhiDef.ModelName = "LDASharedPhiDef"; Engine = new InferenceEngine(new VariationalMessagePassing()); Engine.OptimiseForVariables = new IVariable[] { Theta, PhiDoc, EvidenceDoc }; Engine.Compiler.ShowWarnings = false; Engine.ModelName = "LDAShared"; Engine.Compiler.ReturnCopies = false; Engine.Compiler.FreeMemory = true; }
public Model(SharedVariableArray<Vector> w, Range c, int numChunks) { // Items. numItems = Variable.New<int>().Named("numItems"); i = new Range(numItems).Named("i"); i.AddAttribute(new Sequential()); // The model identifier for the shared variables. model = new MicrosoftResearch.Infer.Models.Model(numChunks).Named("model"); // The weight vector for each submodel. wModel = w.GetCopyFor(model).Named("wModel"); noisePrecision = Variable.New<double>().Named("noisePrecision"); // Arrays of <see cref="Vector"/>-valued items (feature vectors) and integer labels. x = Variable.Array<Vector>(i).Named("x"); y = Variable.Array<int>(i).Named("y"); // For all items... using (Variable.ForEach(i)) { // ...compute the score of this item across all classes... score = BPMUtils.ComputeClassScores(wModel, x[i], noisePrecision); y[i] = Variable.DiscreteUniform(c); // ... and constrain the output. BPMUtils.ConstrainMaximum(y[i], score); } // Inference engine settings (EP). engine.Compiler.UseSerialSchedules = true; engine.ShowProgress = false; }
/// <summary> /// Constructs an LDA model /// </summary> /// <param name="sizeVocab">Size of vocabulary</param> /// <param name="numTopics">Number of topics</param> public LDAShared(int numBatches, int sizeVocab, int numTopics) { SizeVocab = sizeVocab; NumTopics = numTopics; ThetaSparsity = Sparsity.Dense; PhiSparsity = Sparsity.ApproximateWithTolerance(0.00000000001); // Allow for round-off error NumDocuments = Variable.New<int>().Named("NumDocuments"); NumBatches = numBatches; IterationsPerPass = new int[] { 1, 3, 5, 7, 9 }; //--------------------------------------------- // The model //--------------------------------------------- Range D = new Range(NumDocuments).Named("D"); Range W = new Range(SizeVocab).Named("W"); Range T = new Range(NumTopics).Named("T"); NumWordsInDoc = Variable.Array<int>(D).Named("NumWordsInDoc"); Range WInD = new Range(NumWordsInDoc[D]).Named("WInD"); Evidence = SharedVariable<bool>.Random(new Bernoulli(0.5)).Named("Evidence"); Evidence.IsEvidenceVariable = true; Phi = SharedVariable<Vector>.Random(T, CreateUniformDirichletArray(numTopics, sizeVocab, PhiSparsity)).Named("Phi"); // Phi definition sub-model - just one copy PhiDefModel = new Model(1).Named("PhiDefModel"); IfBlock evidencePhiDefBlock = null; EvidencePhiDef = Evidence.GetCopyFor(PhiDefModel).Named("EvidencePhiDef"); evidencePhiDefBlock = Variable.If(EvidencePhiDef); PhiDef = Variable.Array<Vector>(T).Named("PhiDef"); PhiDef.SetSparsity(PhiSparsity); PhiDef.SetValueRange(W); PhiPrior = Variable.Array<Dirichlet>(T).Named("PhiPrior"); PhiDef[T] = Variable<Vector>.Random(PhiPrior[T]); Phi.SetDefinitionTo(PhiDefModel, PhiDef); evidencePhiDefBlock.CloseBlock(); // Document sub-model - many copies DocModel = new Model(numBatches).Named("DocModel"); IfBlock evidenceDocBlock = null; EvidenceDoc = Evidence.GetCopyFor(DocModel).Named("EvidenceDoc"); evidenceDocBlock = Variable.If(EvidenceDoc); Theta = Variable.Array<Vector>(D).Named("Theta"); Theta.SetSparsity(ThetaSparsity); Theta.SetValueRange(T); ThetaPrior = Variable.Array<Dirichlet>(D).Named("ThetaPrior"); Theta[D] = Variable<Vector>.Random(ThetaPrior[D]); PhiDoc = Phi.GetCopyFor(DocModel); PhiDoc.AddAttribute(new MarginalPrototype(Dirichlet.Uniform(sizeVocab, PhiSparsity))); Words = Variable.Array(Variable.Array<int>(WInD), D).Named("Words"); WordCounts = Variable.Array(Variable.Array<double>(WInD), D).Named("WordCounts"); using (Variable.ForEach(D)) { using (Variable.ForEach(WInD)) { using (Variable.Repeat(WordCounts[D][WInD])) { Variable<int> topic = Variable.Discrete(Theta[D]).Named("topic"); using (Variable.Switch(topic)) Words[D][WInD] = Variable.Discrete(PhiDoc[topic]); } } } evidenceDocBlock.CloseBlock(); // Initialization to break symmetry ThetaInit = Variable.New<IDistribution<Vector[]>>().Named("ThetaInit"); Theta.InitialiseTo(ThetaInit); EnginePhiDef = new InferenceEngine(new VariationalMessagePassing()); EnginePhiDef.Compiler.ShowWarnings = false; EnginePhiDef.ModelName = "LDASharedPhiDef"; Engine = new InferenceEngine(new VariationalMessagePassing()); Engine.OptimiseForVariables = new IVariable[] { Theta, PhiDoc, EvidenceDoc }; Engine.Compiler.ShowWarnings = false; Engine.ModelName = "LDAShared"; Engine.Compiler.ReturnCopies = false; Engine.Compiler.FreeMemory = true; }