public virtual void TestMultiply(TableFactor factor1, TableFactor factor2) { TableFactor result = factor1.Multiply((TableFactor)factor2); foreach (int[] assignment in result) { double factor1Value = factor1.GetAssignmentValue(SubsetAssignment(assignment, result, (TableFactor)factor1)); double factor2Value = factor2.GetAssignmentValue(SubsetAssignment(assignment, result, (TableFactor)factor2)); NUnit.Framework.Assert.AreEqual(result.GetAssignmentValue(assignment), 1.0e-5, factor1Value * factor2Value); } // Check for no duplication for (int i = 0; i < result.neighborIndices.Length; i++) { for (int j = 0; j < result.neighborIndices.Length; j++) { if (i == j) { continue; } Assert.AssertNotEquals(result.neighborIndices[i], result.neighborIndices[j]); } } }
public virtual void CheckMAPAgainstBruteForce(GraphicalModel model, ConcatVector weights, CliqueTree inference) { int[] map = inference.CalculateMAP(); ICollection <TableFactor> tableFactors = model.factors.Stream().Map(null).Collect(Collectors.ToSet()); // this is the super slow but obviously correct way to get global marginals TableFactor bruteForce = null; foreach (TableFactor factor in tableFactors) { if (bruteForce == null) { bruteForce = factor; } else { bruteForce = bruteForce.Multiply(factor); } } System.Diagnostics.Debug.Assert((bruteForce != null)); // observe out all variables that have been registered TableFactor observed = bruteForce; foreach (int n in bruteForce.neighborIndices) { if (model.GetVariableMetaDataByReference(n).Contains(CliqueTree.VariableObservedValue)) { int value = System.Convert.ToInt32(model.GetVariableMetaDataByReference(n)[CliqueTree.VariableObservedValue]); if (observed.neighborIndices.Length > 1) { observed = observed.Observe(n, value); } else { // If we've observed everything, then just quit return; } } } bruteForce = observed; int largestVariableNum = 0; foreach (GraphicalModel.Factor f in model.factors) { foreach (int i in f.neigborIndices) { if (i > largestVariableNum) { largestVariableNum = i; } } } // this is presented in true order, where 0 corresponds to var 0 int[] mapValueAssignment = new int[largestVariableNum + 1]; // this is kept in the order that the factor presents to us int[] highestValueAssignment = new int[bruteForce.neighborIndices.Length]; foreach (int[] assignment in bruteForce) { if (bruteForce.GetAssignmentValue(assignment) > bruteForce.GetAssignmentValue(highestValueAssignment)) { highestValueAssignment = assignment; for (int i = 0; i < assignment.Length; i++) { mapValueAssignment[bruteForce.neighborIndices[i]] = assignment[i]; } } } int[] forcedAssignments = new int[largestVariableNum + 1]; for (int i_1 = 0; i_1 < mapValueAssignment.Length; i_1++) { if (model.GetVariableMetaDataByReference(i_1).Contains(CliqueTree.VariableObservedValue)) { mapValueAssignment[i_1] = System.Convert.ToInt32(model.GetVariableMetaDataByReference(i_1)[CliqueTree.VariableObservedValue]); forcedAssignments[i_1] = mapValueAssignment[i_1]; } } if (!Arrays.Equals(mapValueAssignment, map)) { System.Console.Error.WriteLine("---"); System.Console.Error.WriteLine("Relevant variables: " + Arrays.ToString(bruteForce.neighborIndices)); System.Console.Error.WriteLine("Var Sizes: " + Arrays.ToString(bruteForce.GetDimensions())); System.Console.Error.WriteLine("MAP: " + Arrays.ToString(map)); System.Console.Error.WriteLine("Brute force map: " + Arrays.ToString(mapValueAssignment)); System.Console.Error.WriteLine("Forced assignments: " + Arrays.ToString(forcedAssignments)); } foreach (int i_2 in bruteForce.neighborIndices) { // Only check defined variables NUnit.Framework.Assert.AreEqual(mapValueAssignment[i_2], map[i_2]); } }
private void CheckMarginalsAgainstBruteForce(GraphicalModel model, ConcatVector weights, CliqueTree inference) { CliqueTree.MarginalResult result = inference.CalculateMarginals(); double[][] marginals = result.marginals; ICollection <TableFactor> tableFactors = model.factors.Stream().Map(null).Collect(Collectors.ToSet()); System.Diagnostics.Debug.Assert((tableFactors.Count == model.factors.Count)); // this is the super slow but obviously correct way to get global marginals TableFactor bruteForce = null; foreach (TableFactor factor in tableFactors) { if (bruteForce == null) { bruteForce = factor; } else { bruteForce = bruteForce.Multiply(factor); } } if (bruteForce != null) { // observe out all variables that have been registered TableFactor observed = bruteForce; for (int i = 0; i < bruteForce.neighborIndices.Length; i++) { int n = bruteForce.neighborIndices[i]; if (model.GetVariableMetaDataByReference(n).Contains(CliqueTree.VariableObservedValue)) { int value = System.Convert.ToInt32(model.GetVariableMetaDataByReference(n)[CliqueTree.VariableObservedValue]); // Check that the marginals reflect the observation for (int j = 0; j < marginals[n].Length; j++) { NUnit.Framework.Assert.AreEqual(marginals[n][j], 1.0e-9, j == value ? 1.0 : 0.0); } if (observed.neighborIndices.Length > 1) { observed = observed.Observe(n, value); } else { // If we've observed everything, then just quit return; } } } bruteForce = observed; // Spot check each of the marginals in the brute force calculation double[][] bruteMarginals = bruteForce.GetSummedMarginals(); int index = 0; foreach (int i_1 in bruteForce.neighborIndices) { bool isEqual = true; double[] brute = bruteMarginals[index]; index++; System.Diagnostics.Debug.Assert((brute != null)); System.Diagnostics.Debug.Assert((marginals[i_1] != null)); for (int j = 0; j < brute.Length; j++) { if (double.IsNaN(brute[j])) { isEqual = false; break; } if (Math.Abs(brute[j] - marginals[i_1][j]) > 3.0e-2) { isEqual = false; break; } } if (!isEqual) { System.Console.Error.WriteLine("Arrays not equal! Variable " + i_1); System.Console.Error.WriteLine("\tGold: " + Arrays.ToString(brute)); System.Console.Error.WriteLine("\tResult: " + Arrays.ToString(marginals[i_1])); } Assert.AssertArrayEquals(marginals[i_1], 3.0e-2, brute); } // Spot check the partition function double goldPartitionFunction = bruteForce.ValueSum(); // Correct to within 3% NUnit.Framework.Assert.AreEqual(result.partitionFunction, goldPartitionFunction * 3.0e-2, goldPartitionFunction); // Check the joint marginals foreach (GraphicalModel.Factor f in model.factors) { NUnit.Framework.Assert.IsTrue(result.jointMarginals.Contains(f)); TableFactor bruteForceJointMarginal = bruteForce; foreach (int n in bruteForce.neighborIndices) { foreach (int i_2 in f.neigborIndices) { if (i_2 == n) { goto outer_continue; } } if (bruteForceJointMarginal.neighborIndices.Length > 1) { bruteForceJointMarginal = bruteForceJointMarginal.SumOut(n); } else { int[] fixedAssignment = new int[f.neigborIndices.Length]; for (int i_3 = 0; i_3 < fixedAssignment.Length; i_3++) { fixedAssignment[i_3] = System.Convert.ToInt32(model.GetVariableMetaDataByReference(f.neigborIndices[i_3])[CliqueTree.VariableObservedValue]); } foreach (int[] assn in result.jointMarginals[f]) { if (Arrays.Equals(assn, fixedAssignment)) { NUnit.Framework.Assert.AreEqual(result.jointMarginals[f].GetAssignmentValue(assn), 1.0e-7, 1.0); } else { if (result.jointMarginals[f].GetAssignmentValue(assn) != 0) { TableFactor j = result.jointMarginals[f]; foreach (int[] assignment in j) { System.Console.Error.WriteLine(Arrays.ToString(assignment) + ": " + j.GetAssignmentValue(assignment)); } } NUnit.Framework.Assert.AreEqual(result.jointMarginals[f].GetAssignmentValue(assn), 1.0e-7, 0.0); } } goto marginals_continue; } } outer_break :; // Find the correspondence between the brute force joint marginal, which may be missing variables // because they were observed out of the table, and the output joint marginals, which are always an exact // match for the original factor int[] backPointers = new int[f.neigborIndices.Length]; int[] observedValue = new int[f.neigborIndices.Length]; for (int i_4 = 0; i_4 < backPointers.Length; i_4++) { if (model.GetVariableMetaDataByReference(f.neigborIndices[i_4]).Contains(CliqueTree.VariableObservedValue)) { observedValue[i_4] = System.Convert.ToInt32(model.GetVariableMetaDataByReference(f.neigborIndices[i_4])[CliqueTree.VariableObservedValue]); backPointers[i_4] = -1; } else { observedValue[i_4] = -1; backPointers[i_4] = -1; for (int j = 0; j < bruteForceJointMarginal.neighborIndices.Length; j++) { if (bruteForceJointMarginal.neighborIndices[j] == f.neigborIndices[i_4]) { backPointers[i_4] = j; } } System.Diagnostics.Debug.Assert((backPointers[i_4] != -1)); } } double sum = bruteForceJointMarginal.ValueSum(); if (sum == 0.0) { sum = 1; } foreach (int[] assignment_1 in result.jointMarginals[f]) { int[] bruteForceMarginalAssignment = new int[bruteForceJointMarginal.neighborIndices.Length]; for (int i_2 = 0; i_2 < assignment_1.Length; i_2++) { if (backPointers[i_2] != -1) { bruteForceMarginalAssignment[backPointers[i_2]] = assignment_1[i_2]; } else { // Make sure all assignments that don't square with observations get 0 weight System.Diagnostics.Debug.Assert((observedValue[i_2] != -1)); if (assignment_1[i_2] != observedValue[i_2]) { if (result.jointMarginals[f].GetAssignmentValue(assignment_1) != 0) { System.Console.Error.WriteLine("Joint marginals: " + Arrays.ToString(result.jointMarginals[f].neighborIndices)); System.Console.Error.WriteLine("Assignment: " + Arrays.ToString(assignment_1)); System.Console.Error.WriteLine("Observed Value: " + Arrays.ToString(observedValue)); foreach (int[] assn in result.jointMarginals[f]) { System.Console.Error.WriteLine("\t" + Arrays.ToString(assn) + ":" + result.jointMarginals[f].GetAssignmentValue(assn)); } } NUnit.Framework.Assert.AreEqual(result.jointMarginals[f].GetAssignmentValue(assignment_1), 1.0e-7, 0.0); goto outer_continue; } } } NUnit.Framework.Assert.AreEqual(result.jointMarginals[f].GetAssignmentValue(assignment_1), 1.0e-3, bruteForceJointMarginal.GetAssignmentValue(bruteForceMarginalAssignment) / sum); } outer_break :; } marginals_break :; } else { foreach (double[] marginal in marginals) { foreach (double d in marginal) { NUnit.Framework.Assert.AreEqual(d, 3.0e-2, 1.0 / marginal.Length); } } } }
// OPTIMIZATION: // cache the last list of factors, and the last set of messages passed, in case we can recycle some /// <summary>Does tree shaped message passing.</summary> /// <remarks> /// Does tree shaped message passing. The algorithm calls for first passing down to the leaves, then passing back up /// to the root. /// </remarks> /// <param name="marginalize">the method for marginalization, controls MAP or marginals</param> /// <returns>the marginal messages</returns> private CliqueTree.MarginalResult MessagePassing(CliqueTree.MarginalizationMethod marginalize, bool includeJointMarginalsAndPartition) { // Using the behavior of brute force factor multiplication as ground truth, the desired // outcome of marginal calculation with an impossible factor is a uniform probability dist., // since we have a resulting factor of all 0s. That is of course assuming that normalizing // all 0s gives you uniform, which is not real math, but that's a useful tolerance to include, so we do. bool impossibleObservationMade = false; // Message passing will look at fully observed cliques as non-entities, but their // log-likelihood (the log-likelihood of the single observed value) is still relevant for the // partition function. double partitionFunction = 1.0; if (includeJointMarginalsAndPartition) { foreach (GraphicalModel.Factor f in model.factors) { foreach (int n in f.neigborIndices) { if (!model.GetVariableMetaDataByReference(n).Contains(VariableObservedValue)) { goto outer_continue; } } int[] assignment = new int[f.neigborIndices.Length]; for (int i = 0; i < f.neigborIndices.Length; i++) { assignment[i] = System.Convert.ToInt32(model.GetVariableMetaDataByReference(f.neigborIndices[i])[VariableObservedValue]); } double assignmentValue = f.featuresTable.GetAssignmentValue(assignment).Get().DotProduct(weights); if (double.IsInfinite(assignmentValue)) { impossibleObservationMade = true; } else { partitionFunction *= Math.Exp(assignmentValue); } } outer_break :; } // Create the cliques by multiplying out table factors // TODO:OPT This could be made more efficient by observing first, then dot product IList <TableFactor> cliquesList = new List <TableFactor>(); IDictionary <int, GraphicalModel.Factor> cliqueToFactor = new Dictionary <int, GraphicalModel.Factor>(); int numFactorsCached = 0; foreach (GraphicalModel.Factor f_1 in model.factors) { bool allObserved = true; int maxVar = 0; foreach (int n in f_1.neigborIndices) { if (!model.GetVariableMetaDataByReference(n).Contains(VariableObservedValue)) { allObserved = false; } if (n > maxVar) { maxVar = n; } } if (allObserved) { continue; } TableFactor clique = null; // Retrieve cache if exists and none of the observations have changed if (cachedFactors.Contains(f_1)) { CliqueTree.CachedFactorWithObservations obs = cachedFactors[f_1]; bool allConsistent = true; for (int i = 0; i < f_1.neigborIndices.Length; i++) { int n_1 = f_1.neigborIndices[i]; if (model.GetVariableMetaDataByReference(n_1).Contains(VariableObservedValue) && (obs.observations[i] == -1 || System.Convert.ToInt32(model.GetVariableMetaDataByReference(n_1)[VariableObservedValue]) != obs.observations[i])) { allConsistent = false; break; } // NOTE: This disqualifies lots of stuff for some reason... if (!model.GetVariableMetaDataByReference(n_1).Contains(VariableObservedValue) && (obs.observations[i] != -1)) { allConsistent = false; break; } } if (allConsistent) { clique = obs.cachedFactor; numFactorsCached++; if (obs.impossibleObservation) { impossibleObservationMade = true; } } } // Otherwise make a new cache if (clique == null) { int[] observations = new int[f_1.neigborIndices.Length]; for (int i = 0; i < observations.Length; i++) { IDictionary <string, string> metadata = model.GetVariableMetaDataByReference(f_1.neigborIndices[i]); if (metadata.Contains(VariableObservedValue)) { int value = System.Convert.ToInt32(metadata[VariableObservedValue]); observations[i] = value; } else { observations[i] = -1; } } clique = new TableFactor(weights, f_1, observations); CliqueTree.CachedFactorWithObservations cache = new CliqueTree.CachedFactorWithObservations(); cache.cachedFactor = clique; cache.observations = observations; // Check for an impossible observation bool nonZeroValue = false; foreach (int[] assignment in clique) { if (clique.GetAssignmentValue(assignment) > 0) { nonZeroValue = true; break; } } if (!nonZeroValue) { impossibleObservationMade = true; cache.impossibleObservation = true; } cachedFactors[f_1] = cache; } cliqueToFactor[cliquesList.Count] = f_1; cliquesList.Add(clique); } TableFactor[] cliques = Sharpen.Collections.ToArray(cliquesList, new TableFactor[cliquesList.Count]); // If we made any impossible observations, we can just return a uniform distribution for all the variables that // weren't observed, since that's the semantically correct thing to do (our 'probability' is broken at this // point). if (impossibleObservationMade) { int maxVar = 0; foreach (TableFactor c in cliques) { foreach (int i in c.neighborIndices) { if (i > maxVar) { maxVar = i; } } } double[][] result = new double[maxVar + 1][]; foreach (TableFactor c_1 in cliques) { for (int i = 0; i < c_1.neighborIndices.Length; i++) { result[c_1.neighborIndices[i]] = new double[c_1.GetDimensions()[i]]; for (int j = 0; j < result[c_1.neighborIndices[i]].Length; j++) { result[c_1.neighborIndices[i]][j] = 1.0 / result[c_1.neighborIndices[i]].Length; } } } // Create a bunch of uniform joint marginals, constrained by observations, and fill up the joint marginals // with them IDictionary <GraphicalModel.Factor, TableFactor> jointMarginals = new IdentityHashMap <GraphicalModel.Factor, TableFactor>(); if (includeJointMarginalsAndPartition) { foreach (GraphicalModel.Factor f in model.factors) { TableFactor uniformZero = new TableFactor(f_1.neigborIndices, f_1.featuresTable.GetDimensions()); foreach (int[] assignment in uniformZero) { uniformZero.SetAssignmentValue(assignment, 0.0); } jointMarginals[f_1] = uniformZero; } } return(new CliqueTree.MarginalResult(result, 1.0, jointMarginals)); } // Find the largest contained variable, so that we can size arrays appropriately int maxVar_1 = 0; foreach (GraphicalModel.Factor fac in model.factors) { foreach (int i in fac.neigborIndices) { if (i > maxVar_1) { maxVar_1 = i; } } } // Indexed by (start-clique, end-clique), this array will remain mostly null in most graphs TableFactor[][] messages = new TableFactor[cliques.Length][]; // OPTIMIZATION: // check if we've only added one factor since the last time we ran marginal inference. If that's the case, we // can use the new factor as the root, all the messages passed in from the leaves will not have changed. That // means we can cut message passing computation in half. bool[][] backwardPassedMessages = new bool[cliques.Length][]; int forceRootForCachedMessagePassing = -1; int[] cachedCliquesBackPointers = null; if (CacheMessages && (numFactorsCached == cliques.Length - 1) && (numFactorsCached > 0)) { cachedCliquesBackPointers = new int[cliques.Length]; // Sometimes we'll have cached versions of the factors, but they're from inference steps a long time ago, so we // don't get consistent backpointers to our cache of factors. This is a flag to indicate if this happens. bool backPointersConsistent = true; // Calculate the correspondence between the old cliques list and the new cliques list for (int i = 0; i < cliques.Length; i++) { cachedCliquesBackPointers[i] = -1; for (int j = 0; j < cachedCliqueList.Length; j++) { if (cliques[i] == cachedCliqueList[j]) { cachedCliquesBackPointers[i] = j; break; } } if (cachedCliquesBackPointers[i] == -1) { if (forceRootForCachedMessagePassing != -1) { backPointersConsistent = false; break; } forceRootForCachedMessagePassing = i; } } if (!backPointersConsistent) { forceRootForCachedMessagePassing = -1; } } // Create the data structures to hold the tree pattern bool[] visited = new bool[cliques.Length]; int numVisited = 0; int[] visitedOrder = new int[cliques.Length]; int[] parent = new int[cliques.Length]; for (int i_1 = 0; i_1 < parent.Length; i_1++) { parent[i_1] = -1; } // Figure out which cliques are connected to which trees. This is important for calculating the partition // function later, since each tree will converge to its own partition function by multiplication, and we will // need to multiply the partition function of each of the trees to get the global one. int[] trees = new int[cliques.Length]; // Forward pass, record a BFS forest pattern that we can use for message passing int treeIndex = -1; bool[] seenVariable = new bool[maxVar_1 + 1]; while (numVisited < cliques.Length) { treeIndex++; // Pick the largest connected graph remaining as the root for message passing int root = -1; // OPTIMIZATION: if there's a forced root for message passing (a node that we just added) then make it the // root if (CacheMessages && forceRootForCachedMessagePassing != -1 && !visited[forceRootForCachedMessagePassing]) { root = forceRootForCachedMessagePassing; } else { for (int i = 0; i_1 < cliques.Length; i_1++) { if (!visited[i_1] && (root == -1 || cliques[i_1].neighborIndices.Length > cliques[root].neighborIndices.Length)) { root = i_1; } } } System.Diagnostics.Debug.Assert((root != -1)); IQueue <int> toVisit = new ArrayDeque <int>(); toVisit.Add(root); bool[] toVisitArray = new bool[cliques.Length]; toVisitArray[root] = true; while (toVisit.Count > 0) { int cursor = toVisit.Poll(); // toVisitArray[cursor] = false; trees[cursor] = treeIndex; if (visited[cursor]) { log.Info("Visited contains: " + cursor); log.Info("Visited: " + Arrays.ToString(visited)); log.Info("To visit: " + toVisit); } System.Diagnostics.Debug.Assert((!visited[cursor])); visited[cursor] = true; visitedOrder[numVisited] = cursor; foreach (int i in cliques[cursor].neighborIndices) { seenVariable[i_1] = true; } numVisited++; for (int i_2 = 0; i_2 < cliques.Length; i_2++) { if (i_2 == cursor) { continue; } if (i_2 == parent[cursor]) { continue; } if (DomainsOverlap(cliques[cursor], cliques[i_2])) { // Make sure that for every variable that we've already seen somewhere in the graph, if it's // in the child, it's in the parent. Otherwise we'll break the property of continuous // transmission of information about variables through messages. foreach (int child in cliques[i_2].neighborIndices) { if (seenVariable[child]) { foreach (int j in cliques[cursor].neighborIndices) { if (j == child) { goto childNeighborLoop_continue; } } // If we get here it means that this clique is not good as a child, since we can't pass // it all the information it needs from other elements of the tree goto childLoop_continue; } } childNeighborLoop_break :; if (parent[i_2] == -1 && !visited[i_2]) { if (!toVisitArray[i_2]) { toVisit.Add(i_2); toVisitArray[i_2] = true; foreach (int j in cliques[i_2].neighborIndices) { seenVariable[j] = true; } } parent[i_2] = cursor; } } childLoop_continue :; } childLoop_break :; } // No cycles in the tree System.Diagnostics.Debug.Assert((parent[root] == -1)); } System.Diagnostics.Debug.Assert((numVisited == cliques.Length)); // Backward pass, run the visited list in reverse for (int i_3 = numVisited - 1; i_3 >= 0; i_3--) { int cursor = visitedOrder[i_3]; if (parent[cursor] == -1) { continue; } backwardPassedMessages[cursor][parent[cursor]] = true; // OPTIMIZATION: // if these conditions are met we can avoid calculating the message, and instead retrieve from the cache, // since they should be the same if (CacheMessages && forceRootForCachedMessagePassing != -1 && cachedCliquesBackPointers[cursor] != -1 && cachedCliquesBackPointers[parent[cursor]] != -1 && cachedMessages[cachedCliquesBackPointers[cursor]][cachedCliquesBackPointers[parent[cursor ]]] != null && cachedBackwardPassedMessages[cachedCliquesBackPointers[cursor]][cachedCliquesBackPointers[parent[cursor]]]) { messages[cursor][parent[cursor]] = cachedMessages[cachedCliquesBackPointers[cursor]][cachedCliquesBackPointers[parent[cursor]]]; } else { // Calculate the message to the clique's parent, given all incoming messages so far TableFactor message = cliques[cursor]; for (int k = 0; k < cliques.Length; k++) { if (k == parent[cursor]) { continue; } if (messages[k][cursor] != null) { message = message.Multiply(messages[k][cursor]); } } messages[cursor][parent[cursor]] = MarginalizeMessage(message, cliques[parent[cursor]].neighborIndices, marginalize); // Invalidate any cached outgoing messages if (CacheMessages && forceRootForCachedMessagePassing != -1 && cachedCliquesBackPointers[parent[cursor]] != -1) { for (int k_1 = 0; k_1 < cachedCliqueList.Length; k_1++) { cachedMessages[cachedCliquesBackPointers[parent[cursor]]][k_1] = null; } } } } // Forward pass, run the visited list forward for (int i_4 = 0; i_4 < numVisited; i_4++) { int cursor = visitedOrder[i_4]; for (int j = 0; j < cliques.Length; j++) { if (parent[j] != cursor) { continue; } TableFactor message = cliques[cursor]; for (int k = 0; k < cliques.Length; k++) { if (k == j) { continue; } if (messages[k][cursor] != null) { message = message.Multiply(messages[k][cursor]); } } messages[cursor][j] = MarginalizeMessage(message, cliques[j].neighborIndices, marginalize); } } // OPTIMIZATION: // cache the messages, and the current list of cliques cachedCliqueList = cliques; cachedMessages = messages; cachedBackwardPassedMessages = backwardPassedMessages; // Calculate final marginals for each variable double[][] marginals = new double[maxVar_1 + 1][]; // Include observed variables as deterministic foreach (GraphicalModel.Factor fac_1 in model.factors) { for (int i = 0; i_4 < fac_1.neigborIndices.Length; i_4++) { int n = fac_1.neigborIndices[i_4]; if (model.GetVariableMetaDataByReference(n).Contains(VariableObservedValue)) { double[] deterministic = new double[fac_1.featuresTable.GetDimensions()[i_4]]; int assignment = System.Convert.ToInt32(model.GetVariableMetaDataByReference(n)[VariableObservedValue]); if (assignment > deterministic.Length) { throw new InvalidOperationException("Variable " + n + ": Can't have as assignment (" + assignment + ") that is out of bounds for dimension size (" + deterministic.Length + ")"); } deterministic[assignment] = 1.0; marginals[n] = deterministic; } } } IDictionary <GraphicalModel.Factor, TableFactor> jointMarginals_1 = new IdentityHashMap <GraphicalModel.Factor, TableFactor>(); if (marginalize == CliqueTree.MarginalizationMethod.Sum && includeJointMarginalsAndPartition) { bool[] partitionIncludesTrees = new bool[treeIndex + 1]; double[] treePartitionFunctions = new double[treeIndex + 1]; for (int i = 0; i_4 < cliques.Length; i_4++) { TableFactor convergedClique = cliques[i_4]; for (int j = 0; j < cliques.Length; j++) { if (i_4 == j) { continue; } if (messages[j][i_4] == null) { continue; } convergedClique = convergedClique.Multiply(messages[j][i_4]); } // Calculate the partition function when we're calculating marginals // We need one contribution per tree in our forest graph if (!partitionIncludesTrees[trees[i_4]]) { partitionIncludesTrees[trees[i_4]] = true; treePartitionFunctions[trees[i_4]] = convergedClique.ValueSum(); partitionFunction *= treePartitionFunctions[trees[i_4]]; } else { // This is all just an elaborate assert // Check that our partition function is the same as the trees we're attached to, or with %.1, for numerical reasons. // Sometimes the partition function will explode in value, which can make a non-%-based assert worthless here if (AssertsEnabled() && !TableFactor.UseExpApprox) { double valueSum = convergedClique.ValueSum(); if (double.IsFinite(valueSum) && double.IsFinite(treePartitionFunctions[trees[i_4]])) { if (Math.Abs(treePartitionFunctions[trees[i_4]] - valueSum) >= 1.0e-3 * treePartitionFunctions[trees[i_4]]) { log.Info("Different partition functions for tree " + trees[i_4] + ": "); log.Info("Pre-existing for tree: " + treePartitionFunctions[trees[i_4]]); log.Info("This clique for tree: " + valueSum); } System.Diagnostics.Debug.Assert((Math.Abs(treePartitionFunctions[trees[i_4]] - valueSum) < 1.0e-3 * treePartitionFunctions[trees[i_4]])); } } } // Calculate the factor this clique corresponds to, and put in an entry for joint marginals GraphicalModel.Factor f = cliqueToFactor[i_4]; System.Diagnostics.Debug.Assert((f_1 != null)); if (!jointMarginals_1.Contains(f_1)) { int[] observedAssignments = GetObservedAssignments(f_1); // Collect back pointers and check if this factor matches the clique we're using int[] backPointers = new int[observedAssignments.Length]; int cursor = 0; for (int j_1 = 0; j_1 < observedAssignments.Length; j_1++) { if (observedAssignments[j_1] == -1) { backPointers[j_1] = cursor; cursor++; } else { // This is not strictly necessary but will trigger array OOB exception if things go wrong, so is nice backPointers[j_1] = -1; } } double sum = convergedClique.ValueSum(); TableFactor jointMarginal = new TableFactor(f_1.neigborIndices, f_1.featuresTable.GetDimensions()); // OPTIMIZATION: // Rather than use the standard iterator, which creates lots of int[] arrays on the heap, which need to be GC'd, // we use the fast version that just mutates one array. Since this is read once for us here, this is ideal. IEnumerator <int[]> fastPassByReferenceIterator = convergedClique.FastPassByReferenceIterator(); int[] assignment = fastPassByReferenceIterator.Current; while (true) { if (backPointers.Length == assignment.Length) { jointMarginal.SetAssignmentValue(assignment, convergedClique.GetAssignmentValue(assignment) / sum); } else { int[] jointAssignment = new int[backPointers.Length]; for (int j_2 = 0; j_2 < jointAssignment.Length; j_2++) { if (observedAssignments[j_2] != -1) { jointAssignment[j_2] = observedAssignments[j_2]; } else { jointAssignment[j_2] = assignment[backPointers[j_2]]; } } jointMarginal.SetAssignmentValue(jointAssignment, convergedClique.GetAssignmentValue(assignment) / sum); } // Set the assignment arrays correctly if (fastPassByReferenceIterator.MoveNext()) { fastPassByReferenceIterator.Current; } else { break; } } jointMarginals_1[f_1] = jointMarginal; } bool anyNull = false; for (int j_3 = 0; j_3 < convergedClique.neighborIndices.Length; j_3++) { int k = convergedClique.neighborIndices[j_3]; if (marginals[k] == null) { anyNull = true; } } if (anyNull) { double[][] cliqueMarginals = null; switch (marginalize) { case CliqueTree.MarginalizationMethod.Sum: { cliqueMarginals = convergedClique.GetSummedMarginals(); break; } case CliqueTree.MarginalizationMethod.Max: { cliqueMarginals = convergedClique.GetMaxedMarginals(); break; } } for (int j_1 = 0; j_1 < convergedClique.neighborIndices.Length; j_1++) { int k = convergedClique.neighborIndices[j_1]; if (marginals[k] == null) { marginals[k] = cliqueMarginals[j_1]; } } } } } else { // If we don't care about joint marginals, we can be careful about not calculating more cliques than we need to, // by explicitly sorting by which cliques are most profitable to calculate over. In this way we can avoid, in // the case of a chain CRF, calculating almost half the joint factors. // First do a pass where we only calculate all-null neighbors for (int i = 0; i_4 < cliques.Length; i_4++) { bool allNull = true; foreach (int k in cliques[i_4].neighborIndices) { if (marginals[k] != null) { allNull = false; } } if (allNull) { TableFactor convergedClique = cliques[i_4]; for (int j = 0; j < cliques.Length; j++) { if (i_4 == j) { continue; } if (messages[j][i_4] == null) { continue; } convergedClique = convergedClique.Multiply(messages[j][i_4]); } double[][] cliqueMarginals = null; switch (marginalize) { case CliqueTree.MarginalizationMethod.Sum: { cliqueMarginals = convergedClique.GetSummedMarginals(); break; } case CliqueTree.MarginalizationMethod.Max: { cliqueMarginals = convergedClique.GetMaxedMarginals(); break; } } for (int j_1 = 0; j_1 < convergedClique.neighborIndices.Length; j_1++) { int k_1 = convergedClique.neighborIndices[j_1]; if (marginals[k_1] == null) { marginals[k_1] = cliqueMarginals[j_1]; } } } } // Now we calculate any remaining cliques with any non-null variables for (int i_2 = 0; i_2 < cliques.Length; i_2++) { bool anyNull = false; for (int j = 0; j < cliques[i_2].neighborIndices.Length; j++) { int k = cliques[i_2].neighborIndices[j]; if (marginals[k] == null) { anyNull = true; } } if (anyNull) { TableFactor convergedClique = cliques[i_2]; for (int j_1 = 0; j_1 < cliques.Length; j_1++) { if (i_2 == j_1) { continue; } if (messages[j_1][i_2] == null) { continue; } convergedClique = convergedClique.Multiply(messages[j_1][i_2]); } double[][] cliqueMarginals = null; switch (marginalize) { case CliqueTree.MarginalizationMethod.Sum: { cliqueMarginals = convergedClique.GetSummedMarginals(); break; } case CliqueTree.MarginalizationMethod.Max: { cliqueMarginals = convergedClique.GetMaxedMarginals(); break; } } for (int j_2 = 0; j_2 < convergedClique.neighborIndices.Length; j_2++) { int k = convergedClique.neighborIndices[j_2]; if (marginals[k] == null) { marginals[k] = cliqueMarginals[j_2]; } } } } } // Add any factors to the joint marginal map that were fully observed and so didn't get cliques if (marginalize == CliqueTree.MarginalizationMethod.Sum && includeJointMarginalsAndPartition) { foreach (GraphicalModel.Factor f in model.factors) { if (!jointMarginals_1.Contains(f_1)) { // This implies that every variable in the factor is observed. If that's the case, we need to construct // a one hot TableFactor representing the deterministic distribution. TableFactor deterministicJointMarginal = new TableFactor(f_1.neigborIndices, f_1.featuresTable.GetDimensions()); int[] observedAssignment = GetObservedAssignments(f_1); foreach (int i in observedAssignment) { System.Diagnostics.Debug.Assert((i_4 != -1)); } deterministicJointMarginal.SetAssignmentValue(observedAssignment, 1.0); jointMarginals_1[f_1] = deterministicJointMarginal; } } } return(new CliqueTree.MarginalResult(marginals, partitionFunction, jointMarginals_1)); }