Ejemplo n.º 1
0
        /// <summary>Product two factors, taking the multiplication at the intersections.</summary>
        /// <param name="other">the other factor to be multiplied</param>
        /// <returns>a factor containing the union of both variable sets</returns>
        public virtual Edu.Stanford.Nlp.Loglinear.Inference.TableFactor Multiply(Edu.Stanford.Nlp.Loglinear.Inference.TableFactor other)
        {
            // Calculate the result domain
            IList <int> domain       = new List <int>();
            IList <int> otherDomain  = new List <int>();
            IList <int> resultDomain = new List <int>();

            foreach (int n in neighborIndices)
            {
                domain.Add(n);
                resultDomain.Add(n);
            }
            foreach (int n_1 in other.neighborIndices)
            {
                otherDomain.Add(n_1);
                if (!resultDomain.Contains(n_1))
                {
                    resultDomain.Add(n_1);
                }
            }
            // Create result TableFactor
            int[] resultNeighborIndices = new int[resultDomain.Count];
            int[] resultDimensions      = new int[resultNeighborIndices.Length];
            for (int i = 0; i < resultDomain.Count; i++)
            {
                int var = resultDomain[i];
                resultNeighborIndices[i] = var;
                // assert consistency about variable size, we can't have the same variable with two different sizes
                System.Diagnostics.Debug.Assert(((GetVariableSize(var) == 0 && other.GetVariableSize(var) > 0) || (GetVariableSize(var) > 0 && other.GetVariableSize(var) == 0) || (GetVariableSize(var) == other.GetVariableSize(var))));
                resultDimensions[i] = Math.Max(GetVariableSize(resultDomain[i]), other.GetVariableSize(resultDomain[i]));
            }
            Edu.Stanford.Nlp.Loglinear.Inference.TableFactor result = new Edu.Stanford.Nlp.Loglinear.Inference.TableFactor(resultNeighborIndices, resultDimensions);
            // OPTIMIZATION:
            // If we're a factor of size 2 receiving a message of size 1, then we can optimize that pretty heavily
            // We could just use the general algorithm at the end of this set of special cases, but this is the fastest way
            if (otherDomain.Count == 1 && (resultDomain.Count == domain.Count) && domain.Count == 2)
            {
                int msgVar   = otherDomain[0];
                int msgIndex = resultDomain.IndexOf(msgVar);
                if (msgIndex == 0)
                {
                    for (int i_1 = 0; i_1 < resultDimensions[0]; i_1++)
                    {
                        double d = other.values[i_1];
                        int    k = i_1 * resultDimensions[1];
                        for (int j = 0; j < resultDimensions[1]; j++)
                        {
                            int index = k + j;
                            result.values[index] = values[index] + d;
                        }
                    }
                }
                else
                {
                    if (msgIndex == 1)
                    {
                        for (int i_1 = 0; i_1 < resultDimensions[0]; i_1++)
                        {
                            int k = i_1 * resultDimensions[1];
                            for (int j = 0; j < resultDimensions[1]; j++)
                            {
                                int index = k + j;
                                result.values[index] = values[index] + other.values[j];
                            }
                        }
                    }
                }
            }
            else
            {
                // OPTIMIZATION:
                // The special case where we're a message of size 1, and the other factor is receiving the message, and of size 2
                if (domain.Count == 1 && (resultDomain.Count == otherDomain.Count) && resultDomain.Count == 2)
                {
                    return(other.Multiply(this));
                }
                else
                {
                    // Otherwise we follow the big comprehensive, slow general purpose algorithm
                    // Calculate back-pointers from the result domain indices to original indices
                    int[] mapping      = new int[result.neighborIndices.Length];
                    int[] otherMapping = new int[result.neighborIndices.Length];
                    for (int i_1 = 0; i_1 < result.neighborIndices.Length; i_1++)
                    {
                        mapping[i_1]      = domain.IndexOf(result.neighborIndices[i_1]);
                        otherMapping[i_1] = otherDomain.IndexOf(result.neighborIndices[i_1]);
                    }
                    // Do the actual joining operation between the two tables, applying 'join' for each result element.
                    int[] assignment      = new int[neighborIndices.Length];
                    int[] otherAssignment = new int[other.neighborIndices.Length];
                    // OPTIMIZATION:
                    // Rather than use the standard iterator, which creates lots of int[] arrays on the heap, which need to be GC'd,
                    // we use the fast version that just mutates one array. Since this is read once for us here, this is ideal.
                    IEnumerator <int[]> fastPassByReferenceIterator = result.FastPassByReferenceIterator();
                    int[] resultAssignment = fastPassByReferenceIterator.Current;
                    while (true)
                    {
                        // Set the assignment arrays correctly
                        for (int i_2 = 0; i_2 < resultAssignment.Length; i_2++)
                        {
                            if (mapping[i_2] != -1)
                            {
                                assignment[mapping[i_2]] = resultAssignment[i_2];
                            }
                            if (otherMapping[i_2] != -1)
                            {
                                otherAssignment[otherMapping[i_2]] = resultAssignment[i_2];
                            }
                        }
                        result.SetAssignmentLogValue(resultAssignment, GetAssignmentLogValue(assignment) + other.GetAssignmentLogValue(otherAssignment));
                        // This mutates the resultAssignment[] array, rather than creating a new one
                        if (fastPassByReferenceIterator.MoveNext())
                        {
                            fastPassByReferenceIterator.Current;
                        }
                        else
                        {
                            break;
                        }
                    }
                }
            }
            return(result);
        }
Ejemplo n.º 2
0
        /// <summary>
        /// Marginalizes out a variable by applying an associative join operation for each possible assignment to the
        /// marginalized variable.
        /// </summary>
        /// <param name="variable">the variable (by 'name', not offset into neighborIndices)</param>
        /// <param name="startingValue">associativeJoin is basically a foldr over a table, and this is the initialization</param>
        /// <param name="curriedFoldr">
        /// the associative function to use when applying the join operation, taking first the
        /// assignment to the value being marginalized, and then a foldr operation
        /// </param>
        /// <returns>
        /// a new TableFactor that doesn't contain 'variable', where values were gotten through associative
        /// marginalization.
        /// </returns>
        private Edu.Stanford.Nlp.Loglinear.Inference.TableFactor Marginalize(int variable, double startingValue, IBiFunction <int, int[], IBiFunction <double, double, double> > curriedFoldr)
        {
            // Can't marginalize the last variable
            System.Diagnostics.Debug.Assert((GetDimensions().Length > 1));
            // Calculate the result domain
            IList <int> resultDomain = new List <int>();

            foreach (int n in neighborIndices)
            {
                if (n != variable)
                {
                    resultDomain.Add(n);
                }
            }
            // Create result TableFactor
            int[] resultNeighborIndices = new int[resultDomain.Count];
            int[] resultDimensions      = new int[resultNeighborIndices.Length];
            for (int i = 0; i < resultDomain.Count; i++)
            {
                int var = resultDomain[i];
                resultNeighborIndices[i] = var;
                resultDimensions[i]      = GetVariableSize(var);
            }
            Edu.Stanford.Nlp.Loglinear.Inference.TableFactor result = new Edu.Stanford.Nlp.Loglinear.Inference.TableFactor(resultNeighborIndices, resultDimensions);
            // Calculate forward-pointers from the old domain to new domain
            int[] mapping = new int[neighborIndices.Length];
            for (int i_1 = 0; i_1 < neighborIndices.Length; i_1++)
            {
                mapping[i_1] = resultDomain.IndexOf(neighborIndices[i_1]);
            }
            // Initialize
            foreach (int[] assignment in result)
            {
                result.SetAssignmentLogValue(assignment, startingValue);
            }
            // Do the actual fold into the result
            int[] resultAssignment          = new int[result.neighborIndices.Length];
            int   marginalizedVariableValue = 0;
            // OPTIMIZATION:
            // Rather than use the standard iterator, which creates lots of int[] arrays on the heap, which need to be GC'd,
            // we use the fast version that just mutates one array. Since this is read once for us here, this is ideal.
            IEnumerator <int[]> fastPassByReferenceIterator = FastPassByReferenceIterator();

            int[] assignment_1 = fastPassByReferenceIterator.Current;
            while (true)
            {
                // Set the assignment arrays correctly
                for (int i_2 = 0; i_2 < assignment_1.Length; i_2++)
                {
                    if (mapping[i_2] != -1)
                    {
                        resultAssignment[mapping[i_2]] = assignment_1[i_2];
                    }
                    else
                    {
                        marginalizedVariableValue = assignment_1[i_2];
                    }
                }
                result.SetAssignmentLogValue(resultAssignment, curriedFoldr.Apply(marginalizedVariableValue, resultAssignment).Apply(result.GetAssignmentLogValue(resultAssignment), GetAssignmentLogValue(assignment_1)));
                if (fastPassByReferenceIterator.MoveNext())
                {
                    fastPassByReferenceIterator.Current;
                }
                else
                {
                    break;
                }
            }
            return(result);
        }
Ejemplo n.º 3
0
 /// <summary>Marginalize out a variable by taking a sum.</summary>
 /// <param name="variable">the variable to be summed out</param>
 /// <returns>a factor with variable removed</returns>
 public virtual Edu.Stanford.Nlp.Loglinear.Inference.TableFactor SumOut(int variable)
 {
     // OPTIMIZATION: This is by far the most common case, for linear chain inference, and is worth making fast
     // We can use closed loops, and not bother with using the basic iterator to loop through indices.
     // If this special case doesn't trip, we fall back to the standard (but slower) algorithm for the general case
     if (GetDimensions().Length == 2)
     {
         if (neighborIndices[0] == variable)
         {
             Edu.Stanford.Nlp.Loglinear.Inference.TableFactor marginalized = new Edu.Stanford.Nlp.Loglinear.Inference.TableFactor(new int[] { neighborIndices[1] }, new int[] { GetDimensions()[1] });
             for (int i = 0; i < marginalized.values.Length; i++)
             {
                 marginalized.values[i] = 0;
             }
             // We use the stable log-sum-exp trick here, so first we calculate the max
             double[] max = new double[GetDimensions()[1]];
             for (int j = 0; j < GetDimensions()[1]; j++)
             {
                 max[j] = double.NegativeInfinity;
             }
             for (int i_1 = 0; i_1 < GetDimensions()[0]; i_1++)
             {
                 int k = i_1 * GetDimensions()[1];
                 for (int j_1 = 0; j_1 < GetDimensions()[1]; j_1++)
                 {
                     int index = k + j_1;
                     if (values[index] > max[j_1])
                     {
                         max[j_1] = values[index];
                     }
                 }
             }
             // Then we take the sum, minus the max
             for (int i_2 = 0; i_2 < GetDimensions()[0]; i_2++)
             {
                 int k = i_2 * GetDimensions()[1];
                 for (int j_1 = 0; j_1 < GetDimensions()[1]; j_1++)
                 {
                     int index = k + j_1;
                     if (double.IsFinite(max[j_1]))
                     {
                         marginalized.values[j_1] += Math.Exp(values[index] - max[j_1]);
                     }
                 }
             }
             // And now we exponentiate, and add back in the values
             for (int j_2 = 0; j_2 < GetDimensions()[1]; j_2++)
             {
                 if (double.IsFinite(max[j_2]))
                 {
                     marginalized.values[j_2] = max[j_2] + Math.Log(marginalized.values[j_2]);
                 }
                 else
                 {
                     marginalized.values[j_2] = max[j_2];
                 }
             }
             return(marginalized);
         }
         else
         {
             System.Diagnostics.Debug.Assert((neighborIndices[1] == variable));
             Edu.Stanford.Nlp.Loglinear.Inference.TableFactor marginalized = new Edu.Stanford.Nlp.Loglinear.Inference.TableFactor(new int[] { neighborIndices[0] }, new int[] { GetDimensions()[0] });
             for (int i = 0; i < marginalized.values.Length; i++)
             {
                 marginalized.values[i] = 0;
             }
             // We use the stable log-sum-exp trick here, so first we calculate the max
             double[] max = new double[GetDimensions()[0]];
             for (int i_1 = 0; i_1 < GetDimensions()[0]; i_1++)
             {
                 max[i_1] = double.NegativeInfinity;
             }
             for (int i_2 = 0; i_2 < GetDimensions()[0]; i_2++)
             {
                 int k = i_2 * GetDimensions()[1];
                 for (int j = 0; j < GetDimensions()[1]; j++)
                 {
                     int index = k + j;
                     if (values[index] > max[i_2])
                     {
                         max[i_2] = values[index];
                     }
                 }
             }
             // Then we take the sum, minus the max
             for (int i_3 = 0; i_3 < GetDimensions()[0]; i_3++)
             {
                 int k = i_3 * GetDimensions()[1];
                 for (int j = 0; j < GetDimensions()[1]; j++)
                 {
                     int index = k + j;
                     if (double.IsFinite(max[i_3]))
                     {
                         marginalized.values[i_3] += Math.Exp(values[index] - max[i_3]);
                     }
                 }
             }
             // And now we exponentiate, and add back in the values
             for (int i_4 = 0; i_4 < GetDimensions()[0]; i_4++)
             {
                 if (double.IsFinite(max[i_4]))
                 {
                     marginalized.values[i_4] = max[i_4] + Math.Log(marginalized.values[i_4]);
                 }
                 else
                 {
                     marginalized.values[i_4] = max[i_4];
                 }
             }
             return(marginalized);
         }
     }
     else
     {
         // This is a little tricky because we need to use the stable log-sum-exp trick on top of our marginalize
         // dataflow operation.
         // First we calculate all the max values to use as pivots to prevent overflow
         Edu.Stanford.Nlp.Loglinear.Inference.TableFactor maxValues = MaxOut(variable);
         // Then we do the sum against an offset from the pivots
         Edu.Stanford.Nlp.Loglinear.Inference.TableFactor marginalized = Marginalize(variable, 0, null);
         // Then we factor the max values back in, and
         foreach (int[] assignment in marginalized)
         {
             marginalized.SetAssignmentLogValue(assignment, maxValues.GetAssignmentLogValue(assignment) + Math.Log(marginalized.GetAssignmentLogValue(assignment)));
         }
         return(marginalized);
     }
 }