Beispiel #1
0
        /// <summary>
        ///   Creates a new split-set validation algorithm.
        /// </summary>
        ///
        /// <param name="size">The total number of available samples.</param>
        /// <param name="proportion">The desired proportion of samples in the training
        /// set in comparison with the testing set.</param>
        ///
        public SplitSetValidation(int size, double proportion)
        {
            this.Proportion   = proportion;
            this.IsStratified = false;
            this.Indices      = Categorical.Random(size, proportion);

            this.ValidationSet = Indices.Find(x => x == 0);
            this.TrainingSet   = Indices.Find(x => x == 1);
        }
Beispiel #2
0
        /// <summary>
        ///   Processes the current filter.
        /// </summary>
        ///
        protected override DataTable ProcessFilter(DataTable data)
        {
            if (!lockGroups)
            {
                // Check if we should balance label proportions
                if (Columns.Count == 0)
                {
                    // No. Just generate assign groups at random
                    groupIndices = Categorical.Random(data.Rows.Count, Proportion);
                }

                else
                {
                    // Yes, we must balance the occurrences in a data column
                    groupIndices = balancedGroups(data);
                }
            }

            return(apply(data));
        }
        /// <summary>
        ///   Runs the learning algorithm.
        /// </summary>
        ///
        /// <remarks>
        ///   Learning problem. Given some training observation sequences O = {o1, o2, ..., oK}
        ///   and general structure of HMM (numbers of hidden and visible states), determine
        ///   HMM parameters M = (A, B, pi) that best fit training data.
        /// </remarks>
        ///
        public double Run(params T[] observations)
        {
            convergence.Clear();

            double newLogLikelihood = Double.NegativeInfinity;

            int[][] paths = new int[observations.Length][];


            do // Until convergence or max iterations is reached
            {
                if (batches == 1)
                {
                    RunEpoch(observations, paths);
                }
                else
                {
                    // Divide in batches
                    int[] groups = Categorical.Random(observations.Length, batches);

                    // For each batch
                    for (int j = 0; j < batches; j++)
                    {
                        var idx     = groups.Find(x => x == j);
                        var inputs  = observations.Submatrix(idx);
                        var outputs = paths.Submatrix(idx);
                        RunEpoch(inputs, outputs);
                    }
                }

                // Compute log-likelihood
                newLogLikelihood = ComputeLogLikelihood(observations);

                // Check convergence
                convergence.NewValue = newLogLikelihood;
            } while (!convergence.HasConverged);

            return(newLogLikelihood);
        }
 /// <summary>
 ///   Create cross-validation folds by generating a vector of random fold indices,
 ///   making sure class labels get equally distributed among the folds.
 /// </summary>
 ///
 /// <param name="labels">A vector containing class labels.</param>
 /// <param name="classes">The number of different classes in <paramref name="labels"/>.</param>
 /// <param name="folds">The number of folds in the cross-validation.</param>
 ///
 /// <returns>A vector of indices defining the a fold for each point in the data set.</returns>
 ///
 public static int[] Splittings(int[] labels, int classes, int folds)
 {
     return(Categorical.Random(labels, classes, folds));
 }
 /// <summary>
 ///   Create cross-validation folds by generating a vector of random fold indices.
 /// </summary>
 ///
 /// <param name="size">The number of points in the data set.</param>
 /// <param name="folds">The number of folds in the cross-validation.</param>
 ///
 /// <returns>A vector of indices defining the a fold for each point in the data set.</returns>
 ///
 public static int[] Splittings(int size, int folds)
 {
     return(Categorical.Random(size, folds));
 }