/// <summary> /// Creates a new split-set validation algorithm. /// </summary> /// /// <param name="size">The total number of available samples.</param> /// <param name="proportion">The desired proportion of samples in the training /// set in comparison with the testing set.</param> /// public SplitSetValidation(int size, double proportion) { this.Proportion = proportion; this.IsStratified = false; this.Indices = Categorical.Random(size, proportion); this.ValidationSet = Indices.Find(x => x == 0); this.TrainingSet = Indices.Find(x => x == 1); }
/// <summary> /// Processes the current filter. /// </summary> /// protected override DataTable ProcessFilter(DataTable data) { if (!lockGroups) { // Check if we should balance label proportions if (Columns.Count == 0) { // No. Just generate assign groups at random groupIndices = Categorical.Random(data.Rows.Count, Proportion); } else { // Yes, we must balance the occurrences in a data column groupIndices = balancedGroups(data); } } return(apply(data)); }
/// <summary> /// Runs the learning algorithm. /// </summary> /// /// <remarks> /// Learning problem. Given some training observation sequences O = {o1, o2, ..., oK} /// and general structure of HMM (numbers of hidden and visible states), determine /// HMM parameters M = (A, B, pi) that best fit training data. /// </remarks> /// public double Run(params T[] observations) { convergence.Clear(); double newLogLikelihood = Double.NegativeInfinity; int[][] paths = new int[observations.Length][]; do // Until convergence or max iterations is reached { if (batches == 1) { RunEpoch(observations, paths); } else { // Divide in batches int[] groups = Categorical.Random(observations.Length, batches); // For each batch for (int j = 0; j < batches; j++) { var idx = groups.Find(x => x == j); var inputs = observations.Submatrix(idx); var outputs = paths.Submatrix(idx); RunEpoch(inputs, outputs); } } // Compute log-likelihood newLogLikelihood = ComputeLogLikelihood(observations); // Check convergence convergence.NewValue = newLogLikelihood; } while (!convergence.HasConverged); return(newLogLikelihood); }
/// <summary> /// Create cross-validation folds by generating a vector of random fold indices, /// making sure class labels get equally distributed among the folds. /// </summary> /// /// <param name="labels">A vector containing class labels.</param> /// <param name="classes">The number of different classes in <paramref name="labels"/>.</param> /// <param name="folds">The number of folds in the cross-validation.</param> /// /// <returns>A vector of indices defining the a fold for each point in the data set.</returns> /// public static int[] Splittings(int[] labels, int classes, int folds) { return(Categorical.Random(labels, classes, folds)); }
/// <summary> /// Create cross-validation folds by generating a vector of random fold indices. /// </summary> /// /// <param name="size">The number of points in the data set.</param> /// <param name="folds">The number of folds in the cross-validation.</param> /// /// <returns>A vector of indices defining the a fold for each point in the data set.</returns> /// public static int[] Splittings(int size, int folds) { return(Categorical.Random(size, folds)); }