/// <summary> /// Trains the classifier by optimizing parameters based on the training data using specified solver options. /// This can only be called once. /// </summary> /// <param name="data">The training data. /// WARNING: In order to save memory, this data is altered in palce (instead of copying a new object).</param> /// <param name="ops">Sets the member variable <see cref="Classifier.Options"/>. If a value is not provided, default options are used.</param> /// <param name="ops">The pre-optimization analysis <see cref="Classifier.Analysis"/>. If a value is not provided, default options are used.</param> public async Task <Trainer> Train(CategorizedData data, SolverOptions ops = null, PreOptimizationAnalysis analysis = null) { lock (this) { if (this.trainerIsRunning) { throw new ApplicationException("The Train method can only be called once at a time. You might consider creating multiple Trainer objects."); } this.trainerIsRunning = true; } try { if (data.Ncats != this.Classifier.Instance.Ncats) { throw new ArgumentException("The number of categories in the classifier must be equal to the number of categories in the training set."); } //----------------------- // Set the solver otions. //----------------------- if (ops == null) { ops = new SolverOptions(); } this.options = ops; //----------------------- // Compute CatWeights. //----------------------- double cwTotal = 0.0; this.totalWeight = 0.0; // Total weight across all training data. if (this.options.WeightingRule == WeightingRule.EqualPriors) { for (int iCat = 0; iCat < data.Ncats; iCat++) { double w = (double)data.Ntotal / (double)data.Neach[iCat] / (double)data.Ncats; this.CatWeights[iCat] = w; cwTotal += w; this.totalWeight += w * (double)data.Neach[iCat]; } } else if (this.options.WeightingRule == WeightingRule.ObservedPriors) { for (int iCat = 0; iCat < data.Ncats; iCat++) { this.totalWeight += (float)data.Neach[iCat]; this.CatWeights[iCat] = 1.0f; cwTotal += 1.0f; } } else { throw new ApplicationException("Unhandled weighting rule."); } // [iDatum] Used for identifying the category label for each datum. byte[] catVec = new byte[data.Ntotal]; // [iDatum] Used for storing the polynomial calculation for each datum. float[][] yVec = Static.NewArrays <float>(this.Classifier.Instance.Npoly, data.Ntotal); // [iPoly][iDatum] Used for sorting data rows. Sort order is preserved for each polynomial. int[][] idxVec = Static.NewArrays <int>(this.Classifier.Instance.Npoly, data.Ntotal); //----------------------- // Prepare category labels for each datum. //----------------------- int iDatum = 0; for (int iCat = 0; iCat < data.Ncats; iCat++) { int nRows = data.X[iCat].Length; for (int iRow = 0; iRow < nRows; iRow++) { catVec[iDatum++] = (byte)iCat; } } if (analysis != null) { this.Analysis = analysis; } else { //----------------------- // Condition the data and perform a polynomial expansion. //----------------------- this.Analysis = new PreOptimizationAnalysis(); this.Analysis.ConditionMeasurer = SpatialConditionMeasurer.Measure(data); this.Analysis.Conditioner = this.Analysis.ConditionMeasurer.Conditioner(); this.Analysis.Conditioner.Condition(data); data.Expand(this.Classifier.Instance.Coeffs); this.Analysis.ParamScale = new float[this.Classifier.Instance.Coeffs.Ncoeffs]; this.Analysis.ParamInit = Static.NewArrays <float>(this.Classifier.Instance.Ncats, this.Classifier.Instance.Coeffs.Ncoeffs); //if (ops.InitializeParams) this.Analysis.Crits = Static.NewArrays <UniCrit>(this.Classifier.Instance.Ncats, this.Classifier.Instance.Coeffs.Ncoeffs); //----------------------- // Get the parameter scale. //----------------------- float scale; int i15 = (int)(0.5 + 0.15 * (double)(data.Ntotal - 1)); int i50 = (int)(0.5 + 0.50 * (double)(data.Ntotal - 1)); int i85 = (int)(0.5 + 0.85 * (double)(data.Ntotal - 1)); float[] xVec = yVec[0]; for (int iCoeff = 0; iCoeff < this.Classifier.Instance.Coeffs.Ncoeffs; iCoeff++) { // Quantiles determine the parameter scale. Static.FillSeries(idxVec[0]); iDatum = 0; for (int iCat = 0; iCat < data.Ncats; iCat++) { int nSamp = data.Neach[iCat]; for (int iSamp = 0; iSamp < nSamp; iSamp++) { xVec[iDatum++] = data.X[iCat][iSamp][iCoeff]; } } Static.QuickSortIndex(idxVec[0], xVec, 0, xVec.Length - 1); scale = xVec[idxVec[0][i85]] - xVec[idxVec[0][i15]]; this.Analysis.ParamScale[iCoeff] = (float)(1.0 / scale); //if (ops.InitializeParams) //{ // Get univariate classification criteria to get a first-order clue about the saliency of each feature. for (int iCat = 0; iCat < data.Ncats; iCat++) { this.Analysis.Crits[iCat][iCoeff] = UniCrit.MaximumAccuracy(iCat, catVec, xVec, idxVec[0], this.CatWeights); } //} } //----------------------- // Compute initial params. //----------------------- // Compute the expected minimum value for the univariate 2-category classification accuracy. double[] accMin = new double[data.Ncats]; for (int iCat = 0; iCat < data.Ncats; iCat++) { accMin[iCat] = (this.totalWeight - this.CatWeights[iCat] * (double)data.Neach[iCat]) / this.totalWeight; if (accMin[iCat] < 0.5) { accMin[iCat] = 1.0 - accMin[iCat]; } } // The magnitude of each parameter is a function of univariate classification accuracy for the corresponding spatial dimension. double invNtotal = 1.0 / data.Ntotal; for (int iCoeff = 0; iCoeff < this.Classifier.Instance.Coeffs.Ncoeffs; iCoeff++) { if (this.Classifier.Instance.Npoly == 1) { double tAcc = 0.5 * ( Math.Max(0.0, this.Analysis.Crits[0][iCoeff].Accuracy - accMin[0]) + Math.Max(0.0, this.Analysis.Crits[1][iCoeff].Accuracy - accMin[1]) ); tAcc /= (1.0 - 0.5 * (accMin[0] + accMin[1]) + invNtotal); if (this.Analysis.Crits[0][iCoeff].TargetUpper) { this.Analysis.ParamInit[0][iCoeff] = (float)tAcc * this.Analysis.ParamScale[iCoeff]; } else { this.Analysis.ParamInit[0][iCoeff] = -(float)tAcc * this.Analysis.ParamScale[iCoeff]; } } else { for (int iPoly = 0; iPoly < this.Classifier.Instance.Npoly; iPoly++) { double tAcc = Math.Max(0.0, this.Analysis.Crits[iPoly][iCoeff].Accuracy - accMin[iPoly]); tAcc /= (1.0 - accMin[iPoly] + invNtotal); if (this.Analysis.Crits[iPoly][iCoeff].TargetUpper) { this.Analysis.ParamInit[iPoly][iCoeff] = (float)tAcc * this.Analysis.ParamScale[iCoeff]; } else { this.Analysis.ParamInit[iPoly][iCoeff] = -(float)tAcc * this.Analysis.ParamScale[iCoeff]; } } } } } //----------------------- // Set classifier parameters. //----------------------- if (ops.InitializeParams) { Static.Copy <float>(this.Analysis.ParamInit, this.Classifier.Instance.Params); } else { // Inherit parameters passed in by the classifier. We assume the classifier was already initialized with parameters. Static.Copy <float>(this.Classifier.Instance.Params, this.Analysis.ParamInit); } //----------------------- // Perform dual optimizations. //----------------------- if (this.Classifier.Instance.Npoly > 2 && this.options.InitializeParams) { SolverOptions dualOps = this.options.Copy(); dualOps.WeightingRule = WeightingRule.EqualPriors; Task <Trainer>[] dualTasks = new Task <Trainer> [this.Classifier.Instance.Npoly]; for (int iPoly = 0; iPoly < this.Classifier.Instance.Npoly; iPoly++) { Trainer t = new Trainer(this.Classifier.Instance.GetDual(iPoly)); dualTasks[iPoly] = t.Train(data.GetDual(iPoly), dualOps); } for (int iPoly = 0; iPoly < this.Classifier.Instance.Npoly; iPoly++) { Trainer t = await dualTasks[iPoly]; Array.Copy(t.Classifier.Instance.Params, this.Analysis.ParamInit[iPoly], this.Classifier.Instance.Coeffs.Ncoeffs); } Static.Copy <float>(this.Analysis.ParamInit, this.Classifier.Instance.Params); } //----------------------- // Finish constructing the classifier for the first time. //----------------------- MonotonicRegressor regressor = new MonotonicRegressor(); for (int iPoly = 0; iPoly < this.Classifier.Instance.Npoly; iPoly++) { Static.FillSeries(idxVec[iPoly]); } for (int iPoly = 0; iPoly < this.Classifier.Instance.Npoly; iPoly++) { // Set the quantized probability limits. double nPerQuantile = (double)Math.Min(data.Neach[iPoly], data.Ntotal - data.Neach[iPoly]) / (double)this.Classifier.Instance.Quant[iPoly].Nquantiles; this.Classifier.Instance.Quant[iPoly].Pmin = 1.0 / nPerQuantile; this.Classifier.Instance.Quant[iPoly].Pmax = 1.0 - this.Classifier.Instance.Quant[iPoly].Pmin; // Evaluate the polynomial expression for each datum. iDatum = 0; for (int iCat = 0; iCat < data.Ncats; iCat++) { int nSamp = data.Neach[iCat]; for (int iSamp = 0; iSamp < nSamp; iSamp++) { // Evaluate the polynomial expression. yVec[iPoly][iDatum++] = (float)this.Classifier.Instance.EvalPolyFromExpanded(iPoly, data.X[iCat][iSamp]); } } // Sort the output. Indexes are preserved to speed up subsequent sorts. Static.QuickSortIndex(idxVec[iPoly], yVec[iPoly], 0, data.Ntotal - 1); // Quantize the output. this.Classifier.Instance.Quant[iPoly].Measure(idxVec[iPoly], yVec[iPoly], catVec, (byte)iPoly, this.CatWeights, totalWeight, regressor); } //----------------------- // Measure the conditional entropy. //----------------------- float[] y = new float[this.Classifier.Instance.Npoly]; double[] p; double fit = 0.0; byte c; for (iDatum = 0; iDatum < data.Ntotal; iDatum++) { for (int iPoly = 0; iPoly < this.Classifier.Instance.Npoly; iPoly++) { y[iPoly] = yVec[iPoly][iDatum]; } p = this.Classifier.Instance.ClassifyPolynomialOutputs(y); c = catVec[iDatum]; fit += Math.Log(p[c]) * this.CatWeights[c]; } // Change logarithm base and normalize by total weight. this.Classifier.Fit = -fit / totalWeight / Math.Log((double)data.Ncats); // <-- The conditional entropy... we want to minimize it. //----------------------- // Prepare optimization memory. //----------------------- // Initialize an orthonormal if the parameter space is small enough. float[][][] ortho = null; int nParams = this.Classifier.Instance.Npoly * this.Classifier.Instance.Coeffs.Ncoeffs; if (nParams <= 100) { ortho = this.randomDeviates(); } int iOrtho = 0; int ctOrtho = 0; int ctSteps = 0; // Every `modOrtho` steps through the orthonormal basis, we try a gradient search. int modOrtho = Math.Min(10, nParams); // For a trip through `modOrtho` bases, changes in entropy are partialled across the parameter space. float[][] dhOrtho = Static.NewArrays <float>(this.Classifier.Instance.Npoly, this.Classifier.Instance.Coeffs.Ncoeffs); //----------------------- // Optimize. //----------------------- // The attempted classifier. Classifier cTry = this.Classifier.Instance.Copy(); // The initial step size. float stepSize = this.Options.ParamDiffMax; // The optimization mode. We start by iterating through the orthogonal bases. OptimizationMode mode = OptimizationMode.Ortho; throw new NotImplementedException("TO DO"); bool keepOptimizing = true; while (keepOptimizing) { if (mode == OptimizationMode.Ortho) { if (iOrtho >= modOrtho) { iOrtho = 0; } } else if (mode == OptimizationMode.Gradient) { } else { throw new ApplicationException("Unhandled optimization mode."); } } } finally { this.trainerIsRunning = false; } }
/// <summary> /// Measures a space conditioner for the training data. /// </summary> /// <param name="data">The training data.</param> /// <returns>The space conditioner which has been measured for the training data.</returns> public static SpatialConditionMeasurer Measure(CategorizedData data) { if (data == null) { return(null); } SpatialConditionMeasurer output = new SpatialConditionMeasurer(data.Ncats, data.Ndims); float temp; int[] idxVec = null; for (int iCat = 0; iCat < data.Ncats; iCat++) { if (idxVec == null || idxVec.Length != data.Neach[iCat]) { idxVec = new int[data.Neach[iCat]]; } bool isOdd = data.Neach[iCat] % 2 == 1; int iMed = data.Neach[iCat] / 2; for (int iCol = 0; iCol < data.Ndims; iCol++) { Static.FillSeries(idxVec); Static.QuickSortIndex(idxVec, data.X[iCat], iCol, 0, data.Neach[iCat] - 1); if (isOdd) { output.Medians[iCat][iCol] = temp = data.X[iCat][iMed][iCol]; } else { output.Medians[iCat][iCol] = temp = (data.X[iCat][iMed - 1][iCol] + data.X[iCat][iMed][iCol]) / 2.0f; } output.AvgMedian[iCol] += temp; } } for (int iCol = 0; iCol < data.Ndims; iCol++) { output.Spread[iCol] = 0.0f; output.AvgMedian[iCol] /= (float)data.Ncats; for (int iCat = 0; iCat < data.Ncats; iCat++) { double ssMedian = 0.0f; double ssOrigin = 0.0f; double x, dx; int nRows = data.Neach[iCat]; for (int iRow = 0; iRow < nRows; iRow++) { x = data.X[iCat][iRow][iCol]; dx = x - output.Medians[iCat][iCol]; ssMedian += dx * dx; dx = x - output.AvgMedian[iCol]; ssOrigin += dx * dx; } dx = 1.0 / (double)(nRows - 1); output.Spreads[iCat][iCol] = (float)Math.Sqrt(dx * ssMedian); output.Spread[iCol] += (float)Math.Sqrt(dx * ssOrigin) / (float)data.Ncats; } } return(output); }