public virtual ICounter <L> ProbabilityOf(IDatum <L, F> example) { // calculate the feature indices and feature values int[] featureIndices = LogisticUtils.IndicesOf(example.AsFeatures(), featureIndex); double[] featureValues; if (example is RVFDatum <object, object> ) { ICollection <double> featureValuesCollection = ((RVFDatum <object, object>)example).AsFeaturesCounter().Values(); featureValues = LogisticUtils.ConvertToArray(featureValuesCollection); } else { featureValues = new double[example.AsFeatures().Count]; Arrays.Fill(featureValues, 1.0); } // calculate probability of each class ICounter <L> result = new ClassicCounter <L>(); int numClasses = labelIndex.Size(); double[] sigmoids = LogisticUtils.CalculateSigmoids(weights, featureIndices, featureValues); for (int c = 0; c < numClasses; c++) { L label = labelIndex.Get(c); result.IncrementCount(label, sigmoids[c]); } return(result); }
protected internal override void Calculate(double[] thetasArray) { ClearResults(); double[][] thetas = new double[][] { }; LogisticUtils.Unflatten(thetasArray, thetas); for (int i = 0; i < data.Length; i++) { int[] featureIndices = data[i]; double[] featureValues = dataValues[i]; double[] sums = LogisticUtils.CalculateSums(thetas, featureIndices, featureValues); for (int c = 0; c < numClasses; c++) { double sum = sums[c]; value -= sum * labels[i][c]; if (c == 0) { continue; } int offset = (c - 1) * numFeatures; double error = Math.Exp(sum) - labels[i][c]; for (int f = 0; f < featureIndices.Length; f++) { int index = featureIndices[f]; double x = featureValues[f]; derivative[offset + index] -= error * x; } } } // incorporate prior if (prior.GetType().Equals(LogPrior.LogPriorType.Null)) { return; } double sigma = prior.GetSigma(); for (int c_1 = 0; c_1 < numClasses; c_1++) { if (c_1 == 0) { continue; } int offset = (c_1 - 1) * numFeatures; for (int j = 0; j < numL2Parameters; j++) { double theta = thetasArray[offset + j]; value += theta * theta / (sigma * 2.0); derivative[offset + j] += theta / sigma; } } }
public virtual MultinomialLogisticClassifier <L, F> TrainClassifier(GeneralDataset <L, F> dataset) { numClasses = dataset.NumClasses(); numFeatures = dataset.NumFeatures(); data = dataset.GetDataArray(); if (dataset is RVFDataset <object, object> ) { dataValues = dataset.GetValuesArray(); } else { dataValues = LogisticUtils.InitializeDataValues(data); } AugmentFeatureMatrix(data, dataValues); labels = dataset.GetLabelsArray(); return(new MultinomialLogisticClassifier <L, F>(TrainWeights(), dataset.featureIndex, dataset.labelIndex)); }
private double[][] TrainWeights() { QNMinimizer minimizer = new QNMinimizer(15, true); minimizer.UseOWLQN(true, lambda); IDiffFunction objective = new ShiftParamsLogisticObjectiveFunction(data, dataValues, ConvertLabels(labels), numClasses, numFeatures + data.Length, numFeatures, prior); double[] augmentedThetas = new double[(numClasses - 1) * (numFeatures + data.Length)]; augmentedThetas = minimizer.Minimize(objective, 1e-4, augmentedThetas); // calculate number of non-zero parameters, for debugging int count = 0; for (int j = numFeatures; j < augmentedThetas.Length; j++) { if (augmentedThetas[j] != 0) { count++; } } Redwood.Log("NUM NONZERO PARAMETERS: " + count); double[][] thetas = new double[][] { }; LogisticUtils.Unflatten(augmentedThetas, thetas); return(thetas); }