public void CrossValidationUtilities_GetKFoldCrossValidationIndexSets_Handle_Remainder() { var targets = new double[] { 1, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3 }; var sampler = new StratifiedIndexSampler <double>(seed: 242); var actuals = CrossValidationUtilities.GetKFoldCrossValidationIndexSets(sampler, foldCount: 4, targets: targets); var expecteds = new List <(int[] trainingIndices, int[] validationIndices)> { (new int[] { 0, 1, 2, 3, 5, 6, 7, 9, 11, 12, 13, 14 }, new int[] { 10, 4, 8 }), (new int[] { 0, 1, 3, 4, 5, 6, 8, 9, 10, 11, 12, 13 }, new int[] { 2, 7, 14 }), (new int[] { 1, 2, 3, 4, 6, 7, 8, 9, 10, 11, 12, 14 }, new int[] { 5, 13, 0 }), // Handle remainder from target.length / foldsCount, // by adding remaining indices to the last set (new int[] { 0, 2, 4, 5, 7, 8, 10, 13, 14 }, new int[] { 1, 3, 6, 9, 11, 12 }), }; Assert.AreEqual(expecteds.Count, actuals.Count); for (int i = 0; i < expecteds.Count; i++) { var expected = expecteds[i]; var actual = actuals[i]; CollectionAssert.AreEqual(expected.trainingIndices, actual.trainingIndices); CollectionAssert.AreEqual(expected.validationIndices, actual.validationIndices); } }
public void CrossValidationUtilities_GetKFoldCrossValidationIndexSets_Indices() { var targets = new double[] { 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3 }; var indices = new int[] { 0, 1, 2, 3, 4, 5, 6 }; var sampler = new StratifiedIndexSampler <double>(seed: 242); var actuals = CrossValidationUtilities.GetKFoldCrossValidationIndexSets(sampler, foldCount: 2, targets: targets, indices: indices); TraceIndexSets(actuals); var expecteds = new List <(int[] trainingIndices, int[] validationIndices)> { // Sets contains values from the indices array only. (new int[] { 1, 3, 4, 5 }, new int[] { 2, 6, 0 }), (new int[] { 0, 2, 6 }, new int[] { 1, 3, 4, 5 }), }; Assert.AreEqual(expecteds.Count, actuals.Count); for (int i = 0; i < expecteds.Count; i++) { var expected = expecteds[i]; var actual = actuals[i]; CollectionAssert.AreEqual(expected.trainingIndices, actual.trainingIndices); CollectionAssert.AreEqual(expected.validationIndices, actual.validationIndices); } }
public void CrossValidationUtilities_GetKFoldCrossValidationIndexSets() { var targets = new double[] { 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3 }; var sampler = new StratifiedIndexSampler <double>(seed: 242); var actuals = CrossValidationUtilities.GetKFoldCrossValidationIndexSets(sampler, foldCount: 4, targets: targets); TraceIndexSets(actuals); var expecteds = new List <(int[] trainingIndices, int[] validationIndices)> { (new int[] { 0, 1, 3, 4, 5, 7, 9, 10, 11 }, new int[] { 6, 8, 2 }), (new int[] { 0, 2, 3, 4, 6, 7, 8, 9, 10 }, new int[] { 1, 11, 5 }), (new int[] { 0, 1, 2, 4, 5, 6, 8, 9, 11 }, new int[] { 7, 3, 10 }), (new int[] { 1, 2, 3, 5, 6, 7, 8, 10, 11 }, new int[] { 0, 4, 9 }), }; Assert.AreEqual(expecteds.Count, actuals.Count); for (int i = 0; i < expecteds.Count; i++) { var expected = expecteds[i]; var actual = actuals[i]; CollectionAssert.AreEqual(expected.trainingIndices, actual.trainingIndices); CollectionAssert.AreEqual(expected.validationIndices, actual.validationIndices); } }
/// <summary> /// Cross validated predictions. /// Only crossValidates within the provided indices. /// The predictions are returned in the predictions array. /// </summary> /// <param name="learner"></param> /// <param name="observations"></param> /// <param name="targets"></param> /// <param name="crossValidationIndices"></param> /// <param name="crossValidatedPredictions"></param> public void CrossValidate(IIndexedLearner <TPrediction> learner, F64Matrix observations, double[] targets, int[] crossValidationIndices, TPrediction[] crossValidatedPredictions) { var rows = crossValidatedPredictions.Length; if (m_crossValidationFolds > rows) { throw new ArgumentException("Too few observations: " + rows + " for number of cross validation folds: " + m_crossValidationFolds); } var indices = crossValidationIndices.ToArray(); // Map the provided crossValidationIndices to crossValidatedPredictions // Indices from crossValidationIndices can be larger than crossValidatedPredictions length // since crossValidatedPredictions might be a subset of the provided observations and targets var cvPredictionIndiceMap = Enumerable.Range(0, crossValidatedPredictions.Length) .ToDictionary(i => indices[i], i => i); var crossValidationIndexSets = CrossValidationUtilities.GetKFoldCrossValidationIndexSets( m_indexedSampler, m_crossValidationFolds, targets, indices); var observation = new double[observations.ColumnCount]; foreach (var(trainingIndices, validationIndices) in crossValidationIndexSets) { var model = learner.Learn(observations, targets, trainingIndices); var predictions = new TPrediction[validationIndices.Length]; for (int l = 0; l < predictions.Length; l++) { observations.Row(validationIndices[l], observation); predictions[l] = model.Predict(observation); } for (int j = 0; j < validationIndices.Length; j++) { crossValidatedPredictions[cvPredictionIndiceMap[validationIndices[j]]] = predictions[j]; } ModelDisposer.DisposeIfDisposable(model); } }
public void SharpLearning_With_Cntk_Example() { // Load data var(observations, targets) = DataSetUtilities.LoadWinequalityWhite(); // transform data for neural net var transform = new MinMaxTransformer(0.0, 1.0); transform.Transform(observations, observations); var featureCount = observations.ColumnCount; var observationCount = observations.RowCount; var targetCount = 1; var inputShape = new int[] { featureCount, 1 }; var outputShape = new int[] { targetCount }; // Convert data to float, and wrap as minibatch data. var observationsFloat = observations.Data().Select(v => (float)v).ToArray(); var observationsData = new MemoryMinibatchData(observationsFloat, inputShape, observationCount); var targetsFloat = targets.Select(v => (float)v).ToArray(); var targetsData = new MemoryMinibatchData(targetsFloat, outputShape, observationCount); var dataType = DataType.Float; var device = DeviceDescriptor.CPUDevice; // setup input and target variables. var inputVariable = Layers.Input(inputShape, dataType); var targetVariable = Variable.InputVariable(outputShape, dataType); // setup name to variable var nameToVariable = new Dictionary <string, Variable> { { "observations", inputVariable }, { "targets", targetVariable }, }; // Get cross validation folds. var sampler = new RandomIndexSampler <double>(seed: 24); var crossValidationIndexSets = CrossValidationUtilities .GetKFoldCrossValidationIndexSets(sampler, foldCount: 10, targets: targets); var predictions = new double[observationCount]; // Run cross validation loop. foreach (var set in crossValidationIndexSets) { // setup data. var trainingNameToData = new Dictionary <string, MemoryMinibatchData> { { "observations", observationsData.GetSamples(set.trainingIndices) }, { "targets", targetsData.GetSamples(set.trainingIndices) } }; var validationNameToData = new Dictionary <string, MemoryMinibatchData> { { "observations", observationsData.GetSamples(set.validationIndices) }, { "targets", targetsData.GetSamples(set.validationIndices) } }; var trainSource = new MemoryMinibatchSource(nameToVariable, trainingNameToData, seed: 232, randomize: true); var validationSource = new MemoryMinibatchSource(nameToVariable, validationNameToData, seed: 232, randomize: false); // Create model and fit. var model = CreateModel(inputVariable, targetVariable, targetCount, dataType, device); model.Fit(trainSource, batchSize: 128, epochs: 10); // Predict. var predictionsRaw = model.Predict(validationSource); var currentPredictions = predictionsRaw.Select(v => (double)v.Single()).ToArray(); // set cross-validation predictions var validationIndices = set.validationIndices; for (int i = 0; i < validationIndices.Length; i++) { predictions[validationIndices[i]] = currentPredictions[i]; } } Trace.WriteLine(FormatErrorString(targets, predictions)); }