Пример #1
0
        public void CrossValidationUtilities_GetKFoldCrossValidationIndexSets_Handle_Remainder()
        {
            var targets = new double[] { 1, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3 };
            var sampler = new StratifiedIndexSampler <double>(seed: 242);
            var actuals = CrossValidationUtilities.GetKFoldCrossValidationIndexSets(sampler,
                                                                                    foldCount: 4, targets: targets);

            var expecteds = new List <(int[] trainingIndices, int[] validationIndices)>
            {
                (new int[] { 0, 1, 2, 3, 5, 6, 7, 9, 11, 12, 13, 14 }, new int[] { 10, 4, 8 }),
                (new int[] { 0, 1, 3, 4, 5, 6, 8, 9, 10, 11, 12, 13 }, new int[] { 2, 7, 14 }),
                (new int[] { 1, 2, 3, 4, 6, 7, 8, 9, 10, 11, 12, 14 }, new int[] { 5, 13, 0 }),
                // Handle remainder from target.length / foldsCount,
                // by adding remaining indices to the last set
                (new int[] { 0, 2, 4, 5, 7, 8, 10, 13, 14 }, new int[] { 1, 3, 6, 9, 11, 12 }),
            };

            Assert.AreEqual(expecteds.Count, actuals.Count);
            for (int i = 0; i < expecteds.Count; i++)
            {
                var expected = expecteds[i];
                var actual   = actuals[i];
                CollectionAssert.AreEqual(expected.trainingIndices, actual.trainingIndices);
                CollectionAssert.AreEqual(expected.validationIndices, actual.validationIndices);
            }
        }
Пример #2
0
        public void CrossValidationUtilities_GetKFoldCrossValidationIndexSets_Indices()
        {
            var targets = new double[] { 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3 };
            var indices = new int[] { 0, 1, 2, 3, 4, 5, 6 };
            var sampler = new StratifiedIndexSampler <double>(seed: 242);
            var actuals = CrossValidationUtilities.GetKFoldCrossValidationIndexSets(sampler,
                                                                                    foldCount: 2, targets: targets, indices: indices);

            TraceIndexSets(actuals);

            var expecteds = new List <(int[] trainingIndices, int[] validationIndices)>
            {
                // Sets contains values from the indices array only.
                (new int[] { 1, 3, 4, 5 }, new int[] { 2, 6, 0 }),
                (new int[] { 0, 2, 6 }, new int[] { 1, 3, 4, 5 }),
            };

            Assert.AreEqual(expecteds.Count, actuals.Count);
            for (int i = 0; i < expecteds.Count; i++)
            {
                var expected = expecteds[i];
                var actual   = actuals[i];
                CollectionAssert.AreEqual(expected.trainingIndices, actual.trainingIndices);
                CollectionAssert.AreEqual(expected.validationIndices, actual.validationIndices);
            }
        }
Пример #3
0
        public void CrossValidationUtilities_GetKFoldCrossValidationIndexSets()
        {
            var targets = new double[] { 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3 };
            var sampler = new StratifiedIndexSampler <double>(seed: 242);
            var actuals = CrossValidationUtilities.GetKFoldCrossValidationIndexSets(sampler,
                                                                                    foldCount: 4, targets: targets);

            TraceIndexSets(actuals);

            var expecteds = new List <(int[] trainingIndices, int[] validationIndices)>
            {
                (new int[] { 0, 1, 3, 4, 5, 7, 9, 10, 11 }, new int[] { 6, 8, 2 }),
                (new int[] { 0, 2, 3, 4, 6, 7, 8, 9, 10 }, new int[] { 1, 11, 5 }),
                (new int[] { 0, 1, 2, 4, 5, 6, 8, 9, 11 }, new int[] { 7, 3, 10 }),
                (new int[] { 1, 2, 3, 5, 6, 7, 8, 10, 11 }, new int[] { 0, 4, 9 }),
            };

            Assert.AreEqual(expecteds.Count, actuals.Count);
            for (int i = 0; i < expecteds.Count; i++)
            {
                var expected = expecteds[i];
                var actual   = actuals[i];
                CollectionAssert.AreEqual(expected.trainingIndices, actual.trainingIndices);
                CollectionAssert.AreEqual(expected.validationIndices, actual.validationIndices);
            }
        }
        /// <summary>
        /// Cross validated predictions.
        /// Only crossValidates within the provided indices.
        /// The predictions are returned in the predictions array.
        /// </summary>
        /// <param name="learner"></param>
        /// <param name="observations"></param>
        /// <param name="targets"></param>
        /// <param name="crossValidationIndices"></param>
        /// <param name="crossValidatedPredictions"></param>
        public void CrossValidate(IIndexedLearner <TPrediction> learner,
                                  F64Matrix observations,
                                  double[] targets,
                                  int[] crossValidationIndices,
                                  TPrediction[] crossValidatedPredictions)
        {
            var rows = crossValidatedPredictions.Length;

            if (m_crossValidationFolds > rows)
            {
                throw new ArgumentException("Too few observations: " + rows +
                                            " for number of cross validation folds: " + m_crossValidationFolds);
            }

            var indices = crossValidationIndices.ToArray();

            // Map the provided crossValidationIndices to crossValidatedPredictions
            // Indices from crossValidationIndices can be larger than crossValidatedPredictions length
            // since crossValidatedPredictions might be a subset of the provided observations and targets
            var cvPredictionIndiceMap = Enumerable.Range(0, crossValidatedPredictions.Length)
                                        .ToDictionary(i => indices[i], i => i);

            var crossValidationIndexSets = CrossValidationUtilities.GetKFoldCrossValidationIndexSets(
                m_indexedSampler, m_crossValidationFolds, targets, indices);

            var observation = new double[observations.ColumnCount];

            foreach (var(trainingIndices, validationIndices) in crossValidationIndexSets)
            {
                var model       = learner.Learn(observations, targets, trainingIndices);
                var predictions = new TPrediction[validationIndices.Length];

                for (int l = 0; l < predictions.Length; l++)
                {
                    observations.Row(validationIndices[l], observation);
                    predictions[l] = model.Predict(observation);
                }

                for (int j = 0; j < validationIndices.Length; j++)
                {
                    crossValidatedPredictions[cvPredictionIndiceMap[validationIndices[j]]] = predictions[j];
                }

                ModelDisposer.DisposeIfDisposable(model);
            }
        }
        public void SharpLearning_With_Cntk_Example()
        {
            // Load data
            var(observations, targets) = DataSetUtilities.LoadWinequalityWhite();

            // transform data for neural net
            var transform = new MinMaxTransformer(0.0, 1.0);

            transform.Transform(observations, observations);

            var featureCount     = observations.ColumnCount;
            var observationCount = observations.RowCount;
            var targetCount      = 1;

            var inputShape  = new int[] { featureCount, 1 };
            var outputShape = new int[] { targetCount };

            // Convert data to float, and wrap as minibatch data.
            var observationsFloat = observations.Data().Select(v => (float)v).ToArray();
            var observationsData  = new MemoryMinibatchData(observationsFloat, inputShape, observationCount);
            var targetsFloat      = targets.Select(v => (float)v).ToArray();
            var targetsData       = new MemoryMinibatchData(targetsFloat, outputShape, observationCount);

            var dataType = DataType.Float;
            var device   = DeviceDescriptor.CPUDevice;

            // setup input and target variables.
            var inputVariable  = Layers.Input(inputShape, dataType);
            var targetVariable = Variable.InputVariable(outputShape, dataType);

            // setup name to variable
            var nameToVariable = new Dictionary <string, Variable>
            {
                { "observations", inputVariable },
                { "targets", targetVariable },
            };

            // Get cross validation folds.
            var sampler = new RandomIndexSampler <double>(seed: 24);
            var crossValidationIndexSets = CrossValidationUtilities
                                           .GetKFoldCrossValidationIndexSets(sampler, foldCount: 10, targets: targets);
            var predictions = new double[observationCount];

            // Run cross validation loop.
            foreach (var set in crossValidationIndexSets)
            {
                // setup data.
                var trainingNameToData = new Dictionary <string, MemoryMinibatchData>
                {
                    { "observations", observationsData.GetSamples(set.trainingIndices) },
                    { "targets", targetsData.GetSamples(set.trainingIndices) }
                };

                var validationNameToData = new Dictionary <string, MemoryMinibatchData>
                {
                    { "observations", observationsData.GetSamples(set.validationIndices) },
                    { "targets", targetsData.GetSamples(set.validationIndices) }
                };

                var trainSource      = new MemoryMinibatchSource(nameToVariable, trainingNameToData, seed: 232, randomize: true);
                var validationSource = new MemoryMinibatchSource(nameToVariable, validationNameToData, seed: 232, randomize: false);

                // Create model and fit.
                var model = CreateModel(inputVariable, targetVariable, targetCount, dataType, device);
                model.Fit(trainSource, batchSize: 128, epochs: 10);

                // Predict.
                var predictionsRaw     = model.Predict(validationSource);
                var currentPredictions = predictionsRaw.Select(v => (double)v.Single()).ToArray();

                // set cross-validation predictions
                var validationIndices = set.validationIndices;
                for (int i = 0; i < validationIndices.Length; i++)
                {
                    predictions[validationIndices[i]] = currentPredictions[i];
                }
            }

            Trace.WriteLine(FormatErrorString(targets, predictions));
        }