Esempio n. 1
0
        private void RunTraining(Trainer trainer, GenericMinibatchSource minibatchSource, int numMinibatchesToTrain, DeviceDescriptor device)
        {
            Debug.WriteLine($"Minibatch;CrossEntropyLoss;EvaluationCriterion;");
            double aggregate_metric = 0;

            for (int minibatchCount = 0; minibatchCount < numMinibatchesToTrain; minibatchCount++)
            {
                IDictionary <Variable, MinibatchData> data = minibatchSource.GetNextRandomMinibatch();
                trainer.TrainMinibatch(data, device);
                PrintTrainingProgress(trainer, minibatchCount);
            }
        }
Esempio n. 2
0
        private void RunTraining(Trainer trainer, GenericMinibatchSequenceSource minibatchSource, int numMinibatchesToTrain, DeviceDescriptor device)
        {
            double aggregate_metric = 0;

            for (int minibatchCount = 0; minibatchCount < numMinibatchesToTrain; minibatchCount++)
            {
                IDictionary <Variable, MinibatchData> data = minibatchSource.GetNextRandomMinibatch();
                trainer.TrainMinibatch(data, device);

                double samples = trainer.PreviousMinibatchSampleCount();
                double avg     = trainer.PreviousMinibatchEvaluationAverage();
                aggregate_metric += avg * samples;
                double nbSampleSeen = trainer.TotalNumberOfSamplesSeen();
                double train_error  = aggregate_metric / nbSampleSeen;
                Debug.WriteLine($"{minibatchCount} Average training error: {train_error:p2}");
            }
        }
Esempio n. 3
0
        public void Run()
        {
            var device = DeviceDescriptor.UseDefaultDevice();

            // 1. Generate Data
            int sampleSize        = 32;
            int nbDimensionsInput = 2; // 2 dimensions (age&tumorsize)
            int nbLabels          = 2; // l'output est un vecteur de probabilités qui doit sommer à 1. Si on ne met qu'une seule dimension de sortie, l'output sera toujours de 1.
            // on met donc deux dimension, une dimension 'vrai' et une dimension 'faux'. L'output sera du genre 0.25 vrai et 0.75 faux => total des poids = 1;
            // premier label = faux, second = vrai

            IEnumerable <DataPoint> data = GenerateData(sampleSize);

            //foreach (var pt in data)
            //    Debug.WriteLine($"{pt.Age};{pt.TumorSize};{(pt.HasCancer ? 1 : 0)}");

            Variable inputVariables = Variable.InputVariable(NDShape.CreateNDShape(new[] { nbDimensionsInput }), DataType.Double, "input");
            Variable expectedOutput = Variable.InputVariable(new int[] { nbLabels }, DataType.Double, "output");

            Parameter bias    = new Parameter(NDShape.CreateNDShape(new[] { nbLabels }), DataType.Double, 0);                    // une abscisse pour chaque dimension
            Parameter weights = new Parameter(NDShape.CreateNDShape(new[] { nbDimensionsInput, nbLabels }), DataType.Double, 0); // les coefficients à trouver
            // 2 variable d'input, 2 estimations en sortie (proba vrai et proba faux)

            Function predictionFunction = CNTKLib.Plus(CNTKLib.Times(weights, inputVariables), bias);

            Function lossFunction      = CNTKLib.CrossEntropyWithSoftmax(predictionFunction, expectedOutput);
            Function evalErrorFunction = CNTKLib.ClassificationError(predictionFunction, expectedOutput);
            //Function logisticClassifier = CNTKLib.Sigmoid(evaluationFunction, "LogisticClassifier");
            uint minibatchSize = 25;
            //double learningRate = 0.5;
            //TrainingParameterScheduleDouble learningRatePerSample = new TrainingParameterScheduleDouble(learningRate, minibatchSize);

            TrainingParameterScheduleDouble learningRatePerSample = new TrainingParameterScheduleDouble(0.3, (uint)(data.Count() / 1.0));
            TrainingParameterScheduleDouble momentumSchedule      = new TrainingParameterScheduleDouble(0.9126265014311797, minibatchSize);

            var parameters = new ParameterVector();

            foreach (var p in predictionFunction.Parameters())
            {
                parameters.Add(p);
            }

            List <Learner> parameterLearners = new List <Learner>()
            {
                CNTKLib.FSAdaGradLearner(parameters, learningRatePerSample, momentumSchedule, true)
            };

            Trainer trainer = Trainer.CreateTrainer(predictionFunction, lossFunction, evalErrorFunction, parameterLearners);

            double nbSamplesToUseForTraining = 20000;
            int    numMinibatchesToTrain     = (int)(nbSamplesToUseForTraining / (int)minibatchSize);

            // train the model
            for (int minibatchCount = 0; minibatchCount < numMinibatchesToTrain; minibatchCount++)
            {
                IEnumerable <DataPoint> trainingData = GenerateData((int)minibatchSize);

                List <double> minibatchInput  = new List <double>();
                List <double> minibatchOutput = new List <double>();
                foreach (DataPoint row in trainingData)
                {
                    minibatchInput.Add(row.Age);
                    minibatchInput.Add(row.TumorSize);
                    minibatchOutput.Add(row.HasCancer ? 0d : 1d);
                    minibatchOutput.Add(row.HasCancer ? 1d : 0d);
                }


                Value inputData  = Value.CreateBatch <double>(NDShape.CreateNDShape(new int[] { nbDimensionsInput }), minibatchInput, device);
                Value outputData = Value.CreateBatch <double>(NDShape.CreateNDShape(new int[] { nbLabels }), minibatchOutput, device);

                trainer.TrainMinibatch(new Dictionary <Variable, Value>()
                {
                    { inputVariables, inputData }, { expectedOutput, outputData }
                }, device);

                PrintTrainingProgress(trainer, minibatchCount);
            }

            // test
            {
                int testSize = 100;
                IEnumerable <DataPoint> trainingData = GenerateData(testSize);

                List <double> minibatchInput  = new List <double>();
                List <double> minibatchOutput = new List <double>();
                foreach (DataPoint row in trainingData)
                {
                    minibatchInput.Add(row.Age);
                    minibatchInput.Add(row.TumorSize);
                    minibatchOutput.Add(row.HasCancer ? 0d : 1d);
                    minibatchOutput.Add(row.HasCancer ? 1d : 0d);
                }


                Value inputData  = Value.CreateBatch <double>(NDShape.CreateNDShape(new int[] { nbDimensionsInput }), minibatchInput, device);
                Value outputData = Value.CreateBatch <double>(NDShape.CreateNDShape(new int[] { nbLabels }), minibatchOutput, device);

                IList <IList <double> > expectedOneHot = outputData.GetDenseData <double>(predictionFunction.Output);
                IList <int>             expectedLabels = expectedOneHot.Select(l => l.IndexOf(1.0d)).ToList();

                var outputDataMap = new Dictionary <Variable, Value>()
                {
                    { predictionFunction.Output, null }
                };
                predictionFunction.Evaluate(
                    new Dictionary <Variable, Value>()
                {
                    { inputVariables, inputData }
                },
                    outputDataMap,
                    device);

                Value outputValue = outputDataMap[predictionFunction.Output];

                IList <IList <double> > actualLabelSoftMax = outputValue.GetDenseData <double>(predictionFunction.Output);
                var actualLabels = actualLabelSoftMax.Select((IList <double> l) => l.IndexOf(l.Max())).ToList();
                int misMatches   = actualLabels.Zip(expectedLabels, (a, b) => a.Equals(b) ? 0 : 1).Sum();

                Debug.WriteLine($"Validating Model: Total Samples = {testSize}, Misclassify Count = {misMatches}");
            }
        }
Esempio n. 4
0
        public void Run()
        {
            var device = DeviceDescriptor.UseDefaultDevice();

            // 1. Generate Data
            int sampleSize        = 32;
            int nbDimensionsInput = 2; // 2 dimensions (age&tumorsize)
            int nbLabels          = 2; // l'output est un vecteur de probabilités qui doit sommer à 1. Si on ne met qu'une seule dimension de sortie, l'output sera toujours de 1.
            // on met donc deux dimension, une dimension 'vrai' et une dimension 'faux'. L'output sera du genre 0.25 vrai et 0.75 faux => total des poids = 1;
            // premier label = faux, second = vrai

            IEnumerable <DataPoint> data = GenerateData(sampleSize);

            //foreach (var pt in data)
            //    Debug.WriteLine($"{pt.Age};{pt.TumorSize};{(pt.HasCancer ? 1 : 0)}");

            Variable inputVariables = Variable.InputVariable(NDShape.CreateNDShape(new[] { nbDimensionsInput }), DataType.Double, "input");
            Variable expectedOutput = Variable.InputVariable(new int[] { nbLabels }, DataType.Double, "output");

            int nbHiddenLayers = 1;

            Function lastLayer = DefineNetwork(inputVariables, nbLabels, nbHiddenLayers, CNTKLib.Sigmoid);

            Function lossFunction      = CNTKLib.CrossEntropyWithSoftmax(lastLayer, expectedOutput);
            Function evalErrorFunction = CNTKLib.ClassificationError(lastLayer, expectedOutput);

            uint   minibatchSize = 25;
            double learningRate  = 0.5;
            TrainingParameterScheduleDouble learningRatePerSample = new TrainingParameterScheduleDouble(learningRate, minibatchSize);

            IList <Learner> parameterLearners = new List <Learner>()
            {
                Learner.SGDLearner(lastLayer.Parameters(), learningRatePerSample)
            };
            Trainer trainer = Trainer.CreateTrainer(lastLayer, lossFunction, evalErrorFunction, parameterLearners);

            double nbSamplesToUseForTraining = 20000;
            int    numMinibatchesToTrain     = (int)(nbSamplesToUseForTraining / (int)minibatchSize);

            // train the model
            for (int minibatchCount = 0; minibatchCount < numMinibatchesToTrain; minibatchCount++)
            {
                IEnumerable <DataPoint> trainingData = GenerateData((int)minibatchSize);

                List <double> minibatchInput  = new List <double>();
                List <double> minibatchOutput = new List <double>();
                foreach (DataPoint row in trainingData)
                {
                    minibatchInput.Add(row.Age);
                    minibatchInput.Add(row.TumorSize);
                    minibatchOutput.Add(row.HasCancer ? 0d : 1d);
                    minibatchOutput.Add(row.HasCancer ? 1d : 0d);
                }

                Value inputData  = Value.CreateBatch <double>(NDShape.CreateNDShape(new int[] { nbDimensionsInput }), minibatchInput, device);
                Value outputData = Value.CreateBatch <double>(NDShape.CreateNDShape(new int[] { nbLabels }), minibatchOutput, device);

                trainer.TrainMinibatch(new Dictionary <Variable, Value>()
                {
                    { inputVariables, inputData }, { expectedOutput, outputData }
                }, false, device);

                PrintTrainingProgress(trainer, minibatchCount);
            }

            // test
            {
                int testSize = 100;
                IEnumerable <DataPoint> trainingData = GenerateData(testSize);

                List <double> minibatchInput  = new List <double>();
                List <double> minibatchOutput = new List <double>();
                foreach (DataPoint row in trainingData)
                {
                    minibatchInput.Add(row.Age);
                    minibatchInput.Add(row.TumorSize);
                    minibatchOutput.Add(row.HasCancer ? 0d : 1d);
                    minibatchOutput.Add(row.HasCancer ? 1d : 0d);
                }

                Value inputData  = Value.CreateBatch <double>(NDShape.CreateNDShape(new int[] { nbDimensionsInput }), minibatchInput, device);
                Value outputData = Value.CreateBatch <double>(NDShape.CreateNDShape(new int[] { nbLabels }), minibatchOutput, device);

                IList <IList <double> > expectedOneHot = outputData.GetDenseData <double>(lastLayer.Output);
                IList <int>             expectedLabels = expectedOneHot.Select(l => l.IndexOf(1.0d)).ToList();

                var outputDataMap = new Dictionary <Variable, Value>()
                {
                    { lastLayer.Output, null }
                };
                lastLayer.Evaluate(
                    new Dictionary <Variable, Value>()
                {
                    { inputVariables, inputData }
                },
                    outputDataMap,
                    device);

                Value outputValue = outputDataMap[lastLayer.Output];

                IList <IList <double> > actualLabelSoftMax = outputValue.GetDenseData <double>(lastLayer.Output);
                var actualLabels = actualLabelSoftMax.Select((IList <double> l) => l.IndexOf(l.Max())).ToList();
                int misMatches   = actualLabels.Zip(expectedLabels, (a, b) => a.Equals(b) ? 0 : 1).Sum();

                Debug.WriteLine($"Validating Model: Total Samples = {testSize}, Misclassify Count = {misMatches}");
            }
        }