Beispiel #1
0
 /// <inheritdoc />
 internal override Function ToFunction(Variable inputFunction)
 {
     return(CNTKLib.Softplus(inputFunction));
 }
Beispiel #2
0
        internal static void TrainSimpleFeedForwardClassifier(DeviceDescriptor device)
        {
            int inputDim         = 2;
            int numOutputClasses = 2;
            int hiddenLayerDim   = 50;
            int numHiddenLayers  = 2;

            int minibatchSize         = 50;
            int numSamplesPerSweep    = 10000;
            int numSweepsToTrainWith  = 2;
            int numMinibatchesToTrain = (numSamplesPerSweep * numSweepsToTrainWith) / minibatchSize;

            var featureStreamName = "features";
            var labelsStreamName  = "labels";
            var input             = Variable.InputVariable(new int[] { inputDim }, DataType.Float, "features");
            var labels            = Variable.InputVariable(new int[] { numOutputClasses }, DataType.Float, "labels");

            Function classifierOutput;
            Function trainingLoss;
            Function prediction;

            IList <StreamConfiguration> streamConfigurations = new StreamConfiguration[]
            { new StreamConfiguration(featureStreamName, inputDim), new StreamConfiguration(labelsStreamName, numOutputClasses) };

            using (var minibatchSource = MinibatchSource.TextFormatMinibatchSource("SimpleDataTrain_cntk_text.txt",
                                                                                   streamConfigurations, MinibatchSource.FullDataSweep, true, MinibatchSource.DefaultRandomizationWindowInChunks))
            {
                var featureStreamInfo = minibatchSource.StreamInfo(featureStreamName);
                var labelStreamInfo   = minibatchSource.StreamInfo(labelsStreamName);

                IDictionary <StreamInformation, Tuple <NDArrayView, NDArrayView> > inputMeansAndInvStdDevs =
                    new Dictionary <StreamInformation, Tuple <NDArrayView, NDArrayView> >
                {
                    { featureStreamInfo, new Tuple <NDArrayView, NDArrayView>(null, null) }
                };
                MinibatchSource.ComputeInputPerDimMeansAndInvStdDevs(minibatchSource, inputMeansAndInvStdDevs, device);

                var normalizedinput = CNTKLib.PerDimMeanVarianceNormalize(input,
                                                                          inputMeansAndInvStdDevs[featureStreamInfo].Item1, inputMeansAndInvStdDevs[featureStreamInfo].Item2);
                Function fullyConnected = TestHelper.FullyConnectedLinearLayer(normalizedinput, hiddenLayerDim, device, "");
                classifierOutput = CNTKLib.Sigmoid(fullyConnected, "");

                for (int i = 1; i < numHiddenLayers; ++i)
                {
                    fullyConnected   = TestHelper.FullyConnectedLinearLayer(classifierOutput, hiddenLayerDim, device, "");
                    classifierOutput = CNTKLib.Sigmoid(fullyConnected, "");
                }

                var outputTimesParam = new Parameter(NDArrayView.RandomUniform <float>(
                                                         new int[] { numOutputClasses, hiddenLayerDim }, -0.05, 0.05, 1, device));
                var outputBiasParam = new Parameter(NDArrayView.RandomUniform <float>(
                                                        new int[] { numOutputClasses }, -0.05, 0.05, 1, device));
                classifierOutput = CNTKLib.Plus(outputBiasParam, outputTimesParam * classifierOutput, "classifierOutput");

                trainingLoss = CNTKLib.CrossEntropyWithSoftmax(classifierOutput, labels, "lossFunction");;
                prediction   = CNTKLib.ClassificationError(classifierOutput, labels, "classificationError");

                // Test save and reload of model
                {
                    Variable classifierOutputVar = classifierOutput;
                    Variable trainingLossVar     = trainingLoss;
                    Variable predictionVar       = prediction;
                    var      combinedNet         = Function.Combine(new List <Variable>()
                    {
                        trainingLoss, prediction, classifierOutput
                    },
                                                                    "feedForwardClassifier");
                    TestHelper.SaveAndReloadModel(ref combinedNet,
                                                  new List <Variable>()
                    {
                        input, labels, trainingLossVar, predictionVar, classifierOutputVar
                    }, device);

                    classifierOutput = classifierOutputVar;
                    trainingLoss     = trainingLossVar;
                    prediction       = predictionVar;
                }
            }

            CNTK.TrainingParameterScheduleDouble learningRatePerSample = new CNTK.TrainingParameterScheduleDouble(
                0.02, TrainingParameterScheduleDouble.UnitType.Sample);

            using (var minibatchSource = MinibatchSource.TextFormatMinibatchSource("SimpleDataTrain_cntk_text.txt", streamConfigurations))
            {
                var featureStreamInfo = minibatchSource.StreamInfo(featureStreamName);
                var labelStreamInfo   = minibatchSource.StreamInfo(labelsStreamName);

                streamConfigurations = new StreamConfiguration[]
                { new StreamConfiguration("features", inputDim), new StreamConfiguration("labels", numOutputClasses) };

                IList <Learner> parameterLearners =
                    new List <Learner>()
                {
                    CNTKLib.SGDLearner(classifierOutput.Parameters(), learningRatePerSample)
                };
                var trainer = Trainer.CreateTrainer(classifierOutput, trainingLoss, prediction, parameterLearners);

                int outputFrequencyInMinibatches = 20;
                int trainingCheckpointFrequency  = 100;
                for (int i = 0; i < numMinibatchesToTrain; ++i)
                {
                    var minibatchData = minibatchSource.GetNextMinibatch((uint)minibatchSize, device);
                    var arguments     = new Dictionary <Variable, MinibatchData>
                    {
                        { input, minibatchData[featureStreamInfo] },
                        { labels, minibatchData[labelStreamInfo] }
                    };
                    trainer.TrainMinibatch(arguments, device);
                    TestHelper.PrintTrainingProgress(trainer, i, outputFrequencyInMinibatches);

                    if ((i % trainingCheckpointFrequency) == (trainingCheckpointFrequency - 1))
                    {
                        string ckpName = "feedForward.net";
                        trainer.SaveCheckpoint(ckpName);
                        trainer.RestoreFromCheckpoint(ckpName);
                    }
                }
            }
        }
Beispiel #3
0
 /// <summary>
 /// Max pooling operation for temporal data.
 /// </summary>
 /// <param name="layer">The output of the last layer.</param>
 /// <param name="poolSize">Integer, size of the max pooling windows.</param>
 /// <param name="strides">Factor by which to downscale. E.g. 2 will halve the input. If None, it will default to pool_size.</param>
 /// <param name="padding">Boolean, if true results in padding the input such that the output has the same length as the original input.</param>
 /// <returns></returns>
 public static Function MaxPool1D(Variable layer, int poolSize, int strides, bool padding = true)
 {
     return(CNTKLib.Pooling(layer, PoolingType.Max, new int[] { poolSize }, new int[] { strides }, new BoolVector(new bool[] { padding, false, false })));
 }
Beispiel #4
0
 /// <summary>
 /// Dropout consists in randomly setting a fraction rate of input units to 0 at each update during training time, which helps prevent overfitting.
 /// </summary>
 /// <param name="layer">The output of the last layer.</param>
 /// <param name="rate">A float value between 0 and 1. Fraction of the input units to drop.</param>
 /// <returns></returns>
 public static Function Dropout(Variable layer, double rate)
 {
     return(CNTKLib.Dropout(layer, rate));
 }
Beispiel #5
0
 /// <inheritdoc />
 internal override Function ToFunction(Variable inputFunction)
 {
     return(CNTKLib.Tanh(inputFunction));
 }
 protected override Variable BuildNetwork(string name)
 {
     InputVariable = CNTKLib.InputVariable(new int[] { Size }, DataType.Float, name);
     return(InputVariable);
 }
Beispiel #7
0
        Tuple <Function, Function> LSTMPCellWithSelfStabilization <ElementType>(
            Variable input, Variable prevOutput, Variable prevCellState)
        {
            int outputDim = prevOutput.Shape[0];
            int cellDim   = prevCellState.Shape[0];

            bool     isFloatType = typeof(ElementType).Equals(typeof(float));
            DataType dataType    = isFloatType ? DataType.Float : DataType.Double;

            Func <int, Parameter> createBiasParam;

            if (isFloatType)
            {
                createBiasParam = (dim) => new Parameter(new int[] { dim }, 0.01f, device, "");
            }
            else
            {
                createBiasParam = (dim) => new Parameter(new int[] { dim }, 0.01, device, "");
            }

            uint seed2 = 1;
            Func <int, Parameter> createProjectionParam = (oDim) => new Parameter(new int[] { oDim, NDShape.InferredDimension },
                                                                                  dataType, CNTKLib.GlorotUniformInitializer(1.0, 1, 0, seed2++), device);

            Func <int, Parameter> createDiagWeightParam = (dim) =>
                                                          new Parameter(new int[] { dim }, dataType, CNTKLib.GlorotUniformInitializer(1.0, 1, 0, seed2++), device);

            Function stabilizedPrevOutput    = Stabilize <ElementType>(prevOutput);
            Function stabilizedPrevCellState = Stabilize <ElementType>(prevCellState);

            Func <Variable> projectInput = () =>
                                           createBiasParam(cellDim) + (createProjectionParam(cellDim) * input);

            // Input gate
            Function it =
                CNTKLib.Sigmoid(
                    (Variable)(projectInput() + (createProjectionParam(cellDim) * stabilizedPrevOutput)) +
                    CNTKLib.ElementTimes(createDiagWeightParam(cellDim), stabilizedPrevCellState));
            Function bit = CNTKLib.ElementTimes(
                it,
                CNTKLib.Tanh(projectInput() + (createProjectionParam(cellDim) * stabilizedPrevOutput)));

            // Forget-me-not gate
            Function ft = CNTKLib.Sigmoid(
                (Variable)(
                    projectInput() + (createProjectionParam(cellDim) * stabilizedPrevOutput)) +
                CNTKLib.ElementTimes(createDiagWeightParam(cellDim), stabilizedPrevCellState));
            Function bft = CNTKLib.ElementTimes(ft, prevCellState);

            Function ct = (Variable)bft + bit;

            // Output gate
            Function ot = CNTKLib.Sigmoid(
                (Variable)(projectInput() + (createProjectionParam(cellDim) * stabilizedPrevOutput)) +
                CNTKLib.ElementTimes(createDiagWeightParam(cellDim), Stabilize <ElementType>(ct)));
            Function ht = CNTKLib.ElementTimes(ot, CNTKLib.Tanh(ct));

            Function c = ct;
            Function h = (outputDim != cellDim) ? (createProjectionParam(outputDim) * Stabilize <ElementType>(ht)) : ht;

            return(new Tuple <Function, Function>(h, c));
        }
Beispiel #8
0
 /// <summary>
 /// Reshapes an output to a certain shape.
 /// </summary>
 /// <param name="layer">The input layer to be reshaped.</param>
 /// <param name="targetShape">List of integers. Does not include the batch axis.</param>
 /// <returns></returns>
 public static Function Reshape(Variable layer, int[] targetShape)
 {
     return(CNTKLib.Reshape(layer, targetShape));
 }
Beispiel #9
0
 /// <summary>
 /// Reshapes an output to a certain shape.
 /// </summary>
 /// <param name="shape">The input shape of the data.</param>
 /// <param name="targetShape">List of integers. Does not include the batch axis.</param>
 /// <returns></returns>
 public static Function Reshape(int[] shape, int[] targetShape)
 {
     return(CNTKLib.Reshape(Variable.InputVariable(shape, DataType.Float), targetShape));
 }
Beispiel #10
0
        //Network functions

        //Convolution block
        private static Function ConvBlock(Variable input_var, int[] kernel_size, int in_channels, int out_channels, float[] W = null, float[] B = null,
                                          string wpath = null, string[] layer_names = null, bool padding = true, bool use_relu = true, bool use_bn = true)
        {
            Function output_function;

            if (W != null & wpath != null)
            {
                throw new System.ArgumentException("1 parameter must be null!", "W, wpath");
            }
            //If weight path is given, load weight from path. Otherwise use weight from float array W or, initialize new weights.
            if (wpath != null)
            {
                W = weight_fromDisk(wpath, layer_names[0]);
            }
            //Initialize weights
            Parameter weight = make_weight(kernel_size, in_channels, out_channels, W, layer_names[0]);

            //Generate convolution function
            Function convW = CNTKLib.Convolution(
                /*kernel and input*/ weight, input_var,
                /*strides*/ new int[] { 1, 1, in_channels },
                /*sharing*/ new CNTK.BoolVector {
                true
            },
                /*padding*/ new CNTK.BoolVector {
                padding
            });

            //Initialize bias
            if (wpath != null)
            {
                B = make_copies(weight_fromDisk(wpath, layer_names[1]), convW.Output.Shape);
            }
            Parameter bias = make_bias(convW.Output.Shape, B, layer_names[1]);

            //Add bias
            Function add = CNTKLib.Plus(convW, bias);

            //Sigmoid
            Function sig = CNTKLib.Sigmoid(add);

            if (use_bn == true)
            {
                //Initialize batch normalization
                int[]     bns = new int[] { 1, 1 };
                Parameter scale;
                Parameter bnbias;
                Parameter rm;
                Parameter rv;
                var       n = Constant.Scalar(0.0f, DeviceDescriptor.GPUDevice(0));

                make_bn_pars(out_channels, add.Output.Shape, out scale, out bnbias, out rm, out rv, wpath, layer_names[0]);

                //Batch normalization
                Function bn = CNTKLib.BatchNormalization(add, scale, bnbias, rm, rv, n, true);

                //ReLU
                Function relu = CNTKLib.ReLU(bn);
                output_function = relu;
            }
            else
            {
                if (use_relu == true)
                {
                    //ReLU
                    Function relu = CNTKLib.ReLU(add);
                    output_function = relu;
                }
                else
                {
                    output_function = sig;
                }
            }

            return(output_function);
        }
Beispiel #11
0
        /// <summary>
        /// Build and train a RNN model.
        /// </summary>
        /// <param name="device">CPU or GPU device to train and run the model</param>
        public static void Train(DeviceDescriptor device)
        {
            const int inputDim         = 2000;
            const int cellDim          = 25;
            const int hiddenDim        = 25;
            const int embeddingDim     = 50;
            const int numOutputClasses = 5;

            // build the model
            var featuresName = "features";
            var features     = Variable.InputVariable(new int[] { inputDim }, DataType.Float, featuresName, null, true /*isSparse*/);
            var labelsName   = "labels";
            var labels       = Variable.InputVariable(new int[] { numOutputClasses }, DataType.Float, labelsName,
                                                      new List <Axis>()
            {
                Axis.DefaultBatchAxis()
            }, true);

            var      classifierOutput = LSTMSequenceClassifierNet(features, numOutputClasses, embeddingDim, hiddenDim, cellDim, device, "classifierOutput");
            Function trainingLoss     = CNTKLib.CrossEntropyWithSoftmax(classifierOutput, labels, "lossFunction");
            Function prediction       = CNTKLib.ClassificationError(classifierOutput, labels, "classificationError");

            // prepare training data
            IList <StreamConfiguration> streamConfigurations = new StreamConfiguration[]
            { new StreamConfiguration(featuresName, inputDim, true, "x"), new StreamConfiguration(labelsName, numOutputClasses, false, "y") };
            var minibatchSource = MinibatchSource.TextFormatMinibatchSource(
                Path.Combine(DataFolder, "Train.ctf"), streamConfigurations,
                MinibatchSource.InfinitelyRepeat, true);
            var featureStreamInfo = minibatchSource.StreamInfo(featuresName);
            var labelStreamInfo   = minibatchSource.StreamInfo(labelsName);

            // prepare for training
            TrainingParameterScheduleDouble learningRatePerSample = new TrainingParameterScheduleDouble(
                0.0005, 1);
            TrainingParameterScheduleDouble momentumTimeConstant = CNTKLib.MomentumAsTimeConstantSchedule(256);
            IList <Learner> parameterLearners = new List <Learner>()
            {
                Learner.MomentumSGDLearner(classifierOutput.Parameters(), learningRatePerSample, momentumTimeConstant, /*unitGainMomentum = */ true)
            };
            var trainer = Trainer.CreateTrainer(classifierOutput, trainingLoss, prediction, parameterLearners);

            // train the model
            uint minibatchSize = 200;
            int  outputFrequencyInMinibatches = 20;
            int  miniBatchCount = 0;
            int  numEpochs      = 5;

            while (numEpochs > 0)
            {
                var minibatchData = minibatchSource.GetNextMinibatch(minibatchSize, device);

                var arguments = new Dictionary <Variable, MinibatchData>
                {
                    { features, minibatchData[featureStreamInfo] },
                    { labels, minibatchData[labelStreamInfo] }
                };

                trainer.TrainMinibatch(arguments, device);
                TestHelper.PrintTrainingProgress(trainer, miniBatchCount++, outputFrequencyInMinibatches);

                // Because minibatchSource is created with MinibatchSource.InfinitelyRepeat,
                // batching will not end. Each time minibatchSource completes an sweep (epoch),
                // the last minibatch data will be marked as end of a sweep. We use this flag
                // to count number of epochs.
                if (TestHelper.MiniBatchDataIsSweepEnd(minibatchData.Values))
                {
                    numEpochs--;
                }
            }
        }
Beispiel #12
0
        public Dictionary <string, List <double> > Train(object trainData, object validationData, int epoches, int batchSize, On_Epoch_Start OnEpochStart, On_Epoch_End OnEpochEnd, On_Batch_Start onBatchStart, On_Batch_End OnBatchEnd, bool shuffle = false)
        {
            XYFrame train      = (XYFrame)trainData;
            XYFrame validation = validationData != null ? (XYFrame)validationData : null;
            Dictionary <string, List <double> > result = new Dictionary <string, List <double> >();
            var trainer      = Trainer.CreateTrainer(Model, lossFunc, metricFunc, learners);
            int currentEpoch = 1;
            Dictionary <string, double> metricsList = new Dictionary <string, double>();

            while (currentEpoch <= epoches)
            {
                if (shuffle)
                {
                    train.Shuffle();
                }

                metricsList = new Dictionary <string, double>();
                OnEpochStart(currentEpoch);
                int miniBatchCount = 1;
                while (train.NextBatch(miniBatchCount, batchSize))
                {
                    onBatchStart(currentEpoch, miniBatchCount);
                    Value features = DataFrameUtil.GetValueBatch(train.CurrentBatch.XFrame);
                    Value labels   = DataFrameUtil.GetValueBatch(train.CurrentBatch.YFrame);

                    trainer.TrainMinibatch(new Dictionary <Variable, Value>()
                    {
                        { featureVariable, features }, { labelVariable, labels }
                    }, GlobalParameters.Device);
                    OnBatchEnd(currentEpoch, miniBatchCount, trainer.TotalNumberOfSamplesSeen(), trainer.PreviousMinibatchLossAverage(), new Dictionary <string, double>()
                    {
                        { metricName, trainer.PreviousMinibatchEvaluationAverage() }
                    });
                    miniBatchCount++;
                }

                if (!result.ContainsKey("loss"))
                {
                    result.Add("loss", new List <double>());
                }

                if (!result.ContainsKey(metricName))
                {
                    result.Add(metricName, new List <double>());
                }

                double lossValue   = trainer.PreviousMinibatchLossAverage();
                double metricValue = trainer.PreviousMinibatchEvaluationAverage();
                result["loss"].Add(lossValue);
                result[metricName].Add(metricValue);
                metricsList.Add(metricName, metricValue);
                if (validation != null)
                {
                    if (!result.ContainsKey("val_loss"))
                    {
                        result.Add("val_loss", new List <double>());
                    }

                    if (!result.ContainsKey("val_" + metricName))
                    {
                        result.Add("val_" + metricName, new List <double>());
                    }

                    int           evalMiniBatchCount       = 1;
                    List <double> totalEvalBatchLossList   = new List <double>();
                    List <double> totalEvalMetricValueList = new List <double>();
                    while (validation.NextBatch(evalMiniBatchCount, batchSize))
                    {
                        Variable actualVariable = CNTKLib.InputVariable(labelVariable.Shape, DataType.Float);
                        var      evalLossFunc   = Losses.Get(lossName, labelVariable, actualVariable);
                        var      evalMetricFunc = Metrics.Get(metricName, labelVariable, actualVariable);
                        Value    actual         = EvaluateInternal(validation.XFrame);
                        Value    expected       = DataFrameUtil.GetValueBatch(validation.YFrame);
                        var      inputDataMap   = new Dictionary <Variable, Value>()
                        {
                            { labelVariable, expected }, { actualVariable, actual }
                        };
                        var outputDataMap = new Dictionary <Variable, Value>()
                        {
                            { evalLossFunc.Output, null }
                        };

                        evalLossFunc.Evaluate(inputDataMap, outputDataMap, GlobalParameters.Device);
                        var evalLoss = outputDataMap[evalLossFunc.Output].GetDenseData <float>(evalLossFunc.Output).Select(x => x.First()).ToList();
                        totalEvalBatchLossList.Add(evalLoss.Average());

                        inputDataMap = new Dictionary <Variable, Value>()
                        {
                            { labelVariable, expected }, { actualVariable, actual }
                        };
                        outputDataMap = new Dictionary <Variable, Value>()
                        {
                            { evalMetricFunc.Output, null }
                        };
                        evalMetricFunc.Evaluate(inputDataMap, outputDataMap, GlobalParameters.Device);
                        var evalMetric = outputDataMap[evalMetricFunc.Output].GetDenseData <float>(evalMetricFunc.Output).Select(x => x.First()).ToList();
                        totalEvalMetricValueList.Add(evalMetric.Average());

                        evalMiniBatchCount++;
                    }

                    result["val_loss"].Add(totalEvalBatchLossList.Average());
                    metricsList.Add("val_loss", totalEvalBatchLossList.Average());
                    result["val_" + metricName].Add(totalEvalMetricValueList.Average());
                    metricsList.Add("val_" + metricName, totalEvalMetricValueList.Average());
                }

                OnEpochEnd(currentEpoch, trainer.TotalNumberOfSamplesSeen(), lossValue, metricsList);
                currentEpoch++;
            }

            return(result);
        }
Beispiel #13
0
        /// <summary>
        /// Train and evaluate a image classifier for MNIST data.
        /// </summary>
        /// <param name="device">CPU or GPU device to run training and evaluation</param>
        /// <param name="useConvolution">option to use convolution network or to use multilayer perceptron</param>
        /// <param name="forceRetrain">whether to override an existing model.
        /// if true, any existing model will be overridden and the new one evaluated.
        /// if false and there is an existing model, the existing model is evaluated.</param>
        public static void TrainAndEvaluate(DeviceDescriptor device, bool useConvolution, bool forceRetrain)
        {
            var featureStreamName = "features";
            var labelsStreamName  = "labels";
            var classifierName    = "classifierOutput";

            Function classifierOutput;

            int[] imageDim   = useConvolution ? new int[] { 28, 28, 1 } : new int[] { 784 };
            int   imageSize  = 28 * 28;
            int   numClasses = 10;

            IList <StreamConfiguration> streamConfigurations = new StreamConfiguration[]
            { new StreamConfiguration(featureStreamName, imageSize), new StreamConfiguration(labelsStreamName, numClasses) };

            string modelFile = useConvolution ? "MNISTConvolution.model" : "MNISTMLP.model";

            // If a model already exists and not set to force retrain, validate the model and return.
            //prepare vars to accept results
            List <List <float> > X = new List <List <float> >();
            List <float>         Y = new List <float>();

            if (File.Exists(modelFile) && !forceRetrain)
            {
                var minibatchSourceExistModel = MinibatchSource.TextFormatMinibatchSource(
                    Path.Combine(ImageDataFolder, "MINST-TestData.txt"), streamConfigurations);

                //Model validation
                ValidateModel(modelFile, minibatchSourceExistModel, imageDim, numClasses, featureStreamName, labelsStreamName,
                              classifierName, device, 1000, X, Y, useConvolution);

                //show image classification result
                showResult(X, Y);
                return;
            }

            // build the network
            var input = CNTKLib.InputVariable(imageDim, DataType.Float, featureStreamName);

            if (useConvolution)
            {
                var scaledInput = CNTKLib.ElementTimes(Constant.Scalar <float>(0.00390625f, device), input);
                classifierOutput = CreateConvolutionalNeuralNetwork(scaledInput, numClasses, device, classifierName);
            }
            else
            {
                // For MLP, we like to have the middle layer to have certain amount of states.
                int hiddenLayerDim = 200;
                var scaledInput    = CNTKLib.ElementTimes(Constant.Scalar <float>(0.00390625f, device), input);
                classifierOutput = CreateMLPClassifier(device, numClasses, hiddenLayerDim, scaledInput, classifierName);
            }

            var labels = CNTKLib.InputVariable(new int[] { numClasses }, DataType.Float, labelsStreamName);
            //LOss and Eval functions
            var trainingLoss = CNTKLib.CrossEntropyWithSoftmax(new Variable(classifierOutput), labels, "lossFunction");
            var prediction   = CNTKLib.ClassificationError(new Variable(classifierOutput), labels, "classificationError");

            // prepare training data
            var minibatchSource = MinibatchSource.TextFormatMinibatchSource(
                Path.Combine(ImageDataFolder, "MINST-TrainData.txt"), streamConfigurations, MinibatchSource.InfinitelyRepeat);

            var featureStreamInfo = minibatchSource.StreamInfo(featureStreamName);
            var labelStreamInfo   = minibatchSource.StreamInfo(labelsStreamName);

            // set per sample learning rate
            var learningRatePerSample = new CNTK.TrainingParameterScheduleDouble(0.003125, 1);

            IList <Learner> parameterLearners = new List <Learner>()
            {
                Learner.SGDLearner(classifierOutput.Parameters(), learningRatePerSample)
            };

            var trainer = Trainer.CreateTrainer(classifierOutput, trainingLoss, prediction, parameterLearners);

            //
            const uint minibatchSize = 64;
            int        outputFrequencyInMinibatches = 100, i = 0;
            int        epochs = 3;

            while (epochs > 0)
            {
                var minibatchData = minibatchSource.GetNextMinibatch(minibatchSize, device);
                var arguments     = new Dictionary <Variable, MinibatchData>
                {
                    { input, minibatchData[featureStreamInfo] },
                    { labels, minibatchData[labelStreamInfo] }
                };

                trainer.TrainMinibatch(arguments, device);
                //
                TestHelper.PrintTrainingProgress(trainer, i++, outputFrequencyInMinibatches);

                // MinibatchSource is created with MinibatchSource.InfinitelyRepeat.
                // Batching will not end. Each time minibatchSource completes an sweep (epoch),
                // the last minibatch data will be marked as end of a sweep. We use this flag
                // to count number of epochs.
                if (TestHelper.MiniBatchDataIsSweepEnd(minibatchData.Values))
                {
                    epochs--;
                }
            }

            // save the trained model
            classifierOutput.Save(modelFile);

            // validate the model
            var minibatchSourceNewModel = MinibatchSource.TextFormatMinibatchSource(
                Path.Combine(ImageDataFolder, "MINST-TestData.txt"), streamConfigurations, MinibatchSource.InfinitelyRepeat);

            //Model validation
            ValidateModel(modelFile, minibatchSourceNewModel, imageDim, numClasses, featureStreamName, labelsStreamName,
                          classifierName, device, 1000, X, Y, useConvolution);

            //show image classification result
            showResult(X, Y);
        }
Beispiel #14
0
        public static float ValidateModel(string modelFile, MinibatchSource testMinibatchSource, int[] imageDim, int numClasses,
                                          string featureInputName, string labelInputName, string outputName, DeviceDescriptor device,
                                          int maxCount = 1000, List <List <float> > X = null, List <float> Y = null, bool useConvolution = true)
        {
            Function model      = Function.Load(modelFile, device);
            var      imageInput = model.Arguments[0];

            var labelOutput = model.Outputs.Single(o => o.Name == outputName);

            var featureStreamInfo = testMinibatchSource.StreamInfo(featureInputName);
            var labelStreamInfo   = testMinibatchSource.StreamInfo(labelInputName);

            int batchSize = 50;
            int miscountTotal = 0, totalCount = 0;

            while (true)
            {
                var minibatchData = testMinibatchSource.GetNextMinibatch((uint)batchSize, device);
                if (minibatchData == null || minibatchData.Count == 0)
                {
                    break;
                }
                totalCount += (int)minibatchData[featureStreamInfo].numberOfSamples;

                // expected labels are in the minibatch data.

                var labelData      = minibatchData[labelStreamInfo].data.GetDenseData <float>(labelOutput);
                var expectedLabels = labelData.Select(l => l.IndexOf(l.Max())).ToList();

                var inputDataMap = new Dictionary <Variable, Value>()
                {
                    { imageInput, minibatchData[featureStreamInfo].data }
                };

                var outputDataMap = new Dictionary <Variable, Value>()
                {
                    { labelOutput, null }
                };

                model.Evaluate(inputDataMap, outputDataMap, device);


                var faetureData = minibatchData[featureStreamInfo].data.GetDenseData <float>(CNTKLib.InputVariable(minibatchData[featureStreamInfo].data.Shape, DataType.Float, model.Arguments[0].Name));

                var outputData   = outputDataMap[labelOutput].GetDenseData <float>(labelOutput);
                var actualLabels = outputData.Select(l => l.IndexOf(l.Max())).ToList();

                int misMatches = actualLabels.Zip(expectedLabels, (a, b) => a.Equals(b) ? 0 : 1).Sum();

                miscountTotal += misMatches;
                Console.WriteLine($"Validating Model: Total Samples = {totalCount}, Misclassify Count = {miscountTotal}");

                if (totalCount > maxCount)
                {
                    //writes some result in to array

                    for (int i = 0; i < outputData.Count && X != null && Y != null; i++)
                    {
                        var imgDIm      = imageDim.Aggregate(1, (acc, val) => acc * val);
                        var inputVector = faetureData[0].Skip(imgDIm * i).Take(imgDIm).Select(x => (float)x).ToList();
                        X.Add(inputVector);
                        var currLabel = actualLabels[i];
                        Y.Add(currLabel);
                    }
                    ;
                    break;
                }
            }



            float errorRate = 1.0F * miscountTotal / totalCount;

            Console.WriteLine($"Model Validation Error = {errorRate}");
            return(errorRate);
        }
Beispiel #15
0
 public override Function ApplyActivationFunction(Function variable, DeviceDescriptor device)
 {
     return(CNTKLib.Tanh(variable));
 }
Beispiel #16
0
        /// <summary>
        /// Dense implements the operation: output = activation(dot(input, kernel) + bias) where activation is the element-wise activation function passed as the activation argument, kernel is a weights matrix created by the layer, and bias is a bias vector created by the layer (only applicable if use_bias is True).
        /// </summary>
        /// <param name="shape">The input shape.</param>
        /// <param name="dim">Positive integer, dimensionality of the output space..</param>
        /// <param name="act">Activation function to use. If you don't specify anything, no activation is applied (ie. "linear" activation: a(x) = x). <see cref="SiaNet.Common.OptActivations"/></param>
        /// <param name="useBias">Boolean, whether the layer uses a bias vector.</param>
        /// <param name="weightInitializer">Initializer for the kernel weights matrix. <see cref="SiaNet.Common.OptInitializers"/></param>
        /// <param name="biasInitializer">Initializer for the bias vector. <see cref="SiaNet.Common.OptInitializers"/></param>
        /// <returns></returns>
        public static Function Dense(int shape, int dim, string activation = OptActivations.None, bool useBias = false, Initializer weightInitializer = null, Initializer biasInitializer = null)
        {
            var input = CNTKLib.InputVariable(new int[] { shape }, DataType.Float);

            return(Dense(input, dim, activation, useBias, weightInitializer, biasInitializer));
        }
Beispiel #17
0
        /// <summary>
        /// Creates the learner based on learning parameters.
        /// ToDo: Not all learners parameters defined
        /// </summary>
        /// <param name="network">Network model being trained</param>
        /// <param name="lrParams">Learning parameters.</param>
        /// <returns></returns>
        private List <Learner> createLearners(Function network, LearningParameters lrParams)
        {
            //learning rate and momentum values
            var lr       = new TrainingParameterScheduleDouble(lrParams.LearningRate);
            var mm       = CNTKLib.MomentumAsTimeConstantSchedule(lrParams.Momentum);
            var addParam = new AdditionalLearningOptions();

            //
            if (lrParams.L1Regularizer > 0)
            {
                addParam.l1RegularizationWeight = lrParams.L1Regularizer;
            }
            if (lrParams.L2Regularizer > 0)
            {
                addParam.l2RegularizationWeight = lrParams.L2Regularizer;
            }

            //SGD Momentum learner
            if (lrParams.LearnerType == LearnerType.MomentumSGDLearner)
            {
                //
                var llr  = new List <Learner>();
                var msgd = CNTKLib.MomentumSGDLearner(new ParameterVector(network.Parameters().ToList()), lr, mm, true, addParam);
                llr.Add(msgd);
                return(llr);
            }
            //SGDLearner - rate and regulars
            else if (lrParams.LearnerType == LearnerType.SGDLearner)
            {
                //
                var llr  = new List <Learner>();
                var msgd = CNTKLib.SGDLearner(new ParameterVector(network.Parameters().ToList()), lr, addParam);
                llr.Add(msgd);
                return(llr);
            }
            //FSAdaGradLearner learner - rate, moment regulars
            else if (lrParams.LearnerType == LearnerType.FSAdaGradLearner)
            {
                //
                var llr  = new List <Learner>();
                var msgd = CNTKLib.FSAdaGradLearner(new ParameterVector(network.Parameters().ToList()), lr, mm);
                llr.Add(msgd);
                return(llr);
            }
            //AdamLearner learner
            else if (lrParams.LearnerType == LearnerType.AdamLearner)
            {
                //
                var llr  = new List <Learner>();
                var msgd = CNTKLib.AdamLearner(new ParameterVector(network.Parameters().ToList()), lr, mm);
                llr.Add(msgd);
                return(llr);
            }
            //AdaGradLearner learner - Learning rate and regularizers
            else if (lrParams.LearnerType == LearnerType.AdaGradLearner)
            {
                //
                var llr  = new List <Learner>();
                var msgd = CNTKLib.AdaGradLearner(new ParameterVector(network.Parameters().ToList()), lr, false, addParam);
                llr.Add(msgd);
                return(llr);
            }
            else
            {
                throw new Exception("Learner type is not supported!");
            }
        }
Beispiel #18
0
        /// <summary>
        /// The main program entry point.
        /// </summary>
        /// <param name="args">The command line parameters.</param>
        static void Main(string[] args)
        {
            // check the compute device
            Console.WriteLine("Checking compute device...");
            Console.WriteLine($"  Using: {NetUtil.CurrentDevice.AsString()}");

            // unpack archive
            Console.WriteLine("Unpacking archive...");
            if (!File.Exists("x_train_imdb.bin"))
            {
                ZipFile.ExtractToDirectory("imdb_data.zip", ".");
            }

            // load training and test data
            Console.WriteLine("Loading data files...");
            var sequenceLength  = 500;
            var training_data   = DataUtil.LoadBinary <float>("x_train_imdb.bin", 25000, sequenceLength);
            var training_labels = DataUtil.LoadBinary <float>("y_train_imdb.bin", 25000);
            var testing_data    = DataUtil.LoadBinary <float>("x_test_imdb.bin", 25000, sequenceLength);
            var testing_labels  = DataUtil.LoadBinary <float>("y_test_imdb.bin", 25000);

            Console.WriteLine($"  Records for training: {training_data.Length}");
            Console.WriteLine($"  Records for testing:  {testing_data.Length}");

            // build features and labels
            var features = NetUtil.Var(new int[] { 1 }, CNTK.DataType.Float);
            var labels   = NetUtil.Var(new int[] { 1 }, CNTK.DataType.Float,
                                       dynamicAxes: new List <CNTK.Axis>()
            {
                CNTK.Axis.DefaultBatchAxis()
            });

            // build the network
            var lstmUnits = 32;
            var network   = features
                            .OneHotOp(10000, true)
                            .Embedding(32)
                            .LSTM(lstmUnits, lstmUnits)
                            .Dense(1, CNTKLib.Sigmoid)
                            .ToNetwork();

            Console.WriteLine("Model architecture:");
            Console.WriteLine(network.ToSummary());

            // set up the loss function and the classification error function
            var lossFunc  = CNTKLib.BinaryCrossEntropy(network.Output, labels);
            var errorFunc = NetUtil.BinaryClassificationError(network.Output, labels);

            // use the Adam learning algorithm
            var learner = network.GetAdamLearner(
                learningRateSchedule: (0.001, 1),
                momentumSchedule: (0.9, 1),
                unitGain: true);

            // set up a trainer and an evaluator
            var trainer   = network.GetTrainer(learner, lossFunc, errorFunc);
            var evaluator = network.GetEvaluator(errorFunc);

            // train the model
            Console.WriteLine("Epoch\tTrain\tTrain\tTest");
            Console.WriteLine("\tLoss\tError\tError");
            Console.WriteLine("-----------------------------");

            var maxEpochs     = 10;
            var batchSize     = 128;
            var loss          = new double[maxEpochs];
            var trainingError = new double[maxEpochs];
            var testingError  = new double[maxEpochs];
            var batchCount    = 0;

            for (int epoch = 0; epoch < maxEpochs; epoch++)
            {
                // train one epoch on batches
                loss[epoch]          = 0.0;
                trainingError[epoch] = 0.0;
                batchCount           = 0;
                training_data.Batch(batchSize, (data, begin, end) =>
                {
                    // get the current batch
                    var featureBatch = features.GetSequenceBatch(sequenceLength, training_data, begin, end);
                    var labelBatch   = labels.GetBatch(training_labels, begin, end);

                    // train the network on the batch
                    var result = trainer.TrainBatch(
                        new[] {
                        (features, featureBatch),
                        (labels, labelBatch)
                    },
Beispiel #19
0
        /// <summary>
        /// Build and train a RNN model.
        /// </summary>
        /// <param name="device">CPU or GPU device to train and run the model</param>
        public void Train_predict(int M, int numEpochs = 1500, int inDim = 30, int cellDim = 25, int hiDim = 5)
        {
            string featuresName = "features";
            string labelsName   = "label";

            const int ouDim = 1;

            Dictionary <string, Set> dataSet = loadData(inDim, featuresName, labelsName, fun);


            var featureSet = dataSet[featuresName];
            var labelSet   = dataSet[labelsName];


            ///// Debug data
            //int q = 0;
            //using (StreamWriter file = new StreamWriter("0.txt"))
            //{
            //    file.WriteLine("Train");
            //    for (int i = 0; i < featureSet.train.Length; i++)
            //    {
            //        file.Write(q + ": ");
            //        for (int j = 0; j < featureSet.train[i].Length; j++)
            //            file.Write(featureSet.train[i][j] + " ");
            //        file.Write(labelSet.train[i][0]);
            //        file.WriteLine();
            //        q++;
            //    }

            //    file.WriteLine("Valid");
            //    for (int i = 0; i < featureSet.valid.Length; i++)
            //    {
            //        file.Write(q + ": ");
            //        for (int j = 0; j < featureSet.valid[i].Length; j++)
            //            file.Write(featureSet.valid[i][j] + " ");
            //        file.Write(labelSet.valid[i][0]);
            //        file.WriteLine();
            //        q++;
            //    }

            //    file.WriteLine("Test");
            //    for (int i = 0; i < featureSet.test.Length; i++)
            //    {
            //        file.Write(q + ": ");
            //        for (int j = 0; j < featureSet.test[i].Length; j++)
            //            file.Write(featureSet.test[i][j] + " ");
            //        file.Write(labelSet.test[i][0]);
            //        file.WriteLine();
            //        q++;
            //    }

            //}

            // build the model

            var feature = Variable.InputVariable(new int[] { inDim + (advanced_input ? 2 : 0) }, DataType.Float, featuresName, null, false /*isSparse*/);
            var label   = Variable.InputVariable(new int[] { ouDim }, DataType.Float, labelsName, new List <CNTK.Axis>()
            {
                CNTK.Axis.DefaultBatchAxis()
            }, false);

            var lstmModel = CreateModel(feature, ouDim, hiDim, cellDim, "timeSeriesOutput");

            Function trainingLoss = CNTKLib.SquaredError(lstmModel, label, "squarederrorLoss");
            Function prediction   = CNTKLib.SquaredError(lstmModel, label, "squarederrorEval");

            // prepare for training
            TrainingParameterScheduleDouble learningRatePerSample = new TrainingParameterScheduleDouble(0.0005, 1);
            TrainingParameterScheduleDouble momentumTimeConstant  = CNTKLib.MomentumAsTimeConstantSchedule(256);

            IList <Learner> parameterLearners = new List <Learner>()
            {
                Learner.MomentumSGDLearner(lstmModel.Parameters(), learningRatePerSample, momentumTimeConstant, /*unitGainMomentum = */ true)
            };


            var trainer = Trainer.CreateTrainer(lstmModel, trainingLoss, prediction, parameterLearners);

            // train the model
            int batchSize = 20;
            int outputFrequencyInMinibatches = 50;
            int miniBatchCount = 0;

            for (int i = 1; i <= numEpochs; i++)
            {
                //get the next minibatch amount of data
                foreach (var miniBatchData in LSTMSequence.nextBatch(featureSet.train, labelSet.train, batchSize))
                {
                    var xValues = Value.CreateBatch <float>(new NDShape(1, inDim + (advanced_input ? 2 : 0)), miniBatchData.X, device);
                    var yValues = Value.CreateBatch <float>(new NDShape(1, ouDim), miniBatchData.Y, device);

                    //Combine variables and data in to Dictionary for the training
                    var batchData = new Dictionary <Variable, Value>();
                    batchData.Add(feature, xValues);
                    batchData.Add(label, yValues);

                    //train minibarch data
                    trainer.TrainMinibatch(batchData, device);

                    TestHelper.PrintTrainingProgress(trainer, miniBatchCount++, outputFrequencyInMinibatches);
                }
            }
            predict_test(dataSet, trainer.Model(), inDim, ouDim, batchSize, featuresName, labelsName, M);
            predict(dataSet, trainer.Model(), inDim, ouDim, batchSize, featuresName, labelsName, M);
        }
Beispiel #20
0
        /// <summary>
        /// Train and evaluate an image classifier with CIFAR-10 data.
        /// The classification model is saved after training.
        /// For repeated runs, the caller may choose whether to retrain a model or
        /// just validate an existing one.
        /// </summary>
        /// <param name="device">CPU or GPU device to run</param>
        /// <param name="forceRetrain">whether to override an existing model.
        /// if true, any existing model will be overridden and the new one evaluated.
        /// if false and there is an existing model, the existing model is evaluated.</param>
        public static void TrainAndEvaluate(DeviceDescriptor device, bool forceRetrain)
        {
            string modelFile = "Cifar10Rest.model";

            // If a model already exists and not set to force retrain, validate the model and return.
            if (File.Exists(modelFile) && !forceRetrain)
            {
                ValidateModel(device, modelFile);
                return;
            }

            // prepare training data
            var minibatchSource = CreateMinibatchSource(Path.Combine(CifarDataFolder, "train_map.txt"),
                                                        Path.Combine(CifarDataFolder, "CIFAR-10_mean.xml"), imageDim, numClasses, MaxEpochs);
            var imageStreamInfo = minibatchSource.StreamInfo("features");
            var labelStreamInfo = minibatchSource.StreamInfo("labels");

            // build a model
            var imageInput       = CNTKLib.InputVariable(imageDim, imageStreamInfo.m_elementType, "Images");
            var labelsVar        = CNTKLib.InputVariable(new int[] { numClasses }, labelStreamInfo.m_elementType, "Labels");
            var classifierOutput = ResNetClassifier(imageInput, numClasses, device, "classifierOutput");

            // prepare for training
            var trainingLoss = CNTKLib.CrossEntropyWithSoftmax(classifierOutput, labelsVar, "lossFunction");
            var prediction   = CNTKLib.ClassificationError(classifierOutput, labelsVar, 5, "predictionError");

            var learningRatePerSample = new TrainingParameterPerSampleScheduleDouble(0.0078125);
            var trainer = Trainer.CreateTrainer(classifierOutput, trainingLoss, prediction,
                                                new List <Learner> {
                Learner.SGDLearner(classifierOutput.Parameters(), learningRatePerSample)
            });

            uint minibatchSize = 64;
            int  outputFrequencyInMinibatches = 20, miniBatchCount = 0;

            // Feed data to the trainer for number of epochs.
            while (true)
            {
                var minibatchData = minibatchSource.GetNextMinibatch(minibatchSize, device);

                // Stop training once max epochs is reached.
                if (minibatchData.empty())
                {
                    break;
                }

                trainer.TrainMinibatch(new Dictionary <Variable, MinibatchData>()
                {
                    { imageInput, minibatchData[imageStreamInfo] }, { labelsVar, minibatchData[labelStreamInfo] }
                }, device);
                TestHelper.PrintTrainingProgress(trainer, miniBatchCount++, outputFrequencyInMinibatches);
            }

            // save the model
            var imageClassifier = Function.Combine(new List <Variable>()
            {
                trainingLoss, prediction, classifierOutput
            }, "ImageClassifier");

            imageClassifier.Save(modelFile);

            // validate the model
            ValidateModel(device, modelFile);
        }
Beispiel #21
0
        public void TrainAndEvaluateRegression(DeviceDescriptor device)
        {
            // build a logistic regression model
            Variable featureVariable  = Variable.InputVariable(new int[] { inputDim }, DataType.Float);
            Variable labelVariable    = Variable.InputVariable(new int[] { numOutputClasses }, DataType.Float);
            var      classifierOutput = CreateLinearModel(featureVariable, numOutputClasses, device);
            var      loss             = CNTKLib.CrossEntropyWithSoftmax(classifierOutput, labelVariable);
            var      evalError        = CNTKLib.ClassificationError(classifierOutput, labelVariable);

            // prepare for training
            TrainingParameterScheduleDouble learningRatePerSample = new TrainingParameterScheduleDouble(0.02, 1);
            IList <Learner> parameterLearners =
                new List <Learner>()
            {
                Learner.SGDLearner(classifierOutput.Parameters(), learningRatePerSample)
            };
            var trainer = Trainer.CreateTrainer(classifierOutput, loss, evalError, parameterLearners);

            int minibatchSize         = 64;
            int numMinibatchesToTrain = 1000;
            int updatePerMinibatches  = 50;

            // train the model
            for (int minibatchCount = 0; minibatchCount < numMinibatchesToTrain; minibatchCount++)
            {
                Value features, labels;
                GenerateValueData(minibatchSize, inputDim, numOutputClasses, out features, out labels, device);
                //TODO: sweepEnd should be set properly instead of false.
#pragma warning disable 618
                trainer.TrainMinibatch(
                    new Dictionary <Variable, Value>()
                {
                    { featureVariable, features }, { labelVariable, labels }
                }, device);
#pragma warning restore 618
                PrintTrainingProgress(trainer, minibatchCount, updatePerMinibatches);
            }

            // test and validate the model
            int   testSize = 100;
            Value testFeatureValue, expectedLabelValue;
            GenerateValueData(testSize, inputDim, numOutputClasses, out testFeatureValue, out expectedLabelValue, device);

            // GetDenseData just needs the variable's shape
            IList <IList <float> > expectedOneHot = expectedLabelValue.GetDenseData <float>(labelVariable);
            IList <int>            expectedLabels = expectedOneHot.Select(l => l.IndexOf(1.0F)).ToList();

            var inputDataMap = new Dictionary <Variable, Value>()
            {
                { featureVariable, testFeatureValue }
            };
            var outputDataMap = new Dictionary <Variable, Value>()
            {
                { classifierOutput.Output, null }
            };
            classifierOutput.Evaluate(inputDataMap, outputDataMap, device);
            var outputValue = outputDataMap[classifierOutput.Output];
            IList <IList <float> > actualLabelSoftMax = outputValue.GetDenseData <float>(classifierOutput.Output);
            var actualLabels = actualLabelSoftMax.Select((IList <float> l) => l.IndexOf(l.Max())).ToList();
            int misMatches   = actualLabels.Zip(expectedLabels, (a, b) => a.Equals(b) ? 0 : 1).Sum();

            Console.WriteLine($"Validating Model: Total Samples = {testSize}, Misclassify Count = {misMatches}");
        }
Beispiel #22
0
        public override Function Create(Function input, DeviceDescriptor device)
        {
            int newDim = input.Output.Shape.Dimensions.Aggregate((d1, d2) => d1 * d2);

            return(CNTKLib.Reshape(input, new int[] { newDim }));
        }
Beispiel #23
0
        public static Function LSTM(Variable layer, int dim, int?cellDim = null, string activation = OptActivations.Tanh, string recurrentActivation = OptActivations.Sigmoid, string weightInitializer = OptInitializers.GlorotUniform, string recurrentInitializer = OptInitializers.GlorotUniform, bool useBias = true, string biasInitializer = OptInitializers.Zeros, bool returnSequence = false)
        {
            cellDim = cellDim.HasValue ? cellDim : dim;
            Variable prevOutput    = Variable.PlaceholderVariable(new int[] { dim }, layer.DynamicAxes);
            Variable prevCellState = cellDim.HasValue ? Variable.PlaceholderVariable(new int[] { cellDim.Value }, layer.DynamicAxes) : null;

            Func <int, Parameter> createBiasParam = (d) => new Parameter(new int[] { d }, DataType.Float, Initializers.Get(biasInitializer), GlobalParameters.Device);

            Func <int, Parameter> createProjectionParam = (oDim) => new Parameter(new int[] { oDim, NDShape.InferredDimension },
                                                                                  DataType.Float, Initializers.Get(weightInitializer), GlobalParameters.Device);

            Func <int, Parameter> createDiagWeightParam = (d) =>
                                                          new Parameter(new int[] { d }, DataType.Float, Initializers.Get(recurrentInitializer), GlobalParameters.Device);

            Function stabilizedPrevOutput    = Stabilize <float>(prevOutput, GlobalParameters.Device);
            Function stabilizedPrevCellState = prevCellState != null?Stabilize <float>(prevCellState, GlobalParameters.Device) : null;

            Func <Variable> projectInput = null;

            if (cellDim.HasValue)
            {
                projectInput = () => createBiasParam(cellDim.Value) + (createProjectionParam(cellDim.Value) * layer);
            }
            else
            {
                projectInput = () => layer;
            }

            //Input gate
            Function it = null;

            if (cellDim.HasValue)
            {
                it = Basic.Activation((Variable)(projectInput() + (createProjectionParam(cellDim.Value) * stabilizedPrevOutput)) + CNTKLib.ElementTimes(createDiagWeightParam(cellDim.Value), stabilizedPrevCellState), recurrentActivation);
            }
            else
            {
                it = Basic.Activation((Variable)(projectInput()), recurrentActivation);
            }

            Function bit = null;

            if (cellDim.HasValue)
            {
                bit = CNTKLib.ElementTimes(it, Basic.Activation(projectInput() + (createProjectionParam(cellDim.Value) * stabilizedPrevOutput), activation));
            }
            else
            {
                bit = CNTKLib.ElementTimes(it, Basic.Activation(projectInput(), activation));
            }

            // Forget-me-not gate
            Function ft = null;

            if (cellDim.HasValue)
            {
                ft = Basic.Activation((Variable)(projectInput() + (createProjectionParam(cellDim.Value) * stabilizedPrevOutput)) + CNTKLib.ElementTimes(createDiagWeightParam(cellDim.Value), stabilizedPrevCellState), recurrentActivation);
            }
            else
            {
                ft = Basic.Activation(projectInput(), recurrentActivation);
            }

            Function bft = prevCellState != null?CNTKLib.ElementTimes(ft, prevCellState) : ft;

            Function ct = (Variable)bft + bit;

            //Output gate
            Function ot = null;

            if (cellDim.HasValue)
            {
                ot = Basic.Activation((Variable)(projectInput() + (createProjectionParam(cellDim.Value) * stabilizedPrevOutput)) + CNTKLib.ElementTimes(createDiagWeightParam(cellDim.Value), Stabilize <float>(ct, GlobalParameters.Device)), recurrentActivation);
            }
            else
            {
                ot = Basic.Activation((Variable)(projectInput()) + Stabilize <float>(ct, GlobalParameters.Device), recurrentActivation);
            }

            Function ht = CNTKLib.ElementTimes(ot, CNTKLib.Tanh(ct));
            Function c  = ct;
            Function h  = (dim != cellDim) ? (createProjectionParam(dim) * Stabilize <float>(ht, GlobalParameters.Device)) : ht;

            Func <Variable, Function> recurrenceHookH = (x) => CNTKLib.PastValue(x);
            Func <Variable, Function> recurrenceHookC = (x) => CNTKLib.PastValue(x);

            var actualDh = recurrenceHookH(h);
            var actualDc = recurrenceHookC(c);

            if (prevCellState != null)
            {
                h.ReplacePlaceholders(new Dictionary <Variable, Variable> {
                    { prevOutput, actualDh }, { prevCellState, actualDc }
                });
            }
            else
            {
                h.ReplacePlaceholders(new Dictionary <Variable, Variable> {
                    { prevOutput, actualDh }
                });
            }

            if (returnSequence)
            {
                return(h);
            }

            return(CNTKLib.SequenceLast(h));
        }
 /// <inheritdoc />
 internal override Function ToFunction(Variable inputFunction)
 {
     return(CNTKLib.Pooling(inputFunction, PoolingType.Max, new[] { inputFunction.Shape[0] }));
 }
        static void Main(string[] args)
        {
            Console.WriteLine("Loading data....");

            // unzip archive
            if (!System.IO.File.Exists("train_images.bin"))
            {
                DataUtil.Unzip(@"mnist_data.zip", ".");
            }

            // load training and test data
            var training_data   = DataUtil.LoadBinary <float>("train_images.bin", 60000, 28 * 28);
            var test_data       = DataUtil.LoadBinary <float>("test_images.bin", 10000, 28 * 28);
            var training_labels = DataUtil.LoadBinary <float>("train_labels.bin", 60000, 10);
            var test_labels     = DataUtil.LoadBinary <float>("test_labels.bin", 10000, 10);

            // report results
            Console.WriteLine($"{training_data.GetLength(0)} training digits loaded");
            Console.WriteLine($"{test_data.GetLength(0)} test digits loaded");

            // build features and labels
            var features = NetUtil.Var(new int[] { 28, 28 }, DataType.Float);
            var labels   = NetUtil.Var(new int[] { 10 }, DataType.Float);

            // build the network
            var network = features
                          .Dense(512, CNTKLib.ReLU)
                          .Dense(10)
                          .ToNetwork();

            // set up the loss function and the classification error function
            var lossFunc  = CNTKLib.CrossEntropyWithSoftmax(network.Output, labels);
            var errorFunc = CNTKLib.ClassificationError(network.Output, labels);

            // set up a trainer that uses the RMSProp algorithm
            var learner = network.GetRMSPropLearner(
                learningRateSchedule: 0.99,
                gamma: 0.95,
                inc: 2.0,
                dec: 0.5,
                max: 2.0,
                min: 0.5
                );

            // set up a trainer and an evaluator
            var trainer   = network.GetTrainer(learner, lossFunc, errorFunc);
            var evaluator = network.GetEvaluator(errorFunc);

            // declare some variables
            var maxEpochs  = 20;
            var batchSize  = 128;
            var loss       = 0.0;
            var error      = 0.0;
            var batchCount = 0;

            // train the network during several epochs
            Console.WriteLine("Training the neural network....");
            for (int epoch = 0; epoch < maxEpochs; epoch++)
            {
                Console.Write($"Training epoch {epoch + 1}/{maxEpochs}... ");

                // train the network using random batches
                loss       = 0.0;
                error      = 0.0;
                batchCount = 0;
                training_data.Index().Shuffle().Batch(batchSize, (indices, begin, end) =>
                {
                    // get the current batch
                    var featureBatch = features.GetBatch(training_data, indices, begin, end);
                    var labelBatch   = labels.GetBatch(training_labels, indices, begin, end);

                    // train the network on the batch
                    var result = trainer.TrainBatch(
                        new[] {
                        (features, featureBatch),
                        (labels, labelBatch)
                    },
 /// <summary>
 /// construct a parameter of double values
 /// </summary>
 /// <param name="shape">shape of the parameter</param>
 /// <param name="initValue">initial value of the parameter</param>
 /// <param name="device">device</param>
 /// <param name="name">name</param>
 public Parameter(NDShape shape, double initValue, DeviceDescriptor device, string name) :
     this(shape, DataType.Double, CNTKLib.ConstantInitializer(initValue), device, name)
 {
 }
Beispiel #27
0
        /// <summary>
        /// 3D convolution layer (e.g. spatial convolution over volumes). This layer creates a convolution kernel that is convolved with the layer input to produce a tensor of outputs. If  use_bias is True, a bias vector is created and added to the outputs. Finally, if activation is not None, it is applied to the outputs as well.
        /// </summary>
        /// <param name="shape">The 3D input shape.</param>
        /// <param name="channels">Integer, the dimensionality of the output space.</param>
        /// <param name="kernalSize">A tuple of 3 integers, specifying the depth, height and width of the 3D convolution window. Can be a single integer to specify the same value for all spatial dimensions.</param>
        /// <param name="strides">A tuple of 3 integers, specifying the strides of the convolution along each spatial dimension. Can be a single integer to specify the same value for all spatial dimensions. Specifying any stride value != 1 is incompatible with specifying any dilation_rate value != 1.</param>
        /// <param name="padding">Boolean, if true results in padding the input such that the output has the same length as the original input.</param>
        /// <param name="dialation">A tuple of 3 integers, specifying the dilation rate to use for dilated convolution. Can be a single integer to specify the same value for all spatial dimensions. Currently, specifying any dilation_rate value != 1 is incompatible with specifying any stride value != 1.</param>
        /// <param name="activation">Activation function to use. If you don't specify anything, no activation is applied (ie. "linear" activation: a(x) = x). <see cref="SiaNet.Common.OptActivations"/></param>
        /// <param name="useBias">Boolean, whether the layer uses a bias vector.</param>
        /// <param name="weightInitializer">Initializer for the kernel weights matrix. <see cref="SiaNet.Common.OptInitializers"/></param>
        /// <param name="biasInitializer">Initializer for the bias vector. <see cref="SiaNet.Common.OptInitializers"/></param>
        /// <returns></returns>
        public static Function Conv3D(Tuple <int, int, int, int> shape, int channels, Tuple <int, int, int> kernalSize, Tuple <int, int, int> strides, bool padding = true, Tuple <int, int, int> dialation = null, string activation = OptActivations.None, bool useBias = false, string weightInitializer = OptInitializers.Xavier, string biasInitializer = OptInitializers.Zeros)
        {
            Variable input = CNTKLib.InputVariable(new int[] { shape.Item1, shape.Item2, shape.Item3 }, DataType.Float);

            return(Conv3D(input, channels, kernalSize, strides, padding, dialation, activation, useBias, weightInitializer, biasInitializer));
        }
        static public Function Embedding(Variable input, int embeddingDim, DeviceDescriptor device)
        {
            System.Diagnostics.Debug.Assert(input.Shape.Rank == 1);
            int inputDim            = input.Shape[0];
            var embeddingParameters = new Parameter(new int[] { embeddingDim, inputDim }, DataType.Float, CNTKLib.GlorotUniformInitializer(), device);

            return(CNTKLib.Times(embeddingParameters, input));
        }
Beispiel #29
0
 /// <summary>
 /// Max pooling operation for 3D data (spatial or spatio-temporal).
 /// </summary>
 /// <param name="layer">The output of the last layer.</param>
 /// <param name="poolSize">Tuple of 3 integers, factors by which to downscale (dim1, dim2, dim3). (2, 2, 2) will halve the size of the 3D input in each dimension.</param>
 /// <param name="strides">Tuple of 3 integers, or None. Strides values.</param>
 /// <param name="padding">Boolean, if true results in padding the input such that the output has the same length as the original input.</param>
 /// <returns></returns>
 public static Function MaxPool3D(Variable layer, Tuple <int, int, int> poolSize, Tuple <int, int, int> strides, bool padding = true)
 {
     return(CNTKLib.Pooling(layer, PoolingType.Max, new int[] { poolSize.Item1, poolSize.Item2, poolSize.Item3 }, new int[] { strides.Item1, strides.Item2, strides.Item3 }, new BoolVector(new bool[] { padding, padding, padding })));
 }
Beispiel #30
0
 public override Function Create(Function input, DeviceDescriptor device)
 {
     return(CNTKLib.Dropout(input, _dropoutRate, _seed, _name));
 }