/// <inheritdoc /> internal override Function ToFunction(Variable inputFunction) { return(CNTKLib.Softplus(inputFunction)); }
internal static void TrainSimpleFeedForwardClassifier(DeviceDescriptor device) { int inputDim = 2; int numOutputClasses = 2; int hiddenLayerDim = 50; int numHiddenLayers = 2; int minibatchSize = 50; int numSamplesPerSweep = 10000; int numSweepsToTrainWith = 2; int numMinibatchesToTrain = (numSamplesPerSweep * numSweepsToTrainWith) / minibatchSize; var featureStreamName = "features"; var labelsStreamName = "labels"; var input = Variable.InputVariable(new int[] { inputDim }, DataType.Float, "features"); var labels = Variable.InputVariable(new int[] { numOutputClasses }, DataType.Float, "labels"); Function classifierOutput; Function trainingLoss; Function prediction; IList <StreamConfiguration> streamConfigurations = new StreamConfiguration[] { new StreamConfiguration(featureStreamName, inputDim), new StreamConfiguration(labelsStreamName, numOutputClasses) }; using (var minibatchSource = MinibatchSource.TextFormatMinibatchSource("SimpleDataTrain_cntk_text.txt", streamConfigurations, MinibatchSource.FullDataSweep, true, MinibatchSource.DefaultRandomizationWindowInChunks)) { var featureStreamInfo = minibatchSource.StreamInfo(featureStreamName); var labelStreamInfo = minibatchSource.StreamInfo(labelsStreamName); IDictionary <StreamInformation, Tuple <NDArrayView, NDArrayView> > inputMeansAndInvStdDevs = new Dictionary <StreamInformation, Tuple <NDArrayView, NDArrayView> > { { featureStreamInfo, new Tuple <NDArrayView, NDArrayView>(null, null) } }; MinibatchSource.ComputeInputPerDimMeansAndInvStdDevs(minibatchSource, inputMeansAndInvStdDevs, device); var normalizedinput = CNTKLib.PerDimMeanVarianceNormalize(input, inputMeansAndInvStdDevs[featureStreamInfo].Item1, inputMeansAndInvStdDevs[featureStreamInfo].Item2); Function fullyConnected = TestHelper.FullyConnectedLinearLayer(normalizedinput, hiddenLayerDim, device, ""); classifierOutput = CNTKLib.Sigmoid(fullyConnected, ""); for (int i = 1; i < numHiddenLayers; ++i) { fullyConnected = TestHelper.FullyConnectedLinearLayer(classifierOutput, hiddenLayerDim, device, ""); classifierOutput = CNTKLib.Sigmoid(fullyConnected, ""); } var outputTimesParam = new Parameter(NDArrayView.RandomUniform <float>( new int[] { numOutputClasses, hiddenLayerDim }, -0.05, 0.05, 1, device)); var outputBiasParam = new Parameter(NDArrayView.RandomUniform <float>( new int[] { numOutputClasses }, -0.05, 0.05, 1, device)); classifierOutput = CNTKLib.Plus(outputBiasParam, outputTimesParam * classifierOutput, "classifierOutput"); trainingLoss = CNTKLib.CrossEntropyWithSoftmax(classifierOutput, labels, "lossFunction");; prediction = CNTKLib.ClassificationError(classifierOutput, labels, "classificationError"); // Test save and reload of model { Variable classifierOutputVar = classifierOutput; Variable trainingLossVar = trainingLoss; Variable predictionVar = prediction; var combinedNet = Function.Combine(new List <Variable>() { trainingLoss, prediction, classifierOutput }, "feedForwardClassifier"); TestHelper.SaveAndReloadModel(ref combinedNet, new List <Variable>() { input, labels, trainingLossVar, predictionVar, classifierOutputVar }, device); classifierOutput = classifierOutputVar; trainingLoss = trainingLossVar; prediction = predictionVar; } } CNTK.TrainingParameterScheduleDouble learningRatePerSample = new CNTK.TrainingParameterScheduleDouble( 0.02, TrainingParameterScheduleDouble.UnitType.Sample); using (var minibatchSource = MinibatchSource.TextFormatMinibatchSource("SimpleDataTrain_cntk_text.txt", streamConfigurations)) { var featureStreamInfo = minibatchSource.StreamInfo(featureStreamName); var labelStreamInfo = minibatchSource.StreamInfo(labelsStreamName); streamConfigurations = new StreamConfiguration[] { new StreamConfiguration("features", inputDim), new StreamConfiguration("labels", numOutputClasses) }; IList <Learner> parameterLearners = new List <Learner>() { CNTKLib.SGDLearner(classifierOutput.Parameters(), learningRatePerSample) }; var trainer = Trainer.CreateTrainer(classifierOutput, trainingLoss, prediction, parameterLearners); int outputFrequencyInMinibatches = 20; int trainingCheckpointFrequency = 100; for (int i = 0; i < numMinibatchesToTrain; ++i) { var minibatchData = minibatchSource.GetNextMinibatch((uint)minibatchSize, device); var arguments = new Dictionary <Variable, MinibatchData> { { input, minibatchData[featureStreamInfo] }, { labels, minibatchData[labelStreamInfo] } }; trainer.TrainMinibatch(arguments, device); TestHelper.PrintTrainingProgress(trainer, i, outputFrequencyInMinibatches); if ((i % trainingCheckpointFrequency) == (trainingCheckpointFrequency - 1)) { string ckpName = "feedForward.net"; trainer.SaveCheckpoint(ckpName); trainer.RestoreFromCheckpoint(ckpName); } } } }
/// <summary> /// Max pooling operation for temporal data. /// </summary> /// <param name="layer">The output of the last layer.</param> /// <param name="poolSize">Integer, size of the max pooling windows.</param> /// <param name="strides">Factor by which to downscale. E.g. 2 will halve the input. If None, it will default to pool_size.</param> /// <param name="padding">Boolean, if true results in padding the input such that the output has the same length as the original input.</param> /// <returns></returns> public static Function MaxPool1D(Variable layer, int poolSize, int strides, bool padding = true) { return(CNTKLib.Pooling(layer, PoolingType.Max, new int[] { poolSize }, new int[] { strides }, new BoolVector(new bool[] { padding, false, false }))); }
/// <summary> /// Dropout consists in randomly setting a fraction rate of input units to 0 at each update during training time, which helps prevent overfitting. /// </summary> /// <param name="layer">The output of the last layer.</param> /// <param name="rate">A float value between 0 and 1. Fraction of the input units to drop.</param> /// <returns></returns> public static Function Dropout(Variable layer, double rate) { return(CNTKLib.Dropout(layer, rate)); }
/// <inheritdoc /> internal override Function ToFunction(Variable inputFunction) { return(CNTKLib.Tanh(inputFunction)); }
protected override Variable BuildNetwork(string name) { InputVariable = CNTKLib.InputVariable(new int[] { Size }, DataType.Float, name); return(InputVariable); }
Tuple <Function, Function> LSTMPCellWithSelfStabilization <ElementType>( Variable input, Variable prevOutput, Variable prevCellState) { int outputDim = prevOutput.Shape[0]; int cellDim = prevCellState.Shape[0]; bool isFloatType = typeof(ElementType).Equals(typeof(float)); DataType dataType = isFloatType ? DataType.Float : DataType.Double; Func <int, Parameter> createBiasParam; if (isFloatType) { createBiasParam = (dim) => new Parameter(new int[] { dim }, 0.01f, device, ""); } else { createBiasParam = (dim) => new Parameter(new int[] { dim }, 0.01, device, ""); } uint seed2 = 1; Func <int, Parameter> createProjectionParam = (oDim) => new Parameter(new int[] { oDim, NDShape.InferredDimension }, dataType, CNTKLib.GlorotUniformInitializer(1.0, 1, 0, seed2++), device); Func <int, Parameter> createDiagWeightParam = (dim) => new Parameter(new int[] { dim }, dataType, CNTKLib.GlorotUniformInitializer(1.0, 1, 0, seed2++), device); Function stabilizedPrevOutput = Stabilize <ElementType>(prevOutput); Function stabilizedPrevCellState = Stabilize <ElementType>(prevCellState); Func <Variable> projectInput = () => createBiasParam(cellDim) + (createProjectionParam(cellDim) * input); // Input gate Function it = CNTKLib.Sigmoid( (Variable)(projectInput() + (createProjectionParam(cellDim) * stabilizedPrevOutput)) + CNTKLib.ElementTimes(createDiagWeightParam(cellDim), stabilizedPrevCellState)); Function bit = CNTKLib.ElementTimes( it, CNTKLib.Tanh(projectInput() + (createProjectionParam(cellDim) * stabilizedPrevOutput))); // Forget-me-not gate Function ft = CNTKLib.Sigmoid( (Variable)( projectInput() + (createProjectionParam(cellDim) * stabilizedPrevOutput)) + CNTKLib.ElementTimes(createDiagWeightParam(cellDim), stabilizedPrevCellState)); Function bft = CNTKLib.ElementTimes(ft, prevCellState); Function ct = (Variable)bft + bit; // Output gate Function ot = CNTKLib.Sigmoid( (Variable)(projectInput() + (createProjectionParam(cellDim) * stabilizedPrevOutput)) + CNTKLib.ElementTimes(createDiagWeightParam(cellDim), Stabilize <ElementType>(ct))); Function ht = CNTKLib.ElementTimes(ot, CNTKLib.Tanh(ct)); Function c = ct; Function h = (outputDim != cellDim) ? (createProjectionParam(outputDim) * Stabilize <ElementType>(ht)) : ht; return(new Tuple <Function, Function>(h, c)); }
/// <summary> /// Reshapes an output to a certain shape. /// </summary> /// <param name="layer">The input layer to be reshaped.</param> /// <param name="targetShape">List of integers. Does not include the batch axis.</param> /// <returns></returns> public static Function Reshape(Variable layer, int[] targetShape) { return(CNTKLib.Reshape(layer, targetShape)); }
/// <summary> /// Reshapes an output to a certain shape. /// </summary> /// <param name="shape">The input shape of the data.</param> /// <param name="targetShape">List of integers. Does not include the batch axis.</param> /// <returns></returns> public static Function Reshape(int[] shape, int[] targetShape) { return(CNTKLib.Reshape(Variable.InputVariable(shape, DataType.Float), targetShape)); }
//Network functions //Convolution block private static Function ConvBlock(Variable input_var, int[] kernel_size, int in_channels, int out_channels, float[] W = null, float[] B = null, string wpath = null, string[] layer_names = null, bool padding = true, bool use_relu = true, bool use_bn = true) { Function output_function; if (W != null & wpath != null) { throw new System.ArgumentException("1 parameter must be null!", "W, wpath"); } //If weight path is given, load weight from path. Otherwise use weight from float array W or, initialize new weights. if (wpath != null) { W = weight_fromDisk(wpath, layer_names[0]); } //Initialize weights Parameter weight = make_weight(kernel_size, in_channels, out_channels, W, layer_names[0]); //Generate convolution function Function convW = CNTKLib.Convolution( /*kernel and input*/ weight, input_var, /*strides*/ new int[] { 1, 1, in_channels }, /*sharing*/ new CNTK.BoolVector { true }, /*padding*/ new CNTK.BoolVector { padding }); //Initialize bias if (wpath != null) { B = make_copies(weight_fromDisk(wpath, layer_names[1]), convW.Output.Shape); } Parameter bias = make_bias(convW.Output.Shape, B, layer_names[1]); //Add bias Function add = CNTKLib.Plus(convW, bias); //Sigmoid Function sig = CNTKLib.Sigmoid(add); if (use_bn == true) { //Initialize batch normalization int[] bns = new int[] { 1, 1 }; Parameter scale; Parameter bnbias; Parameter rm; Parameter rv; var n = Constant.Scalar(0.0f, DeviceDescriptor.GPUDevice(0)); make_bn_pars(out_channels, add.Output.Shape, out scale, out bnbias, out rm, out rv, wpath, layer_names[0]); //Batch normalization Function bn = CNTKLib.BatchNormalization(add, scale, bnbias, rm, rv, n, true); //ReLU Function relu = CNTKLib.ReLU(bn); output_function = relu; } else { if (use_relu == true) { //ReLU Function relu = CNTKLib.ReLU(add); output_function = relu; } else { output_function = sig; } } return(output_function); }
/// <summary> /// Build and train a RNN model. /// </summary> /// <param name="device">CPU or GPU device to train and run the model</param> public static void Train(DeviceDescriptor device) { const int inputDim = 2000; const int cellDim = 25; const int hiddenDim = 25; const int embeddingDim = 50; const int numOutputClasses = 5; // build the model var featuresName = "features"; var features = Variable.InputVariable(new int[] { inputDim }, DataType.Float, featuresName, null, true /*isSparse*/); var labelsName = "labels"; var labels = Variable.InputVariable(new int[] { numOutputClasses }, DataType.Float, labelsName, new List <Axis>() { Axis.DefaultBatchAxis() }, true); var classifierOutput = LSTMSequenceClassifierNet(features, numOutputClasses, embeddingDim, hiddenDim, cellDim, device, "classifierOutput"); Function trainingLoss = CNTKLib.CrossEntropyWithSoftmax(classifierOutput, labels, "lossFunction"); Function prediction = CNTKLib.ClassificationError(classifierOutput, labels, "classificationError"); // prepare training data IList <StreamConfiguration> streamConfigurations = new StreamConfiguration[] { new StreamConfiguration(featuresName, inputDim, true, "x"), new StreamConfiguration(labelsName, numOutputClasses, false, "y") }; var minibatchSource = MinibatchSource.TextFormatMinibatchSource( Path.Combine(DataFolder, "Train.ctf"), streamConfigurations, MinibatchSource.InfinitelyRepeat, true); var featureStreamInfo = minibatchSource.StreamInfo(featuresName); var labelStreamInfo = minibatchSource.StreamInfo(labelsName); // prepare for training TrainingParameterScheduleDouble learningRatePerSample = new TrainingParameterScheduleDouble( 0.0005, 1); TrainingParameterScheduleDouble momentumTimeConstant = CNTKLib.MomentumAsTimeConstantSchedule(256); IList <Learner> parameterLearners = new List <Learner>() { Learner.MomentumSGDLearner(classifierOutput.Parameters(), learningRatePerSample, momentumTimeConstant, /*unitGainMomentum = */ true) }; var trainer = Trainer.CreateTrainer(classifierOutput, trainingLoss, prediction, parameterLearners); // train the model uint minibatchSize = 200; int outputFrequencyInMinibatches = 20; int miniBatchCount = 0; int numEpochs = 5; while (numEpochs > 0) { var minibatchData = minibatchSource.GetNextMinibatch(minibatchSize, device); var arguments = new Dictionary <Variable, MinibatchData> { { features, minibatchData[featureStreamInfo] }, { labels, minibatchData[labelStreamInfo] } }; trainer.TrainMinibatch(arguments, device); TestHelper.PrintTrainingProgress(trainer, miniBatchCount++, outputFrequencyInMinibatches); // Because minibatchSource is created with MinibatchSource.InfinitelyRepeat, // batching will not end. Each time minibatchSource completes an sweep (epoch), // the last minibatch data will be marked as end of a sweep. We use this flag // to count number of epochs. if (TestHelper.MiniBatchDataIsSweepEnd(minibatchData.Values)) { numEpochs--; } } }
public Dictionary <string, List <double> > Train(object trainData, object validationData, int epoches, int batchSize, On_Epoch_Start OnEpochStart, On_Epoch_End OnEpochEnd, On_Batch_Start onBatchStart, On_Batch_End OnBatchEnd, bool shuffle = false) { XYFrame train = (XYFrame)trainData; XYFrame validation = validationData != null ? (XYFrame)validationData : null; Dictionary <string, List <double> > result = new Dictionary <string, List <double> >(); var trainer = Trainer.CreateTrainer(Model, lossFunc, metricFunc, learners); int currentEpoch = 1; Dictionary <string, double> metricsList = new Dictionary <string, double>(); while (currentEpoch <= epoches) { if (shuffle) { train.Shuffle(); } metricsList = new Dictionary <string, double>(); OnEpochStart(currentEpoch); int miniBatchCount = 1; while (train.NextBatch(miniBatchCount, batchSize)) { onBatchStart(currentEpoch, miniBatchCount); Value features = DataFrameUtil.GetValueBatch(train.CurrentBatch.XFrame); Value labels = DataFrameUtil.GetValueBatch(train.CurrentBatch.YFrame); trainer.TrainMinibatch(new Dictionary <Variable, Value>() { { featureVariable, features }, { labelVariable, labels } }, GlobalParameters.Device); OnBatchEnd(currentEpoch, miniBatchCount, trainer.TotalNumberOfSamplesSeen(), trainer.PreviousMinibatchLossAverage(), new Dictionary <string, double>() { { metricName, trainer.PreviousMinibatchEvaluationAverage() } }); miniBatchCount++; } if (!result.ContainsKey("loss")) { result.Add("loss", new List <double>()); } if (!result.ContainsKey(metricName)) { result.Add(metricName, new List <double>()); } double lossValue = trainer.PreviousMinibatchLossAverage(); double metricValue = trainer.PreviousMinibatchEvaluationAverage(); result["loss"].Add(lossValue); result[metricName].Add(metricValue); metricsList.Add(metricName, metricValue); if (validation != null) { if (!result.ContainsKey("val_loss")) { result.Add("val_loss", new List <double>()); } if (!result.ContainsKey("val_" + metricName)) { result.Add("val_" + metricName, new List <double>()); } int evalMiniBatchCount = 1; List <double> totalEvalBatchLossList = new List <double>(); List <double> totalEvalMetricValueList = new List <double>(); while (validation.NextBatch(evalMiniBatchCount, batchSize)) { Variable actualVariable = CNTKLib.InputVariable(labelVariable.Shape, DataType.Float); var evalLossFunc = Losses.Get(lossName, labelVariable, actualVariable); var evalMetricFunc = Metrics.Get(metricName, labelVariable, actualVariable); Value actual = EvaluateInternal(validation.XFrame); Value expected = DataFrameUtil.GetValueBatch(validation.YFrame); var inputDataMap = new Dictionary <Variable, Value>() { { labelVariable, expected }, { actualVariable, actual } }; var outputDataMap = new Dictionary <Variable, Value>() { { evalLossFunc.Output, null } }; evalLossFunc.Evaluate(inputDataMap, outputDataMap, GlobalParameters.Device); var evalLoss = outputDataMap[evalLossFunc.Output].GetDenseData <float>(evalLossFunc.Output).Select(x => x.First()).ToList(); totalEvalBatchLossList.Add(evalLoss.Average()); inputDataMap = new Dictionary <Variable, Value>() { { labelVariable, expected }, { actualVariable, actual } }; outputDataMap = new Dictionary <Variable, Value>() { { evalMetricFunc.Output, null } }; evalMetricFunc.Evaluate(inputDataMap, outputDataMap, GlobalParameters.Device); var evalMetric = outputDataMap[evalMetricFunc.Output].GetDenseData <float>(evalMetricFunc.Output).Select(x => x.First()).ToList(); totalEvalMetricValueList.Add(evalMetric.Average()); evalMiniBatchCount++; } result["val_loss"].Add(totalEvalBatchLossList.Average()); metricsList.Add("val_loss", totalEvalBatchLossList.Average()); result["val_" + metricName].Add(totalEvalMetricValueList.Average()); metricsList.Add("val_" + metricName, totalEvalMetricValueList.Average()); } OnEpochEnd(currentEpoch, trainer.TotalNumberOfSamplesSeen(), lossValue, metricsList); currentEpoch++; } return(result); }
/// <summary> /// Train and evaluate a image classifier for MNIST data. /// </summary> /// <param name="device">CPU or GPU device to run training and evaluation</param> /// <param name="useConvolution">option to use convolution network or to use multilayer perceptron</param> /// <param name="forceRetrain">whether to override an existing model. /// if true, any existing model will be overridden and the new one evaluated. /// if false and there is an existing model, the existing model is evaluated.</param> public static void TrainAndEvaluate(DeviceDescriptor device, bool useConvolution, bool forceRetrain) { var featureStreamName = "features"; var labelsStreamName = "labels"; var classifierName = "classifierOutput"; Function classifierOutput; int[] imageDim = useConvolution ? new int[] { 28, 28, 1 } : new int[] { 784 }; int imageSize = 28 * 28; int numClasses = 10; IList <StreamConfiguration> streamConfigurations = new StreamConfiguration[] { new StreamConfiguration(featureStreamName, imageSize), new StreamConfiguration(labelsStreamName, numClasses) }; string modelFile = useConvolution ? "MNISTConvolution.model" : "MNISTMLP.model"; // If a model already exists and not set to force retrain, validate the model and return. //prepare vars to accept results List <List <float> > X = new List <List <float> >(); List <float> Y = new List <float>(); if (File.Exists(modelFile) && !forceRetrain) { var minibatchSourceExistModel = MinibatchSource.TextFormatMinibatchSource( Path.Combine(ImageDataFolder, "MINST-TestData.txt"), streamConfigurations); //Model validation ValidateModel(modelFile, minibatchSourceExistModel, imageDim, numClasses, featureStreamName, labelsStreamName, classifierName, device, 1000, X, Y, useConvolution); //show image classification result showResult(X, Y); return; } // build the network var input = CNTKLib.InputVariable(imageDim, DataType.Float, featureStreamName); if (useConvolution) { var scaledInput = CNTKLib.ElementTimes(Constant.Scalar <float>(0.00390625f, device), input); classifierOutput = CreateConvolutionalNeuralNetwork(scaledInput, numClasses, device, classifierName); } else { // For MLP, we like to have the middle layer to have certain amount of states. int hiddenLayerDim = 200; var scaledInput = CNTKLib.ElementTimes(Constant.Scalar <float>(0.00390625f, device), input); classifierOutput = CreateMLPClassifier(device, numClasses, hiddenLayerDim, scaledInput, classifierName); } var labels = CNTKLib.InputVariable(new int[] { numClasses }, DataType.Float, labelsStreamName); //LOss and Eval functions var trainingLoss = CNTKLib.CrossEntropyWithSoftmax(new Variable(classifierOutput), labels, "lossFunction"); var prediction = CNTKLib.ClassificationError(new Variable(classifierOutput), labels, "classificationError"); // prepare training data var minibatchSource = MinibatchSource.TextFormatMinibatchSource( Path.Combine(ImageDataFolder, "MINST-TrainData.txt"), streamConfigurations, MinibatchSource.InfinitelyRepeat); var featureStreamInfo = minibatchSource.StreamInfo(featureStreamName); var labelStreamInfo = minibatchSource.StreamInfo(labelsStreamName); // set per sample learning rate var learningRatePerSample = new CNTK.TrainingParameterScheduleDouble(0.003125, 1); IList <Learner> parameterLearners = new List <Learner>() { Learner.SGDLearner(classifierOutput.Parameters(), learningRatePerSample) }; var trainer = Trainer.CreateTrainer(classifierOutput, trainingLoss, prediction, parameterLearners); // const uint minibatchSize = 64; int outputFrequencyInMinibatches = 100, i = 0; int epochs = 3; while (epochs > 0) { var minibatchData = minibatchSource.GetNextMinibatch(minibatchSize, device); var arguments = new Dictionary <Variable, MinibatchData> { { input, minibatchData[featureStreamInfo] }, { labels, minibatchData[labelStreamInfo] } }; trainer.TrainMinibatch(arguments, device); // TestHelper.PrintTrainingProgress(trainer, i++, outputFrequencyInMinibatches); // MinibatchSource is created with MinibatchSource.InfinitelyRepeat. // Batching will not end. Each time minibatchSource completes an sweep (epoch), // the last minibatch data will be marked as end of a sweep. We use this flag // to count number of epochs. if (TestHelper.MiniBatchDataIsSweepEnd(minibatchData.Values)) { epochs--; } } // save the trained model classifierOutput.Save(modelFile); // validate the model var minibatchSourceNewModel = MinibatchSource.TextFormatMinibatchSource( Path.Combine(ImageDataFolder, "MINST-TestData.txt"), streamConfigurations, MinibatchSource.InfinitelyRepeat); //Model validation ValidateModel(modelFile, minibatchSourceNewModel, imageDim, numClasses, featureStreamName, labelsStreamName, classifierName, device, 1000, X, Y, useConvolution); //show image classification result showResult(X, Y); }
public static float ValidateModel(string modelFile, MinibatchSource testMinibatchSource, int[] imageDim, int numClasses, string featureInputName, string labelInputName, string outputName, DeviceDescriptor device, int maxCount = 1000, List <List <float> > X = null, List <float> Y = null, bool useConvolution = true) { Function model = Function.Load(modelFile, device); var imageInput = model.Arguments[0]; var labelOutput = model.Outputs.Single(o => o.Name == outputName); var featureStreamInfo = testMinibatchSource.StreamInfo(featureInputName); var labelStreamInfo = testMinibatchSource.StreamInfo(labelInputName); int batchSize = 50; int miscountTotal = 0, totalCount = 0; while (true) { var minibatchData = testMinibatchSource.GetNextMinibatch((uint)batchSize, device); if (minibatchData == null || minibatchData.Count == 0) { break; } totalCount += (int)minibatchData[featureStreamInfo].numberOfSamples; // expected labels are in the minibatch data. var labelData = minibatchData[labelStreamInfo].data.GetDenseData <float>(labelOutput); var expectedLabels = labelData.Select(l => l.IndexOf(l.Max())).ToList(); var inputDataMap = new Dictionary <Variable, Value>() { { imageInput, minibatchData[featureStreamInfo].data } }; var outputDataMap = new Dictionary <Variable, Value>() { { labelOutput, null } }; model.Evaluate(inputDataMap, outputDataMap, device); var faetureData = minibatchData[featureStreamInfo].data.GetDenseData <float>(CNTKLib.InputVariable(minibatchData[featureStreamInfo].data.Shape, DataType.Float, model.Arguments[0].Name)); var outputData = outputDataMap[labelOutput].GetDenseData <float>(labelOutput); var actualLabels = outputData.Select(l => l.IndexOf(l.Max())).ToList(); int misMatches = actualLabels.Zip(expectedLabels, (a, b) => a.Equals(b) ? 0 : 1).Sum(); miscountTotal += misMatches; Console.WriteLine($"Validating Model: Total Samples = {totalCount}, Misclassify Count = {miscountTotal}"); if (totalCount > maxCount) { //writes some result in to array for (int i = 0; i < outputData.Count && X != null && Y != null; i++) { var imgDIm = imageDim.Aggregate(1, (acc, val) => acc * val); var inputVector = faetureData[0].Skip(imgDIm * i).Take(imgDIm).Select(x => (float)x).ToList(); X.Add(inputVector); var currLabel = actualLabels[i]; Y.Add(currLabel); } ; break; } } float errorRate = 1.0F * miscountTotal / totalCount; Console.WriteLine($"Model Validation Error = {errorRate}"); return(errorRate); }
public override Function ApplyActivationFunction(Function variable, DeviceDescriptor device) { return(CNTKLib.Tanh(variable)); }
/// <summary> /// Dense implements the operation: output = activation(dot(input, kernel) + bias) where activation is the element-wise activation function passed as the activation argument, kernel is a weights matrix created by the layer, and bias is a bias vector created by the layer (only applicable if use_bias is True). /// </summary> /// <param name="shape">The input shape.</param> /// <param name="dim">Positive integer, dimensionality of the output space..</param> /// <param name="act">Activation function to use. If you don't specify anything, no activation is applied (ie. "linear" activation: a(x) = x). <see cref="SiaNet.Common.OptActivations"/></param> /// <param name="useBias">Boolean, whether the layer uses a bias vector.</param> /// <param name="weightInitializer">Initializer for the kernel weights matrix. <see cref="SiaNet.Common.OptInitializers"/></param> /// <param name="biasInitializer">Initializer for the bias vector. <see cref="SiaNet.Common.OptInitializers"/></param> /// <returns></returns> public static Function Dense(int shape, int dim, string activation = OptActivations.None, bool useBias = false, Initializer weightInitializer = null, Initializer biasInitializer = null) { var input = CNTKLib.InputVariable(new int[] { shape }, DataType.Float); return(Dense(input, dim, activation, useBias, weightInitializer, biasInitializer)); }
/// <summary> /// Creates the learner based on learning parameters. /// ToDo: Not all learners parameters defined /// </summary> /// <param name="network">Network model being trained</param> /// <param name="lrParams">Learning parameters.</param> /// <returns></returns> private List <Learner> createLearners(Function network, LearningParameters lrParams) { //learning rate and momentum values var lr = new TrainingParameterScheduleDouble(lrParams.LearningRate); var mm = CNTKLib.MomentumAsTimeConstantSchedule(lrParams.Momentum); var addParam = new AdditionalLearningOptions(); // if (lrParams.L1Regularizer > 0) { addParam.l1RegularizationWeight = lrParams.L1Regularizer; } if (lrParams.L2Regularizer > 0) { addParam.l2RegularizationWeight = lrParams.L2Regularizer; } //SGD Momentum learner if (lrParams.LearnerType == LearnerType.MomentumSGDLearner) { // var llr = new List <Learner>(); var msgd = CNTKLib.MomentumSGDLearner(new ParameterVector(network.Parameters().ToList()), lr, mm, true, addParam); llr.Add(msgd); return(llr); } //SGDLearner - rate and regulars else if (lrParams.LearnerType == LearnerType.SGDLearner) { // var llr = new List <Learner>(); var msgd = CNTKLib.SGDLearner(new ParameterVector(network.Parameters().ToList()), lr, addParam); llr.Add(msgd); return(llr); } //FSAdaGradLearner learner - rate, moment regulars else if (lrParams.LearnerType == LearnerType.FSAdaGradLearner) { // var llr = new List <Learner>(); var msgd = CNTKLib.FSAdaGradLearner(new ParameterVector(network.Parameters().ToList()), lr, mm); llr.Add(msgd); return(llr); } //AdamLearner learner else if (lrParams.LearnerType == LearnerType.AdamLearner) { // var llr = new List <Learner>(); var msgd = CNTKLib.AdamLearner(new ParameterVector(network.Parameters().ToList()), lr, mm); llr.Add(msgd); return(llr); } //AdaGradLearner learner - Learning rate and regularizers else if (lrParams.LearnerType == LearnerType.AdaGradLearner) { // var llr = new List <Learner>(); var msgd = CNTKLib.AdaGradLearner(new ParameterVector(network.Parameters().ToList()), lr, false, addParam); llr.Add(msgd); return(llr); } else { throw new Exception("Learner type is not supported!"); } }
/// <summary> /// The main program entry point. /// </summary> /// <param name="args">The command line parameters.</param> static void Main(string[] args) { // check the compute device Console.WriteLine("Checking compute device..."); Console.WriteLine($" Using: {NetUtil.CurrentDevice.AsString()}"); // unpack archive Console.WriteLine("Unpacking archive..."); if (!File.Exists("x_train_imdb.bin")) { ZipFile.ExtractToDirectory("imdb_data.zip", "."); } // load training and test data Console.WriteLine("Loading data files..."); var sequenceLength = 500; var training_data = DataUtil.LoadBinary <float>("x_train_imdb.bin", 25000, sequenceLength); var training_labels = DataUtil.LoadBinary <float>("y_train_imdb.bin", 25000); var testing_data = DataUtil.LoadBinary <float>("x_test_imdb.bin", 25000, sequenceLength); var testing_labels = DataUtil.LoadBinary <float>("y_test_imdb.bin", 25000); Console.WriteLine($" Records for training: {training_data.Length}"); Console.WriteLine($" Records for testing: {testing_data.Length}"); // build features and labels var features = NetUtil.Var(new int[] { 1 }, CNTK.DataType.Float); var labels = NetUtil.Var(new int[] { 1 }, CNTK.DataType.Float, dynamicAxes: new List <CNTK.Axis>() { CNTK.Axis.DefaultBatchAxis() }); // build the network var lstmUnits = 32; var network = features .OneHotOp(10000, true) .Embedding(32) .LSTM(lstmUnits, lstmUnits) .Dense(1, CNTKLib.Sigmoid) .ToNetwork(); Console.WriteLine("Model architecture:"); Console.WriteLine(network.ToSummary()); // set up the loss function and the classification error function var lossFunc = CNTKLib.BinaryCrossEntropy(network.Output, labels); var errorFunc = NetUtil.BinaryClassificationError(network.Output, labels); // use the Adam learning algorithm var learner = network.GetAdamLearner( learningRateSchedule: (0.001, 1), momentumSchedule: (0.9, 1), unitGain: true); // set up a trainer and an evaluator var trainer = network.GetTrainer(learner, lossFunc, errorFunc); var evaluator = network.GetEvaluator(errorFunc); // train the model Console.WriteLine("Epoch\tTrain\tTrain\tTest"); Console.WriteLine("\tLoss\tError\tError"); Console.WriteLine("-----------------------------"); var maxEpochs = 10; var batchSize = 128; var loss = new double[maxEpochs]; var trainingError = new double[maxEpochs]; var testingError = new double[maxEpochs]; var batchCount = 0; for (int epoch = 0; epoch < maxEpochs; epoch++) { // train one epoch on batches loss[epoch] = 0.0; trainingError[epoch] = 0.0; batchCount = 0; training_data.Batch(batchSize, (data, begin, end) => { // get the current batch var featureBatch = features.GetSequenceBatch(sequenceLength, training_data, begin, end); var labelBatch = labels.GetBatch(training_labels, begin, end); // train the network on the batch var result = trainer.TrainBatch( new[] { (features, featureBatch), (labels, labelBatch) },
/// <summary> /// Build and train a RNN model. /// </summary> /// <param name="device">CPU or GPU device to train and run the model</param> public void Train_predict(int M, int numEpochs = 1500, int inDim = 30, int cellDim = 25, int hiDim = 5) { string featuresName = "features"; string labelsName = "label"; const int ouDim = 1; Dictionary <string, Set> dataSet = loadData(inDim, featuresName, labelsName, fun); var featureSet = dataSet[featuresName]; var labelSet = dataSet[labelsName]; ///// Debug data //int q = 0; //using (StreamWriter file = new StreamWriter("0.txt")) //{ // file.WriteLine("Train"); // for (int i = 0; i < featureSet.train.Length; i++) // { // file.Write(q + ": "); // for (int j = 0; j < featureSet.train[i].Length; j++) // file.Write(featureSet.train[i][j] + " "); // file.Write(labelSet.train[i][0]); // file.WriteLine(); // q++; // } // file.WriteLine("Valid"); // for (int i = 0; i < featureSet.valid.Length; i++) // { // file.Write(q + ": "); // for (int j = 0; j < featureSet.valid[i].Length; j++) // file.Write(featureSet.valid[i][j] + " "); // file.Write(labelSet.valid[i][0]); // file.WriteLine(); // q++; // } // file.WriteLine("Test"); // for (int i = 0; i < featureSet.test.Length; i++) // { // file.Write(q + ": "); // for (int j = 0; j < featureSet.test[i].Length; j++) // file.Write(featureSet.test[i][j] + " "); // file.Write(labelSet.test[i][0]); // file.WriteLine(); // q++; // } //} // build the model var feature = Variable.InputVariable(new int[] { inDim + (advanced_input ? 2 : 0) }, DataType.Float, featuresName, null, false /*isSparse*/); var label = Variable.InputVariable(new int[] { ouDim }, DataType.Float, labelsName, new List <CNTK.Axis>() { CNTK.Axis.DefaultBatchAxis() }, false); var lstmModel = CreateModel(feature, ouDim, hiDim, cellDim, "timeSeriesOutput"); Function trainingLoss = CNTKLib.SquaredError(lstmModel, label, "squarederrorLoss"); Function prediction = CNTKLib.SquaredError(lstmModel, label, "squarederrorEval"); // prepare for training TrainingParameterScheduleDouble learningRatePerSample = new TrainingParameterScheduleDouble(0.0005, 1); TrainingParameterScheduleDouble momentumTimeConstant = CNTKLib.MomentumAsTimeConstantSchedule(256); IList <Learner> parameterLearners = new List <Learner>() { Learner.MomentumSGDLearner(lstmModel.Parameters(), learningRatePerSample, momentumTimeConstant, /*unitGainMomentum = */ true) }; var trainer = Trainer.CreateTrainer(lstmModel, trainingLoss, prediction, parameterLearners); // train the model int batchSize = 20; int outputFrequencyInMinibatches = 50; int miniBatchCount = 0; for (int i = 1; i <= numEpochs; i++) { //get the next minibatch amount of data foreach (var miniBatchData in LSTMSequence.nextBatch(featureSet.train, labelSet.train, batchSize)) { var xValues = Value.CreateBatch <float>(new NDShape(1, inDim + (advanced_input ? 2 : 0)), miniBatchData.X, device); var yValues = Value.CreateBatch <float>(new NDShape(1, ouDim), miniBatchData.Y, device); //Combine variables and data in to Dictionary for the training var batchData = new Dictionary <Variable, Value>(); batchData.Add(feature, xValues); batchData.Add(label, yValues); //train minibarch data trainer.TrainMinibatch(batchData, device); TestHelper.PrintTrainingProgress(trainer, miniBatchCount++, outputFrequencyInMinibatches); } } predict_test(dataSet, trainer.Model(), inDim, ouDim, batchSize, featuresName, labelsName, M); predict(dataSet, trainer.Model(), inDim, ouDim, batchSize, featuresName, labelsName, M); }
/// <summary> /// Train and evaluate an image classifier with CIFAR-10 data. /// The classification model is saved after training. /// For repeated runs, the caller may choose whether to retrain a model or /// just validate an existing one. /// </summary> /// <param name="device">CPU or GPU device to run</param> /// <param name="forceRetrain">whether to override an existing model. /// if true, any existing model will be overridden and the new one evaluated. /// if false and there is an existing model, the existing model is evaluated.</param> public static void TrainAndEvaluate(DeviceDescriptor device, bool forceRetrain) { string modelFile = "Cifar10Rest.model"; // If a model already exists and not set to force retrain, validate the model and return. if (File.Exists(modelFile) && !forceRetrain) { ValidateModel(device, modelFile); return; } // prepare training data var minibatchSource = CreateMinibatchSource(Path.Combine(CifarDataFolder, "train_map.txt"), Path.Combine(CifarDataFolder, "CIFAR-10_mean.xml"), imageDim, numClasses, MaxEpochs); var imageStreamInfo = minibatchSource.StreamInfo("features"); var labelStreamInfo = minibatchSource.StreamInfo("labels"); // build a model var imageInput = CNTKLib.InputVariable(imageDim, imageStreamInfo.m_elementType, "Images"); var labelsVar = CNTKLib.InputVariable(new int[] { numClasses }, labelStreamInfo.m_elementType, "Labels"); var classifierOutput = ResNetClassifier(imageInput, numClasses, device, "classifierOutput"); // prepare for training var trainingLoss = CNTKLib.CrossEntropyWithSoftmax(classifierOutput, labelsVar, "lossFunction"); var prediction = CNTKLib.ClassificationError(classifierOutput, labelsVar, 5, "predictionError"); var learningRatePerSample = new TrainingParameterPerSampleScheduleDouble(0.0078125); var trainer = Trainer.CreateTrainer(classifierOutput, trainingLoss, prediction, new List <Learner> { Learner.SGDLearner(classifierOutput.Parameters(), learningRatePerSample) }); uint minibatchSize = 64; int outputFrequencyInMinibatches = 20, miniBatchCount = 0; // Feed data to the trainer for number of epochs. while (true) { var minibatchData = minibatchSource.GetNextMinibatch(minibatchSize, device); // Stop training once max epochs is reached. if (minibatchData.empty()) { break; } trainer.TrainMinibatch(new Dictionary <Variable, MinibatchData>() { { imageInput, minibatchData[imageStreamInfo] }, { labelsVar, minibatchData[labelStreamInfo] } }, device); TestHelper.PrintTrainingProgress(trainer, miniBatchCount++, outputFrequencyInMinibatches); } // save the model var imageClassifier = Function.Combine(new List <Variable>() { trainingLoss, prediction, classifierOutput }, "ImageClassifier"); imageClassifier.Save(modelFile); // validate the model ValidateModel(device, modelFile); }
public void TrainAndEvaluateRegression(DeviceDescriptor device) { // build a logistic regression model Variable featureVariable = Variable.InputVariable(new int[] { inputDim }, DataType.Float); Variable labelVariable = Variable.InputVariable(new int[] { numOutputClasses }, DataType.Float); var classifierOutput = CreateLinearModel(featureVariable, numOutputClasses, device); var loss = CNTKLib.CrossEntropyWithSoftmax(classifierOutput, labelVariable); var evalError = CNTKLib.ClassificationError(classifierOutput, labelVariable); // prepare for training TrainingParameterScheduleDouble learningRatePerSample = new TrainingParameterScheduleDouble(0.02, 1); IList <Learner> parameterLearners = new List <Learner>() { Learner.SGDLearner(classifierOutput.Parameters(), learningRatePerSample) }; var trainer = Trainer.CreateTrainer(classifierOutput, loss, evalError, parameterLearners); int minibatchSize = 64; int numMinibatchesToTrain = 1000; int updatePerMinibatches = 50; // train the model for (int minibatchCount = 0; minibatchCount < numMinibatchesToTrain; minibatchCount++) { Value features, labels; GenerateValueData(minibatchSize, inputDim, numOutputClasses, out features, out labels, device); //TODO: sweepEnd should be set properly instead of false. #pragma warning disable 618 trainer.TrainMinibatch( new Dictionary <Variable, Value>() { { featureVariable, features }, { labelVariable, labels } }, device); #pragma warning restore 618 PrintTrainingProgress(trainer, minibatchCount, updatePerMinibatches); } // test and validate the model int testSize = 100; Value testFeatureValue, expectedLabelValue; GenerateValueData(testSize, inputDim, numOutputClasses, out testFeatureValue, out expectedLabelValue, device); // GetDenseData just needs the variable's shape IList <IList <float> > expectedOneHot = expectedLabelValue.GetDenseData <float>(labelVariable); IList <int> expectedLabels = expectedOneHot.Select(l => l.IndexOf(1.0F)).ToList(); var inputDataMap = new Dictionary <Variable, Value>() { { featureVariable, testFeatureValue } }; var outputDataMap = new Dictionary <Variable, Value>() { { classifierOutput.Output, null } }; classifierOutput.Evaluate(inputDataMap, outputDataMap, device); var outputValue = outputDataMap[classifierOutput.Output]; IList <IList <float> > actualLabelSoftMax = outputValue.GetDenseData <float>(classifierOutput.Output); var actualLabels = actualLabelSoftMax.Select((IList <float> l) => l.IndexOf(l.Max())).ToList(); int misMatches = actualLabels.Zip(expectedLabels, (a, b) => a.Equals(b) ? 0 : 1).Sum(); Console.WriteLine($"Validating Model: Total Samples = {testSize}, Misclassify Count = {misMatches}"); }
public override Function Create(Function input, DeviceDescriptor device) { int newDim = input.Output.Shape.Dimensions.Aggregate((d1, d2) => d1 * d2); return(CNTKLib.Reshape(input, new int[] { newDim })); }
public static Function LSTM(Variable layer, int dim, int?cellDim = null, string activation = OptActivations.Tanh, string recurrentActivation = OptActivations.Sigmoid, string weightInitializer = OptInitializers.GlorotUniform, string recurrentInitializer = OptInitializers.GlorotUniform, bool useBias = true, string biasInitializer = OptInitializers.Zeros, bool returnSequence = false) { cellDim = cellDim.HasValue ? cellDim : dim; Variable prevOutput = Variable.PlaceholderVariable(new int[] { dim }, layer.DynamicAxes); Variable prevCellState = cellDim.HasValue ? Variable.PlaceholderVariable(new int[] { cellDim.Value }, layer.DynamicAxes) : null; Func <int, Parameter> createBiasParam = (d) => new Parameter(new int[] { d }, DataType.Float, Initializers.Get(biasInitializer), GlobalParameters.Device); Func <int, Parameter> createProjectionParam = (oDim) => new Parameter(new int[] { oDim, NDShape.InferredDimension }, DataType.Float, Initializers.Get(weightInitializer), GlobalParameters.Device); Func <int, Parameter> createDiagWeightParam = (d) => new Parameter(new int[] { d }, DataType.Float, Initializers.Get(recurrentInitializer), GlobalParameters.Device); Function stabilizedPrevOutput = Stabilize <float>(prevOutput, GlobalParameters.Device); Function stabilizedPrevCellState = prevCellState != null?Stabilize <float>(prevCellState, GlobalParameters.Device) : null; Func <Variable> projectInput = null; if (cellDim.HasValue) { projectInput = () => createBiasParam(cellDim.Value) + (createProjectionParam(cellDim.Value) * layer); } else { projectInput = () => layer; } //Input gate Function it = null; if (cellDim.HasValue) { it = Basic.Activation((Variable)(projectInput() + (createProjectionParam(cellDim.Value) * stabilizedPrevOutput)) + CNTKLib.ElementTimes(createDiagWeightParam(cellDim.Value), stabilizedPrevCellState), recurrentActivation); } else { it = Basic.Activation((Variable)(projectInput()), recurrentActivation); } Function bit = null; if (cellDim.HasValue) { bit = CNTKLib.ElementTimes(it, Basic.Activation(projectInput() + (createProjectionParam(cellDim.Value) * stabilizedPrevOutput), activation)); } else { bit = CNTKLib.ElementTimes(it, Basic.Activation(projectInput(), activation)); } // Forget-me-not gate Function ft = null; if (cellDim.HasValue) { ft = Basic.Activation((Variable)(projectInput() + (createProjectionParam(cellDim.Value) * stabilizedPrevOutput)) + CNTKLib.ElementTimes(createDiagWeightParam(cellDim.Value), stabilizedPrevCellState), recurrentActivation); } else { ft = Basic.Activation(projectInput(), recurrentActivation); } Function bft = prevCellState != null?CNTKLib.ElementTimes(ft, prevCellState) : ft; Function ct = (Variable)bft + bit; //Output gate Function ot = null; if (cellDim.HasValue) { ot = Basic.Activation((Variable)(projectInput() + (createProjectionParam(cellDim.Value) * stabilizedPrevOutput)) + CNTKLib.ElementTimes(createDiagWeightParam(cellDim.Value), Stabilize <float>(ct, GlobalParameters.Device)), recurrentActivation); } else { ot = Basic.Activation((Variable)(projectInput()) + Stabilize <float>(ct, GlobalParameters.Device), recurrentActivation); } Function ht = CNTKLib.ElementTimes(ot, CNTKLib.Tanh(ct)); Function c = ct; Function h = (dim != cellDim) ? (createProjectionParam(dim) * Stabilize <float>(ht, GlobalParameters.Device)) : ht; Func <Variable, Function> recurrenceHookH = (x) => CNTKLib.PastValue(x); Func <Variable, Function> recurrenceHookC = (x) => CNTKLib.PastValue(x); var actualDh = recurrenceHookH(h); var actualDc = recurrenceHookC(c); if (prevCellState != null) { h.ReplacePlaceholders(new Dictionary <Variable, Variable> { { prevOutput, actualDh }, { prevCellState, actualDc } }); } else { h.ReplacePlaceholders(new Dictionary <Variable, Variable> { { prevOutput, actualDh } }); } if (returnSequence) { return(h); } return(CNTKLib.SequenceLast(h)); }
/// <inheritdoc /> internal override Function ToFunction(Variable inputFunction) { return(CNTKLib.Pooling(inputFunction, PoolingType.Max, new[] { inputFunction.Shape[0] })); }
static void Main(string[] args) { Console.WriteLine("Loading data...."); // unzip archive if (!System.IO.File.Exists("train_images.bin")) { DataUtil.Unzip(@"mnist_data.zip", "."); } // load training and test data var training_data = DataUtil.LoadBinary <float>("train_images.bin", 60000, 28 * 28); var test_data = DataUtil.LoadBinary <float>("test_images.bin", 10000, 28 * 28); var training_labels = DataUtil.LoadBinary <float>("train_labels.bin", 60000, 10); var test_labels = DataUtil.LoadBinary <float>("test_labels.bin", 10000, 10); // report results Console.WriteLine($"{training_data.GetLength(0)} training digits loaded"); Console.WriteLine($"{test_data.GetLength(0)} test digits loaded"); // build features and labels var features = NetUtil.Var(new int[] { 28, 28 }, DataType.Float); var labels = NetUtil.Var(new int[] { 10 }, DataType.Float); // build the network var network = features .Dense(512, CNTKLib.ReLU) .Dense(10) .ToNetwork(); // set up the loss function and the classification error function var lossFunc = CNTKLib.CrossEntropyWithSoftmax(network.Output, labels); var errorFunc = CNTKLib.ClassificationError(network.Output, labels); // set up a trainer that uses the RMSProp algorithm var learner = network.GetRMSPropLearner( learningRateSchedule: 0.99, gamma: 0.95, inc: 2.0, dec: 0.5, max: 2.0, min: 0.5 ); // set up a trainer and an evaluator var trainer = network.GetTrainer(learner, lossFunc, errorFunc); var evaluator = network.GetEvaluator(errorFunc); // declare some variables var maxEpochs = 20; var batchSize = 128; var loss = 0.0; var error = 0.0; var batchCount = 0; // train the network during several epochs Console.WriteLine("Training the neural network...."); for (int epoch = 0; epoch < maxEpochs; epoch++) { Console.Write($"Training epoch {epoch + 1}/{maxEpochs}... "); // train the network using random batches loss = 0.0; error = 0.0; batchCount = 0; training_data.Index().Shuffle().Batch(batchSize, (indices, begin, end) => { // get the current batch var featureBatch = features.GetBatch(training_data, indices, begin, end); var labelBatch = labels.GetBatch(training_labels, indices, begin, end); // train the network on the batch var result = trainer.TrainBatch( new[] { (features, featureBatch), (labels, labelBatch) },
/// <summary> /// construct a parameter of double values /// </summary> /// <param name="shape">shape of the parameter</param> /// <param name="initValue">initial value of the parameter</param> /// <param name="device">device</param> /// <param name="name">name</param> public Parameter(NDShape shape, double initValue, DeviceDescriptor device, string name) : this(shape, DataType.Double, CNTKLib.ConstantInitializer(initValue), device, name) { }
/// <summary> /// 3D convolution layer (e.g. spatial convolution over volumes). This layer creates a convolution kernel that is convolved with the layer input to produce a tensor of outputs. If use_bias is True, a bias vector is created and added to the outputs. Finally, if activation is not None, it is applied to the outputs as well. /// </summary> /// <param name="shape">The 3D input shape.</param> /// <param name="channels">Integer, the dimensionality of the output space.</param> /// <param name="kernalSize">A tuple of 3 integers, specifying the depth, height and width of the 3D convolution window. Can be a single integer to specify the same value for all spatial dimensions.</param> /// <param name="strides">A tuple of 3 integers, specifying the strides of the convolution along each spatial dimension. Can be a single integer to specify the same value for all spatial dimensions. Specifying any stride value != 1 is incompatible with specifying any dilation_rate value != 1.</param> /// <param name="padding">Boolean, if true results in padding the input such that the output has the same length as the original input.</param> /// <param name="dialation">A tuple of 3 integers, specifying the dilation rate to use for dilated convolution. Can be a single integer to specify the same value for all spatial dimensions. Currently, specifying any dilation_rate value != 1 is incompatible with specifying any stride value != 1.</param> /// <param name="activation">Activation function to use. If you don't specify anything, no activation is applied (ie. "linear" activation: a(x) = x). <see cref="SiaNet.Common.OptActivations"/></param> /// <param name="useBias">Boolean, whether the layer uses a bias vector.</param> /// <param name="weightInitializer">Initializer for the kernel weights matrix. <see cref="SiaNet.Common.OptInitializers"/></param> /// <param name="biasInitializer">Initializer for the bias vector. <see cref="SiaNet.Common.OptInitializers"/></param> /// <returns></returns> public static Function Conv3D(Tuple <int, int, int, int> shape, int channels, Tuple <int, int, int> kernalSize, Tuple <int, int, int> strides, bool padding = true, Tuple <int, int, int> dialation = null, string activation = OptActivations.None, bool useBias = false, string weightInitializer = OptInitializers.Xavier, string biasInitializer = OptInitializers.Zeros) { Variable input = CNTKLib.InputVariable(new int[] { shape.Item1, shape.Item2, shape.Item3 }, DataType.Float); return(Conv3D(input, channels, kernalSize, strides, padding, dialation, activation, useBias, weightInitializer, biasInitializer)); }
static public Function Embedding(Variable input, int embeddingDim, DeviceDescriptor device) { System.Diagnostics.Debug.Assert(input.Shape.Rank == 1); int inputDim = input.Shape[0]; var embeddingParameters = new Parameter(new int[] { embeddingDim, inputDim }, DataType.Float, CNTKLib.GlorotUniformInitializer(), device); return(CNTKLib.Times(embeddingParameters, input)); }
/// <summary> /// Max pooling operation for 3D data (spatial or spatio-temporal). /// </summary> /// <param name="layer">The output of the last layer.</param> /// <param name="poolSize">Tuple of 3 integers, factors by which to downscale (dim1, dim2, dim3). (2, 2, 2) will halve the size of the 3D input in each dimension.</param> /// <param name="strides">Tuple of 3 integers, or None. Strides values.</param> /// <param name="padding">Boolean, if true results in padding the input such that the output has the same length as the original input.</param> /// <returns></returns> public static Function MaxPool3D(Variable layer, Tuple <int, int, int> poolSize, Tuple <int, int, int> strides, bool padding = true) { return(CNTKLib.Pooling(layer, PoolingType.Max, new int[] { poolSize.Item1, poolSize.Item2, poolSize.Item3 }, new int[] { strides.Item1, strides.Item2, strides.Item3 }, new BoolVector(new bool[] { padding, padding, padding }))); }
public override Function Create(Function input, DeviceDescriptor device) { return(CNTKLib.Dropout(input, _dropoutRate, _seed, _name)); }