private static bool TrainSingleEpoch(Trainer trainer, DeviceDescriptor device, MinibatchSource miniBatchSource, uint miniBatchSize, Dictionary <Variable, StreamInformation> streamInfos) { try { var miniBatchData = miniBatchSource.GetNextMinibatch(miniBatchSize, device); while (!MiniBatchDataIsSweepEnd(miniBatchData.Values)) { var arguments = streamInfos.ToDictionary(kv => kv.Key, kv => miniBatchData[kv.Value]); trainer.TrainMinibatch(arguments, device); miniBatchData = miniBatchSource.GetNextMinibatch(miniBatchSize, device); } return(true); } catch (Exception ex) { Console.WriteLine(ex); return(false); } }
public CTFSampler(string path, int minibatchSize, bool randomize = true) { _minibatchSize = minibatchSize; // Build a stream configuration _streamConfigurations = new List <StreamConfiguration>(); var elements = GuessDataFormat(path, 10); foreach (var e in elements) { if (e.Value == -1) { throw new ArgumentException("CTF file contains sparse data"); } var config = new StreamConfiguration(e.Key, e.Value, false); _streamConfigurations.Add(config); } _minibatchSource = MinibatchSource.TextFormatMinibatchSource(path, _streamConfigurations, MinibatchSource.InfinitelyRepeat, randomize); _streamInfos = new Dictionary <string, StreamInformation>(); foreach (var name in elements.Keys) { _streamInfos.Add(name, _minibatchSource.StreamInfo(name)); } }
public MinibatchSourceEx(MinibatchType type, StreamConfiguration[] streamConfigurations, List <Variable> inputVar, List <Variable> outputVar, string trainFilePath, string validFilePath, ulong epochSize, bool randomizeBatch, int useImgAugm) { this.StreamConfigurations = streamConfigurations; this.TrainingDataFile = trainFilePath; this.ValidationDataFile = validFilePath; Type = type; if (Type == MinibatchType.Default) { // prepare the training data defaultmb = MinibatchSource.TextFormatMinibatchSource(trainFilePath, StreamConfigurations, epochSize, randomizeBatch); } else if (Type == MinibatchType.Image) { var featVar = inputVar.First(); // int image_width = featVar.Shape.Dimensions[0]; int image_height = featVar.Shape.Dimensions[1]; int num_channels = featVar.Shape.Dimensions[2]; //make transformation and scaling var transforms = new List <CNTKDictionary>(); var randomSideTransform = CNTKLib.ReaderCrop("RandomSide", new Tuple <int, int>(0, 0), new Tuple <float, float>(0.8f, 1.0f), new Tuple <float, float>(0.0f, 0.0f), new Tuple <float, float>(1.0f, 1.0f), "uniRatio"); if (useImgAugm == 1) { transforms.Add(randomSideTransform); } //scaling image comes at the end of image transformation var scaleTransform = CNTKLib.ReaderScale(image_width, image_height, num_channels); transforms.Add(scaleTransform); var labelName = streamConfigurations.Last().m_streamName; var labelDimension = streamConfigurations.Last().m_dim; var featureName = streamConfigurations.First().m_streamName; var imagemb = CNTKLib.ImageDeserializer(trainFilePath, labelName, (uint)labelDimension, featureName, transforms); var mmsConfig = new CNTK.MinibatchSourceConfig(new CNTK.DictionaryVector() { imagemb }); // defaultmb = CNTKLib.CreateCompositeMinibatchSource(mmsConfig); } else if (Type == MinibatchType.Custom) { custommb = new StreamReader(trainFilePath); } else { throw new Exception("Minibatchsource type is unknown!"); } }
/// <summary> /// Evaluate how good (or bad) training went on test data /// </summary> protected virtual void EvaluateModel(ITrainingDatasetDefinition datasetDefinition, string persistedTrainingModelPath, int howManySamplesToUseFromTestDataset) { using (var evaluationMinibatchSourceModel = MinibatchSource.TextFormatMinibatchSource (TrainingDataset.TestingDatasetPath, GetStreamConfigFrom(datasetDefinition), MinibatchSource.FullDataSweep)) { Function model = Function.Load(persistedTrainingModelPath, Device); var imageInput = model.Arguments[0]; var labelOutput = model.Outputs.Single(o => o.Name == ClassifierName); var featureStreamInfo = evaluationMinibatchSourceModel.StreamInfo(FeatureStreamName); var labelStreamInfo = evaluationMinibatchSourceModel.StreamInfo(LabelsStreamName); int batchSize = 50; int miscountTotal = 0, totalCount = 0; while (true) { var minibatchData = evaluationMinibatchSourceModel.GetNextMinibatch((uint)batchSize, Device); if (minibatchData == null || minibatchData.Count == 0) { break; } totalCount += (int)minibatchData[featureStreamInfo].numberOfSamples; // expected lables are in the minibatch data. var labelData = minibatchData[labelStreamInfo].data.GetDenseData <float>(labelOutput); var expectedLabels = labelData.Select(l => l.IndexOf(l.Max())).ToList(); var inputDataMap = new Dictionary <Variable, Value>() { { imageInput, minibatchData[featureStreamInfo].data } }; var outputDataMap = new Dictionary <Variable, Value>() { { labelOutput, null } }; model.Evaluate(inputDataMap, outputDataMap, Device); var outputData = outputDataMap[labelOutput].GetDenseData <float>(labelOutput); var actualLabels = outputData.Select(l => l.IndexOf(l.Max())).ToList(); int misMatches = actualLabels.Zip(expectedLabels, (a, b) => a.Equals(b) ? 0 : 1).Sum(); miscountTotal += misMatches; MessagePrinter.PrintMessage($"Validating Model: Total Samples = {totalCount}, Misclassify Count = {miscountTotal}"); if (totalCount > howManySamplesToUseFromTestDataset) { break; } } float errorRate = 1.0F * miscountTotal / totalCount; MessagePrinter.PrintMessage($"Model Validation Error = {errorRate}"); } }
public PreparedDataInfo(string preparedDataPath, string featureStreamName, string labelsStreamName, int imageSize, int numClasses, ulong epochSize) { MinibatchSource = GetMinibatchSource(preparedDataPath, featureStreamName, labelsStreamName, imageSize, numClasses, epochSize); FeatureStreamInfo = MinibatchSource.StreamInfo(featureStreamName); LabelStreamInfo = MinibatchSource.StreamInfo(labelsStreamName); }
protected override void PrepareTrainingData(ITrainingDatasetDefinition datasetDefinition) { _minibatchSource = MinibatchSource.TextFormatMinibatchSource( TrainingDataset.TrainingDatasetPath, GetStreamConfigFrom(datasetDefinition), MinibatchSource.InfinitelyRepeat); _featureStreamInfo = _minibatchSource.StreamInfo(FeatureStreamName); _labelStreamInfo = _minibatchSource.StreamInfo(LabelsStreamName); }
private static void ValidateModel(DeviceDescriptor device, string modelFile) { MinibatchSource testMinibatchSource = CreateMinibatchSource( Path.Combine("D:/Libraries/cntk-release/Examples/Image/DataSets/CIFAR-10", "test_map.txt"), Path.Combine(CifarDataFolder, "CIFAR-10_mean.xml"), imageDim, numClasses, 1); TestHelper.ValidateModelWithMinibatchSource(modelFile, testMinibatchSource, imageDim, numClasses, "features", "labels", "classifierOutput", device); }
public static float ValidateModelWithMinibatchSource( string modelFile, MinibatchSource testMinibatchSource, int[] imageDim, int numClasses, string featureInputName, string labelInputName, string outputName, DeviceDescriptor device, int maxCount = 1000) { Function model = Function.Load(modelFile, device); var imageInput = model.Arguments[0]; var labelOutput = model.Outputs.Single(o => o.Name == outputName); var featureStreamInfo = testMinibatchSource.StreamInfo(featureInputName); var labelStreamInfo = testMinibatchSource.StreamInfo(labelInputName); int batchSize = 50; int miscountTotal = 0, totalCount = 0; while (true) { var minibatchData = testMinibatchSource.GetNextMinibatch((uint)batchSize, device); if (minibatchData == null || minibatchData.Count == 0) { break; } totalCount += (int)minibatchData[featureStreamInfo].numberOfSamples; // expected lables are in the minibatch data. var labelData = minibatchData[labelStreamInfo].data.GetDenseData <float>(labelOutput); var expectedLabels = labelData.Select(l => l.IndexOf(l.Max())).ToList(); var inputDataMap = new Dictionary <Variable, Value>() { { imageInput, minibatchData[featureStreamInfo].data } }; var outputDataMap = new Dictionary <Variable, Value>() { { labelOutput, null } }; model.Evaluate(inputDataMap, outputDataMap, device); var outputData = outputDataMap[labelOutput].GetDenseData <float>(labelOutput); var actualLabels = outputData.Select(l => l.IndexOf(l.Max())).ToList(); int misMatches = actualLabels.Zip(expectedLabels, (a, b) => a.Equals(b) ? 0 : 1).Sum(); miscountTotal += misMatches; Console.WriteLine($"Validating Model: Total Samples = {totalCount}, Misclassify Count = {miscountTotal}"); if (totalCount > maxCount) { break; } } float errorRate = 1.0F * miscountTotal / totalCount; Console.WriteLine($"Model Validation Error = {errorRate}"); return(errorRate); }
private static float ValidateModel(DeviceDescriptor device, string modelFile) { MinibatchSource testMinibatchSource = CreateMinibatchSource( Path.Combine(CifarDataFolder, "test_map.txt"), Path.Combine(CifarDataFolder, "CIFAR-10_mean.xml"), imageDim, numClasses, 1); return(TestHelper.ValidateModelWithMinibatchSource(modelFile, testMinibatchSource, imageDim, numClasses, "features", "labels", "classifierOutput", device)); }
public static float ValidateModelWithMinibatchSource( string _model, MinibatchSource testMinibatchSource, string featureInputName, string labelInputName, string outputName, DeviceDescriptor device, int batchSize, int maxCount = 1000) { Function model = Function.Load(_model, device); var input = model.Arguments[0]; var labelOutput = model.Outputs.Single(o => o.Name == outputName); var featureStreamInfo = testMinibatchSource.StreamInfo(featureInputName); var labelStreamInfo = testMinibatchSource.StreamInfo(labelInputName); int miscountTotal = 0, totalCount = 0; while (true) { var minibatchData = testMinibatchSource.GetNextMinibatch((uint)batchSize, device); if (minibatchData == null || minibatchData.Count == 0) { break; } totalCount += (int)minibatchData[featureStreamInfo].numberOfSamples; // expected lables are in the minibatch data. var labelData = minibatchData[labelStreamInfo].data.GetDenseData <float>(labelOutput); var expectedLabels = labelData.Select(l => l.IndexOf(l.Max())).ToList(); var inputDataMap = new Dictionary <Variable, Value>() { { input, minibatchData[featureStreamInfo].data } }; // Create ouput data map. Using null as Value to indicate using system allocated memory. var outputDataMap = new Dictionary <Variable, Value>(); outputDataMap.Add(labelOutput, null); model.Evaluate(inputDataMap, outputDataMap, device); var outputData = outputDataMap[labelOutput].GetDenseData <float>(labelOutput); var actualLabels = outputData.Select(l => l.IndexOf(l.Max())).ToList(); int misMatches = actualLabels.Zip(expectedLabels, (a, b) => a.Equals(b) ? 0 : 1).Sum(); miscountTotal += misMatches; Debug.Log($"Validating Model: Total Samples = {totalCount}, Misclassify Count = {miscountTotal}"); if (totalCount > maxCount) { break; } } float errorRate = 1.0F * miscountTotal / totalCount; Debug.Log($"Average test error: {errorRate:P2}"); return(errorRate); }
public static MinibatchSource TextFormatMinibatchSourceInternal(string dataFilePath, StreamConfigurationVector streamConfigs) { global::System.IntPtr cPtr = CNTKLibPINVOKE.MinibatchSource_TextFormatMinibatchSourceInternal__SWIG_4(dataFilePath, StreamConfigurationVector.getCPtr(streamConfigs)); MinibatchSource ret = (cPtr == global::System.IntPtr.Zero) ? null : new MinibatchSource(cPtr, true); if (CNTKLibPINVOKE.SWIGPendingException.Pending) { throw CNTKLibPINVOKE.SWIGPendingException.Retrieve(); } return(ret); }
public static MinibatchSource TextFormatMinibatchSourceInternal(string dataFilePath, StreamConfigurationVector streamConfigs, ulong epochSize, bool randomize, ulong randomizationWindow, bool sampleBasedRandomizationWindow) { global::System.IntPtr cPtr = CNTKLibPINVOKE.MinibatchSource_TextFormatMinibatchSourceInternal__SWIG_0(dataFilePath, StreamConfigurationVector.getCPtr(streamConfigs), epochSize, randomize, randomizationWindow, sampleBasedRandomizationWindow); MinibatchSource ret = (cPtr == global::System.IntPtr.Zero) ? null : new MinibatchSource(cPtr, true); if (CNTKLibPINVOKE.SWIGPendingException.Pending) { throw CNTKLibPINVOKE.SWIGPendingException.Retrieve(); } return(ret); }
private MinibatchSource GetMinibatchSource(string preparedDataPath, string featureStreamName, string labelsStreamName, int imageSize, int numClasses, ulong epochSize) { var streamConfigs = new[] { new StreamConfiguration(featureStreamName, imageSize), new StreamConfiguration(labelsStreamName, numClasses) }; var minibatchSource = MinibatchSource.TextFormatMinibatchSource(preparedDataPath, streamConfigs, epochSize); return(minibatchSource); }
private CntkDataSource CreateDataSource(string datasetFile) => new CntkDataSource( MinibatchSource.TextFormatMinibatchSource( datasetFile, new StreamConfiguration[] { new StreamConfiguration(FEATURE_STREAM_NAME, _modelWrapper.InputLength), new StreamConfiguration(LABEL_STREAM_NAME, _modelWrapper.OutputLength) }, MinibatchSource.InfinitelyRepeat), FEATURE_STREAM_NAME, LABEL_STREAM_NAME);
internal void LoadTextData(CNTK.Variable feature, CNTK.Variable label) { int imageSize = feature.Shape.Rank == 1 ? feature.Shape[0] : feature.Shape[0] * feature.Shape[1] * feature.Shape[2]; int numClasses = label.Shape[0]; IList <StreamConfiguration> streamConfigurations = new StreamConfiguration[] { new StreamConfiguration(featureStreamName, imageSize), new StreamConfiguration(labelsStreamName, numClasses) }; miniBatchSource = MinibatchSource.TextFormatMinibatchSource(FileName, streamConfigurations, MinibatchSource.InfinitelyRepeat); featureVariable = feature; labelVariable = label; featureStreamInfo = miniBatchSource.StreamInfo(featureStreamName); labelStreamInfo = miniBatchSource.StreamInfo(labelsStreamName); }
/// <summary> /// When using Normalization of the input variables, before training process and network creation, /// we must calculated mean and standard deviation in order to prepare the normalization layer during network creation /// </summary> /// <param name="inputVars"></param> /// <param name="device"></param> /// <returns></returns> public List <Function> NormalizeInput(List <Variable> inputVars, DeviceDescriptor device) { if (inputVars.Count > 0 && Type != MinibatchType.Default) { throw new Exception("Input normalization is supported for default minibatch source only!"); } var globalMeanStd = new Dictionary <StreamInformation, Tuple <NDArrayView, NDArrayView> >(); foreach (var var in inputVars) { var inputMeansAndInvStdDevs = new Dictionary <StreamInformation, Tuple <NDArrayView, NDArrayView> >(); var featureStreamInfo = defaultmb.StreamInfo(var.Name); inputMeansAndInvStdDevs.Add(featureStreamInfo, new Tuple <NDArrayView, NDArrayView>(null, null)); //compute mean and standard deviation of the population for inputs variables MinibatchSource.ComputeInputPerDimMeansAndInvStdDevs(defaultmb, inputMeansAndInvStdDevs, device); //add to global variable var v = inputMeansAndInvStdDevs.First(); //var avg = (new Value(v.Value.Item1)).GetDenseData<float>(var); //var std = (new Value(v.Value.Item2)).GetDenseData<float>(var); globalMeanStd.Add(v.Key, v.Value); } // var normalizedInputs = new List <Function>(); foreach (var input in inputVars) { var z = globalMeanStd.Where(x => x.Key.m_name == input.Name).Select(x => x.Value).FirstOrDefault(); var featureStreamInfo = defaultmb.StreamInfo(input.Name); // var mean = new Constant(z.Item1, "mean"); var std = new Constant(z.Item2, "std"); // var normalizedinput = CNTKLib.PerDimMeanVarianceNormalize(input, mean, std, input.Name + m_NormalizedSufixName); // normalizedInputs.Add(normalizedinput); } return(normalizedInputs); }
public MinibatchSourceEx(MinibatchType type, StreamConfiguration[] streamConfigurations, string trainFilePath, string validFilePath, ulong epochSize, bool randomizeBatch) { this.StreamConfigurations = streamConfigurations; this.TrainingDataFile = trainFilePath; this.ValidationDataFile = validFilePath; Type = type; if (Type == MinibatchType.Default) { // prepare the training data defaultmb = MinibatchSource.TextFormatMinibatchSource(trainFilePath, StreamConfigurations, epochSize, randomizeBatch); } else if (Type == MinibatchType.Custom) { custommb = new StreamReader(trainFilePath); } }
CntkMinibatchSource CreateMinibatchSource(string mapFilePath, Dictionary <string, Variable> nameToVariable, bool randomize) { var streamConfigurations = new List <StreamConfiguration>(); foreach (var kvp in nameToVariable) { var size = kvp.Value.Shape.TotalSize; var name = kvp.Key; streamConfigurations.Add(new StreamConfiguration(name, size)); } var minibatchSource = MinibatchSource.TextFormatMinibatchSource( mapFilePath, streamConfigurations, MinibatchSource.InfinitelyRepeat, randomize); return(new CntkMinibatchSource(minibatchSource, nameToVariable)); }
private void Init() { if (useGPU) { device = DeviceDescriptor.GPUDevice(0); } else { device = DeviceDescriptor.CPUDevice; } trainPath = System.IO.Path.Combine(Environment.CurrentDirectory, @"Assets\CNTK\Data\train.txt"); testPath = System.IO.Path.Combine(Environment.CurrentDirectory, @"Assets\CNTK\Data\test.txt"); modelPath = System.IO.Path.Combine(Environment.CurrentDirectory, @"Assets\CNTK\Models\mymodel.model"); streamConfigurations = new StreamConfiguration[] { new StreamConfiguration("features", inputDim), new StreamConfiguration("labels", numOutputClasses) }; trainingData = ReadData(trainPath, true); testData = ReadData(testPath, false); }
/// <summary> /// The method is called during Evaluation of the model for specific data set which is specified as an argument /// </summary> /// <param name="type"></param> /// <param name="strFilePath">dataset file path</param> /// <param name="streamConfigurations">stream configuration which provides meta-data information</param> /// <param name="device"></param> /// <returns></returns> public static UnorderedMapStreamInformationMinibatchData GetFullBatch(MinibatchType type, string strFilePath, StreamConfiguration[] streamConfigurations, DeviceDescriptor device) { if (type == MinibatchType.Default) { var mbs = MinibatchSource.TextFormatMinibatchSource(strFilePath, streamConfigurations, MinibatchSource.FullDataSweep, false); // var minibatchData = mbs.GetNextMinibatch(int.MaxValue, device); // return(minibatchData); } else if (type == MinibatchType.Custom) { using (var mbreader = new StreamReader(strFilePath)) { var retVal = nextBatch(mbreader, streamConfigurations, -1, 1, device); var mb = new UnorderedMapStreamInformationMinibatchData(); for (int i = 0; i < retVal.Count; i++) { var k = retVal.ElementAt(i); var key = k.Key; var si = new StreamInformation(); si.m_definesMbSize = streamConfigurations[i].m_definesMbSize; si.m_storageFormat = k.Value.data.StorageFormat; si.m_name = streamConfigurations[i].m_streamName; var stream = streamConfigurations[i]; mb.Add(si, k.Value); } return(mb); } } else { throw new Exception("Minibatch is not supported."); } }
//Function CreateConvolutionalNeuralNetwork(Variable features, int outDims, DeviceDescriptor device, string classifierName) //{ // // 28x28x1 -> 14x14x4 // int kernelWidth1 = 3, kernelHeight1 = 3, numInputChannels1 = 3, outFeatureMapCount1 = 4; // int hStride1 = 2, vStride1 = 2; // int poolingWindowWidth1 = 3, poolingWindowHeight1 = 3; // Function pooling1 = ConvolutionWithMaxPooling(features, device, kernelWidth1, kernelHeight1, // numInputChannels1, outFeatureMapCount1, hStride1, vStride1, poolingWindowWidth1, poolingWindowHeight1); // // 14x14x4 -> 7x7x8 // int kernelWidth2 = 3, kernelHeight2 = 3, numInputChannels2 = outFeatureMapCount1, outFeatureMapCount2 = 8; // int hStride2 = 2, vStride2 = 2; // int poolingWindowWidth2 = 3, poolingWindowHeight2 = 3; // Function pooling2 = ConvolutionWithMaxPooling(pooling1, device, kernelWidth2, kernelHeight2, // numInputChannels2, outFeatureMapCount2, hStride2, vStride2, poolingWindowWidth2, poolingWindowHeight2); // Function denseLayer = Dense(pooling2, outDims, device, Activation.None, classifierName); // return denseLayer; //} private void ImageLoader(string MapFilePath) { List <CNTKDictionary> transforms = new List <CNTKDictionary> { CNTKLib.ReaderScale(ImageDim[0], ImageDim[1], ImageDim[2]) // CNTKLib.ReaderMean(meanFilePath) }; var deserializerConfiguration = CNTKLib.ImageDeserializer(MapFilePath, "labels", (uint)NumClasses, "features", transforms); MinibatchSourceConfig config = new MinibatchSourceConfig(new List <CNTKDictionary> { deserializerConfiguration }) { MaxSweeps = 50 }; minibatchSource = CNTKLib.CreateCompositeMinibatchSource(config); imageStreamInfo = minibatchSource.StreamInfo("features"); labelStreamInfo = minibatchSource.StreamInfo("labels"); }
static void TrainFromMiniBatchFile(Trainer trainer, Variable inputs, Variable labels, DeviceDescriptor device, int epochs = 1000, int outputFrequencyInMinibatches = 50) { int i = 0; IList <StreamConfiguration> streamConfigurations = new StreamConfiguration[] { new StreamConfiguration("features", inputs.Shape[0]), new StreamConfiguration("labels", labels.Shape[0]) }; var minibatchSource = MinibatchSource.TextFormatMinibatchSource("XORdataset.txt", streamConfigurations, MinibatchSource.InfinitelyRepeat, true); while (epochs >= 0) { var minibatchData = minibatchSource.GetNextMinibatch(4, device); var arguments = new Dictionary <Variable, MinibatchData> { { inputs, minibatchData[minibatchSource.StreamInfo("features")] }, { labels, minibatchData[minibatchSource.StreamInfo("labels")] } }; trainer.TrainMinibatch(arguments, device); PrintTrainingProgress(trainer, i++, outputFrequencyInMinibatches); if (minibatchData.Values.Any(a => a.sweepEnd)) { epochs--; } } }
public CntkMinibatchSource(MinibatchSource minibatchSource, IDictionary <string, Variable> nameToVariable) { m_nameToVariable = nameToVariable ?? throw new ArgumentNullException(nameof(nameToVariable)); m_minibatchSource = minibatchSource ?? throw new ArgumentNullException(nameof(minibatchSource)); }
/// <summary> /// Build and train a RNN model. /// </summary> /// <param name="device">CPU or GPU device to train and run the model</param> public static void Train(DeviceDescriptor device) { const int inputDim = 2000; const int cellDim = 25; const int hiddenDim = 25; const int embeddingDim = 50; const int numOutputClasses = 5; // build the model var featuresName = "features"; var features = Variable.InputVariable(new int[] { inputDim }, DataType.Float, featuresName, null, true /*isSparse*/); var labelsName = "labels"; var labels = Variable.InputVariable(new int[] { numOutputClasses }, DataType.Float, labelsName, new List <Axis>() { Axis.DefaultBatchAxis() }, true); var classifierOutput = LSTMSequenceClassifierNet(features, numOutputClasses, embeddingDim, hiddenDim, cellDim, device, "classifierOutput"); Function trainingLoss = CNTKLib.CrossEntropyWithSoftmax(classifierOutput, labels, "lossFunction"); Function prediction = CNTKLib.ClassificationError(classifierOutput, labels, "classificationError"); // prepare training data IList <StreamConfiguration> streamConfigurations = new StreamConfiguration[] { new StreamConfiguration(featuresName, inputDim, true, "x"), new StreamConfiguration(labelsName, numOutputClasses, false, "y") }; var minibatchSource = MinibatchSource.TextFormatMinibatchSource( Path.Combine(DataFolder, "Train.ctf"), streamConfigurations, MinibatchSource.InfinitelyRepeat, true); var featureStreamInfo = minibatchSource.StreamInfo(featuresName); var labelStreamInfo = minibatchSource.StreamInfo(labelsName); // prepare for training TrainingParameterScheduleDouble learningRatePerSample = new TrainingParameterScheduleDouble( 0.0005, 1); TrainingParameterScheduleDouble momentumTimeConstant = CNTKLib.MomentumAsTimeConstantSchedule(256); IList <Learner> parameterLearners = new List <Learner>() { Learner.MomentumSGDLearner(classifierOutput.Parameters(), learningRatePerSample, momentumTimeConstant, /*unitGainMomentum = */ true) }; var trainer = Trainer.CreateTrainer(classifierOutput, trainingLoss, prediction, parameterLearners); // train the model uint minibatchSize = 200; int outputFrequencyInMinibatches = 20; int miniBatchCount = 0; int numEpochs = 5; while (numEpochs > 0) { var minibatchData = minibatchSource.GetNextMinibatch(minibatchSize, device); var arguments = new Dictionary <Variable, MinibatchData> { { features, minibatchData[featureStreamInfo] }, { labels, minibatchData[labelStreamInfo] } }; trainer.TrainMinibatch(arguments, device); TestHelper.PrintTrainingProgress(trainer, miniBatchCount++, outputFrequencyInMinibatches); // Because minibatchSource is created with MinibatchSource.InfinitelyRepeat, // batching will not end. Each time minibatchSource completes an sweep (epoch), // the last minibatch data will be marked as end of a sweep. We use this flag // to count number of epochs. if (TestHelper.MiniBatchDataIsSweepEnd(minibatchData.Values)) { numEpochs--; } } }
/// <summary> /// Train and evaluate a image classifier for MNIST data. /// </summary> /// <param name="device">CPU or GPU device to run training and evaluation</param> /// <param name="useConvolution">option to use convolution network or to use multilayer perceptron</param> /// <param name="forceRetrain">whether to override an existing model. /// if true, any existing model will be overridden and the new one evaluated. /// if false and there is an existing model, the existing model is evaluated.</param> public static void TrainAndEvaluate(DeviceDescriptor device, bool useConvolution, bool forceRetrain) { var featureStreamName = "features"; var labelsStreamName = "labels"; var classifierName = "classifierOutput"; Function classifierOutput; int[] imageDim = useConvolution ? new int[] { 28, 28, 1 } : new int[] { 784 }; int imageSize = 28 * 28; int numClasses = 10; IList <StreamConfiguration> streamConfigurations = new StreamConfiguration[] { new StreamConfiguration(featureStreamName, imageSize), new StreamConfiguration(labelsStreamName, numClasses) }; string modelFile = useConvolution ? "MNISTConvolution.model" : "MNISTMLP.model"; // If a model already exists and not set to force retrain, validate the model and return. if (File.Exists(modelFile) && !forceRetrain) { var minibatchSourceExistModel = MinibatchSource.TextFormatMinibatchSource( Path.Combine(ImageDataFolder, "Test_cntk_text.txt"), streamConfigurations); TestHelper.ValidateModelWithMinibatchSource(modelFile, minibatchSourceExistModel, imageDim, numClasses, featureStreamName, labelsStreamName, classifierName, device); return; } // build the network var input = CNTKLib.InputVariable(imageDim, DataType.Float, featureStreamName); if (useConvolution) { var scaledInput = CNTKLib.ElementTimes(Constant.Scalar <float>(0.00390625f, device), input); classifierOutput = CreateConvolutionalNeuralNetwork(scaledInput, numClasses, device, classifierName); } else { // For MLP, we like to have the middle layer to have certain amount of states. int hiddenLayerDim = 200; var scaledInput = CNTKLib.ElementTimes(Constant.Scalar <float>(0.00390625f, device), input); classifierOutput = CreateMLPClassifier(device, numClasses, hiddenLayerDim, scaledInput, classifierName); } var labels = CNTKLib.InputVariable(new int[] { numClasses }, DataType.Float, labelsStreamName); var trainingLoss = CNTKLib.CrossEntropyWithSoftmax(new Variable(classifierOutput), labels, "lossFunction"); var prediction = CNTKLib.ClassificationError(new Variable(classifierOutput), labels, "classificationError"); // prepare training data var minibatchSource = MinibatchSource.TextFormatMinibatchSource( Path.Combine(ImageDataFolder, "Train_cntk_text.txt"), streamConfigurations, MinibatchSource.InfinitelyRepeat); var featureStreamInfo = minibatchSource.StreamInfo(featureStreamName); var labelStreamInfo = minibatchSource.StreamInfo(labelsStreamName); // set per sample learning rate CNTK.TrainingParameterScheduleDouble learningRatePerSample = new CNTK.TrainingParameterScheduleDouble( 0.003125, TrainingParameterScheduleDouble.UnitType.Sample); IList <Learner> parameterLearners = new List <Learner>() { Learner.SGDLearner(classifierOutput.Parameters(), learningRatePerSample) }; var trainer = Trainer.CreateTrainer(classifierOutput, trainingLoss, prediction, parameterLearners); // const uint minibatchSize = 64; int outputFrequencyInMinibatches = 20, i = 0; int epochs = 5; while (epochs > 0) { var minibatchData = minibatchSource.GetNextMinibatch(minibatchSize, device); var arguments = new Dictionary <Variable, MinibatchData> { { input, minibatchData[featureStreamInfo] }, { labels, minibatchData[labelStreamInfo] } }; trainer.TrainMinibatch(arguments, device); TestHelper.PrintTrainingProgress(trainer, i++, outputFrequencyInMinibatches); // MinibatchSource is created with MinibatchSource.InfinitelyRepeat. // Batching will not end. Each time minibatchSource completes an sweep (epoch), // the last minibatch data will be marked as end of a sweep. We use this flag // to count number of epochs. if (TestHelper.MiniBatchDataIsSweepEnd(minibatchData.Values)) { epochs--; } } // save the trained model classifierOutput.Save(modelFile); // validate the model var minibatchSourceNewModel = MinibatchSource.TextFormatMinibatchSource( Path.Combine(ImageDataFolder, "Test_cntk_text.txt"), streamConfigurations, MinibatchSource.FullDataSweep); TestHelper.ValidateModelWithMinibatchSource(modelFile, minibatchSourceNewModel, imageDim, numClasses, featureStreamName, labelsStreamName, classifierName, device); }
internal static void TrainSimpleFeedForwardClassifier(DeviceDescriptor device) { int inputDim = 2; int numOutputClasses = 2; int hiddenLayerDim = 50; int numHiddenLayers = 2; int minibatchSize = 50; int numSamplesPerSweep = 10000; int numSweepsToTrainWith = 2; int numMinibatchesToTrain = (numSamplesPerSweep * numSweepsToTrainWith) / minibatchSize; var featureStreamName = "features"; var labelsStreamName = "labels"; var input = Variable.InputVariable(new int[] { inputDim }, DataType.Float, "features"); var labels = Variable.InputVariable(new int[] { numOutputClasses }, DataType.Float, "labels"); Function classifierOutput; Function trainingLoss; Function prediction; IList <StreamConfiguration> streamConfigurations = new StreamConfiguration[] { new StreamConfiguration(featureStreamName, inputDim), new StreamConfiguration(labelsStreamName, numOutputClasses) }; using (var minibatchSource = MinibatchSource.TextFormatMinibatchSource( Path.Combine(DataFolder, "SimpleDataTrain_cntk_text.txt"), streamConfigurations, MinibatchSource.FullDataSweep, true, MinibatchSource.DefaultRandomizationWindowInChunks)) { var featureStreamInfo = minibatchSource.StreamInfo(featureStreamName); var labelStreamInfo = minibatchSource.StreamInfo(labelsStreamName); IDictionary <StreamInformation, Tuple <NDArrayView, NDArrayView> > inputMeansAndInvStdDevs = new Dictionary <StreamInformation, Tuple <NDArrayView, NDArrayView> > { { featureStreamInfo, new Tuple <NDArrayView, NDArrayView>(null, null) } }; MinibatchSource.ComputeInputPerDimMeansAndInvStdDevs(minibatchSource, inputMeansAndInvStdDevs, device); var normalizedinput = CNTKLib.PerDimMeanVarianceNormalize(input, inputMeansAndInvStdDevs[featureStreamInfo].Item1, inputMeansAndInvStdDevs[featureStreamInfo].Item2); Function fullyConnected = TestHelper.FullyConnectedLinearLayer(normalizedinput, hiddenLayerDim, device, ""); classifierOutput = CNTKLib.Sigmoid(fullyConnected, ""); for (int i = 1; i < numHiddenLayers; ++i) { fullyConnected = TestHelper.FullyConnectedLinearLayer(classifierOutput, hiddenLayerDim, device, ""); classifierOutput = CNTKLib.Sigmoid(fullyConnected, ""); } var outputTimesParam = new Parameter(NDArrayView.RandomUniform <float>( new int[] { numOutputClasses, hiddenLayerDim }, -0.05, 0.05, 1, device)); var outputBiasParam = new Parameter(NDArrayView.RandomUniform <float>( new int[] { numOutputClasses }, -0.05, 0.05, 1, device)); classifierOutput = CNTKLib.Plus(outputBiasParam, outputTimesParam * classifierOutput, "classifierOutput"); trainingLoss = CNTKLib.CrossEntropyWithSoftmax(classifierOutput, labels, "lossFunction");; prediction = CNTKLib.ClassificationError(classifierOutput, labels, "classificationError"); // Test save and reload of model { Variable classifierOutputVar = classifierOutput; Variable trainingLossVar = trainingLoss; Variable predictionVar = prediction; var combinedNet = Function.Combine(new List <Variable>() { trainingLoss, prediction, classifierOutput }, "feedForwardClassifier"); TestHelper.SaveAndReloadModel(ref combinedNet, new List <Variable>() { input, labels, trainingLossVar, predictionVar, classifierOutputVar }, device); classifierOutput = classifierOutputVar; trainingLoss = trainingLossVar; prediction = predictionVar; } } CNTK.TrainingParameterScheduleDouble learningRatePerSample = new CNTK.TrainingParameterScheduleDouble( 0.02, TrainingParameterScheduleDouble.UnitType.Sample); using (var minibatchSource = MinibatchSource.TextFormatMinibatchSource( Path.Combine(DataFolder, "SimpleDataTrain_cntk_text.txt"), streamConfigurations)) { var featureStreamInfo = minibatchSource.StreamInfo(featureStreamName); var labelStreamInfo = minibatchSource.StreamInfo(labelsStreamName); streamConfigurations = new StreamConfiguration[] { new StreamConfiguration("features", inputDim), new StreamConfiguration("labels", numOutputClasses) }; IList <Learner> parameterLearners = new List <Learner>() { Learner.SGDLearner(classifierOutput.Parameters(), learningRatePerSample) }; var trainer = Trainer.CreateTrainer(classifierOutput, trainingLoss, prediction, parameterLearners); int outputFrequencyInMinibatches = 20; int trainingCheckpointFrequency = 100; for (int i = 0; i < numMinibatchesToTrain; ++i) { var minibatchData = minibatchSource.GetNextMinibatch((uint)minibatchSize, device); var arguments = new Dictionary <Variable, MinibatchData> { { input, minibatchData[featureStreamInfo] }, { labels, minibatchData[labelStreamInfo] } }; trainer.TrainMinibatch(arguments, device); TestHelper.PrintTrainingProgress(trainer, i, outputFrequencyInMinibatches); if ((i % trainingCheckpointFrequency) == (trainingCheckpointFrequency - 1)) { string ckpName = "feedForward.net"; trainer.SaveCheckpoint(ckpName); trainer.RestoreFromCheckpoint(ckpName); } } double trainLossValue = trainer.PreviousMinibatchLossAverage(); double evaluationValue = trainer.PreviousMinibatchEvaluationAverage(); if (trainLossValue > 0.3 || evaluationValue > 0.2) { throw new Exception($"TrainSimpleFeedForwardClassifier resulted in unusual high training loss (= {trainLossValue}) or error rate (= {evaluationValue})"); } } }
/// <summary> /// TrainAndEvaluateWithFlowerData shows how to do transfer learning with a MinibatchSource. MinibatchSource is constructed with /// a map file that contains image file paths and labels. Data loading, image preprocessing, and batch ramdomization are handled /// by MinibatchSource. /// </summary> /// <param name="device">CPU or GPU device to run</param> /// <param name="forceRetrain">Force to train the model if true. If false, /// it only evaluates the model is it exists. </param> public static void TrainAndEvaluateWithFlowerData(DeviceDescriptor device, bool forceReTrain = false) { string flowerFolder = Path.Combine(ExampleImageFoler, "Flowers"); string flowersTrainingMap = Path.Combine(flowerFolder, "1k_img_map.txt"); string flowersValidationMap = Path.Combine(flowerFolder, "val_map.txt"); int flowerModelNumClasses = 102; string flowerModelFile = Path.Combine(CurrentFolder, "FlowersTransferLearning.model"); // If the model exists and it is not set to force retrain, validate the model and return. if (File.Exists(flowerModelFile) && !forceReTrain) { ValidateModelWithMinibatchSource(flowerModelFile, flowersValidationMap, imageDims, flowerModelNumClasses, device); return; } // prepare training data MinibatchSource minibatchSource = CreateMinibatchSource(flowersTrainingMap, imageDims, flowerModelNumClasses); var featureStreamInfo = minibatchSource.StreamInfo("image"); var labelStreamInfo = minibatchSource.StreamInfo("labels"); string predictionNodeName = "prediction"; Variable imageInput, labelInput; Function trainingLoss, predictionError; // create a transfer model Function transferLearningModel = CreateTransferLearningModel(BaseResnetModelFile, featureNodeName, predictionNodeName, lastHiddenNodeName, flowerModelNumClasses, device, out imageInput, out labelInput, out trainingLoss, out predictionError); // prepare for training int numMinibatches = 100; int minibatchbSize = 50; float learningRatePerMinibatch = 0.2F; float momentumPerMinibatch = 0.9F; float l2RegularizationWeight = 0.05F; AdditionalLearningOptions additionalLearningOptions = new AdditionalLearningOptions() { l2RegularizationWeight = l2RegularizationWeight }; IList <Learner> parameterLearners = new List <Learner>() { Learner.MomentumSGDLearner(transferLearningModel.Parameters(), new TrainingParameterScheduleDouble(learningRatePerMinibatch, TrainingParameterScheduleDouble.UnitType.Minibatch), new TrainingParameterScheduleDouble(momentumPerMinibatch, TrainingParameterScheduleDouble.UnitType.Minibatch), true, additionalLearningOptions) }; var trainer = Trainer.CreateTrainer(transferLearningModel, trainingLoss, predictionError, parameterLearners); // train the model int outputFrequencyInMinibatches = 1; for (int minibatchCount = 0; minibatchCount < numMinibatches; ++minibatchCount) { var minibatchData = minibatchSource.GetNextMinibatch((uint)minibatchbSize, device); trainer.TrainMinibatch(new Dictionary <Variable, MinibatchData>() { { imageInput, minibatchData[featureStreamInfo] }, { labelInput, minibatchData[labelStreamInfo] } }, device); TestHelper.PrintTrainingProgress(trainer, minibatchCount, outputFrequencyInMinibatches); } // save the model transferLearningModel.Save(flowerModelFile); // validate the trained model ValidateModelWithMinibatchSource(flowerModelFile, flowersValidationMap, imageDims, flowerModelNumClasses, device); }
public static float ValidateModel(string modelFile, MinibatchSource testMinibatchSource, int[] imageDim, int numClasses, string featureInputName, string labelInputName, DeviceDescriptor device, int maxCount = 1000, List <List <float> > X = null, List <float> Y = null, bool useConvolution = true) { Function model = Function.Load(modelFile, device); var imageInput = model.Arguments[0]; var labelOutput = model.Output; var featureStreamInfo = testMinibatchSource.StreamInfo(featureInputName); var labelStreamInfo = testMinibatchSource.StreamInfo(labelInputName); int batchSize = 1000; int miscountTotal = 0, totalCount = 0; while (true) { var minibatchData = testMinibatchSource.GetNextMinibatch((uint)batchSize, device); if (minibatchData == null || minibatchData.Count == 0) { break; } totalCount += (int)minibatchData[featureStreamInfo].numberOfSamples; // expected labels are in the minibatch data. var labelData = minibatchData[labelStreamInfo].data.GetDenseData <float>(labelOutput); var expectedLabels = labelData.Select(l => l.IndexOf(l.Max())).ToList(); var inputDataMap = new Dictionary <Variable, Value>() { { imageInput, minibatchData[featureStreamInfo].data } }; var outputDataMap = new Dictionary <Variable, Value>() { { labelOutput, null } }; model.Evaluate(inputDataMap, outputDataMap, device); var faetureData = minibatchData[featureStreamInfo].data.GetDenseData <float>(CNTKLib.InputVariable(minibatchData[featureStreamInfo].data.Shape, DataType.Float, model.Arguments[0].Name)); var outputData = outputDataMap[labelOutput].GetDenseData <float>(labelOutput); var actualLabels = outputData.Select(l => l.IndexOf(l.Max())).ToList(); int misMatches = actualLabels.Zip(expectedLabels, (a, b) => a.Equals(b) ? 0 : 1).Sum(); miscountTotal += misMatches; Console.WriteLine($"Validating Model: Total Samples = {totalCount}, Mis-classify Count = {miscountTotal}"); if (totalCount > 10001) { //writes some result in to array for (int i = 0; i < outputData.Count && X != null && Y != null; i++) { var imgDIm = imageDim.Aggregate(1, (acc, val) => acc * val); var inputVector = faetureData[0].Skip(imgDIm * i).Take(imgDIm).Select(x => (float)x).ToList(); X.Add(inputVector); var currLabel = actualLabels[i]; Y.Add(currLabel); } ; break; } } float errorRate = 1.0F * miscountTotal / totalCount; Console.WriteLine($"Model Validation Error = {errorRate}"); return(errorRate); }
public static void Run_MNIST_Test() { // var device = DeviceDescriptor.UseDefaultDevice(); //dims var inDim = 784; var outDim = 10; // MNIST images are 28x28=784 pixels var input = CNTKLib.InputVariable(new NDShape(1, inDim), DataType.Float, "features"); var labels = CNTKLib.InputVariable(new NDShape(1, outDim), DataType.Float, "labels"); //create network var nnModel = createModel(input, outDim, 1, device); //Loss and Eval functions var trainingLoss = CNTKLib.CrossEntropyWithSoftmax(nnModel, labels, "lossFunction"); var prediction = CNTKLib.ClassificationError(nnModel, labels, "classificationError"); //create learners and trainer // set per sample learning rate and momentum var learningRatePerSample = new CNTK.TrainingParameterScheduleDouble(0.001, 1); var momentumPerSample = new CNTK.TrainingParameterScheduleDouble(0.9, 1); var nnParams = nnModel.Parameters(); var parameterLearners = new List <Learner>() { CNTKLib.AdamLearner(new ParameterVector(nnModel.Parameters().ToList()), learningRatePerSample, momentumPerSample) }; var trainer = Trainer.CreateTrainer(nnModel, trainingLoss, prediction, parameterLearners); //create minibatch source var sConfigs = new StreamConfiguration[] { new StreamConfiguration("features", inDim), new StreamConfiguration("labels", outDim) }; //this file is huge and cannot be uploaded on GitHUb. //it can be downloaded from: https://github.com/Microsoft/CNTK/tree/987b22a8350211cb4c44278951857af1289c3666/Examples/Image/DataSets/MNIST var minibatchSource = MinibatchSource.TextFormatMinibatchSource("..\\..\\..\\Data\\MNIST-TrainData.txt", sConfigs, MinibatchSource.InfinitelyRepeat); var minibatchSize = (uint)754; var featureStreamInfo = minibatchSource.StreamInfo("features"); var labelStreamInfo = minibatchSource.StreamInfo("labels"); var maxIt = 250; var curIt = 1; while (true) { var minibatchData = minibatchSource.GetNextMinibatch(minibatchSize, device); var arguments = new Dictionary <Variable, MinibatchData> { { input, minibatchData[featureStreamInfo] }, { labels, minibatchData[labelStreamInfo] } }; trainer.TrainMinibatch(arguments, device); // if (minibatchData[featureStreamInfo].sweepEnd) { if (curIt % 50 == 0 || curIt == 1) { printProgress(trainer, curIt); } curIt++; } if (maxIt <= curIt) { break; } } // save the trained model nnModel.Save("mnist_classifier"); // validate the model var minibatchSourceNewModel = MinibatchSource.TextFormatMinibatchSource("../../../data/MNIST-TestData.txt", sConfigs, MinibatchSource.InfinitelyRepeat); //prepare vars to accept results List <List <float> > X = new List <List <float> >(); List <float> Y = new List <float>(); //Model validation ValidateModel("mnist_classifier", minibatchSourceNewModel, new int[] { 28, 28 }, 10, "features", "labels", device, 1000, X, Y, false); //show image classification result showResult(X, Y); }
public CntkDataSource(MinibatchSource source, string featureStreamName, string labelStreamName) { MinibatchSource = source; FeatureStreamInfo = MinibatchSource.StreamInfo(featureStreamName); LabelStreamInfo = MinibatchSource.StreamInfo(labelStreamName); }