public Dictionary <Variable, Value> GetNextMinibatch(uint minibatchSizeInSamples, ref bool sweepEnd, List <Variable> vars, DeviceDescriptor device) { if (Type == MinibatchType.Default || Type == MinibatchType.Image) { var args = defaultmb.GetNextMinibatch(minibatchSizeInSamples, device); sweepEnd = args.Any(x => x.Value.sweepEnd); // var arguments = MinibatchSourceEx.ToMinibatchValueData(args, vars); return(arguments); } else if (Type == MinibatchType.Custom) { var retVal = nextBatch(custommb, StreamConfigurations, (int)minibatchSizeInSamples); var mb = new Dictionary <Variable, Value>(); sweepEnd = custommb.EndOfStream; //create minibatch foreach (var d in retVal) { var v = Value.CreateBatchOfSequences <float>(new NDShape(1, d.Key.m_dim), d.Value, device); // var var = vars.Where(x => x.Name == d.Key.m_streamName).FirstOrDefault(); if (var == null) { throw new Exception("Variable cannot be null!"); } // mb.Add(var, v); } return(mb); } else { throw new Exception("Unsupported Mini-batch-source type!"); } }
/// <summary> /// Perform normalization against input features and creates Normalization Layer prior to neural network creation. /// On this way data normalization is included /// in the network itself and not additional normalization is required /// </summary> /// <param name="trData"></param> /// <param name="f"></param> /// <param name="strTrainFile"></param> /// <param name="strValidFile"></param> /// <param name="device"></param> /// <returns></returns> public static List <Variable> NormalizeInputLayer(TrainingParameters trData, MLFactory f, string strTrainFile, string strValidFile, DeviceDescriptor device) { var networkInput = new List <Variable>(); if (trData.Normalization != null) { using (var mbs1 = new MinibatchSourceEx(trData.Type, f.StreamConfigurations.ToArray(), strTrainFile, strValidFile, MinibatchSource.FullDataSweep, trData.RandomizeBatch)) { //select variables which are marked for the normalization. Train Data contains this information var vars = f.InputVariables.Where(x => trData.Normalization.Contains(x.Name)).ToList(); //This line check if existing variable contains specific sufix in the name. In this case throws exception // Such name is reserved for normalized variables only. if (f.InputVariables.Where(x => x.Name.EndsWith(MinibatchSourceEx.m_NormalizedSufixName)).Count() > 0) { throw new Exception($"Name of Variable cannot ends with '{MinibatchSourceEx.m_NormalizedSufixName}'"); } //create normalized layer and return it var norVars = mbs1.NormalizeInput(vars, device); //select al other variables which are not normalized var nonNorm = f.InputVariables.Where(x => !trData.Normalization.Contains(x.Name)); //join normalized and unnormalized input variables // create them in the same order since the mini-batch follow this variable order foreach (var iv in f.InputVariables) { var v = nonNorm.Where(x => x.Name.Equals(iv.Name)).FirstOrDefault(); // if (v != null)//add non normalized variable { networkInput.Add(v); } else//add normalize variable { var vn = norVars.Where(x => x.Name.Equals(iv.Name + MinibatchSourceEx.m_NormalizedSufixName)).FirstOrDefault(); if (vn == null) { throw new Exception("Error in normalization group of features. Check Features in dataset."); } networkInput.Add(vn); } } } } else { networkInput = f.InputVariables; } return(networkInput); }
PrepareNNData(Dictionary <string, string> dicMParameters, CreateCustomModel customModel, DeviceDescriptor device) { try { //create factory object MLFactory f = CreateMLFactory(dicMParameters); //create learning params var strLearning = dicMParameters["learning"]; LearningParameters lrData = MLFactory.CreateLearningParameters(strLearning); //create training param var strTraining = dicMParameters["training"]; TrainingParameters trData = MLFactory.CreateTrainingParameters(strTraining); //set model component locations var dicPath = MLFactory.GetMLConfigComponentPaths(dicMParameters["paths"]); // trData.ModelTempLocation = $"{dicMParameters["root"]}\\{dicPath["TempModels"]}"; trData.ModelFinalLocation = $"{dicMParameters["root"]}\\{dicPath["Models"]}"; var strTrainPath = $"{dicMParameters["root"]}\\{dicPath["Training"]}"; var strValidPath = (string.IsNullOrEmpty(dicPath["Validation"]) || dicPath["Validation"] == " ") ? "": $"{dicMParameters["root"]}\\{dicPath["Validation"]}"; //data normalization in case the option is enabled //check if network contains Normalization layer and assign value to normalization parameter if (dicMParameters["network"].Contains("Normalization")) { trData.Normalization = new string[] { MLFactory.m_NumFeaturesGroupName } } ; //perform data normalization according to the normalization parameter List <Variable> networkInput = NormalizeInputLayer(trData, f, strTrainPath, strValidPath, device); //create network parameters Function nnModel = CreateNetworkModel(dicMParameters["network"], networkInput, f.OutputVariables, customModel, device); //create minibatch spurce var mbs = new MinibatchSourceEx(trData.Type, f.StreamConfigurations.ToArray(), strTrainPath, strValidPath, MinibatchSource.InfinitelyRepeat, trData.RandomizeBatch); //return ml parameters return(f, lrData, trData, nnModel, mbs); } catch (Exception) { throw; } }
/// <summary> /// Calback from the training in order to inform user about trining progress /// </summary> /// <param name="trParams"></param> /// <param name="trainer"></param> /// <param name="network"></param> /// <param name="mbs"></param> /// <param name="epoch"></param> /// <param name="progress"></param> /// <param name="device"></param> /// <returns></returns> protected virtual ProgressData progressTraining(TrainingParameters trParams, Trainer trainer, Function network, MinibatchSourceEx mbs, int epoch, TrainingProgress progress, DeviceDescriptor device) { //calculate average training loss and evaluation var mbAvgLoss = trainer.PreviousMinibatchLossAverage(); var mbAvgEval = trainer.PreviousMinibatchEvaluationAverage(); var vars = InputVariables.Union(OutputVariables).ToList(); //get training dataset double trainEval = mbAvgEval; //sometimes when the data set is huge validation model against // full training dataset could take time, so we can skip it by setting parameter 'FullTrainingSetEval' if (trParams.FullTrainingSetEval) { if (m_TrainData == null || m_TrainData.Values.Any(x => x.data.IsValid == false)) { using (var streamDatat = MinibatchSourceEx.GetFullBatch(mbs.Type, mbs.TrainingDataFile, mbs.StreamConfigurations, device)) { //get full training dataset m_TrainData = MinibatchSourceEx.ToMinibatchData(streamDatat, vars, mbs.Type); } //perform evaluation of the current model on whole training dataset trainEval = trainer.TestMinibatch(m_TrainData, device); } } string bestModelPath = m_bestModelPath; double validEval = 0; //in case validation data set is empty don't perform test-minibatch if (!string.IsNullOrEmpty(mbs.ValidationDataFile)) { if (m_ValidationData == null || m_ValidationData.Values.Any(x => x.data.IsValid == false)) { //get validation dataset using (var streamData = MinibatchSourceEx.GetFullBatch(mbs.Type, mbs.ValidationDataFile, mbs.StreamConfigurations, device)) { //store validation data for future testing m_ValidationData = MinibatchSourceEx.ToMinibatchData(streamData, vars, mbs.Type); } } //perform evaluation of the current model with validation dataset validEval = trainer.TestMinibatch(m_ValidationData, device); } //here we should decide if the current model worth to be saved into temp location // depending of the Evaluation function which sometimes can be better if it is greater that previous (e.g. ClassificationAccuracy) if (isBetterThanPrevious(trainEval, validEval, StatMetrics.IsGoalToMinimize(trainer.EvaluationFunction())) && trParams.SaveModelWhileTraining) { //save model var strFilePath = $"{trParams.ModelTempLocation}\\model_at_{epoch}of{trParams.Epochs}_epochs_TimeSpan_{DateTime.Now.Ticks}"; if (!Directory.Exists(trParams.ModelTempLocation)) { Directory.CreateDirectory(trParams.ModelTempLocation); } //save temp model network.Save(strFilePath); //set training and validation evaluation to previous state m_PrevTrainingEval = trainEval; m_PrevValidationEval = validEval; bestModelPath = strFilePath; var tpl = Tuple.Create <double, double, string>(trainEval, validEval, strFilePath); m_ModelEvaluations.Add(tpl); } m_bestModelPath = bestModelPath; //create progressData object var prData = new ProgressData(); prData.EpochTotal = trParams.Epochs; prData.EpochCurrent = epoch; prData.EvaluationFunName = trainer.EvaluationFunction().Name; prData.TrainEval = trainEval; prData.ValidationEval = validEval; prData.MinibatchAverageEval = mbAvgEval; prData.MinibatchAverageLoss = mbAvgLoss; //the progress is only reported if satisfied the following condition if (progress != null && (epoch % trParams.ProgressFrequency == 0 || epoch == 1 || epoch == trParams.Epochs)) { //add info to the history m_trainingHistory.Add(new Tuple <int, float, float, float, float>(epoch, (float)mbAvgLoss, (float)mbAvgEval, (float)trainEval, (float)validEval)); //send progress progress(prData); // //Console.WriteLine($"Epoch={epoch} of {trParams.Epochs} processed."); } //return progress data return(prData); }
/// <summary> /// Main method for training /// </summary> /// <param name="trainer"></param> /// <param name="network"></param> /// <param name="trParams"></param> /// <param name="miniBatchSource"></param> /// <param name="device"></param> /// <param name="token"></param> /// <param name="progress"></param> /// <param name="modelCheckPoint"></param> /// <returns></returns> public override TrainResult Train(Trainer trainer, Function network, TrainingParameters trParams, MinibatchSourceEx miniBatchSource, DeviceDescriptor device, CancellationToken token, TrainingProgress progress, string modelCheckPoint, string historyPath) { try { //create trainer result. // the variable indicate how training process is ended // completed, stopped, crashed, var trainResult = new TrainResult(); var historyFile = ""; //create training process evaluation collection //for each iteration it is stored evaluationValue for training, and validation set with the model m_ModelEvaluations = new List <Tuple <double, double, string> >(); //check what is the optimization (Minimization (error) or maximization (accuracy)) bool isMinimize = StatMetrics.IsGoalToMinimize(trainer.EvaluationFunction()); //setup first iteration if (m_trainingHistory == null) { m_trainingHistory = new List <Tuple <int, float, float, float, float> >(); } //in case of continuation of training iteration must start with the last of path previous training process int epoch = (m_trainingHistory.Count > 0)? m_trainingHistory.Last().Item1 + 1:1; //define progressData ProgressData prData = null; //define helper variable collection var vars = InputVariables.Union(OutputVariables).ToList(); //training process while (true) { //get mini batch data var args = miniBatchSource.GetNextMinibatch(trParams.BatchSize, device); var arguments = MinibatchSourceEx.ToMinibatchData(args, vars, miniBatchSource.Type); // trainer.TrainMinibatch(arguments, device); //make progress if (args.Any(a => a.Value.sweepEnd)) { //check the progress of the training process prData = progressTraining(trParams, trainer, network, miniBatchSource, epoch, progress, device); //check if training process ends if (epoch >= trParams.Epochs) { //save training checkpoint state if (!string.IsNullOrEmpty(modelCheckPoint)) { trainer.SaveCheckpoint(modelCheckPoint); } //save training history if (!string.IsNullOrEmpty(historyPath)) { string header = $"{trainer.LossFunction().Name};{trainer.EvaluationFunction().Name};"; saveTrainingHistory(m_trainingHistory, header, historyPath); } //save best or last trained model and send report last time before trainer completes var bestModelPath = saveBestModel(trParams, trainer.Model(), epoch, isMinimize); // if (progress != null) { progress(prData); } // trainResult.Iteration = epoch; trainResult.ProcessState = ProcessState.Compleated; trainResult.BestModelFile = bestModelPath; trainResult.TrainingHistoryFile = historyFile; break; } else { epoch++; } } //stop in case user request it if (token.IsCancellationRequested) { if (!string.IsNullOrEmpty(modelCheckPoint)) { trainer.SaveCheckpoint(modelCheckPoint); } //save training history if (!string.IsNullOrEmpty(historyPath)) { string header = $"{trainer.LossFunction().Name};{trainer.EvaluationFunction().Name};"; saveTrainingHistory(m_trainingHistory, header, historyPath); } //sometime stopping training process can be before first epoch passed so make a incomplete progress if (prData == null)//check the progress of the training process { prData = progressTraining(trParams, trainer, network, miniBatchSource, epoch, progress, device); } //save best or last trained model and send report last time before trainer terminates var bestModelPath = saveBestModel(trParams, trainer.Model(), epoch, isMinimize); // if (progress != null) { progress(prData); } //setup training result trainResult.Iteration = prData.EpochCurrent; trainResult.ProcessState = ProcessState.Stopped; trainResult.BestModelFile = bestModelPath; trainResult.TrainingHistoryFile = historyFile; break; } } return(trainResult); } catch (Exception ex) { var ee = ex; throw; } finally { } }
/// <summary> /// Returns part of mldataset with features labels columns this is needed in case Excel export is performed. /// </summary> /// <param name="fun"></param> /// <param name="evParam"></param> /// <param name="device"></param> /// <returns></returns> public static Dictionary <string, List <List <float> > > FeaturesAndLabels(Function fun, EvaluationParameters evParam, DeviceDescriptor device) { try { //declare return vars var featDic = new Dictionary <string, List <List <float> > >(); while (true) { //get one minibatch of data for training var mbData = evParam.MBSource.GetNextMinibatch(evParam.MinibatchSize, device); var mdDataEx = MinibatchSourceEx.ToMinibatchValueData(mbData, evParam.Input.Union(evParam.Ouptut).ToList()); var inMap = new Dictionary <Variable, Value>(); //input var vars = evParam.Input; for (int i = 0; i < vars.Count() /*mdDataEx.Count*/; i++) { var vv = vars.ElementAt(i); var d = mdDataEx.Where(x => x.Key.Name.Equals(vv.Name)).FirstOrDefault(); // var fv = MLValue.GetValues(d.Key, d.Value); if (featDic.ContainsKey(d.Key.Name)) { featDic[d.Key.Name].AddRange(fv); } else { featDic.Add(d.Key.Name, fv); } } //output var varso = evParam.Ouptut; for (int i = 0; i < varso.Count() /*mdDataEx.Count*/; i++) { var vv = varso.ElementAt(i); var d = mdDataEx.Where(x => x.Key.Name.Equals(vv.Name)).FirstOrDefault(); // var fv = MLValue.GetValues(d.Key, d.Value); if (vv.Shape.Dimensions.Last() == 1) { var value = fv.Select(l => new List <float>() { l.First() }).ToList(); if (featDic.ContainsKey(d.Key.Name)) { featDic[d.Key.Name].AddRange(value); } else { featDic.Add(d.Key.Name, value); } } else { var value = fv.Select(l => new List <float>() { l.IndexOf(l.Max()) }).ToList(); if (featDic.ContainsKey(d.Key.Name)) { featDic[d.Key.Name].AddRange(value); } else { featDic.Add(d.Key.Name, value); } } } // check if sweep end reached if (mbData.Any(x => x.Value.sweepEnd)) { break; } } return(featDic); } catch (Exception) { throw; } }
public static (List <List <float> > actual, List <List <float> > predicted) EvaluateFunctionEx(Function fun, EvaluationParameters evParam, DeviceDescriptor device) { try { //declare return vars List <List <float> > actualLst = new List <List <float> >(); List <List <float> > predictedLst = new List <List <float> >(); while (true) { Value predicted = null; //get one minibatch of data for training var mbData = evParam.MBSource.GetNextMinibatch(evParam.MinibatchSize, device); var mbDataEx = MinibatchSourceEx.ToMinibatchValueData(mbData, evParam.Input.Union(evParam.Ouptut).ToList()); var inMap = new Dictionary <Variable, Value>(); // var vars = fun.Arguments.Union(fun.Outputs); for (int i = 0; i < vars.Count() /* mbDataEx.Count*/; i++) { var d = mbDataEx.ElementAt(i); var v = vars.Where(x => x.Name.Equals(d.Key.Name)).First(); //skip output data if (!evParam.Ouptut.Any(x => x.Name.Equals(v.Name))) { inMap.Add(v, d.Value); } } //actual data if t is available var actualVar = mbDataEx.Keys.Where(x => x.Name.Equals(evParam.Ouptut.First().Name)).FirstOrDefault(); var act = mbDataEx[actualVar].GetDenseData <float>(actualVar).Select(l => l.ToList()); actualLst.AddRange(act); //predicted data //map variables and data var predictedDataMap = new Dictionary <Variable, Value>() { { fun, null } }; //evaluates model fun.Evaluate(inMap, predictedDataMap, device); predicted = predictedDataMap.Values.First(); var pred = predicted.GetDenseData <float>(fun).Select(l => l.ToList()); predicted.Erase(); predicted.Dispose(); predictedLst.AddRange(pred); // check if sweep end reached if (mbData.Any(x => x.Value.sweepEnd)) { break; } } return(actualLst, predictedLst); } catch (Exception) { throw; } }
/// <summary> /// Test cntk model stored at 'modelPath' against array of image paths /// </summary> /// <param name="modelPath"></param> /// <param name="vector"></param> /// <param name="device"></param> /// <returns></returns> public static List <int> TestModel(string modelPath, string[] imagePaths, DeviceDescriptor device) { try { // FileInfo fi = new FileInfo(modelPath); if (!fi.Exists) { throw new Exception($"The '{fi.FullName}' does not exist. Make sure the model is places at this location."); } //load the model from disk var model = Function.Load(fi.FullName, device); //get input feature var features = model.Arguments.ToList(); var labels = model.Outputs.ToList(); var stremsConfig = MLFactory.CreateStreamConfiguration(features, labels); var mapFile = "testMapFile"; File.WriteAllLines(mapFile, imagePaths.Select(x => $"{x}\t0")); var testMB = new MinibatchSourceEx(MinibatchType.Image, stremsConfig.ToArray(), features, labels, mapFile, null, 30, false, 0); // var vars = features.Union(labels).ToList(); var retVal = new List <int>(); var mbSize = imagePaths.Count(); if (mbSize > 30) { mbSize = 30; } while (true) { bool isSweepEnd = false; var inputMap = testMB.GetNextMinibatch((uint)mbSize, ref isSweepEnd, vars, device); //prepare data for trainer //var inputMap = new Dictionary<Variable, Value>(); //inputMap.Add(features.First(), nextMB.Where(x => x.Key.m_name.Equals(features.First().Name)).Select(x => x.Value.data).FirstOrDefault()); var outputMap = new Dictionary <Variable, Value>(); outputMap.Add(labels.First(), null); //evaluate model model.Evaluate(inputMap, outputMap, device); var result = outputMap[labels.First()].GetDenseData <float>(labels.First()); //extract result foreach (var r in result) { var l = MLValue.GetResult(r); retVal.Add((int)l); } if (/*nextMB.Any(x => x.Value.sweepEnd)*/ isSweepEnd) { break; } } return(retVal); } catch (Exception) { throw; } }
public abstract TrainResult Train(Trainer trainer, Function network, TrainingParameters trParams, MinibatchSourceEx miniBatchSource, DeviceDescriptor device, CancellationToken token, TrainingProgress progressstring, string modelCheckPoint, string historyPath);
protected virtual ProgressData progressTraining(TrainingParameters trParams, Trainer trainer, Function network, MinibatchSourceEx mbs, int epoch, TrainingProgress progress, DeviceDescriptor device) { //calculate average training loss and evaluation var mbAvgLoss = trainer.PreviousMinibatchLossAverage(); var mbAvgEval = trainer.PreviousMinibatchEvaluationAverage(); //get training dataset double trainEval = mbAvgEval; //sometimes when the data set is huge validation model against // full training dataset could take time, so we can skip it by setting parameter 'FullTrainingSetEval' if (trParams.FullTrainingSetEval) { var evParams = new EvaluationParameters() { MinibatchSize = trParams.BatchSize, MBSource = new MinibatchSourceEx(mbs.Type, this.StreamConfigurations.ToArray(), this.InputVariables, this.OutputVariables, mbs.TrainingDataFile, null, MinibatchSource.FullDataSweep, false, 0), Ouptut = OutputVariables, Input = InputVariables, }; var result = MLEvaluator.EvaluateFunction(trainer.Model(), evParams, device); trainEval = MLEvaluator.CalculateMetrics(trainer.EvaluationFunction().Name, result.actual, result.predicted, device); ////if output has more than one dimension and when the output is not categorical but numeric with more than one value ////for now only custom mini-batch source is supported this kind of variable //if(OutputVariables.First().Shape.Dimensions.Last() > 1 && evParams.MBSource.Type== MinibatchType.Custom) //{ // var result1 = MLEvaluator.EvaluateFunctionEx(trainer.Model(), evParams, device); // trainEval = MLEvaluator.CalculateMetrics(trainer.EvaluationFunction().Name, result1.actual, result1.predicted, device); //} //else //{ // var result = MLEvaluator.EvaluateFunction(trainer.Model(), evParams, device); // trainEval = MLEvaluator.CalculateMetrics(trainer.EvaluationFunction().Name, result.actual, result.predicted, device); //} } string bestModelPath = m_bestModelPath; double validEval = 0; //in case validation data set is empty don't perform test-minibatch if (!string.IsNullOrEmpty(mbs.ValidationDataFile)) { var evParams = new EvaluationParameters() { MinibatchSize = trParams.BatchSize, //StrmsConfig = StreamConfigurations.ToArray(), MBSource = new MinibatchSourceEx(mbs.Type, this.StreamConfigurations.ToArray(), this.InputVariables, this.OutputVariables, mbs.ValidationDataFile, null, MinibatchSource.FullDataSweep, false, 0), Ouptut = OutputVariables, Input = InputVariables, }; // var result = MLEvaluator.EvaluateFunction(trainer.Model(), evParams, device); validEval = MLEvaluator.CalculateMetrics(trainer.EvaluationFunction().Name, result.actual, result.predicted, device); ////if output has more than one dimension and when the output is not categorical but numeric with more than one value ////for now only custom mini-batch source is supported this kind of variable //if (OutputVariables.First().Shape.Dimensions.Last() > 1 && evParams.MBSource.Type == MinibatchType.Custom) //{ // var result1 = MLEvaluator.EvaluateFunctionEx(trainer.Model(), evParams, device); // validEval = MLEvaluator.CalculateMetrics(trainer.EvaluationFunction().Name, result1.actual, result1.predicted, device); //} //else //{ // var result = MLEvaluator.EvaluateFunction(trainer.Model(), evParams, device); // validEval = MLEvaluator.CalculateMetrics(trainer.EvaluationFunction().Name, result.actual, result.predicted, device); //} } //here we should decide if the current model worth to be saved into temp location // depending of the Evaluation function which sometimes can be better if it is greater that previous (e.g. ClassificationAccuracy) if (isBetterThanPrevious(trainEval, validEval, StatMetrics.IsGoalToMinimize(trainer.EvaluationFunction())) && trParams.SaveModelWhileTraining) { //save model var strFilePath = $"{trParams.ModelTempLocation}\\model_at_{epoch}of{trParams.Epochs}_epochs_TimeSpan_{DateTime.Now.Ticks}"; if (!Directory.Exists(trParams.ModelTempLocation)) { Directory.CreateDirectory(trParams.ModelTempLocation); } //save temp model network.Save(strFilePath); //set training and validation evaluation to previous state m_PrevTrainingEval = trainEval; m_PrevValidationEval = validEval; bestModelPath = strFilePath; var tpl = Tuple.Create <double, double, string>(trainEval, validEval, strFilePath); m_ModelEvaluations.Add(tpl); } m_bestModelPath = bestModelPath; //create progressData object var prData = new ProgressData(); prData.EpochTotal = trParams.Epochs; prData.EpochCurrent = epoch; prData.EvaluationFunName = trainer.EvaluationFunction().Name; prData.TrainEval = trainEval; prData.ValidationEval = validEval; prData.MinibatchAverageEval = mbAvgEval; prData.MinibatchAverageLoss = mbAvgLoss; //prData.BestModel = bestModelPath; //the progress is only reported if satisfied the following condition if (progress != null && (epoch % trParams.ProgressFrequency == 0 || epoch == 1 || epoch == trParams.Epochs)) { //add info to the history m_trainingHistory.Add(new Tuple <int, float, float, float, float>(epoch, (float)mbAvgLoss, (float)mbAvgEval, (float)trainEval, (float)validEval)); //send progress progress(prData); // //Console.WriteLine($"Epoch={epoch} of {trParams.Epochs} processed."); } //return progress data return(prData); }