public void gaussNormalization_test01() { DeviceDescriptor device = DeviceDescriptor.UseDefaultDevice(); //create factory object MLFactory f = new MLFactory(); //create config streams f.CreateIOVariables("feature 4 0", "flower 3 0", DataType.Float); var trData = MLFactory.CreateTrainingParameters("|Type: default |BatchSize: 130 |Epochs:5 |Normalization: 0 |SaveWhileTraining: 0 |RandomizeBatch: 0 |ProgressFrequency: 1"); string trainingPath = "C:\\sc\\github\\anndotnet\\test\\anndotnet.unit\\data\\iris_with_hot_vector.txt"; string validationPath = "C:\\sc\\github\\anndotnet\\test\\anndotnet.unit\\data\\iris_with_hot_vector_test.txt"; //string trainingPath = "../../../../data/iris_with_hot_vector.txt"; //string validationPath = "../../../../data/iris_with_hot_vector_test.txt"; //string trainingNormalizedPathh = "../../../../data/iris_train_normalized.txt"; string trainingNormalizedPathh = "C:\\sc\\github\\anndotnet\\test\\anndotnet.unit\\data\\iris_train_normalized.txt"; var strNormalizedLine = System.IO.File.ReadAllLines(trainingNormalizedPathh); string validationNormalizedPath = "C:\\sc\\github\\anndotnet\\test\\anndotnet.unit\\data\\iris_valid_normalized.txt"; //string validationNormalizedPath = "../../../../data/iris_valid_normalized.txt"; var strValidNormalizedLine = System.IO.File.ReadAllLines(validationNormalizedPath); // List <Function> normalizedInputs = null; using (var mbs1 = new MinibatchSourceEx(trData.Type, f.StreamConfigurations.ToArray(), trainingPath, validationPath, MinibatchSource.FullDataSweep, trData.RandomizeBatch)) { normalizedInputs = mbs1.NormalizeInput(f.InputVariables, device); } //normalization test for train datatset using (var mbs1 = new MinibatchSourceEx(trData.Type, f.StreamConfigurations.ToArray(), trainingPath, validationPath, MinibatchSource.FullDataSweep, trData.RandomizeBatch)) { var data = mbs1.GetNextMinibatch(130, device); //go through all functions and perform the calculation foreach (var fun in normalizedInputs) { // var input = new Dictionary <Variable, Value>() { { f.InputVariables.First(), data.First().Value.data } }; var output = new Dictionary <Variable, Value>() { { fun, null } }; // fun.Evaluate(input, output, device); var normalizedValues = output[fun].GetDenseData <float>(fun); for (int i = 0; i < normalizedValues.Count; i++) { var currNorLine = strNormalizedLine[i].Split('\t').ToList(); for (int j = 0; j < normalizedValues[0].Count(); j++) { var n1 = normalizedValues[i][j].ToString(CultureInfo.InvariantCulture).Substring(0, 5); var n2 = currNorLine[j].Substring(0, 5); Assert.Equal(n1, n2); } } } } using (var mbs1 = new MinibatchSourceEx(trData.Type, f.StreamConfigurations.ToArray(), trainingPath, validationPath, MinibatchSource.FullDataSweep, trData.RandomizeBatch)) { var data = MinibatchSourceEx.GetFullBatch(mbs1.Type, mbs1.ValidationDataFile, mbs1.StreamConfigurations, device); //go through all functions and perform the calculation foreach (var fun in normalizedInputs) { // var input = new Dictionary <Variable, Value>() { { f.InputVariables.First(), data.First().Value.data } }; var output = new Dictionary <Variable, Value>() { { fun, null } }; // fun.Evaluate(input, output, device); var normalizedValues = output[fun].GetDenseData <float>(fun); for (int i = 0; i < normalizedValues.Count; i++) { var currNorLine = strValidNormalizedLine[i].Split('\t').ToList(); for (int j = 0; j < normalizedValues[0].Count(); j++) { var n1 = normalizedValues[i][j].ToString(CultureInfo.InvariantCulture).Substring(0, 5); var n2 = currNorLine[j].Substring(0, 5); Assert.Equal(n1, n2); } } } } }
public void NormalizationfeatureGroup_test03() { DeviceDescriptor device = DeviceDescriptor.UseDefaultDevice(); //create factory object MLFactory f = new MLFactory(); //create config streams f.CreateIOVariables("|Itemid 1 0 |Sales 4 0 |Color 1 0", "|Label 1 0", DataType.Float); var trData = MLFactory.CreateTrainingParameters("|Type: default |BatchSize: 130 |Epochs:5 |Normalization:Sales |SaveWhileTraining: 0 |RandomizeBatch: 0 |ProgressFrequency: 1"); string trainingPath = "C:\\sc\\github\\anndotnet\\test\\anndotnet.unit\\data\\cntk_dataset_for_normalization_test.txt"; string trainingNormalizedPathh = "C:\\sc\\github\\anndotnet\\test\\anndotnet.unit\\data\\cntk_dataset_for_normalization_test_result.txt"; //string trainingPath = "../../../../data/cntk_dataset_for_normalization_test.txt"; //string trainingNormalizedPathh = "../../../../data/cntk_dataset_for_normalization_test_result.txt"; var strTrainData = System.IO.File.ReadAllLines(trainingNormalizedPathh); var normalizedResult = System.IO.File.ReadAllLines(trainingNormalizedPathh); var inputVars = MLFactory.NormalizeInputLayer(trData, f, trainingPath, trainingPath, device); //normalization test for train dataset using (var mbs1 = new MinibatchSourceEx(trData.Type, f.StreamConfigurations.ToArray(), trainingPath, trainingPath, MinibatchSource.FullDataSweep, trData.RandomizeBatch)) { var data = mbs1.GetNextMinibatch(10, device); //go through all functions and perform the calculation for (int i = 0; i < inputVars.Count; i++) { // var fun = (Function)inputVars[i]; var strName = data.Keys.Where(x => x.m_name.Equals(f.InputVariables[i].Name)).FirstOrDefault(); var input = new Dictionary <Variable, Value>() { { f.InputVariables[i], data[strName].data } }; var output = new Dictionary <Variable, Value>() { { fun, null } }; // fun.Evaluate(input, output, device); var inputValues = data[strName].data.GetDenseData <float>(fun).Select(x => x[0]).ToList(); var normalizedValues = output[fun].GetDenseData <float>(fun).Select(x => x[0]).ToList(); int index = 0; if (i < 2) { index = i; } else { index = i + 3; } var currNorLine = normalizedResult[index].Split(new char[] { '\t', ' ' }).ToList(); for (int j = 0; j < normalizedValues.Count; j++) { var n1 = normalizedValues[j].ToString(CultureInfo.InvariantCulture); var n2 = currNorLine[j]; if (n1.Length < 2) { Assert.Equal(n1, n2); } else { Assert.Equal(n1.Substring(0, 5), n2.Substring(0, 5)); } } } } }
/// <summary> /// Evaluate the model against dataset sored in the dataset file, and exports the result in csv format for further analysis /// </summary> /// <param name="mlF"> ml factory object contains members needed to evaluation process</param> /// <param name="mbs"> Minibatch source which provides helpers members needed for for evaluation</param> /// <param name="strDataSetPath"> file of dataset</param> /// <param name="modelPath"> models which will be evaluate</param> /// <param name="resultExportPath"> result file in which the result will be exported</param> /// <param name="device"> device for computation</param> public static void EvaluateModel(string mlconfigPath, string bestTrainedModelPath, DeviceDescriptor device) { //Load ML model configuration file var dicMParameters = MLFactory.LoadMLConfiguration(mlconfigPath); //add full path of model folder since model file doesn't contains any absolute path dicMParameters.Add("root", MLFactory.GetMLConfigFolder(mlconfigPath)); //get model daa paths var dicPath = MLFactory.GetMLConfigComponentPaths(dicMParameters["paths"]); //parse feature variables var projectValues = dicMParameters["training"].Split(MLFactory.m_cntkSpearator, StringSplitOptions.RemoveEmptyEntries); var trainedModelRelativePath = MLFactory.GetParameterValue(projectValues, "TrainedModel"); //Minibatch type var mbTypestr = MLFactory.GetParameterValue(projectValues, "Type"); MinibatchType mbType = (MinibatchType)Enum.Parse(typeof(MinibatchType), mbTypestr, true); //prepare MLFactory var f = MLFactory.CreateMLFactory(dicMParameters); //prepare data paths for mini-batch source var strTrainPath = $"{dicMParameters["root"]}\\{dicPath["Training"]}"; var strValidPath = $"{dicMParameters["root"]}\\{dicPath["Validation"]}"; var strResult = $"{dicMParameters["root"]}\\{dicPath["Result"]}"; var bestModelFullPath = $"{dicMParameters["root"]}\\{bestTrainedModelPath}"; //decide what data to evaluate var dataPath = strValidPath; //load model var model = Function.Load(bestModelFullPath, device); //get data for evaluation by calling GetFullBatch var minibatchData = MinibatchSourceEx.GetFullBatch(mbType, dataPath, f.StreamConfigurations.ToArray(), device); //input map creation for model evaluation var inputMap = new Dictionary <Variable, Value>(); foreach (var v in minibatchData) { var vv = model.Arguments.Where(x => x.Name == v.Key.m_name).FirstOrDefault(); var streamInfo = v.Key; if (vv != null) { inputMap.Add(vv, minibatchData[streamInfo].data); } } //output map var predictedDataMap = new Dictionary <Variable, Value>(); foreach (var outp in model.Outputs) { predictedDataMap.Add(outp, null); } //model evaluation model.Evaluate(inputMap, predictedDataMap, device); //retrieve actual and predicted values from model List <List <float> > actual = new List <List <float> >(); List <List <float> > predict = new List <List <float> >(); foreach (var output in model.Outputs) { //label stream var labelStream = minibatchData.Keys.Where(x => x.m_name == output.Name).First(); //actual values List <List <float> > av = MLValue.GetValues(output, minibatchData[labelStream].data); //predicted values List <List <float> > pv = MLValue.GetValues(output, predictedDataMap[output]); for (int i = 0; i < av.Count; i++) { //actual var act = av[i]; if (actual.Count <= i) { actual.Add(new List <float>()); } actual[i].AddRange(act); //prediction var prd = pv[i]; if (predict.Count <= i) { predict.Add(new List <float>()); } predict[i].AddRange(prd); } } //export result MLValue.ValueToFile(actual, predict, strResult); // Console.WriteLine(Environment.NewLine); Console.WriteLine($"*******************Model Evaluation**************"); Console.WriteLine(Environment.NewLine); Console.WriteLine($"Model Evaluation successfully exported result into file {strResult}!"); Console.WriteLine(Environment.NewLine); }