Beispiel #1
0
        public void gaussNormalization_test01()
        {
            DeviceDescriptor device = DeviceDescriptor.UseDefaultDevice();
            //create factory object
            MLFactory f = new MLFactory();

            //create config streams
            f.CreateIOVariables("feature 4 0", "flower 3 0", DataType.Float);
            var trData = MLFactory.CreateTrainingParameters("|Type: default |BatchSize: 130 |Epochs:5 |Normalization: 0 |SaveWhileTraining: 0 |RandomizeBatch: 0 |ProgressFrequency: 1");

            string trainingPath   = "C:\\sc\\github\\anndotnet\\test\\anndotnet.unit\\data\\iris_with_hot_vector.txt";
            string validationPath = "C:\\sc\\github\\anndotnet\\test\\anndotnet.unit\\data\\iris_with_hot_vector_test.txt";
            //string trainingPath = "../../../../data/iris_with_hot_vector.txt";
            //string validationPath = "../../../../data/iris_with_hot_vector_test.txt";

            //string trainingNormalizedPathh = "../../../../data/iris_train_normalized.txt";
            string trainingNormalizedPathh = "C:\\sc\\github\\anndotnet\\test\\anndotnet.unit\\data\\iris_train_normalized.txt";
            var    strNormalizedLine       = System.IO.File.ReadAllLines(trainingNormalizedPathh);

            string validationNormalizedPath = "C:\\sc\\github\\anndotnet\\test\\anndotnet.unit\\data\\iris_valid_normalized.txt";
            //string validationNormalizedPath = "../../../../data/iris_valid_normalized.txt";
            var strValidNormalizedLine = System.IO.File.ReadAllLines(validationNormalizedPath);
            //
            List <Function> normalizedInputs = null;

            using (var mbs1 = new MinibatchSourceEx(trData.Type, f.StreamConfigurations.ToArray(), trainingPath, validationPath, MinibatchSource.FullDataSweep, trData.RandomizeBatch))
            {
                normalizedInputs = mbs1.NormalizeInput(f.InputVariables, device);
            }

            //normalization test for train datatset
            using (var mbs1 = new MinibatchSourceEx(trData.Type, f.StreamConfigurations.ToArray(), trainingPath, validationPath, MinibatchSource.FullDataSweep, trData.RandomizeBatch))
            {
                var data = mbs1.GetNextMinibatch(130, device);

                //go through all functions and perform the calculation
                foreach (var fun in normalizedInputs)
                {
                    //
                    var input = new Dictionary <Variable, Value>()
                    {
                        { f.InputVariables.First(), data.First().Value.data }
                    };

                    var output = new Dictionary <Variable, Value>()
                    {
                        { fun, null }
                    };
                    //
                    fun.Evaluate(input, output, device);

                    var normalizedValues = output[fun].GetDenseData <float>(fun);

                    for (int i = 0; i < normalizedValues.Count; i++)
                    {
                        var currNorLine = strNormalizedLine[i].Split('\t').ToList();
                        for (int j = 0; j < normalizedValues[0].Count(); j++)
                        {
                            var n1 = normalizedValues[i][j].ToString(CultureInfo.InvariantCulture).Substring(0, 5);
                            var n2 = currNorLine[j].Substring(0, 5);
                            Assert.Equal(n1, n2);
                        }
                    }
                }
            }

            using (var mbs1 = new MinibatchSourceEx(trData.Type, f.StreamConfigurations.ToArray(), trainingPath, validationPath, MinibatchSource.FullDataSweep, trData.RandomizeBatch))
            {
                var data = MinibatchSourceEx.GetFullBatch(mbs1.Type, mbs1.ValidationDataFile, mbs1.StreamConfigurations, device);

                //go through all functions and perform the calculation
                foreach (var fun in normalizedInputs)
                {
                    //
                    var input = new Dictionary <Variable, Value>()
                    {
                        { f.InputVariables.First(), data.First().Value.data }
                    };

                    var output = new Dictionary <Variable, Value>()
                    {
                        { fun, null }
                    };
                    //
                    fun.Evaluate(input, output, device);

                    var normalizedValues = output[fun].GetDenseData <float>(fun);

                    for (int i = 0; i < normalizedValues.Count; i++)
                    {
                        var currNorLine = strValidNormalizedLine[i].Split('\t').ToList();
                        for (int j = 0; j < normalizedValues[0].Count(); j++)
                        {
                            var n1 = normalizedValues[i][j].ToString(CultureInfo.InvariantCulture).Substring(0, 5);
                            var n2 = currNorLine[j].Substring(0, 5);
                            Assert.Equal(n1, n2);
                        }
                    }
                }
            }
        }
Beispiel #2
0
        public void NormalizationfeatureGroup_test03()
        {
            DeviceDescriptor device = DeviceDescriptor.UseDefaultDevice();
            //create factory object
            MLFactory f = new MLFactory();

            //create config streams
            f.CreateIOVariables("|Itemid 1 0 |Sales 4 0 |Color 1 0", "|Label 1 0", DataType.Float);
            var trData = MLFactory.CreateTrainingParameters("|Type: default |BatchSize: 130 |Epochs:5 |Normalization:Sales |SaveWhileTraining: 0 |RandomizeBatch: 0 |ProgressFrequency: 1");

            string trainingPath            = "C:\\sc\\github\\anndotnet\\test\\anndotnet.unit\\data\\cntk_dataset_for_normalization_test.txt";
            string trainingNormalizedPathh = "C:\\sc\\github\\anndotnet\\test\\anndotnet.unit\\data\\cntk_dataset_for_normalization_test_result.txt";

            //string trainingPath = "../../../../data/cntk_dataset_for_normalization_test.txt";
            //string trainingNormalizedPathh = "../../../../data/cntk_dataset_for_normalization_test_result.txt";

            var strTrainData     = System.IO.File.ReadAllLines(trainingNormalizedPathh);
            var normalizedResult = System.IO.File.ReadAllLines(trainingNormalizedPathh);
            var inputVars        = MLFactory.NormalizeInputLayer(trData, f, trainingPath, trainingPath, device);

            //normalization test for train dataset
            using (var mbs1 = new MinibatchSourceEx(trData.Type, f.StreamConfigurations.ToArray(), trainingPath, trainingPath, MinibatchSource.FullDataSweep, trData.RandomizeBatch))
            {
                var data = mbs1.GetNextMinibatch(10, device);

                //go through all functions and perform the calculation
                for (int i = 0; i < inputVars.Count; i++)
                {
                    //
                    var fun     = (Function)inputVars[i];
                    var strName = data.Keys.Where(x => x.m_name.Equals(f.InputVariables[i].Name)).FirstOrDefault();
                    var input   = new Dictionary <Variable, Value>()
                    {
                        { f.InputVariables[i], data[strName].data }
                    };

                    var output = new Dictionary <Variable, Value>()
                    {
                        { fun, null }
                    };
                    //
                    fun.Evaluate(input, output, device);
                    var inputValues      = data[strName].data.GetDenseData <float>(fun).Select(x => x[0]).ToList();
                    var normalizedValues = output[fun].GetDenseData <float>(fun).Select(x => x[0]).ToList();
                    int index            = 0;
                    if (i < 2)
                    {
                        index = i;
                    }
                    else
                    {
                        index = i + 3;
                    }
                    var currNorLine = normalizedResult[index].Split(new char[] { '\t', ' ' }).ToList();

                    for (int j = 0; j < normalizedValues.Count; j++)
                    {
                        var n1 = normalizedValues[j].ToString(CultureInfo.InvariantCulture);
                        var n2 = currNorLine[j];
                        if (n1.Length < 2)
                        {
                            Assert.Equal(n1, n2);
                        }
                        else
                        {
                            Assert.Equal(n1.Substring(0, 5), n2.Substring(0, 5));
                        }
                    }
                }
            }
        }
Beispiel #3
0
        /// <summary>
        /// Evaluate the model against dataset sored in the dataset file, and exports the result in csv format for further analysis
        /// </summary>
        /// <param name="mlF"> ml factory object contains members needed to evaluation process</param>
        /// <param name="mbs"> Minibatch source which provides helpers members needed for for evaluation</param>
        /// <param name="strDataSetPath"> file of dataset</param>
        /// <param name="modelPath"> models which will be evaluate</param>
        /// <param name="resultExportPath"> result file in which the result will be exported</param>
        /// <param name="device"> device for computation</param>
        public static void EvaluateModel(string mlconfigPath, string bestTrainedModelPath, DeviceDescriptor device)
        {
            //Load ML model configuration file
            var dicMParameters = MLFactory.LoadMLConfiguration(mlconfigPath);

            //add full path of model folder since model file doesn't contains any absolute path
            dicMParameters.Add("root", MLFactory.GetMLConfigFolder(mlconfigPath));

            //get model daa paths
            var dicPath = MLFactory.GetMLConfigComponentPaths(dicMParameters["paths"]);

            //parse feature variables
            var projectValues            = dicMParameters["training"].Split(MLFactory.m_cntkSpearator, StringSplitOptions.RemoveEmptyEntries);
            var trainedModelRelativePath = MLFactory.GetParameterValue(projectValues, "TrainedModel");


            //Minibatch type
            var           mbTypestr = MLFactory.GetParameterValue(projectValues, "Type");
            MinibatchType mbType    = (MinibatchType)Enum.Parse(typeof(MinibatchType), mbTypestr, true);
            //prepare MLFactory
            var f = MLFactory.CreateMLFactory(dicMParameters);

            //prepare data paths for mini-batch source
            var strTrainPath = $"{dicMParameters["root"]}\\{dicPath["Training"]}";
            var strValidPath = $"{dicMParameters["root"]}\\{dicPath["Validation"]}";
            var strResult    = $"{dicMParameters["root"]}\\{dicPath["Result"]}";

            var bestModelFullPath = $"{dicMParameters["root"]}\\{bestTrainedModelPath}";
            //decide what data to evaluate
            var dataPath = strValidPath;

            //load model
            var model = Function.Load(bestModelFullPath, device);

            //get data for evaluation by calling GetFullBatch
            var minibatchData = MinibatchSourceEx.GetFullBatch(mbType, dataPath, f.StreamConfigurations.ToArray(), device);
            //input map creation for model evaluation
            var inputMap = new Dictionary <Variable, Value>();

            foreach (var v in minibatchData)
            {
                var vv         = model.Arguments.Where(x => x.Name == v.Key.m_name).FirstOrDefault();
                var streamInfo = v.Key;
                if (vv != null)
                {
                    inputMap.Add(vv, minibatchData[streamInfo].data);
                }
            }

            //output map
            var predictedDataMap = new Dictionary <Variable, Value>();

            foreach (var outp in model.Outputs)
            {
                predictedDataMap.Add(outp, null);
            }

            //model evaluation
            model.Evaluate(inputMap, predictedDataMap, device);

            //retrieve actual and predicted values from model
            List <List <float> > actual  = new List <List <float> >();
            List <List <float> > predict = new List <List <float> >();

            foreach (var output in model.Outputs)
            {
                //label stream
                var labelStream = minibatchData.Keys.Where(x => x.m_name == output.Name).First();

                //actual values
                List <List <float> > av = MLValue.GetValues(output, minibatchData[labelStream].data);
                //predicted values
                List <List <float> > pv = MLValue.GetValues(output, predictedDataMap[output]);

                for (int i = 0; i < av.Count; i++)
                {
                    //actual
                    var act = av[i];
                    if (actual.Count <= i)
                    {
                        actual.Add(new List <float>());
                    }
                    actual[i].AddRange(act);
                    //prediction
                    var prd = pv[i];
                    if (predict.Count <= i)
                    {
                        predict.Add(new List <float>());
                    }
                    predict[i].AddRange(prd);
                }
            }


            //export result
            MLValue.ValueToFile(actual, predict, strResult);

            //
            Console.WriteLine(Environment.NewLine);
            Console.WriteLine($"*******************Model Evaluation**************");
            Console.WriteLine(Environment.NewLine);
            Console.WriteLine($"Model Evaluation successfully exported result into file {strResult}!");
            Console.WriteLine(Environment.NewLine);
        }