static void Main(string[] args) { if (Directory.Exists("./Source/Network") == false) { Directory.CreateDirectory("./Source/Network"); } cfg = new Config("Classifier.config"); log("info", "Конфиг:" + cfg.ToString()); //File.WriteAllText("Classifier.config", JSonParser.Save(cfg, typeof(Config))); System.Globalization.CultureInfo customCulture = (System.Globalization.CultureInfo)System.Threading.Thread.CurrentThread.CurrentCulture.Clone(); customCulture.NumberFormat.NumberDecimalSeparator = "."; System.Threading.Thread.CurrentThread.CurrentCulture = customCulture; // Convert the DataTable to input and output vectors foreach (var file in cfg.Input) { log("info", "Конфиг:" + cfg.ToString()); List <Tuple <double[], double> > dataset = new List <Tuple <double[], double> >(); string root = Path.GetFileNameWithoutExtension(file); using (CSVParser parser = new CSVParser().Load(file)) { string[] buff; double[] inputBuff; double outputBuff; while ((buff = parser.Next()) != null) { inputBuff = buff.Take(buff.Length - 1).ToArray().ToDouble(); outputBuff = Double.Parse(buff.Skip(buff.Length - 1).ToArray()[0]); dataset.Add(new Tuple <double[], double>(inputBuff, outputBuff)); } } dataset = dataset .Where(x => cfg.Filter.IsInside(x.Item2)) .Take(cfg.Filter.Max) .ToList(); log("info", "Конечный размер датасета:" + dataset.Count); if (cfg.Network.Shuffle) { dataset.Shuffle(); } var trainData = dataset .Take((int)(dataset.Count * (1 - cfg.Network.ValidationPercent))) .ToArray(); var validData = dataset .Skip((int)(dataset.Count * (1 - cfg.Network.ValidationPercent))) .ToArray(); var trainInput = trainData.Select(x => x.Item1).ToArray(); var trainOutput = trainData.Select(x => new double[] { x.Item2 }).ToArray(); var validInput = validData.Select(x => x.Item1).ToArray(); var validOutput = validData.Select(x => new double[] { x.Item2 }).ToArray(); var topology = new List <int>(cfg.Network.Layers) { 1 }; var network = new ActivationNetwork( new SigmoidFunction(), trainInput[0].Length, topology.ToArray()); var teacher = new ParallelResilientBackpropagationLearning(network); LogInfo current = new LogInfo() { error = double.PositiveInfinity, iteration = 0, percent = 0, validError = double.PositiveInfinity }; LogInfo better = current; double previous; do { previous = current.error; current.error = teacher.RunEpoch(trainInput, trainOutput); if (cfg.MoreInfoLog) { int[] answers = validInput.Apply(network.Compute).GetColumn(0). Apply(x => x > 0.5 ? 1 : 0); current.validError = teacher.ComputeError(validInput, validOutput); int[] outputs = validOutput.Apply(x => x[0] > 0.5 ? 1 : 0); int pos = 0; for (int j = 0; j < answers.Length; j++) { if (answers[j] == outputs[j]) { pos++; } } current.validPercent = (double)pos / (double)answers.Length; answers = trainInput.Apply(network.Compute).GetColumn(0). Apply(x => x > 0.5 ? 1 : 0); outputs = trainOutput.Apply(x => x[0] > 0.5 ? 1 : 0); pos = 0; for (int j = 0; j < answers.Length; j++) { if (answers[j] == outputs[j]) { pos++; } } current.percent = (double)pos / (double)answers.Length; log(current.iteration, current.error, current.validError, current.percent, current.validPercent); } else { smalllog(current.iteration, current.error); } if (current.error < cfg.Cancelation.Error) { break; } if (Math.Abs(previous - current.error) < cfg.Cancelation.Step) { break; } if (current.iteration == cfg.Cancelation.MaxEpoch) { break; } if (current.percent >= cfg.Validation.Percent) { break; } current.iteration++; if (better.validPercent < current.validPercent) { better = current; SaveNetwork($"Best_{root}", validInput, validOutput, network, better, root); } better.WriteTop(); } while (true); SaveNetwork(root, trainInput, trainOutput, network, current, root); } }
/// <summary> /// The main application entry point. /// </summary> /// <param name="args">Command line arguments.</param> public static void Main(string[] args) { // get data Console.WriteLine("Loading data...."); var path = Path.GetFullPath(Path.Combine(AppDomain.CurrentDomain.BaseDirectory, @"..\..\..\..\california_housing.csv")); var housing = Frame.ReadCsv(path, separators: ","); housing = housing.Where(kv => ((decimal)kv.Value["median_house_value"]) < 500000); // shuffle the frame var rnd = new Random(); var indices = Enumerable.Range(0, housing.Rows.KeyCount).OrderBy(v => rnd.NextDouble()); housing = housing.IndexRowsWith(indices).SortRowsByKey(); // convert the house value range to thousands housing["median_house_value"] /= 1000; // create training, validation, and test partitions var training = housing.Rows[Enumerable.Range(0, 12000)]; var validation = housing.Rows[Enumerable.Range(12000, 2500)]; var test = housing.Rows[Enumerable.Range(14500, 2500)]; // set up model columns var columns = new string[] { "latitude", "longitude", "housing_median_age", "total_rooms", "total_bedrooms", "population", "households", "median_income" }; // build a neural network var network = new ActivationNetwork( new RectifiedLinearFunction(), // ReLU activation 8, // number of input features 8, // hidden layer with 8 nodes 1); // output layer with 1 node // set up a backpropagation learner var learner = new ParallelResilientBackpropagationLearning(network); // prep training feature and label arrays var features = training.Columns[columns].ToArray2D <double>().ToJagged(); var labels = (from v in training["median_house_value"].Values select new double[] { v }).ToArray(); // prep validation feature and label arrays var features_v = validation.Columns[columns].ToArray2D <double>().ToJagged(); var labels_v = (from v in validation["median_house_value"].Values select new double[] { v }).ToArray(); // randomize the network new GaussianWeights(network, 0.1).Randomize(); // train the neural network var errors = new List <double>(); var errors_v = new List <double>(); for (var epoch = 0; epoch < 100; epoch++) { learner.RunEpoch(features, labels); var rmse = Math.Sqrt(learner.ComputeError(features, labels) / labels.GetLength(0)); var rmse_v = Math.Sqrt(learner.ComputeError(features_v, labels_v) / labels_v.GetLength(0)); errors.Add(rmse); errors_v.Add(rmse_v); Console.WriteLine($"Epoch: {epoch}, Training RMSE: {rmse}, Validation RMSE: {rmse_v}"); } // plot the training curve Plot(errors, "Training", "Epoch", "RMSE"); // plot the training and validation curves Plot(errors, errors_v, "Training and validation", "Epoch", "RMSE"); Console.ReadLine(); }
public void Learn() { var stopWatch = new Stopwatch(); stopWatch.Start(); var trainingInputs = LearningData.TrainingData.Select(data => data.ToVectorArray(Metadata, PropertiesToSkip)).ToArray(); var trainingOutputs = LearningData.TrainingData.Select(data => new[] { data.PercentMatch }).ToArray(); var testInputs = LearningData.TestData.Select(data => data.ToVectorArray(Metadata, PropertiesToSkip)).ToArray(); var testOutputs = LearningData.TestData.Select(data => new[] { data.PercentMatch }).ToArray(); if (testInputs.Length != testOutputs.Length || trainingInputs.Length != trainingOutputs.Length) { throw new ArgumentException("Inputs and outputs data are not the same size"); } var vectorSize = trainingInputs.First().Length; if (trainingInputs.Any(input => input.Length != vectorSize)) { throw new ArgumentException("Not all trainingInputs have the same vector size"); } if (testInputs.Any(input => input.Length != vectorSize)) { throw new ArgumentException("Not test inputs have the correct vector size"); } var testMatcher = new LoggingNeuralNetworkMatcher(LearningData.TestData); var trainingMatcher = new LoggingNeuralNetworkMatcher(LearningData.TrainingData); var results = new List <Tuple <int[], double, double> >(); Parallel.For(Range.Min, Range.Max + 1, i => { var parameters = i > 0 ? new[] { i, 1 } : new [] { 1 }; var network = new ActivationNetwork(new BipolarSigmoidFunction(), trainingInputs[0].Length, parameters); //new DeepBeliefNetwork(); var teacher = new ParallelResilientBackpropagationLearning(network); var random = new Random(); var error = double.MaxValue; var iteration = 0; while (error > 0.0005 && iteration < 1000) { iteration++; { var pair = random.Next(0, trainingInputs.Length - 1); teacher.Run(trainingInputs[pair], trainingOutputs[pair]); var accuracyRecallPrecision = trainingMatcher.MatchCount(network, Metadata, PropertiesToSkip); error = 3 - accuracyRecallPrecision.Item1 - accuracyRecallPrecision.Item2 - accuracyRecallPrecision.Item3; } if (iteration % 100 == 0) { Logger.DebugFormat("NeuralNetwork: Iteration {0} Error {1}", iteration, error); } } var inSampleError = teacher.ComputeError(trainingInputs, trainingOutputs); var outOfSampleError = teacher.ComputeError(testInputs, testOutputs); lock (results) { results.Add(new Tuple <int[], double, double>(parameters, inSampleError, outOfSampleError)); if (error < BestError) { BestNetwork = network; BestParameter = i; BestError = error; } } testMatcher.LogMatchCount(string.Format("{0}: {1}", Name, string.Join("-", parameters)), network, Metadata, PropertiesToSkip); }); Logger.DebugFormat("Results ({0}):\n{1}", Name, string.Join(", ", results.Select(result => $"{string.Join("-", result.Item1)}: In: {result.Item2} Out: {result.Item3}"))); Logger.InfoFormat("Best {0}: {1}-1 Error {2}", Name, BestParameter, BestError); stopWatch.Stop(); Logger.InfoFormat("Neural Network learning ({0}) took {1}", Name, stopWatch.Elapsed); }
private static void NeuralNetworkLearningSingleAttributes(LearningData learningData) { var stopWatch = new Stopwatch(); stopWatch.Start(); var testMatcher = new LoggingNeuralNetworkMatcher(learningData.TestData); var trainingMatcher = new LoggingNeuralNetworkMatcher(learningData.TrainingData); Parallel.ForEach(learningData.ActualMetadata.Keys, metadataKey => { var metadata = new Dictionary <string, IndexableAttributeMetadata> { { metadataKey, learningData.ActualMetadata[metadataKey] } }; var trainingInputs = learningData.TrainingData.Select(data => data.ToVectorArray(metadata)).ToArray(); var trainingOutputs = learningData.TrainingData.Select(data => new[] { data.PercentMatch }).ToArray(); var testInputs = learningData.TestData.Select(data => data.ToVectorArray(metadata)).ToArray(); var testOutputs = learningData.TestData.Select(data => new[] { data.PercentMatch }).ToArray(); if (testInputs.Length != testOutputs.Length || trainingInputs.Length != trainingOutputs.Length) { throw new ArgumentException("Inputs and outputs data are not the same size"); } var vectorSize = trainingInputs.First().Length; if (trainingInputs.Any(input => input.Length != vectorSize)) { throw new ArgumentException("Not all trainingInputs have the same vector size"); } if (testInputs.Any(input => input.Length != vectorSize)) { throw new ArgumentException("Not test inputs have the correct vector size"); } var results = new List <Tuple <int[], double, double> >(); Parallel.For(0, 16, i => { var parameters = new[] { i, 1 }; var network = new ActivationNetwork(new BipolarSigmoidFunction(), trainingInputs[0].Length, parameters); //new DeepBeliefNetwork(); var teacher = new ParallelResilientBackpropagationLearning(network); var random = new Random(); var error = double.MaxValue; var iteration = 0; while (error > 0.0005 && iteration < 200) { iteration++; //for (var i = 0; i < 10; i++) { //* var pair = random.Next(0, trainingInputs.Length - 1); error = teacher.Run(trainingInputs[pair], trainingOutputs[pair]); //*/ /* * error = teacher.RunEpoch(trainingInputs, trainingOutputs); * //*/ var accuracyRecallPrecision = trainingMatcher.MatchCount(network, metadata, new List <string>()); error = 3 - accuracyRecallPrecision.Item1 - accuracyRecallPrecision.Item2 - accuracyRecallPrecision.Item3; } if (iteration % 100 == 0) { Logger.DebugFormat("NeuralNetwork: Iteration {0} Error {1}", iteration, error); } } var inSampleError = teacher.ComputeError(trainingInputs, trainingOutputs); var outOfSampleError = teacher.ComputeError(testInputs, testOutputs); lock (results) { results.Add(new Tuple <int[], double, double>(parameters, inSampleError, outOfSampleError)); } testMatcher.LogMatchCount(string.Format("{0} ({1})", metadataKey, learningData.ActualMetadata[metadataKey].Attribute.GetType().FullName), network, metadata, new List <string>()); }); Logger.InfoFormat("Results for {1} ({2}):\n{0}", string.Join(", ", results.Select(result => $"{string.Join("-", result.Item1)}: In: {result.Item2} Out: {result.Item3}")), metadataKey, learningData.ActualMetadata[metadataKey].Attribute.GetType().FullName); }); stopWatch.Stop(); Logger.InfoFormat("Neural Network learning (single attribute) took {0}", stopWatch.Elapsed); }
//train the classifier networks public double Train(List <CategoryNetwork> networks, List <TrainingSet> trainingSetList, List <TrainingSet> validationSetList = null) { var ng = new NetworkGraph(); //open a new training process display in a new thread new Thread(() => { ng.Load += (sender, e) => (sender as NetworkGraph).Visible = true; ng.ShowDialog(); }).Start(); //start fresh, reset all networks foreach (var net in networks) { net.ClearData(); } //create a list of categoryNetworks and add the images to them to generate epochs for (int i = 0; i < trainingSetList.Count; i++) { var cat = trainingSetList[i].Category; var imageData = ImageExtractor.ExtractImageFeatures(trainingSetList[i].ImageFile); while (cat.Parent != null) { var n = GetNetwork(networks, cat.Parent); n.AddData(cat, ref imageData); cat = cat.Parent; } } //for each network begin training with paralell rprop foreach (var net in networks) { var teacher = new ParallelResilientBackpropagationLearning(net.Network); //display the category being trained ng.AddTitle(net.Category.Name); ng.ResetData(); // teacher.UpdateUpperBound = 500; var inputs = net.ImageData.ToArray(); var outputs = new double[net.Catetgories.Count][]; //dtermine which documents belong to the calidation set for the network currently being trained var thisValSet = new List <TrainingSet>(); if (validationSetList != null) { foreach (var td in validationSetList) { var cat = td.Category; while (cat != null) { if (cat.Parent == net.Category) { thisValSet.Add(new TrainingSet(td.ImageFile, cat)); break; } cat = cat.Parent; } } } //vectorize these images and their respective categorical classification var valOuts = thisValSet.Select(x => CategoryExtractor.ExtractCategoryFeature(x.Category)).ToArray(); var valIns = thisValSet.Select(x => ImageExtractor.ExtractImageFeatures(x.ImageFile)).ToArray(); for (int i = 0; i < net.Catetgories.Count; i++) { outputs[i] = CategoryExtractor.ExtractCategoryFeature(net.Catetgories[i]); } int k = 0; //find the current batch error during training double localError = teacher.ComputeError(inputs, outputs) / inputs.Length; double validationError = 1; DateTime start = DateTime.Now; ng.AddPoint(localError, localError, DateTime.Now - start); //find the validation error by checking the validation set on the network while (validationError > .05 && localError > .04) { localError = teacher.RunEpoch(inputs, outputs) / inputs.Length; if (valIns.Length > 0) { validationError = teacher.ComputeError(valIns, valOuts) / valIns.Length; } ng.AddPoint(localError, validationError, DateTime.Now - start); k++; } } return(0); }