public void MinMaxTransformer_Transform_Vector() { var sut = new MinMaxTransformer(-1.0, 1.0); var matrix = new F64Matrix(new double[] { -10, 0, 10, 10, 0, -10, -10, 0, 10 }, 3, 3); // Create transformation sut.Transform(matrix); // use vector transform on each row var actual = new F64Matrix(3, 3); for (int i = 0; i < actual.RowCount; i++) { var row = sut.Transform(matrix.Row(i)); for (int j = 0; j < actual.ColumnCount; j++) { actual[i, j] = row[j]; } } var expected = new F64Matrix(new double[] { -1, -1, 1, 1, -1, -1, -1, -1, 1 }, 3, 3); Assert.AreEqual(expected, actual); }
public void FeatureNormalization_Normalize() { // Use StreamReader(filepath) when running from filesystem var parser = new CsvParser(() => new StringReader(Resources.winequality_white)); var targetName = "quality"; // read feature matrix (all columns different from the targetName) var observations = parser.EnumerateRows(c => c != targetName) .ToF64Matrix(); // create minmax normalizer (normalizes each feature from 0.0 to 1.0) var minMaxTransformer = new MinMaxTransformer(0.0, 1.0); // transforms features using the feature normalization transform minMaxTransformer.Transform(observations, observations); // read targets var targets = parser.EnumerateRows(targetName) .ToF64Vector(); // create learner // neural net requires features to be normalize. // This makes convergens much faster. var net = new NeuralNet(); net.Add(new InputLayer(observations.ColumnCount)); net.Add(new SoftMaxLayer(targets.Distinct().Count())); // no hidden layer and softmax output correpsonds to logistic regression var learner = new ClassificationNeuralNetLearner(net, new LogLoss()); // learns a logistic regression classifier var model = learner.Learn(observations, targets); }
public void SharpLearning_Example() { // Load data var(observations, targets) = DataSetUtilities.LoadWinequalityWhite(); // transform data for neural net var transform = new MinMaxTransformer(0.0, 1.0); transform.Transform(observations, observations); var featureCount = observations.ColumnCount; // define the neural net. var net = new NeuralNet(); net.Add(new InputLayer(inputUnits: featureCount)); net.Add(new DenseLayer(32, Activation.Relu)); net.Add(new DenseLayer(32, Activation.Relu)); net.Add(new SquaredErrorRegressionLayer()); // using only 10 iteration to make the example run faster. // using square error as error metric. This is only used for reporting progress. var learner = new RegressionNeuralNetLearner(net, iterations: 10, loss: new SquareLoss(), optimizerMethod: OptimizerMethod.Adam); var cv = new RandomCrossValidation <double>(10, seed: 232); var predictions = cv.CrossValidate(learner, observations, targets); Trace.WriteLine(FormatErrorString(targets, predictions)); }
public void FeatureNormalization_Normalize() { // Use StreamReader(filepath) when running from filesystem var parser = new CsvParser(() => new StringReader(Resources.winequality_white)); var targetName = "quality"; // read feature matrix (all columns different from the targetName) var observations = parser.EnumerateRows(c => c != targetName) .ToF64Matrix(); // create minmax normalizer (normalizes each feature from 0.0 to 1.0) var minMaxTransformer = new MinMaxTransformer(0.0, 1.0); // transforms features using the feature normalization transform minMaxTransformer.Transform(observations, observations); // read targets var targets = parser.EnumerateRows(targetName) .ToF64Vector(); // Create neural net. var net = new NeuralNet(); net.Add(new InputLayer(observations.ColumnCount)); net.Add(new SquaredErrorRegressionLayer()); // Create regression learner. var learner = new RegressionNeuralNetLearner(net, new SquareLoss()); // learns a neural net regression model. var model = learner.Learn(observations, targets); // serializer for saving the MinMaxTransformer var serializer = new GenericXmlDataContractSerializer(); // Serialize transform for use with the model. // Replace this with StreamWriter for use with file system. var data = new StringBuilder(); var writer = new StringWriter(data); serializer.Serialize(minMaxTransformer, () => writer); // Deserialize transform for use with the model. // Replace this with StreamReader for use with file system. var reader = new StringReader(data.ToString()); var deserializedMinMaxTransform = serializer.Deserialize <MinMaxTransformer>(() => reader); // Normalize observation and predict using the model. var normalizedObservation = deserializedMinMaxTransform.Transform(observations.Row(0)); var prediction = model.Predict(normalizedObservation); Trace.WriteLine($"Prediction: {prediction}"); }
public void MinMaxTransformer_Transform() { var sut = new MinMaxTransformer(-1.0, 1.0); var matrix = new F64Matrix(new double[] { -10, 0, 10, 10, 0, -10, -10, 0, 10 }, 3, 3); var actual = sut.Transform(matrix); var expected = new F64Matrix(new double[] { -1, -1, 1, 1, -1, -1, -1, -1, 1 }, 3, 3); Assert.AreEqual(expected, actual); }
public void Regression_Standard_Neural_Net_FeatureTransform_Normalization() { #region Read Data // Use StreamReader(filepath) when running from filesystem var parser = new CsvParser(() => new StringReader(Resources.winequality_white)); var targetName = "quality"; // read feature matrix var observations = parser.EnumerateRows(c => c != targetName) .ToF64Matrix(); // read classification targets var targets = parser.EnumerateRows(targetName) .ToF64Vector(); #endregion // transform pixel values to be between 0 and 1 // and shift each feature to have a mean value of zero. var minMaxTransformer = new MinMaxTransformer(0.0, 1.0); var meanZeroTransformer = new MeanZeroFeatureTransformer(); minMaxTransformer.Transform(observations, observations); meanZeroTransformer.Transform(observations, observations); var numberOfFeatures = observations.ColumnCount; // define the neural net. var net = new NeuralNet(); net.Add(new InputLayer(inputUnits: numberOfFeatures)); net.Add(new DropoutLayer(0.2)); net.Add(new DenseLayer(800, Activation.Relu)); net.Add(new DropoutLayer(0.5)); net.Add(new DenseLayer(800, Activation.Relu)); net.Add(new DropoutLayer(0.5)); net.Add(new SquaredErrorRegressionLayer()); // using only 10 iteration to make the example run faster. // using square error as error metric. This is only used for reporting progress. var learner = new RegressionNeuralNetLearner(net, iterations: 10, loss: new SquareLoss()); var model = learner.Learn(observations, targets); var metric = new MeanSquaredErrorRegressionMetric(); var predictions = model.Predict(observations); Trace.WriteLine("Training Error: " + metric.Error(targets, predictions)); }
public void Predict(int iterations = DefaultNNIterations, int targetOffset = 1, string targetName = DefaultTargetName, bool pauseAtEnd = false) { _iterations = iterations; _targetName = targetName; _targetOffset = targetOffset; Program.StatusLogger.Info($"Iterations: {_iterations}"); Program.StatusLogger.Info($"Target: {_targetName}"); Program.StatusLogger.Info($"Offset: {_targetOffset}"); var data = new ConcurrentDictionary <int, ModelData>(); if (File.Exists(Path())) { data = JsonConvert.DeserializeObject <ConcurrentDictionary <int, ModelData> >(File.ReadAllText(Path())); //data = TypeSerializer.DeserializeFromReader<ConcurrentDictionary<int, ModelData>>(new StreamReader(Path())); Program.StatusLogger.Info("Cached data was loaded."); } else { //http://publicdata.landregistry.gov.uk/market-trend-data/house-price-index-data/UK-HPI-full-file-2019-07.csv var header = File.ReadLines("UK-HPI-full-file-2019-07.csv").First(); var columnNames = header.Split(","); var parser = new CsvParser(() => new StringReader(File.ReadAllText("UK-HPI-full-file-2019-07.csv")), ',', false, true); var creditData = _creditDataExtractor.Extract(); var populationData = _populationDataExtractor.Extract(); var otherPopulationData = _otherPopulationDataExtractor.Extract(); var densityData = _londonDensityDataExtractor.Extract(); var gvaData = _gvaDataExtractor.Extract(); var featureRows = parser.EnumerateRows().ToArray(); var targets = parser.EnumerateRows(_targetName).ToArray(); string previousKey = null; for (int i = 0; i < featureRows.Length; i++) { var item = featureRows[i]; var key = item.GetValue("RegionName"); var date = DateTime.ParseExact(item.GetValue("Date"), "dd/MM/yyyy", new CultureInfo("en-GB"), DateTimeStyles.AssumeLocal); if (key != previousKey) { Program.StatusLogger.Info($"Processing {key}"); } previousKey = key; var regionFeatures = item.GetValues(columnNames.Except(excludeColumns).ToArray()).Select(s => ParseRowValue(s)); var creditDataKey = _creditDataExtractor.GetKey(date, creditData.Keys.ToArray()); if (!creditData.ContainsKey(creditDataKey)) { regionFeatures = regionFeatures.Concat(Enumerable.Repeat(-1d, creditData.Values.First().Length)); Trace.WriteLine($"Credit data not found: {creditDataKey}"); } else { regionFeatures = regionFeatures.Concat(creditData[creditDataKey]); } var modelData = new ModelData { Name = key, Code = item.GetValue("AreaCode"), Date = date, Observations = regionFeatures.ToArray(), OriginalTarget = ParseTarget(item.GetValue(_targetName)) }; modelData.Observations = modelData.Observations .Concat(_populationDataExtractor.Get(populationData, modelData)) .Concat(_londonDensityDataExtractor.Get(densityData, modelData)) .Concat(_otherPopulationDataExtractor.Get(otherPopulationData, modelData)) .Concat(_gvaDataExtractor.Get(gvaData, modelData)) .ToArray(); data.TryAdd(i, modelData); } _targetCalculator.Calculate(data, _targetOffset); //TypeSerializer.SerializeToWriter<ConcurrentDictionary<int, ModelData>>(data, new StreamWriter(Path())); var json = JsonConvert.SerializeObject(data, Formatting.Indented); File.WriteAllText(Path(), json); } var itemCount = 0; Parallel.ForEach(data.OrderBy(o => o.Value.Date).GroupBy(g => g.Value.Name).AsParallel(), new ParallelOptions { MaxDegreeOfParallelism = -1 }, (grouping) => { var lastDate = grouping.Last().Value.Date; var dataWithTarget = grouping.Where(s => s.Value.OriginalTarget.HasValue && s.Value.Target != -1); if (dataWithTarget.Any()) { var allObservations = dataWithTarget.Select(s => s.Value.Observations).ToArray(); var allTargets = dataWithTarget.Select(s => s.Value.Target).ToArray(); //var validation = new TimeSeriesCrossValidation<double>((int)(allObservationsExceptLast.RowCount * 0.8), 0, 1); //var validationPredictions = validation.Validate((IIndexedLearner<double>)learner, allObservationsExceptLast, allTargetsExceptLast); //var crossMetric = new MeanSquaredErrorRegressionMetric(); //var crossError = crossMetric.Error(validation.GetValidationTargets(allTargetsExceptLast), validationPredictions); //_totalCrossError += crossError; var meanZeroTransformer = new MeanZeroFeatureTransformer(); var minMaxTransformer = new MinMaxTransformer(0d, 1d); var lastObservations = grouping.Last().Value.Observations; F64Matrix allTransformed = minMaxTransformer.Transform(meanZeroTransformer.Transform(allObservations.Append(lastObservations).ToArray())); var transformed = new F64Matrix(allTransformed.Rows(Enumerable.Range(0, allTransformed.RowCount - 1).ToArray()).Data(), allTransformed.RowCount - 1, allTransformed.ColumnCount); var splitter = new RandomTrainingTestIndexSplitter <double>(trainingPercentage: 0.7, seed: 24); var trainingTestSplit = splitter.SplitSet(transformed, allTargets); transformed = trainingTestSplit.TrainingSet.Observations; var testSet = trainingTestSplit.TestSet; //var learner = GetRandomForest(); //var learner = GetAda(); //var learner = GetNeuralNet(grouping.First().Value.Observations.Length, transformed.RowCount); var learner = GetEnsemble(grouping.First().Value.Observations.Length, transformed.RowCount); Program.StatusLogger.Info("Learning commenced " + grouping.First().Value.Name); var model = learner.Learn(transformed, trainingTestSplit.TrainingSet.Targets); Program.StatusLogger.Info("Learning completed " + grouping.First().Value.Name); if (model.GetRawVariableImportance().Any(a => a > 0)) { var importanceSummary = string.Join(",\r\n", model.GetRawVariableImportance().Select((d, i) => i.ToString() + ":" + d.ToString())); Program.StatusLogger.Info("Raw variable importance:\r\n" + importanceSummary); } var lastTransformed = allTransformed.Row(transformed.RowCount); var prediction = model.Predict(lastTransformed); //var before = item.Value.Item2[transformed.RowCount - _targetOffset - 1]; var change = -1; //Math.Round(prediction / before, 2); var testPrediction = model.Predict(testSet.Observations); var metric = new MeanSquaredErrorRegressionMetric(); var error = metric.Error(testSet.Targets, testPrediction); var averageError = 0d; lock (Locker) { _totalError += error; itemCount++; averageError = Math.Round(_totalError / itemCount, 3); } var isLondon = London.Contains(grouping.First().Value.Name); var message = $"TotalError: {Math.Round(_totalError, 3)}, AverageError: {averageError}, Target: {_targetName}, Offset: {_targetOffset}, Region: {grouping.First().Value.Name}, London: {isLondon}, Error: {Math.Round(error, 3)}, Next: {Math.Round(prediction, 3)}, Change: {change}"; Program.Logger.Info(message); } }); if (pauseAtEnd) { Console.WriteLine("Press any key to continue"); Console.ReadKey(); } }
public void Classification_Find_Best_Model_With_Default_Parameters() { #region Read and Transform Data var parser = new CsvParser(() => new StringReader(Resources.winequality_white)); var targetName = "quality"; // read feature matrix (all columns different from the targetName) var observations = parser.EnumerateRows(c => c != targetName) .ToF64Matrix(); // create minmax normalizer (normalizes each feature from 0.0 to 1.0) var minMaxTransformer = new MinMaxTransformer(0.0, 1.0); // transforms features using the feature normalization transform minMaxTransformer.Transform(observations, observations); // read targets var targets = parser.EnumerateRows(targetName) .ToF64Vector(); #endregion // split data // creates training test splitter, training and test set are splittet // to have equal distribution of classes in both set. var splitter = new StratifiedTrainingTestIndexSplitter <double>(trainingPercentage: 0.7, seed: 24); var trainingTestSplit = splitter.SplitSet(observations, targets); var trainingSet = trainingTestSplit.TrainingSet; var testSet = trainingTestSplit.TestSet; // Create list of all classification learners (with default parameters) var learners = new List <ILearner <double> > { new ClassificationDecisionTreeLearner(), new ClassificationRandomForestLearner(), new ClassificationExtremelyRandomizedTreesLearner(), new ClassificationAdaBoostLearner(), new ClassificationBinomialGradientBoostLearner(), }; // metric for measuring the error var metric = new TotalErrorClassificationMetric <double>(); // try all learners var testPredictions = new double[testSet.Targets.Length]; var testObservation = new double[trainingSet.Observations.ColumnCount]; foreach (var learner in learners) { // train model var model = learner.Learn(trainingSet.Observations, trainingSet.Targets); // iterate over test set and predict each observation for (int i = 0; i < testSet.Targets.Length; i++) { testSet.Observations.Row(i, testObservation); testPredictions[i] = model.Predict(testObservation); } // measure error on test set var error = metric.Error(testSet.Targets, testPredictions); // Trace learner type and error to output window Trace.WriteLine(string.Format("{0}: {1:0.0000}", learner.GetType().Name, error)); } }
public void SharpLearning_With_Cntk_Example() { // Load data var(observations, targets) = DataSetUtilities.LoadWinequalityWhite(); // transform data for neural net var transform = new MinMaxTransformer(0.0, 1.0); transform.Transform(observations, observations); var featureCount = observations.ColumnCount; var observationCount = observations.RowCount; var targetCount = 1; var inputShape = new int[] { featureCount, 1 }; var outputShape = new int[] { targetCount }; // Convert data to float, and wrap as minibatch data. var observationsFloat = observations.Data().Select(v => (float)v).ToArray(); var observationsData = new MemoryMinibatchData(observationsFloat, inputShape, observationCount); var targetsFloat = targets.Select(v => (float)v).ToArray(); var targetsData = new MemoryMinibatchData(targetsFloat, outputShape, observationCount); var dataType = DataType.Float; var device = DeviceDescriptor.CPUDevice; // setup input and target variables. var inputVariable = Layers.Input(inputShape, dataType); var targetVariable = Variable.InputVariable(outputShape, dataType); // setup name to variable var nameToVariable = new Dictionary <string, Variable> { { "observations", inputVariable }, { "targets", targetVariable }, }; // Get cross validation folds. var sampler = new RandomIndexSampler <double>(seed: 24); var crossValidationIndexSets = CrossValidationUtilities .GetKFoldCrossValidationIndexSets(sampler, foldCount: 10, targets: targets); var predictions = new double[observationCount]; // Run cross validation loop. foreach (var set in crossValidationIndexSets) { // setup data. var trainingNameToData = new Dictionary <string, MemoryMinibatchData> { { "observations", observationsData.GetSamples(set.trainingIndices) }, { "targets", targetsData.GetSamples(set.trainingIndices) } }; var validationNameToData = new Dictionary <string, MemoryMinibatchData> { { "observations", observationsData.GetSamples(set.validationIndices) }, { "targets", targetsData.GetSamples(set.validationIndices) } }; var trainSource = new MemoryMinibatchSource(nameToVariable, trainingNameToData, seed: 232, randomize: true); var validationSource = new MemoryMinibatchSource(nameToVariable, validationNameToData, seed: 232, randomize: false); // Create model and fit. var model = CreateModel(inputVariable, targetVariable, targetCount, dataType, device); model.Fit(trainSource, batchSize: 128, epochs: 10); // Predict. var predictionsRaw = model.Predict(validationSource); var currentPredictions = predictionsRaw.Select(v => (double)v.Single()).ToArray(); // set cross-validation predictions var validationIndices = set.validationIndices; for (int i = 0; i < validationIndices.Length; i++) { predictions[validationIndices[i]] = currentPredictions[i]; } } Trace.WriteLine(FormatErrorString(targets, predictions)); }