/// <summary> /// One hot encodes the REBER strings /// </summary> /// <param name="strList">A list of REBER sequences</param> /// <returns>A data table with matrices to represent the sequences of vectors and their corresponding outputs</returns> public static IDataTable GetOneHot(IEnumerable <string> strList) { var strList2 = strList.ToList(); // build the following item table var following = new Dictionary <string, HashSet <int> >(); foreach (var str in strList2) { var sb = new StringBuilder(); string prev = null; foreach (var ch in str) { sb.Append(ch); var key = sb.ToString(); if (prev != null) { if (!following.TryGetValue(prev, out HashSet <int> temp)) { following.Add(prev, temp = new HashSet <int>()); } temp.Add(_ch[ch]); } prev = key; } } var builder = DataTableBuilder.CreateTwoColumnMatrix(); foreach (var str in strList2) { var inputList = new FloatVector[str.Length]; var outputList = new FloatVector[str.Length]; var sb = new StringBuilder(); for (var i = 0; i < str.Length; i++) { var ch = str[i]; sb.Append(ch); var input = new float[_ch.Count]; var output = new float[_ch.Count]; input[_ch[ch]] = 1f; if (following.TryGetValue(sb.ToString(), out HashSet <int> temp)) { foreach (var item in temp) { output[item] = 1f; } } inputList[i] = new FloatVector { Data = input }; outputList[i] = new FloatVector { Data = output }; } builder.Add(FloatMatrix.Create(inputList), FloatMatrix.Create(outputList)); } return(builder.Build()); }
/// <summary> /// Creates random integers added together as feature vectors /// The input feature contains two features, one for each bit at that position /// The output feature contains a single feature: 1 or 0 if that bit is set in the result /// </summary> /// <param name="sampleCount">How many samples to generate</param> /// <param name="stochastic">True to generate random integers</param> /// <returns>A list of sequences</returns> public static IDataTable Addition(int sampleCount, bool stochastic) { Random rand = stochastic ? new Random() : new Random(0); var builder = DataTableBuilder.CreateTwoColumnMatrix(); for (var i = 0; i < sampleCount; i++) { // generate some random numbers (sized to prevent overflow) var a = rand.Next(int.MaxValue / 2); var b = rand.Next(int.MaxValue / 2); var a2 = _GetBitArray(a); var b2 = _GetBitArray(b); var r2 = _GetBitArray(a + b); var inputList = new FloatVector[r2.Length]; var outputList = new FloatVector[r2.Length]; for (int j = 0; j < r2.Length; j++) { inputList[j] = new FloatVector { Data = new[] { a2[j], b2[j] } }; outputList[j] = new FloatVector { Data = new[] { r2[j] } }; } builder.Add(FloatMatrix.Create(inputList), FloatMatrix.Create(outputList)); } return(builder.Build()); }
static void ManyToOne() { var grammar = new SequenceGenerator(dictionarySize: 16, minSize: 3, maxSize: 6, noRepeat: true, isStochastic: false); var sequences = grammar.GenerateSequences().Take(1000).ToList(); var builder = BrightWireProvider.CreateDataTableBuilder(); builder.AddColumn(ColumnType.Matrix, "Sequence"); builder.AddColumn(ColumnType.Vector, "Summary"); foreach (var sequence in sequences) { var list = new List <FloatVector>(); var charSet = new HashSet <char>(); foreach (var ch in sequence) { charSet.Add(ch); list.Add(grammar.Encode(ch)); } var target = grammar.Encode(charSet.Select(ch2 => (ch2, 1f))); builder.Add(FloatMatrix.Create(list.ToArray()), target); } var data = builder.Build().Split(0); using (var lap = BrightWireProvider.CreateLinearAlgebra(false)) { var graph = new GraphFactory(lap); var errorMetric = graph.ErrorMetric.BinaryClassification; // create the property set var propertySet = graph.CurrentPropertySet .Use(graph.GradientDescent.RmsProp) .Use(graph.WeightInitialisation.Xavier) ; // create the engine var trainingData = graph.CreateDataSource(data.Training); var testData = trainingData.CloneWith(data.Test); var engine = graph.CreateTrainingEngine(trainingData, 0.03f, 8); // build the network const int HIDDEN_LAYER_SIZE = 128; var memory = new float[HIDDEN_LAYER_SIZE]; var network = graph.Connect(engine) .AddGru(memory) //.AddSimpleRecurrent(graph.ReluActivation(), memory) .AddFeedForward(engine.DataSource.OutputSize) .Add(graph.SigmoidActivation()) .AddBackpropagationThroughTime(errorMetric) ; engine.Train(20, testData, errorMetric); var networkGraph = engine.Graph; var executionEngine = graph.CreateEngine(networkGraph); var output = executionEngine.Execute(testData); Console.WriteLine(output.Where(o => o.Target != null).Average(o => o.CalculateError(errorMetric))); } }
static void OneToMany() { var grammar = new SequenceGenerator(dictionarySize: 10, minSize: 5, maxSize: 5, noRepeat: true, isStochastic: false); var sequences = grammar.GenerateSequences().Take(1000).ToList(); var builder = BrightWireProvider.CreateDataTableBuilder(); builder.AddColumn(ColumnType.Vector, "Summary"); builder.AddColumn(ColumnType.Matrix, "Sequence"); foreach (var sequence in sequences) { var sequenceData = sequence.GroupBy(ch => ch).Select(g => (g.Key, g.Count())). ToDictionary(d => d.Item1, d => (float)d.Item2); var summary = grammar.Encode(sequenceData.Select(kv => (kv.Key, kv.Value))); var list = new List <FloatVector>(); foreach (var item in sequenceData.OrderBy(kv => kv.Key)) { var row = grammar.Encode(item.Key, item.Value); list.Add(row); } builder.Add(summary, FloatMatrix.Create(list.ToArray())); } var data = builder.Build().Split(0); using var lap = BrightWireProvider.CreateLinearAlgebra(false); var graph = new GraphFactory(lap); var errorMetric = graph.ErrorMetric.BinaryClassification; // create the property set var propertySet = graph.CurrentPropertySet.Use(graph.GradientDescent.RmsProp). Use(graph.WeightInitialisation.Xavier); // create the engine const float TRAINING_RATE = 0.1f; var trainingData = graph.CreateDataSource(data.Training); var testData = trainingData.CloneWith(data.Test); var engine = graph.CreateTrainingEngine(trainingData, TRAINING_RATE, 8); engine.LearningContext.ScheduleLearningRate(30, TRAINING_RATE / 3); // build the network const int HIDDEN_LAYER_SIZE = 128; graph.Connect(engine).AddLstm(HIDDEN_LAYER_SIZE).AddFeedForward(engine.DataSource.OutputSize). Add(graph.SigmoidActivation()).AddBackpropagation(errorMetric); engine.Train(40, testData, errorMetric); var networkGraph = engine.Graph; var executionEngine = graph.CreateEngine(networkGraph); var output = executionEngine.Execute(testData); Console.WriteLine(output.Average(o => o.CalculateError(errorMetric))); }
public void MatrixDataSource() { var matrices = Enumerable.Range(0, 10).Select(j => FloatMatrix.Create(Enumerable.Range(0, 10).Select(i => FloatVector.Create(GetArray(i, 10))).ToArray())).ToList(); var dataSource = _factory.CreateDataSource(matrices); var miniBatch = dataSource.Get(null, new[] { 0, 1, 2 }); var currentSequence = miniBatch.CurrentSequence; var batchMatrix = currentSequence.Input[0].GetMatrix(); Assert.IsNull(currentSequence.Target); Assert.IsTrue(batchMatrix.RowCount == 3); Assert.IsTrue(batchMatrix.ColumnCount == 10); Assert.AreEqual(batchMatrix.Row(0).GetAt(0), 0f); }
/// <summary> /// Uses a recurrent LSTM neural network to predict stock price movements /// Data can be downloaded from https://raw.githubusercontent.com/plotly/datasets/master/stockdata.csv /// </summary> static void StockData(string dataFilePath) { // load and normalise the data var dataSet = new StreamReader(dataFilePath).ParseCSV(',', true); var normalised = dataSet.Normalise(NormalisationType.FeatureScale); var rows = normalised.GetNumericRows(dataSet.Columns.Where(c => c.Name != "Date").Select(c => c.Index)); // build the data table with a window of input data and the prediction as the following value var builder = BrightWireProvider.CreateDataTableBuilder(); builder.AddColumn(ColumnType.Matrix, "Past"); builder.AddColumn(ColumnType.Vector, "Future"); const int LAST_X_DAYS = 14; for (var i = 0; i < rows.Count - LAST_X_DAYS - 1; i++) { var inputVector = new List <FloatVector>(); for (var j = 0; j < LAST_X_DAYS; j++) { inputVector.Add(FloatVector.Create(rows[i + j])); } var input = FloatMatrix.Create(inputVector.ToArray()); var target = FloatVector.Create(rows[i + LAST_X_DAYS + 1]); builder.Add(input, target); } var data = builder.Build().Split(trainingPercentage: 0.2); using (var lap = BrightWireProvider.CreateLinearAlgebra()) { var graph = new GraphFactory(lap); var errorMetric = graph.ErrorMetric.Quadratic; // create the property set graph.CurrentPropertySet .Use(graph.GradientDescent.Adam) .Use(graph.WeightInitialisation.Xavier); // create the engine var trainingData = graph.CreateDataSource(data.Training); var testData = trainingData.CloneWith(data.Test); var engine = graph.CreateTrainingEngine(trainingData, learningRate: 0.03f, batchSize: 128); // build the network const int HIDDEN_LAYER_SIZE = 256; graph.Connect(engine) .AddLstm(HIDDEN_LAYER_SIZE) .AddFeedForward(engine.DataSource.OutputSize) .Add(graph.TanhActivation()) .AddBackpropagationThroughTime(errorMetric); // train the network and restore the best result GraphModel bestNetwork = null; engine.Train(50, testData, errorMetric, model => bestNetwork = model); if (bestNetwork != null) { // execute each row of the test data on an execution engine var executionEngine = graph.CreateEngine(bestNetwork.Graph); var results = executionEngine.Execute(testData).OrderSequentialOutput(); var expectedOutput = data.Test.GetColumn <FloatVector>(1); var score = results.Select((r, i) => errorMetric.Compute(r.Last(), expectedOutput[i])).Average(); Console.WriteLine(score); } } }