private NeuralTuringMachine[] TrainInternal(double[][] input, double[][] knownOutput) { NeuralTuringMachine[] machines = new NeuralTuringMachine[input.Length]; //FORWARD phase _machine.InitializeMemoryState(); machines[0] = new NeuralTuringMachine(_machine); machines[0].Process(input[0]); for (int i = 1; i < input.Length; i++) { machines[i] = new NeuralTuringMachine(machines[i - 1]); machines[i].Process(input[i]); } //Gradient reset _gradientResetter.Reset(); _machine.UpdateWeights(_gradientResetter); //BACKWARD phase for (int i = input.Length - 1; i >= 0; i--) { machines[i].BackwardErrorPropagation(knownOutput[i]); } _machine.BackwardErrorPropagation(); //Weight updates _weightUpdater.Reset(); _machine.UpdateWeights(_weightUpdater); return(machines); }
private static void ParallelIterations(int controllerSize, int headCount, int memoryN, int memoryM, Random rand, List <double[][]> inputs, List <double[][]> outputs) { string directoryName = string.Format("{0}_{1}_{2}_{3}", controllerSize, headCount, memoryM, memoryN); if (Directory.Exists(directoryName)) { DateTime newestDateTime = new DateTime(); string newestFileName = null; string[] filenames = Directory.GetFiles(directoryName); if (filenames.Length > 0) { foreach (string fileName in filenames) { DateTime creationTime = File.GetCreationTime(fileName); if (creationTime > newestDateTime) { newestFileName = fileName; newestDateTime = creationTime; } } int weightsCount; NeuralTuringMachine neuralTuringMachine = GetRandomMachine(out weightsCount, controllerSize, headCount, memoryN, memoryM, rand); ParallelTasks.Run( () => new Tuple <NeuralTuringMachine, int>(NeuralTuringMachine.Load(newestFileName), weightsCount), () => ExampleFactory(inputs, outputs, rand), directoryName); return; } } ParallelTasks.Run(() => MachineFactory(headCount, controllerSize, memoryN, memoryM, rand), () => ExampleFactory(inputs, outputs, rand), directoryName); }
private static NeuralTuringMachine GetRandomMachine(out int weightsCount) { Random rand = new Random(DateTime.Now.Millisecond); const int vectorSize = 8; const int controllerSize = 100; const int headsCount = 1; const int memoryN = 128; const int memoryM = 20; const int inputSize = vectorSize + 2; const int outputSize = vectorSize; int headUnitSize = Head.GetUnitSize(memoryM); weightsCount = (headsCount * memoryN) + (memoryN * memoryM) + (controllerSize * headsCount * memoryM) + (controllerSize * inputSize) + (controllerSize) + (outputSize * (controllerSize + 1)) + (headsCount * headUnitSize * (controllerSize + 1)); //TODO remove rand NeuralTuringMachine machine = new NeuralTuringMachine(vectorSize + 2, vectorSize, controllerSize, headsCount, memoryN, memoryM, new RandomWeightInitializer(rand)); return(machine); }
private static Tuple <NeuralTuringMachine, int> MachineFactory(int headCount, int controllerSize, int memoryN, int memoryM, Random rand) { int weightsCount; NeuralTuringMachine neuralTuringMachine = GetRandomMachine(out weightsCount, controllerSize, headCount, memoryN, memoryM, rand); return(new Tuple <NeuralTuringMachine, int>(neuralTuringMachine, weightsCount)); }
private NeuralTuringMachine[] TrainInternal(double[][] input, double[][] knownOutput) { NeuralTuringMachine[] machines = new NeuralTuringMachine[input.Length]; //FORWARD phase _machine.InitializeMemoryState(); machines[0] = new NeuralTuringMachine(_machine); machines[0].Process(input[0]); for (int i = 1; i < input.Length; i++) { machines[i] = new NeuralTuringMachine(machines[i - 1]); machines[i].Process(input[i]); } //Gradient reset _gradientResetter.Reset(); _machine.UpdateWeights(_gradientResetter); //BACKWARD phase for (int i = input.Length - 1; i >= 0; i--) { machines[i].BackwardErrorPropagation(knownOutput[i]); } _machine.BackwardErrorPropagation(); //Weight updates _weightUpdater.Reset(); _machine.UpdateWeights(_weightUpdater); return machines; }
private static BPTTTeacher GetTeacher(int weightsCount, NeuralTuringMachine machine) { RMSPropWeightUpdater rmsPropWeightUpdater = new RMSPropWeightUpdater(weightsCount, 0.95, 0.5, 0.001, 0.001); BPTTTeacher teacher = new BPTTTeacher(machine, rmsPropWeightUpdater); return(teacher); }
public CopyMachine(int weightsCount, NeuralTuringMachine machine) { _weights = new double[weightsCount]; _gradients = new double[weightsCount]; var extractor = new WeightsExtractor(_weights, _gradients); machine.UpdateWeights(extractor); }
public LearningTask(NeuralTuringMachine machine, RMSPropWeightUpdater weightUpdater, int id) { _iterations = 0; _machine = machine; _weightUpdater = weightUpdater; _id = id; _teacher = new BPTTTeacher(_machine, weightUpdater); _longTermAverageErrors = new List <double>(); Priority = 10; }
private double[][] GetMachineOutputs(NeuralTuringMachine[] machines) { double[][] realOutputs = new double[machines.Length][]; for (int i = 0; i < machines.Length; i++) { NeuralTuringMachine machine = machines[i]; realOutputs[i] = machine.GetOutput(); } return realOutputs; }
private double[][] GetHeadAddressings(NeuralTuringMachine[] machines) { double[][] headAddressings = new double[machines.Length][]; for (int i = 0; i < machines.Length; i++) { NeuralTuringMachine machine = machines[i]; headAddressings[i] = machine.GetHeadAdressings(); } return(headAddressings); }
private double[][] GetMachineOutputs(NeuralTuringMachine[] machines) { double[][] realOutputs = new double[machines.Length][]; for (int i = 0; i < machines.Length; i++) { NeuralTuringMachine machine = machines[i]; realOutputs[i] = machine.GetOutput(); } return(realOutputs); }
public LearningTask(NeuralTuringMachine machine, RMSPropWeightUpdater weightUpdater, Func <Tuple <double[][], double[][]> > exampleGenerator, string directoryName, int id) { _iterations = 0; _machine = machine; _weightUpdater = weightUpdater; _exampleGenerator = exampleGenerator; _directoryName = directoryName; _id = id; _teacher = new BPTTTeacher(_machine, weightUpdater); _longTermAverageErrors = new List <double>(); Priority = 100 / 32; }
private static NeuralTuringMachine GetRandomMachine(out int weightsCount, int controllerSize, int headsCount, int memoryN, int memoryM, Random rand) { const int inputSize = 8; const int outputSize = 1; int headUnitSize = Head.GetUnitSize(memoryM); weightsCount = (headsCount * memoryN) + (memoryN * memoryM) + (controllerSize * headsCount * memoryM) + (controllerSize * inputSize) + (controllerSize) + (outputSize * (controllerSize + 1)) + (headsCount * headUnitSize * (controllerSize + 1)); NeuralTuringMachine machine = new NeuralTuringMachine(inputSize, outputSize, controllerSize, headsCount, memoryN, memoryM, new RandomWeightInitializer(rand)); return(machine); }
static void Main() { DataStream reportStream = null; try { YoVisionClientHelper yoVisionClientHelper = new YoVisionClientHelper(); yoVisionClientHelper.Connect(EndpointType.NetTcp, 8081, "localhost", "YoVisionServer"); reportStream = yoVisionClientHelper.RegisterDataStream("NGram task training", new Int32DataType("Iteration"), new DoubleDataType("Average data loss"), new Double2DArrayType("Input"), new Double2DArrayType("Known output"), new Double2DArrayType("Real output"), new Double2DArrayType("Head addressings")); } catch (Exception ex) { Console.WriteLine(ex.Message); } const int controllerSize = 100; const int headsCount = 1; const int memoryN = 128; const int memoryM = 20; const int inputSize = 1; const int outputSize = 1; Random rand = new Random(42); NeuralTuringMachine machine = new NeuralTuringMachine(inputSize, outputSize, controllerSize, headsCount, memoryN, memoryM, new RandomWeightInitializer(rand)); int headUnitSize = Head.GetUnitSize(memoryM); var weightsCount = (headsCount * memoryN) + (memoryN * memoryM) + (controllerSize * headsCount * memoryM) + (controllerSize * inputSize) + (controllerSize) + (outputSize * (controllerSize + 1)) + (headsCount * headUnitSize * (controllerSize + 1)); Console.WriteLine(weightsCount); RMSPropWeightUpdater rmsPropWeightUpdater = new RMSPropWeightUpdater(weightsCount, 0.95, 0.5, 0.001, 0.001); BPTTTeacher teacher = new BPTTTeacher(machine, rmsPropWeightUpdater); long[] times = new long[100]; for (int i = 1; i < 10000000; i++) { Tuple<double[][], double[][]> data = SequenceGenerator.GenerateSequence(SequenceGenerator.GeneratePropabilities()); Stopwatch stopwatch = new Stopwatch(); stopwatch.Start(); double[][] headAddressings; double[][] output = teacher.TrainVerbose(data.Item1, data.Item2, out headAddressings); stopwatch.Stop(); times[i % 100] = stopwatch.ElapsedMilliseconds; if (i%10 == 0) { double loss = CalculateLogLoss(output, data.Item2); if (reportStream != null) { reportStream.Set("Iteration", i); reportStream.Set("Average data loss", loss); reportStream.Set("Input", data.Item1); reportStream.Set("Known output", data.Item2); reportStream.Set("Real output", output); reportStream.Set("Head addressings", headAddressings); reportStream.SendData(); } } if (i%100 == 0) { Console.WriteLine("Iteration: {0}, iterations per second: {1:0.0}", i, 1000 / times.Average()); } if (i%1000 == 0) { double[] props = SequenceGenerator.GeneratePropabilities(); const int sampleCount = 100; double[] losses = new double[sampleCount]; for (int j = 0; j < sampleCount; j++) { Tuple<double[][], double[][]> sequence = SequenceGenerator.GenerateSequence(props); var machineOutput = teacher.Train(sequence.Item1, sequence.Item2); double[][] knownOutput = sequence.Item2; double loss = CalculateLogLoss(machineOutput, knownOutput); losses[j] = -loss; } Console.WriteLine("Loss [bits per sequence]: {0}", losses.Average()); } if (i % 1000 == 0) { machine.Save("NTM_" + i + DateTime.Now.ToString("s").Replace(":", "")); } } }
public static void Run( Func <Tuple <NeuralTuringMachine, int> > machineFactory, Func <Tuple <double[][], double[][]> > exampleFactory, string directoryName) { const int numberOfThreads = 8; const int numberOfParallelTasks = 32; List <LearningTask> tasks = new List <LearningTask>(); BlockingCollection <Tuple <Action <int>, int> > work = new BlockingCollection <Tuple <Action <int>, int> >(); Thread[] threads = new Thread[numberOfThreads]; SemaphoreSlim[] semaphores = new SemaphoreSlim[numberOfParallelTasks]; for (int i = 0; i < numberOfParallelTasks; i++) { semaphores[i] = new SemaphoreSlim(0); } for (int i = 0; i < numberOfThreads; i++) { threads[i] = new Thread( () => { while (!End) { var action = work.Take(); action.Item1(action.Item2); semaphores[action.Item2].Release(); } }); threads[i].Start(); } int weightsCount = 0; for (int i = 0; i < numberOfParallelTasks; i++) { Tuple <NeuralTuringMachine, int> factory = machineFactory(); NeuralTuringMachine machine = factory.Item1; weightsCount = factory.Item2; RMSPropWeightUpdater updater = new RMSPropWeightUpdater(weightsCount, 0.95, 0.5, 0.001); tasks.Add(new LearningTask(machine, updater, exampleFactory, directoryName, i)); } int k = 1; double bestLongTermError = double.MaxValue; while (!End) { Stopwatch stopwatch = new Stopwatch(); stopwatch.Start(); for (int i = 0; i < tasks.Count; i++) { work.Add(new Tuple <Action <int>, int>( id => { tasks[id].Run(); }, i)); } for (int i = 0; i < tasks.Count; i++) { semaphores[i].Wait(); } Console.WriteLine("Best copy"); Console.WriteLine("Iteration: {0}", k); double[] longAverages = tasks.Select(task => task.GetLongTermErrorAverage()).ToArray(); Console.WriteLine("Long averages"); WriteSorted(tasks); Console.WriteLine("Current averages"); WriteCurrentSorted(tasks); var min = longAverages.Min(); Console.WriteLine("Long term average min < best long term error: {0}", min < bestLongTermError); if (min < bestLongTermError) { double maxError = double.MinValue; double minError = double.MaxValue; int maxIndex = 0; int minIndex = 0; for (int i = 0; i < longAverages.Length; i++) { if (longAverages[i] > maxError) { maxError = longAverages[i]; maxIndex = i; } if (longAverages[i] < minError) { minError = longAverages[i]; minIndex = i; } } if (minIndex == maxIndex) { break; } Console.WriteLine("Copying {0} to {1}", tasks[minIndex].ID, tasks[maxIndex].ID); tasks[maxIndex].CopyFrom(tasks[minIndex], weightsCount); bestLongTermError = min; } ResetPriorities(tasks); Console.WriteLine("Remaining tasks count: {0}", tasks.Count); Console.WriteLine("Minimum long term error: {0}", min); Console.WriteLine("Best long term error: {0}", bestLongTermError); k++; stopwatch.Stop(); double seconds = stopwatch.ElapsedMilliseconds / (double)1000; Console.WriteLine("Time: {0}[s] per task: {1}[s]", seconds, seconds / tasks.Count); } }
private static void MultipleSimultaniousAvgCopyTasks() { const int numberOfThreads = 1; const int numberOfParallelTasks = 16; bool end = false; BlockingCollection <Tuple <Action <int>, int> > work = new BlockingCollection <Tuple <Action <int>, int> >(); Thread[] threads = new Thread[numberOfThreads]; SemaphoreSlim[] semaphores = new SemaphoreSlim[numberOfParallelTasks]; for (int i = 0; i < numberOfParallelTasks; i++) { semaphores[i] = new SemaphoreSlim(0); } for (int i = 0; i < numberOfThreads; i++) { threads[i] = new Thread( () => { while (!end) { var action = work.Take(); action.Item1(action.Item2); semaphores[action.Item2].Release(); } }); threads[i].Start(); } double[][] errorss = new double[numberOfParallelTasks][]; long[][] timess = new long[numberOfParallelTasks][]; NeuralTuringMachine[] machines = new NeuralTuringMachine[numberOfParallelTasks]; BPTTTeacher[] teachers = new BPTTTeacher[numberOfParallelTasks]; int weightsCount = 0; for (int i = 0; i < numberOfParallelTasks; i++) { errorss[i] = new double[100]; timess[i] = new long[100]; for (int j = 0; j < 100; j++) { errorss[i][j] = 1; } machines[i] = GetRandomMachine(out weightsCount); teachers[i] = GetTeacher(weightsCount, machines[i]); } int k = 1; while (!end) { Stopwatch stopwatch = new Stopwatch(); stopwatch.Start(); for (int i = 0; i < numberOfParallelTasks; i++) { var index = i; work.Add(new Tuple <Action <int>, int>(id => Iterate(teachers[index], errorss[index], timess[index], id), index)); } for (int i = 0; i < numberOfParallelTasks; i++) { semaphores[i].Wait(); } Console.WriteLine("Average NTMs"); double[] errors = errorss.Select(doubles => doubles.Average()).ToArray(); AverageMachineWeightUpdater averageWeightUpdater = new AverageMachineWeightUpdater(weightsCount, machines); foreach (NeuralTuringMachine machine in machines) { machine.UpdateWeights(averageWeightUpdater); averageWeightUpdater.Reset(); } for (int i = 0; i < numberOfParallelTasks; i++) { teachers[i] = GetTeacher(weightsCount, machines[i]); } Console.WriteLine("Iteration: {0}", k); Console.WriteLine("Average error: {0}", errors.Average()); Console.WriteLine("Best error: {0}", errors.Min()); k++; stopwatch.Stop(); double seconds = stopwatch.ElapsedMilliseconds / (double)1000; Console.WriteLine("Time: {0}[s] per task: {1}[s]", seconds, seconds / numberOfParallelTasks); } }
static void Main() { DataStream reportStream = null; try { YoVisionClientHelper yoVisionClientHelper = new YoVisionClientHelper(); yoVisionClientHelper.Connect(EndpointType.NetTcp, 8081, "localhost", "YoVisionServer"); reportStream = yoVisionClientHelper.RegisterDataStream("Copy task training", new Int32DataType("Iteration"), new DoubleDataType("Average data loss"), new Int32DataType("Training time"), new Int32DataType("Sequence length")); } catch (Exception ex) { Console.WriteLine(ex.Message); } double[] errors = new double[100]; long[] times = new long[100]; for (int i = 0; i < 100; i++) { errors[i] = 1; } const int seed = 32702; Console.WriteLine(seed); //TODO args parsing shit Random rand = new Random(seed); const int vectorSize = 8; const int controllerSize = 100; const int headsCount = 1; const int memoryN = 128; const int memoryM = 20; const int inputSize = vectorSize + 2; const int outputSize = vectorSize; //TODO remove rand NeuralTuringMachine machine = new NeuralTuringMachine(vectorSize + 2, vectorSize, controllerSize, headsCount, memoryN, memoryM, new RandomWeightInitializer(rand)); //TODO extract weight count calculation int headUnitSize = Head.GetUnitSize(memoryM); var weightsCount = (headsCount * memoryN) + (memoryN * memoryM) + (controllerSize * headsCount * memoryM) + (controllerSize * inputSize) + (controllerSize) + (outputSize * (controllerSize + 1)) + (headsCount * headUnitSize * (controllerSize + 1)); Console.WriteLine(weightsCount); RMSPropWeightUpdater rmsPropWeightUpdater = new RMSPropWeightUpdater(weightsCount, 0.95, 0.5, 0.001, 0.001); //NeuralTuringMachine machine2 = NeuralTuringMachine.Load(@"NTM2015-03-22T210312"); BPTTTeacher teacher = new BPTTTeacher(machine, rmsPropWeightUpdater); for (int i = 1; i < 10000; i++) { Tuple<double[][], double[][]> sequence = SequenceGenerator.GenerateSequence(rand.Next(20) + 1, vectorSize); Stopwatch stopwatch = new Stopwatch(); stopwatch.Start(); double[][] machinesOutput = teacher.Train(sequence.Item1, sequence.Item2); stopwatch.Stop(); times[i%100] = stopwatch.ElapsedMilliseconds; double error = CalculateLogLoss(sequence.Item2, machinesOutput); double averageError = error / (sequence.Item2.Length * sequence.Item2[0].Length); errors[i % 100] = averageError; if (reportStream != null) { reportStream.Set("Iteration", i); reportStream.Set("Average data loss", averageError); reportStream.Set("Training time", stopwatch.ElapsedMilliseconds); reportStream.Set("Sequence length", (sequence.Item1.Length - 2)/2); reportStream.SendData(); } if (i % 100 == 0) { Console.WriteLine("Iteration: {0}, average error: {1}, iterations per second: {2:0.0}", i, errors.Average(), 1000/times.Average()); } } machine.Save("NTM"+DateTime.Now.ToString("s").Replace(":","")); }
private static void StandardCopyTask(DataStream reportStream) { double[] errors = new double[100]; long[] times = new long[100]; for (int i = 0; i < 100; i++) { errors[i] = 1; } const int seed = 32702; Console.WriteLine(seed); //TODO args parsing shit Random rand = new Random(seed); const int vectorSize = 8; const int controllerSize = 100; const int headsCount = 1; const int memoryN = 128; const int memoryM = 20; const int inputSize = vectorSize + 2; const int outputSize = vectorSize; //TODO remove rand NeuralTuringMachine machine = new NeuralTuringMachine(vectorSize + 2, vectorSize, controllerSize, headsCount, memoryN, memoryM, new RandomWeightInitializer(rand)); //TODO extract weight count calculation int headUnitSize = Head.GetUnitSize(memoryM); var weightsCount = (headsCount * memoryN) + (memoryN * memoryM) + (controllerSize * headsCount * memoryM) + (controllerSize * inputSize) + (controllerSize) + (outputSize * (controllerSize + 1)) + (headsCount * headUnitSize * (controllerSize + 1)); Console.WriteLine(weightsCount); RMSPropWeightUpdater rmsPropWeightUpdater = new RMSPropWeightUpdater(weightsCount, 0.95, 0.5, 0.001, 0.001); //NeuralTuringMachine machine = NeuralTuringMachine.Load(@"NTM_0.000583637804331003_2015-04-18T223455"); BPTTTeacher teacher = new BPTTTeacher(machine, rmsPropWeightUpdater); //for (int i = 1; i < 256; i++) //{ // var sequence = SequenceGenerator.GenerateSequence(i, vectorSize); // double[][] machineOutput = teacher.Train(sequence.Item1, sequence.Item2); // double error = CalculateLoss(sequence.Item2, machineOutput); // Console.WriteLine("{0},{1}", i, error); //} int minSeqLen = 200; int maxSeqLen = 200; double savingThreshold = 0.0005; for (int i = 1; i < 10000000; i++) { //var sequence = SequenceGenerator.GenerateSequence(rand.Next(20) + 1, vectorSize); var sequence = SequenceGenerator.GenerateSequence(rand.Next(minSeqLen, maxSeqLen), vectorSize); Stopwatch stopwatch = new Stopwatch(); stopwatch.Start(); double[][] headAddressings; double[][] machinesOutput = teacher.TrainVerbose(sequence.Item1, sequence.Item2, out headAddressings); stopwatch.Stop(); times[i % 100] = stopwatch.ElapsedMilliseconds; double error = CalculateLoss(sequence.Item2, machinesOutput); double averageError = error / (sequence.Item2.Length * sequence.Item2[0].Length); errors[i % 100] = error; if (reportStream != null) { reportStream.Set("Iteration", i); reportStream.Set("Average data loss", averageError); reportStream.Set("Training time", stopwatch.ElapsedMilliseconds); reportStream.Set("Sequence length", (sequence.Item1.Length - 2) / 2); reportStream.Set("Input", sequence.Item1); reportStream.Set("Known output", sequence.Item2); reportStream.Set("Real output", machinesOutput); reportStream.Set("Head addressings", headAddressings); reportStream.SendData(); } if (i % 100 == 0) { double averageError2 = errors.Average(); Console.WriteLine( "Iteration: {0}, error: {1}, iterations per second: {2:0.0} MinSeqLen: {3} MaxSeqLen: {4}", i, averageError2, 1000 / times.Average(), minSeqLen, maxSeqLen); if (averageError2 < savingThreshold) { savingThreshold /= 2; machine.Save("NTM_" + averageError2 + "_" + DateTime.Now.ToString("s").Replace(":", "")); maxSeqLen++; minSeqLen++; } } if (i % 100000 == 0) { machine.Save("NTM_" + i + DateTime.Now.ToString("s").Replace(":", "")); } } }
public BPTTTeacher(NeuralTuringMachine machine, IWeightUpdater weightUpdater) { _machine = machine; _weightUpdater = weightUpdater; _gradientResetter = new GradientResetter(); }
public BPTTTeacher(NeuralTuringMachine machine, IWeightUpdater weightUpdater) { _machine = machine; _weightUpdater = weightUpdater; _gradientResetter = new GradientResetter(); }