static double Evaluate(FunctionStack model, int[] dataset) { FunctionStack predictModel = (FunctionStack)model.Clone(); predictModel.ResetState(); Real totalLoss = 0; long totalLossCount = 0; for (int i = 0; i < dataset.Length - 1; i++) { NdArray x = new NdArray(new[] { 1 }, BATCH_SIZE); NdArray t = new NdArray(new[] { 1 }, BATCH_SIZE); for (int j = 0; j < BATCH_SIZE; j++) { x.Data[j] = dataset[j + i]; t.Data[j] = dataset[j + i + 1]; } Real sumLoss = new SoftmaxCrossEntropy().Evaluate(predictModel.Forward(x), t); totalLoss += sumLoss; totalLossCount++; } //calc perplexity return(Math.Exp(totalLoss / (totalLossCount - 1))); }
static Real Evaluate(FunctionStack <Real> model, int[] dataset) { FunctionStack <Real> predictModel = DeepCopyHelper <Real> .DeepCopy(model); predictModel.ResetState(); Real totalLoss = 0; long totalLossCount = 0; for (int i = 0; i < dataset.Length - 1; i++) { NdArray <Real> x = new NdArray <Real>(new[] { 1 }, BATCH_SIZE); NdArray <int> t = new NdArray <int>(new[] { 1 }, BATCH_SIZE); for (int j = 0; j < BATCH_SIZE; j++) { x.Data[j] = dataset[j + i]; t.Data[j] = dataset[j + i + 1]; } NdArray <Real> result = predictModel.Forward(x)[0]; Real sumLoss = new SoftmaxCrossEntropy <Real>().Evaluate(result, t); totalLoss += sumLoss; totalLossCount++; } //calc perplexity return(Math.Exp(totalLoss / (totalLossCount - 1))); }
public static void Run() { DataMaker dataMaker = new DataMaker(STEPS_PER_CYCLE, NUMBER_OF_CYCLES); NdArray trainData = dataMaker.Make(); FunctionStack model = new FunctionStack("Test8", new Linear(true, 1, 5, name: "Linear l1"), new LSTM(true, 5, 5, name: "LSTM l2"), new Linear(true, 5, 1, name: "Linear l3") ); model.SetOptimizer(new Adam()); RILogManager.Default?.SendDebug("Training..."); for (int epoch = 0; epoch < TRAINING_EPOCHS; epoch++) { NdArray[] sequences = dataMaker.MakeMiniBatch(trainData, MINI_BATCH_SIZE, LENGTH_OF_SEQUENCE); Real loss = ComputeLoss(model, sequences); model.Update(); model.ResetState(); if (epoch != 0 && epoch % DISPLAY_EPOCH == 0) { RILogManager.Default?.SendDebug("[{0}]training loss:\t{1}", epoch, loss); } } RILogManager.Default?.SendDebug("Testing..."); NdArray[] testSequences = dataMaker.MakeMiniBatch(trainData, MINI_BATCH_SIZE, LENGTH_OF_SEQUENCE); int sample_index = 45; predict(testSequences[sample_index], model, PREDICTION_LENGTH); }
static Real predict_sequence(FunctionStack model, List <Real> input_seq) { model.ResetState(); NdArray result = 0; for (int i = 0; i < input_seq.Count; i++) { result = model.Predict(input_seq[i])[0]; } return(result.Data[0]); }
static Real predict_sequence(FunctionStack model, List <Real> input_seq) { Ensure.Argument(model).NotNull(); Ensure.Argument(input_seq).NotNull(); model.ResetState(); NdArray result = 0; Ensure.Argument(model).NotNull(); Ensure.Argument(input_seq).NotNull(); foreach (var t in input_seq) { result = model.Predict(true, t)[0]; } return(result.Data[0]); }
public static void Run() { DataMaker dataMaker = new DataMaker(STEPS_PER_CYCLE, NUMBER_OF_CYCLES); NdArray <Real> trainData = dataMaker.Make(); //ネットワークの構成は FunctionStack に書き連ねる FunctionStack <Real> model = new FunctionStack <Real>( new Linear <Real>(1, 5, name: "Linear l1"), new LSTM <Real>(5, 5, name: "LSTM l2"), new Linear <Real>(5, 1, name: "Linear l3") ); //optimizerを宣言 Adam <Real> adam = new Adam <Real>(); adam.SetUp(model); //訓練ループ Console.WriteLine("Training..."); for (int epoch = 0; epoch < TRAINING_EPOCHS; epoch++) { NdArray <Real>[] sequences = dataMaker.MakeMiniBatch(trainData, MINI_BATCH_SIZE, LENGTH_OF_SEQUENCE); Real loss = ComputeLoss(model, sequences); adam.Update(); model.ResetState(); if (epoch != 0 && epoch % DISPLAY_EPOCH == 0) { Console.WriteLine("[{0}]training loss:\t{1}", epoch, loss); } } Console.WriteLine("Testing..."); NdArray <Real>[] testSequences = dataMaker.MakeMiniBatch(trainData, MINI_BATCH_SIZE, LENGTH_OF_SEQUENCE); int sample_index = 45; predict(testSequences[sample_index], model, PREDICTION_LENGTH); }
public static void Run() { DataMaker dataMaker = new DataMaker(STEPS_PER_CYCLE, NUMBER_OF_CYCLES); NdArray trainData = dataMaker.Make(); //Network configuration is written in FunctionStack FunctionStack model = new FunctionStack( new Linear(1, 5, name: "Linear l1"), new LSTM(5, 5, name: "LSTM l2"), new Linear(5, 1, name: "Linear l3") ); //Declare optimizer model.SetOptimizer(new Adam()); //Training loop Console.WriteLine("Training..."); for (int epoch = 0; epoch < TRAINING_EPOCHS; epoch++) { NdArray[] sequences = dataMaker.MakeMiniBatch(trainData, MINI_BATCH_SIZE, LENGTH_OF_SEQUENCE); Real loss = ComputeLoss(model, sequences); model.Update(); model.ResetState(); if (epoch != 0 && epoch % DISPLAY_EPOCH == 0) { Console.WriteLine("[{0}]training loss:\t{1}", epoch, loss); } } Console.WriteLine("Testing..."); NdArray[] testSequences = dataMaker.MakeMiniBatch(trainData, MINI_BATCH_SIZE, LENGTH_OF_SEQUENCE); int sample_index = 45; predict(testSequences[sample_index], model, PREDICTION_LENGTH); }
public static void Run() { Console.WriteLine("Build Vocabulary."); Vocabulary vocabulary = new Vocabulary(); string trainPath = InternetFileDownloader.Donwload(DOWNLOAD_URL + TRAIN_FILE, TRAIN_FILE); string validPath = InternetFileDownloader.Donwload(DOWNLOAD_URL + VALID_FILE, VALID_FILE); string testPath = InternetFileDownloader.Donwload(DOWNLOAD_URL + TEST_FILE, TEST_FILE); int[] trainData = vocabulary.LoadData(trainPath); int[] validData = vocabulary.LoadData(validPath); int[] testData = vocabulary.LoadData(testPath); int nVocab = vocabulary.Length; Console.WriteLine("Network Initilizing."); FunctionStack model = new FunctionStack( new EmbedID(nVocab, N_UNITS, name: "l1 EmbedID"), new Dropout(), new LSTM(N_UNITS, N_UNITS, name: "l2 LSTM"), new Dropout(), new LSTM(N_UNITS, N_UNITS, name: "l3 LSTM"), new Dropout(), new Linear(N_UNITS, nVocab, name: "l4 Linear") ); //与えられたthresholdで頭打ちではなく、全パラメータのL2Normからレートを取り補正を行う GradientClipping gradientClipping = new GradientClipping(threshold: GRAD_CLIP); SGD sgd = new SGD(learningRate: 1); model.SetOptimizer(gradientClipping, sgd); Real wholeLen = trainData.Length; int jump = (int)Math.Floor(wholeLen / BATCH_SIZE); int epoch = 0; Stack <NdArray[]> backNdArrays = new Stack <NdArray[]>(); Console.WriteLine("Train Start."); for (int i = 0; i < jump * N_EPOCH; i++) { NdArray x = new NdArray(new[] { 1 }, BATCH_SIZE); NdArray t = new NdArray(new[] { 1 }, BATCH_SIZE); for (int j = 0; j < BATCH_SIZE; j++) { x.Data[j] = trainData[(int)((jump * j + i) % wholeLen)]; t.Data[j] = trainData[(int)((jump * j + i + 1) % wholeLen)]; } NdArray[] result = model.Forward(x); Real sumLoss = new SoftmaxCrossEntropy().Evaluate(result, t); backNdArrays.Push(result); Console.WriteLine("[{0}/{1}] Loss: {2}", i + 1, jump, sumLoss); //Run truncated BPTT if ((i + 1) % BPROP_LEN == 0) { for (int j = 0; backNdArrays.Count > 0; j++) { Console.WriteLine("backward" + backNdArrays.Count); model.Backward(backNdArrays.Pop()); } model.Update(); model.ResetState(); } if ((i + 1) % jump == 0) { epoch++; Console.WriteLine("evaluate"); Console.WriteLine("validation perplexity: {0}", Evaluate(model, validData)); if (epoch >= 6) { sgd.LearningRate /= 1.2; Console.WriteLine("learning rate =" + sgd.LearningRate); } } } Console.WriteLine("test start"); Console.WriteLine("test perplexity:" + Evaluate(model, testData)); }
public static void Run() { _outputStream = File.Create(LogPath); _logWriter = new HistogramLogWriter(_outputStream); _logWriter.Write(DateTime.Now); var recorder = HistogramFactory .With64BitBucketSize() ?.WithValuesFrom(1) ?.WithValuesUpTo(2345678912345) ?.WithPrecisionOf(3) ?.WithThreadSafeWrites() ?.WithThreadSafeReads() ?.Create(); var accumulatingHistogram = new LongHistogram(2345678912345, 3); var size = accumulatingHistogram.GetEstimatedFootprintInBytes(); RILogManager.Default?.SendDebug("Histogram size = {0} bytes ({1:F2} MB)", size, size / 1024.0 / 1024.0); RILogManager.Default?.SendDebug("Recorded latencies [in system clock ticks]"); accumulatingHistogram.OutputPercentileDistribution(Console.Out, outputValueUnitScalingRatio: OutputScalingFactor.None, useCsvFormat: true); Console.WriteLine(); RILogManager.Default?.SendDebug("Recorded latencies [in usec]"); accumulatingHistogram.OutputPercentileDistribution(Console.Out, outputValueUnitScalingRatio: OutputScalingFactor.TimeStampToMicroseconds, useCsvFormat: true); Console.WriteLine(); RILogManager.Default?.SendDebug("Recorded latencies [in msec]"); accumulatingHistogram.OutputPercentileDistribution(Console.Out, outputValueUnitScalingRatio: OutputScalingFactor.TimeStampToMilliseconds, useCsvFormat: true); Console.WriteLine(); RILogManager.Default?.SendDebug("Recorded latencies [in sec]"); accumulatingHistogram.OutputPercentileDistribution(Console.Out, outputValueUnitScalingRatio: OutputScalingFactor.TimeStampToSeconds, useCsvFormat: true); DocumentResults(accumulatingHistogram, recorder); RILogManager.Default?.SendDebug("Build Vocabulary."); DocumentResults(accumulatingHistogram, recorder); Vocabulary vocabulary = new Vocabulary(); DocumentResults(accumulatingHistogram, recorder); string trainPath = InternetFileDownloader.Download(DOWNLOAD_URL + TRAIN_FILE, TRAIN_FILE); DocumentResults(accumulatingHistogram, recorder); string validPath = InternetFileDownloader.Download(DOWNLOAD_URL + VALID_FILE, VALID_FILE); DocumentResults(accumulatingHistogram, recorder); string testPath = InternetFileDownloader.Download(DOWNLOAD_URL + TEST_FILE, TEST_FILE); DocumentResults(accumulatingHistogram, recorder); int[] trainData = vocabulary.LoadData(trainPath); DocumentResults(accumulatingHistogram, recorder); int[] validData = vocabulary.LoadData(validPath); DocumentResults(accumulatingHistogram, recorder); int[] testData = vocabulary.LoadData(testPath); DocumentResults(accumulatingHistogram, recorder); int nVocab = vocabulary.Length; RILogManager.Default?.SendDebug("Network Initializing."); FunctionStack model = new FunctionStack("Test10", new EmbedID(nVocab, N_UNITS, name: "l1 EmbedID"), new Dropout(), new LSTM(true, N_UNITS, N_UNITS, name: "l2 LSTM"), new Dropout(), new LSTM(true, N_UNITS, N_UNITS, name: "l3 LSTM"), new Dropout(), new Linear(true, N_UNITS, nVocab, name: "l4 Linear") ); DocumentResults(accumulatingHistogram, recorder); // Do not cease at the given threshold, correct the rate by taking the rate from L2Norm of all parameters GradientClipping gradientClipping = new GradientClipping(threshold: GRAD_CLIP); SGD sgd = new SGD(learningRate: 1); model.SetOptimizer(gradientClipping, sgd); DocumentResults(accumulatingHistogram, recorder); Real wholeLen = trainData.Length; int jump = (int)Math.Floor(wholeLen / BATCH_SIZE); int epoch = 0; Stack <NdArray[]> backNdArrays = new Stack <NdArray[]>(); RILogManager.Default?.SendDebug("Train Start."); double dVal; NdArray x = new NdArray(new[] { 1 }, BATCH_SIZE, (Function)null); NdArray t = new NdArray(new[] { 1 }, BATCH_SIZE, (Function)null); for (int i = 0; i < jump * N_EPOCH; i++) { for (int j = 0; j < BATCH_SIZE; j++) { x.Data[j] = trainData[(int)((jump * j + i) % wholeLen)]; t.Data[j] = trainData[(int)((jump * j + i + 1) % wholeLen)]; } NdArray[] result = model.Forward(true, x); Real sumLoss = new SoftmaxCrossEntropy().Evaluate(result, t); backNdArrays.Push(result); RILogManager.Default?.SendDebug("[{0}/{1}] Loss: {2}", i + 1, jump, sumLoss); //Run truncated BPTT if ((i + 1) % BPROP_LEN == 0) { for (int j = 0; backNdArrays.Count > 0; j++) { RILogManager.Default?.SendDebug("backward" + backNdArrays.Count); model.Backward(true, backNdArrays.Pop()); } model.Update(); model.ResetState(); } if ((i + 1) % jump == 0) { epoch++; RILogManager.Default?.SendDebug("evaluate"); dVal = Evaluate(model, validData); RILogManager.Default?.SendDebug($"validation perplexity: {dVal}"); if (epoch >= 6) { sgd.LearningRate /= 1.2; RILogManager.Default?.SendDebug("learning rate =" + sgd.LearningRate); } } DocumentResults(accumulatingHistogram, recorder); } RILogManager.Default?.SendDebug("test start"); dVal = Evaluate(model, testData); RILogManager.Default?.SendDebug("test perplexity:" + dVal); DocumentResults(accumulatingHistogram, recorder); _logWriter.Dispose(); _outputStream.Dispose(); RILogManager.Default?.SendDebug("Log contents"); RILogManager.Default?.SendDebug(File.ReadAllText(LogPath)); Console.WriteLine(); RILogManager.Default?.SendDebug("Percentile distribution (values reported in milliseconds)"); accumulatingHistogram.OutputPercentileDistribution(Console.Out, outputValueUnitScalingRatio: OutputScalingFactor.TimeStampToMilliseconds, useCsvFormat: true); RILogManager.Default?.SendDebug("Mean: " + BytesToString(accumulatingHistogram.GetMean()) + ", StdDev: " + BytesToString(accumulatingHistogram.GetStdDeviation())); }
public static void Run() { Console.WriteLine("Build Vocabulary."); Vocabulary vocabulary = new Vocabulary(); string trainPath = InternetFileDownloader.Donwload(DOWNLOAD_URL + TRAIN_FILE, TRAIN_FILE, TRAIN_FILE_HASH); string validPath = InternetFileDownloader.Donwload(DOWNLOAD_URL + VALID_FILE, VALID_FILE, VALID_FILE_HASH); string testPath = InternetFileDownloader.Donwload(DOWNLOAD_URL + TEST_FILE, TEST_FILE, TEST_FILE_HASH); int[] trainData = vocabulary.LoadData(trainPath); int[] validData = vocabulary.LoadData(validPath); int[] testData = vocabulary.LoadData(testPath); int nVocab = vocabulary.Length; Console.WriteLine("Network Initilizing."); FunctionStack <Real> model = new FunctionStack <Real>( new EmbedID <Real>(nVocab, N_UNITS, name: "l1 EmbedID"), new Dropout <Real>(), new LSTM <Real>(N_UNITS, N_UNITS, name: "l2 LSTM"), new Dropout <Real>(), new LSTM <Real>(N_UNITS, N_UNITS, name: "l3 LSTM"), new Dropout <Real>(), new Linear <Real>(N_UNITS, nVocab, name: "l4 Linear") ); for (int i = 0; i < model.Functions.Length; i++) { for (int j = 0; j < model.Functions[i].Parameters.Length; j++) { for (int k = 0; k < model.Functions[i].Parameters[j].Data.Length; k++) { model.Functions[i].Parameters[j].Data[k] = ((Real)Mother.Dice.NextDouble() * 2.0f - 1.0f) / 10.0f; } } } //与えられたthresholdで頭打ちではなく、全パラメータのL2Normからレートを取り補正を行う GradientClipping <Real> gradientClipping = new GradientClipping <Real>(threshold: GRAD_CLIP); SGD <Real> sgd = new SGD <Real>(learningRate: 0.1f); gradientClipping.SetUp(model); sgd.SetUp(model); Real wholeLen = trainData.Length; int jump = (int)Math.Floor(wholeLen / BATCH_SIZE); int epoch = 0; Console.WriteLine("Train Start."); for (int i = 0; i < jump * N_EPOCH; i++) { NdArray <Real> x = new NdArray <Real>(new[] { 1 }, BATCH_SIZE); NdArray <int> t = new NdArray <int>(new[] { 1 }, BATCH_SIZE); for (int j = 0; j < BATCH_SIZE; j++) { x.Data[j] = trainData[(int)((jump * j + i) % wholeLen)]; t.Data[j] = trainData[(int)((jump * j + i + 1) % wholeLen)]; } NdArray <Real> result = model.Forward(x)[0]; Real sumLoss = new SoftmaxCrossEntropy <Real>().Evaluate(result, t); Console.WriteLine("[{0}/{1}] Loss: {2}", i + 1, jump, sumLoss); model.Backward(result); //Run truncated BPTT if ((i + 1) % BPROP_LEN == 0) { gradientClipping.Update(); sgd.Update(); model.ResetState(); } if ((i + 1) % jump == 0) { epoch++; Console.WriteLine("evaluate"); Console.WriteLine("validation perplexity: {0}", Evaluate(model, validData)); if (epoch >= 6) { sgd.LearningRate /= 1.2f; Console.WriteLine("learning rate =" + sgd.LearningRate); } } } Console.WriteLine("test start"); Console.WriteLine("test perplexity:" + Evaluate(model, testData)); }