public void TestGetMean() { var expectedRawMean = ((10000.0 * 1000) + (1.0 * 100000000)) / 10001; /* direct avg. of raw results */ var expectedMean = (1000.0 + 50000000.0) / 2; /* avg. 1 msec for half the time, and 50 sec for other half */ // We expect to see the mean to be accurate to ~3 decimal points (~0.1%): Assert.AreEqual(expectedRawMean, RawHistogram.GetMean(), expectedRawMean * 0.001, "Raw mean is " + expectedRawMean + " +/- 0.1%"); Assert.AreEqual(expectedMean, LongHistogram.GetMean(), expectedMean * 0.001, "Mean is " + expectedMean + " +/- 0.1%"); }
static void DocumentResults(LongHistogram accumulatingHistogram, Recorder recorder) { recorder?.RecordValue(GC.GetTotalMemory(false)); var histogram = recorder?.GetIntervalHistogram(); accumulatingHistogram?.Add(histogram); _logWriter?.Append(histogram); RILogManager.Default?.SendDebug($"Accumulated.TotalCount = {accumulatingHistogram.TotalCount,10:G}."); RILogManager.Default?.SendDebug("Mean: " + BytesToString(accumulatingHistogram.GetMean()) + ", StdDev: " + BytesToString(accumulatingHistogram.GetStdDeviation())); }
public void TestScalingEquivalence() { Assert.AreEqual( LongHistogram.GetMean() * 512, ScaledHistogram.GetMean(), ScaledHistogram.GetMean() * 0.000001, "averages should be equivalent"); Assert.AreEqual( LongHistogram.TotalCount, ScaledHistogram.TotalCount, "total count should be the same"); Assert.AreEqual( LongHistogram.LowestEquivalentValue(LongHistogram.GetValueAtPercentile(99.0)) * 512, ScaledHistogram.LowestEquivalentValue(ScaledHistogram.GetValueAtPercentile(99.0)), "99%'iles should be equivalent"); Assert.AreEqual( ScaledHistogram.HighestEquivalentValue(LongHistogram.GetMaxValue() * 512), ScaledHistogram.GetMaxValue(), "Max should be equivalent for scaled data"); // Same for post-corrected: Assert.AreEqual( LongHistogram.GetMean() * 512, ScaledHistogram.GetMean(), ScaledHistogram.GetMean() * 0.000001, "averages should be equivalent"); Assert.AreEqual( PostCorrectedHistogram.TotalCount, PostCorrectedScaledHistogram.TotalCount, "total count should be the same"); Assert.AreEqual( PostCorrectedHistogram.LowestEquivalentValue(PostCorrectedHistogram.GetValueAtPercentile(99.0)) * 512, PostCorrectedScaledHistogram.LowestEquivalentValue(PostCorrectedScaledHistogram.GetValueAtPercentile(99.0)), "99%'iles should be equivalent"); Assert.AreEqual( PostCorrectedScaledHistogram.HighestEquivalentValue(PostCorrectedHistogram.GetMaxValue() * 512), PostCorrectedScaledHistogram.GetMaxValue(), "Max should be equivalent for post-corrected data"); }
private CachedValuesHistogram(LongHistogram underlying) { /* * Single thread calculates a variety of commonly-accessed quantities. * This way, all threads can access the cached values without synchronization * Synchronization is only required for values that are not cached */ if (underlying.TotalCount > 0) { mean = (int)underlying.GetMean(); p0 = (int)underlying.GetValueAtPercentile(0); p5 = (int)underlying.GetValueAtPercentile(5); p10 = (int)underlying.GetValueAtPercentile(10); p15 = (int)underlying.GetValueAtPercentile(15); p20 = (int)underlying.GetValueAtPercentile(20); p25 = (int)underlying.GetValueAtPercentile(25); p30 = (int)underlying.GetValueAtPercentile(30); p35 = (int)underlying.GetValueAtPercentile(35); p40 = (int)underlying.GetValueAtPercentile(40); p45 = (int)underlying.GetValueAtPercentile(45); p50 = (int)underlying.GetValueAtPercentile(50); p55 = (int)underlying.GetValueAtPercentile(55); p60 = (int)underlying.GetValueAtPercentile(60); p65 = (int)underlying.GetValueAtPercentile(65); p70 = (int)underlying.GetValueAtPercentile(70); p75 = (int)underlying.GetValueAtPercentile(75); p80 = (int)underlying.GetValueAtPercentile(80); p85 = (int)underlying.GetValueAtPercentile(85); p90 = (int)underlying.GetValueAtPercentile(90); p95 = (int)underlying.GetValueAtPercentile(95); p99 = (int)underlying.GetValueAtPercentile(99); p99_5 = (int)underlying.GetValueAtPercentile(99.5); p99_9 = (int)underlying.GetValueAtPercentile(99.9); p99_95 = (int)underlying.GetValueAtPercentile(99.95); p99_99 = (int)underlying.GetValueAtPercentile(99.99); p100 = (int)underlying.GetValueAtPercentile(100); totalCount = underlying.TotalCount; } }
public static void Run() { _outputStream = File.Create(LogPath); _logWriter = new HistogramLogWriter(_outputStream); _logWriter.Write(DateTime.Now); var recorder = HistogramFactory .With64BitBucketSize() ?.WithValuesFrom(1) ?.WithValuesUpTo(2345678912345) ?.WithPrecisionOf(3) ?.WithThreadSafeWrites() ?.WithThreadSafeReads() ?.Create(); var accumulatingHistogram = new LongHistogram(2345678912345, 3); var size = accumulatingHistogram.GetEstimatedFootprintInBytes(); RILogManager.Default?.SendDebug("Histogram size = {0} bytes ({1:F2} MB)", size, size / 1024.0 / 1024.0); RILogManager.Default?.SendDebug("Recorded latencies [in system clock ticks]"); accumulatingHistogram.OutputPercentileDistribution(Console.Out, outputValueUnitScalingRatio: OutputScalingFactor.None, useCsvFormat: true); Console.WriteLine(); RILogManager.Default?.SendDebug("Recorded latencies [in usec]"); accumulatingHistogram.OutputPercentileDistribution(Console.Out, outputValueUnitScalingRatio: OutputScalingFactor.TimeStampToMicroseconds, useCsvFormat: true); Console.WriteLine(); RILogManager.Default?.SendDebug("Recorded latencies [in msec]"); accumulatingHistogram.OutputPercentileDistribution(Console.Out, outputValueUnitScalingRatio: OutputScalingFactor.TimeStampToMilliseconds, useCsvFormat: true); Console.WriteLine(); RILogManager.Default?.SendDebug("Recorded latencies [in sec]"); accumulatingHistogram.OutputPercentileDistribution(Console.Out, outputValueUnitScalingRatio: OutputScalingFactor.TimeStampToSeconds, useCsvFormat: true); DocumentResults(accumulatingHistogram, recorder); RILogManager.Default?.SendDebug("Build Vocabulary."); DocumentResults(accumulatingHistogram, recorder); Vocabulary vocabulary = new Vocabulary(); DocumentResults(accumulatingHistogram, recorder); string trainPath = InternetFileDownloader.Download(DOWNLOAD_URL + TRAIN_FILE, TRAIN_FILE); DocumentResults(accumulatingHistogram, recorder); string validPath = InternetFileDownloader.Download(DOWNLOAD_URL + VALID_FILE, VALID_FILE); DocumentResults(accumulatingHistogram, recorder); string testPath = InternetFileDownloader.Download(DOWNLOAD_URL + TEST_FILE, TEST_FILE); DocumentResults(accumulatingHistogram, recorder); int[] trainData = vocabulary.LoadData(trainPath); DocumentResults(accumulatingHistogram, recorder); int[] validData = vocabulary.LoadData(validPath); DocumentResults(accumulatingHistogram, recorder); int[] testData = vocabulary.LoadData(testPath); DocumentResults(accumulatingHistogram, recorder); int nVocab = vocabulary.Length; RILogManager.Default?.SendDebug("Network Initializing."); FunctionStack model = new FunctionStack("Test10", new EmbedID(nVocab, N_UNITS, name: "l1 EmbedID"), new Dropout(), new LSTM(true, N_UNITS, N_UNITS, name: "l2 LSTM"), new Dropout(), new LSTM(true, N_UNITS, N_UNITS, name: "l3 LSTM"), new Dropout(), new Linear(true, N_UNITS, nVocab, name: "l4 Linear") ); DocumentResults(accumulatingHistogram, recorder); // Do not cease at the given threshold, correct the rate by taking the rate from L2Norm of all parameters GradientClipping gradientClipping = new GradientClipping(threshold: GRAD_CLIP); SGD sgd = new SGD(learningRate: 1); model.SetOptimizer(gradientClipping, sgd); DocumentResults(accumulatingHistogram, recorder); Real wholeLen = trainData.Length; int jump = (int)Math.Floor(wholeLen / BATCH_SIZE); int epoch = 0; Stack <NdArray[]> backNdArrays = new Stack <NdArray[]>(); RILogManager.Default?.SendDebug("Train Start."); double dVal; NdArray x = new NdArray(new[] { 1 }, BATCH_SIZE, (Function)null); NdArray t = new NdArray(new[] { 1 }, BATCH_SIZE, (Function)null); for (int i = 0; i < jump * N_EPOCH; i++) { for (int j = 0; j < BATCH_SIZE; j++) { x.Data[j] = trainData[(int)((jump * j + i) % wholeLen)]; t.Data[j] = trainData[(int)((jump * j + i + 1) % wholeLen)]; } NdArray[] result = model.Forward(true, x); Real sumLoss = new SoftmaxCrossEntropy().Evaluate(result, t); backNdArrays.Push(result); RILogManager.Default?.SendDebug("[{0}/{1}] Loss: {2}", i + 1, jump, sumLoss); //Run truncated BPTT if ((i + 1) % BPROP_LEN == 0) { for (int j = 0; backNdArrays.Count > 0; j++) { RILogManager.Default?.SendDebug("backward" + backNdArrays.Count); model.Backward(true, backNdArrays.Pop()); } model.Update(); model.ResetState(); } if ((i + 1) % jump == 0) { epoch++; RILogManager.Default?.SendDebug("evaluate"); dVal = Evaluate(model, validData); RILogManager.Default?.SendDebug($"validation perplexity: {dVal}"); if (epoch >= 6) { sgd.LearningRate /= 1.2; RILogManager.Default?.SendDebug("learning rate =" + sgd.LearningRate); } } DocumentResults(accumulatingHistogram, recorder); } RILogManager.Default?.SendDebug("test start"); dVal = Evaluate(model, testData); RILogManager.Default?.SendDebug("test perplexity:" + dVal); DocumentResults(accumulatingHistogram, recorder); _logWriter.Dispose(); _outputStream.Dispose(); RILogManager.Default?.SendDebug("Log contents"); RILogManager.Default?.SendDebug(File.ReadAllText(LogPath)); Console.WriteLine(); RILogManager.Default?.SendDebug("Percentile distribution (values reported in milliseconds)"); accumulatingHistogram.OutputPercentileDistribution(Console.Out, outputValueUnitScalingRatio: OutputScalingFactor.TimeStampToMilliseconds, useCsvFormat: true); RILogManager.Default?.SendDebug("Mean: " + BytesToString(accumulatingHistogram.GetMean()) + ", StdDev: " + BytesToString(accumulatingHistogram.GetStdDeviation())); }