public RobustHistogram MergeResults(int samples) { var merged = new RobustHistogram(this.Scale); foreach (var histo in this.Histos.Reverse <RobustHistogram>().Take(samples)) { merged.Merge(histo); } return(merged); }
public void Merge(RobustHistogram other) { if (other.Min < this.Min) { this.Min = other.Min; } if (other.Max > this.Max) { this.Max = other.Max; } this.Count += other.Count; this.InternalSum += other.InternalSum; this.InternalSumSquares += other.InternalSumSquares; for (int b = 0; b < NumBuckets; b++) { this.Buckets[b] += other.Buckets[b]; } }
private RobustHistogram GetFrame(TimeSpan elapsed) { int index = GetGraphIndex(elapsed); if (index != this.LastIndex && this.Completed != null && HasFrame(this.LastIndex)) { if (this.Completed(this.Histos[this.LastIndex - this.Offset], this.LastIndex)) { // reset! this.Histos.Clear(); this.Offset = this.LastIndex; } this.LastIndex = index; } while (!HasFrame(index)) { var histo = new RobustHistogram(this.Scale); this.Histos.Add(histo); } return(this.Histos[index - this.Offset]); }
public static async Task Map(string[] path, IVarTuple extras, IFdbDatabase db, TextWriter log, CancellationToken ct) { // we want to merge the map of shards, with the map of directories from the Directory Layer, and count for each directory how many shards intersect bool progress = log == Console.Out; var folder = await TryOpenCurrentDirectoryAsync(path, db, ct); if (folder == null) { Program.Error(log, "# Directory not found"); return; } Program.StdOut(log, "Listing all shards..."); // note: this may break in future versions of the DL! Maybe we need a custom API to get a flat list of all directories in a DL that span a specific range ? var span = folder.DirectoryLayer.ContentSubspace.Keys.ToRange(); var shards = await Fdb.System.GetChunksAsync(db, span, ct); int totalShards = shards.Count; Program.StdOut(log, $"> Found {totalShards} shard(s) in partition /{folder.DirectoryLayer.FullName}", ConsoleColor.Gray); Program.StdOut(log, "Listing all directories..."); var map = new Dictionary <string, int>(StringComparer.Ordinal); void Account(string[] p, int c) { for (int i = 1; i <= p.Length; i++) { var s = "/" + string.Join("/", p, 0, i); map[s] = map.TryGetValue(s, out int x) ? (x + c) : c; } } var work = new Stack <IFdbDirectory>(); work.Push(folder); var dirs = new List <IFdbDirectory>(); int n = 0; while (work.Count > 0) { var cur = work.Pop(); // skip sub partitions var names = await cur.ListAsync(db, ct); foreach (var name in names) { var sub = await cur.TryOpenAsync(db, name, ct); if (sub != null) { var p = sub.FullName; if (sub is FdbDirectoryPartition) { if (progress) { log.Write("\r"); } Program.StdOut(log, $"! Skipping partition {sub.Name} ", ConsoleColor.DarkRed); n = 0; continue; } if (progress) { log.Write($"\r/{p}{(p.Length > n ? String.Empty : new string(' ', n - p.Length))}"); } n = p.Length; work.Push(sub); dirs.Add(sub); } } } if (progress) { log.Write("\r" + new string(' ', n + 2) + "\r"); } Program.StdOut(log, $"> Found {dirs.Count} sub-directories", ConsoleColor.Gray); log.WriteLine(); Program.StdOut(log, "Estimating size of each directory..."); int foundShards = 0; n = 0; int max = 0; IFdbDirectory bigBad = null; foreach (var dir in dirs) { if (progress) { log.Write($"\r> {dir.Name}{(dir.Name.Length > n ? String.Empty : new string(' ', n - dir.Name.Length))}"); } n = dir.Name.Length; var p = dir.Path.ToArray(); var key = ((KeySubspace)dir).GetPrefix(); // verify that the subspace has at least one key inside var bounds = await db.ReadAsync(async (tr) => { var kvs = await Task.WhenAll( tr.GetRange(KeyRange.StartsWith(key)).FirstOrDefaultAsync(), tr.GetRange(KeyRange.StartsWith(key)).LastOrDefaultAsync() ); return(new { Min = kvs[0].Key, Max = kvs[1].Key }); }, ct); if (bounds.Min.HasValue) { // folder is not empty shards = await Fdb.System.GetChunksAsync(db, KeyRange.StartsWith(key), ct); //TODO: we still need to check if the first and last shard really intersect the subspace // we need to check if the shards actually contain data //Console.WriteLine("/{0} under {1} with {2} shard(s)", string.Join("/", p), FdbKey.Dump(key), shards.Count); foundShards += shards.Count; Account(p, shards.Count); if (shards.Count > max) { max = shards.Count; bigBad = dir; } } else { Account(p, 0); } } if (progress) { log.Write("\r" + new string(' ', n + 2) + "\r"); } Program.StdOut(log, $"> Found a total of {foundShards:N0} shard(s) in {dirs.Count:N0} folder(s)", ConsoleColor.Gray); log.WriteLine(); Program.StdOut(log, "Shards %Total Path"); foreach (var kvp in map.OrderBy(x => x.Key)) { Program.StdOut(log, $"{kvp.Value,6} {RobustHistogram.FormatHistoBar((double)kvp.Value / foundShards, 20),-20} {kvp.Key}", ConsoleColor.Gray); } log.WriteLine(); if (bigBad != null) { Program.StdOut(log, $"Biggest folder is /{bigBad.FullName} with {max} shards ({100.0 * max / totalShards:N1}% total, {100.0 * max / foundShards:N1}% subtree)"); log.WriteLine(); } }
public static async Task Sampling(string[] path, IVarTuple extras, IFdbDatabase db, TextWriter log, CancellationToken ct) { double ratio = 0.1d; bool auto = true; if (extras.Count > 0) { double x = extras.Get <double>(0); if (x > 0 && x <= 1) { ratio = x; } auto = false; } var folder = await TryOpenCurrentDirectoryAsync(path, db, ct); KeyRange span; if (folder is FdbDirectorySubspace) { span = KeyRange.StartsWith((folder as FdbDirectorySubspace).Copy().GetPrefix()); log.WriteLine($"Reading list of shards for /{String.Join("/", path)} under {FdbKey.Dump(span.Begin)} ..."); } else { log.WriteLine("Reading list of shards for the whole cluster ..."); span = KeyRange.All; } // dump keyServers var ranges = await Fdb.System.GetChunksAsync(db, span, ct); log.WriteLine($"> Found {ranges.Count:N0} shard(s)"); // take a sample var samples = new List <KeyRange>(); if (ranges.Count <= 32) { // small enough to scan it all samples.AddRange(ranges); log.WriteLine($"Sampling all {samples.Count:N0} shards ..."); } else { // need to take a random subset var rnd = new Random(); int sz = Math.Max((int)Math.Ceiling(ratio * ranges.Count), 1); if (auto) { if (sz > 100) { sz = 100; //SAFETY } if (sz < 32) { sz = Math.Max(sz, Math.Min(32, ranges.Count)); } } var population = new List <KeyRange>(ranges); for (int i = 0; i < sz; i++) { int p = rnd.Next(population.Count); samples.Add(population[p]); population.RemoveAt(p); } log.WriteLine($"Sampling {samples.Count:N0} out of {ranges.Count:N0} shards ({(100.0 * samples.Count / ranges.Count):N1}%) ..."); } log.WriteLine(); const string FORMAT_STRING = "{0,9} ║{1,10}{6,6} {2,-29} ║{3,10}{7,7} {4,-37} ║{5,10}"; const string SCALE_KEY = "....--------========########M"; const string SCALE_VAL = "....--------========########@@@@@@@@M"; log.WriteLine(FORMAT_STRING, "Count", "Keys", SCALE_KEY, "Values", SCALE_VAL, "Total", "med.", "med."); var rangeOptions = new FdbRangeOptions { Mode = FdbStreamingMode.WantAll }; samples = samples.OrderBy(x => x.Begin).ToList(); long globalSize = 0; long globalCount = 0; int workers = 8; // Math.Max(4, Environment.ProcessorCount); var sw = Stopwatch.StartNew(); var tasks = new List <Task>(); int n = samples.Count; while (samples.Count > 0) { while (tasks.Count < workers && samples.Count > 0) { var range = samples[0]; samples.RemoveAt(0); tasks.Add(Task.Run(async() => { var kk = new RobustHistogram(RobustHistogram.TimeScale.Ticks); var vv = new RobustHistogram(RobustHistogram.TimeScale.Ticks); #region Method 1: get_range everything... using (var tr = db.BeginTransaction(ct)) { long keySize = 0; long valueSize = 0; long count = 0; int iter = 0; var beginSelector = KeySelector.FirstGreaterOrEqual(range.Begin); var endSelector = KeySelector.FirstGreaterOrEqual(range.End); while (true) { FdbRangeChunk data = default(FdbRangeChunk); FdbException error = null; try { data = await tr.Snapshot.GetRangeAsync( beginSelector, endSelector, rangeOptions, iter ).ConfigureAwait(false); } catch (FdbException e) { error = e; } if (error != null) { await tr.OnErrorAsync(error.Code).ConfigureAwait(false); continue; } if (data.Count == 0) { break; } count += data.Count; foreach (var kvp in data) { keySize += kvp.Key.Count; valueSize += kvp.Value.Count; kk.Add(TimeSpan.FromTicks(kvp.Key.Count)); vv.Add(TimeSpan.FromTicks(kvp.Value.Count)); } if (!data.HasMore) { break; } beginSelector = KeySelector.FirstGreaterThan(data.Last); ++iter; } long totalSize = keySize + valueSize; Interlocked.Add(ref globalSize, totalSize); Interlocked.Add(ref globalCount, count); lock (log) { log.WriteLine(FORMAT_STRING, count.ToString("N0"), FormatSize(keySize), kk.GetDistribution(begin: 1, end: 12000, fold: 2), FormatSize(valueSize), vv.GetDistribution(begin: 1, end: 120000, fold: 2), FormatSize(totalSize), FormatSize((int)Math.Ceiling(kk.Median)), FormatSize((int)Math.Ceiling(vv.Median))); } } #endregion #region Method 2: estimate the count using key selectors... //long counter = await Fdb.System.EstimateCountAsync(db, range, ct); //Console.WriteLine("COUNT = " + counter.ToString("N0")); #endregion }, ct)); } var done = await Task.WhenAny(tasks); tasks.Remove(done); } await Task.WhenAll(tasks); sw.Stop(); log.WriteLine(); if (n != ranges.Count) { log.WriteLine($"Sampled {FormatSize(globalSize)} ({globalSize:N0} bytes) and {globalCount:N0} keys in {sw.Elapsed.TotalSeconds:N1} sec"); log.WriteLine($"> Estimated total size is {FormatSize(globalSize * ranges.Count / n)}"); } else { log.WriteLine($"Found {FormatSize(globalSize)} ({globalSize:N0} bytes) and {globalCount:N0} keys in {sw.Elapsed.TotalSeconds:N1} sec"); // compare to the whole cluster ranges = await Fdb.System.GetChunksAsync(db, FdbKey.MinValue, FdbKey.MaxValue, ct); log.WriteLine($"> This directory contains ~{(100.0 * n / ranges.Count):N2}% of all data"); } log.WriteLine(); }
public RobustHistogram MergeResults(int samples) { var merged = new RobustHistogram(this.Scale); foreach (var histo in this.Histos.Reverse<RobustHistogram>().Take(samples)) { merged.Merge(histo); } return merged; }
private RobustHistogram GetFrame(TimeSpan elapsed) { int index = GetGraphIndex(elapsed); if (index != this.LastIndex && this.Completed != null && HasFrame(this.LastIndex)) { if (this.Completed(this.Histos[this.LastIndex - this.Offset], this.LastIndex)) { // reset! this.Histos.Clear(); this.Offset = this.LastIndex; } this.LastIndex = index; } while (!HasFrame(index)) { var histo = new RobustHistogram(this.Scale); this.Histos.Add(histo); } return this.Histos[index - this.Offset]; }
public RobustTimeLine(TimeSpan step, RobustHistogram.TimeScale scale = RobustHistogram.TimeScale.Milliseconds, Func<RobustHistogram, int, bool> onCompleted = null) { if (step <= TimeSpan.Zero) throw new ArgumentException("Time step must be greater than zero", "step"); this.Histos = new List<RobustHistogram>(); this.Step = step; this.Completed = onCompleted; this.Clock = Stopwatch.StartNew(); }
public void Merge(RobustHistogram other) { if (other.Min < this.Min) this.Min = other.Min; if (other.Max > this.Max) this.Max = other.Max; this.Count += other.Count; this.InternalSum += other.InternalSum; this.InternalSumSquares += other.InternalSumSquares; for (int b = 0; b < NumBuckets; b++) { this.Buckets[b] += other.Buckets[b]; } }
public static async Task Sampling(string[] path, IFdbTuple extras, IFdbDatabase db, TextWriter log, CancellationToken ct) { double ratio = 0.1d; bool auto = true; if (extras.Count > 0) { double x = extras.Get<double>(0); if (x > 0 && x <= 1) ratio = x; auto = false; } var folder = await TryOpenCurrentDirectoryAsync(path, db, ct); FdbKeyRange span; if (folder is FdbDirectorySubspace) { span = FdbKeyRange.StartsWith((folder as FdbDirectorySubspace).Copy()); log.WriteLine("Reading list of shards for /{0} under {1} ...", String.Join("/", path), FdbKey.Dump(span.Begin)); } else { log.WriteLine("Reading list of shards for the whole cluster ..."); span = FdbKeyRange.All; } // dump keyServers var ranges = await Fdb.System.GetChunksAsync(db, span, ct); log.WriteLine("> Found {0:N0} shard(s)", ranges.Count); // take a sample var samples = new List<FdbKeyRange>(); if (ranges.Count <= 32) { // small enough to scan it all samples.AddRange(ranges); log.WriteLine("Sampling all {0:N0} shards ...", samples.Count); } else { // need to take a random subset var rnd = new Random(); int sz = Math.Max((int)Math.Ceiling(ratio * ranges.Count), 1); if (auto) { if (sz > 100) sz = 100; //SAFETY if (sz < 32) sz = Math.Max(sz, Math.Min(32, ranges.Count)); } var population = new List<FdbKeyRange>(ranges); for (int i = 0; i < sz; i++) { int p = rnd.Next(population.Count); samples.Add(population[p]); population.RemoveAt(p); } log.WriteLine("Sampling " + samples.Count + " out of " + ranges.Count + " shards (" + (100.0 * samples.Count / ranges.Count).ToString("N1") + "%) ..."); } log.WriteLine(); const string FORMAT_STRING = "{0,9} ║{1,10}{6,6} {2,-29} ║{3,10}{7,7} {4,-37} ║{5,10}"; const string SCALE_KEY = "....--------========########M"; const string SCALE_VAL = "....--------========########@@@@@@@@M"; log.WriteLine(FORMAT_STRING, "Count", "Keys", SCALE_KEY, "Values", SCALE_VAL, "Total", "med.", "med."); var rangeOptions = new FdbRangeOptions { Mode = FdbStreamingMode.WantAll }; samples = samples.OrderBy(x => x.Begin).ToList(); long globalSize = 0; long globalCount = 0; int workers = 8; // Math.Max(4, Environment.ProcessorCount); var sw = Stopwatch.StartNew(); var tasks = new List<Task>(); int n = samples.Count; while (samples.Count > 0) { while (tasks.Count < workers && samples.Count > 0) { var range = samples[0]; samples.RemoveAt(0); tasks.Add(Task.Run(async () => { var kk = new RobustHistogram(RobustHistogram.TimeScale.Ticks); var vv = new RobustHistogram(RobustHistogram.TimeScale.Ticks); #region Method 1: get_range everything... using (var tr = db.BeginTransaction(ct)) { long keySize = 0; long valueSize = 0; long count = 0; int iter = 0; var beginSelector = FdbKeySelector.FirstGreaterOrEqual(range.Begin); var endSelector = FdbKeySelector.FirstGreaterOrEqual(range.End); while (true) { FdbRangeChunk data = default(FdbRangeChunk); FdbException error = null; try { data = await tr.Snapshot.GetRangeAsync( beginSelector, endSelector, rangeOptions, iter ).ConfigureAwait(false); } catch (FdbException e) { error = e; } if (error != null) { await tr.OnErrorAsync(error.Code).ConfigureAwait(false); continue; } if (data.Count == 0) break; count += data.Count; foreach (var kvp in data.Chunk) { keySize += kvp.Key.Count; valueSize += kvp.Value.Count; kk.Add(TimeSpan.FromTicks(kvp.Key.Count)); vv.Add(TimeSpan.FromTicks(kvp.Value.Count)); } if (!data.HasMore) break; beginSelector = FdbKeySelector.FirstGreaterThan(data.Last.Key); ++iter; } long totalSize = keySize + valueSize; Interlocked.Add(ref globalSize, totalSize); Interlocked.Add(ref globalCount, count); lock (log) { log.WriteLine(FORMAT_STRING, count.ToString("N0"), FormatSize(keySize), kk.GetDistribution(begin: 1, end: 12000, fold: 2), FormatSize(valueSize), vv.GetDistribution(begin: 1, end: 120000, fold: 2), FormatSize(totalSize), FormatSize((int)Math.Ceiling(kk.Median)), FormatSize((int)Math.Ceiling(vv.Median))); } } #endregion #region Method 2: estimate the count using key selectors... //long counter = await Fdb.System.EstimateCountAsync(db, range, ct); //Console.WriteLine("COUNT = " + counter.ToString("N0")); #endregion }, ct)); } var done = await Task.WhenAny(tasks); tasks.Remove(done); } await Task.WhenAll(tasks); sw.Stop(); log.WriteLine(); if (n != ranges.Count) { log.WriteLine("Sampled " + FormatSize(globalSize) + " (" + globalSize.ToString("N0") + " bytes) and " + globalCount.ToString("N0") + " keys in " + sw.Elapsed.TotalSeconds.ToString("N1") + " sec"); log.WriteLine("> Estimated total size is " + FormatSize(globalSize * ranges.Count / n)); } else { log.WriteLine("Found " + FormatSize(globalSize) + " (" + globalSize.ToString("N0") + " bytes) and " + globalCount.ToString("N0") + " keys in " + sw.Elapsed.TotalSeconds.ToString("N1") + " sec"); // compare to the whole cluster ranges = await Fdb.System.GetChunksAsync(db, FdbKey.MinValue, FdbKey.MaxValue, ct); log.WriteLine("> This directory contains ~{0:N2}% of all data", (100.0 * n / ranges.Count)); } log.WriteLine(); }