예제 #1
0
        public RobustHistogram MergeResults(int samples)
        {
            var merged = new RobustHistogram(this.Scale);

            foreach (var histo in this.Histos.Reverse <RobustHistogram>().Take(samples))
            {
                merged.Merge(histo);
            }
            return(merged);
        }
예제 #2
0
 public void Merge(RobustHistogram other)
 {
     if (other.Min < this.Min)
     {
         this.Min = other.Min;
     }
     if (other.Max > this.Max)
     {
         this.Max = other.Max;
     }
     this.Count              += other.Count;
     this.InternalSum        += other.InternalSum;
     this.InternalSumSquares += other.InternalSumSquares;
     for (int b = 0; b < NumBuckets; b++)
     {
         this.Buckets[b] += other.Buckets[b];
     }
 }
예제 #3
0
        private RobustHistogram GetFrame(TimeSpan elapsed)
        {
            int index = GetGraphIndex(elapsed);

            if (index != this.LastIndex && this.Completed != null && HasFrame(this.LastIndex))
            {
                if (this.Completed(this.Histos[this.LastIndex - this.Offset], this.LastIndex))
                {                 // reset!
                    this.Histos.Clear();
                    this.Offset = this.LastIndex;
                }
                this.LastIndex = index;
            }

            while (!HasFrame(index))
            {
                var histo = new RobustHistogram(this.Scale);
                this.Histos.Add(histo);
            }

            return(this.Histos[index - this.Offset]);
        }
        public static async Task Map(string[] path, IVarTuple extras, IFdbDatabase db, TextWriter log, CancellationToken ct)
        {
            // we want to merge the map of shards, with the map of directories from the Directory Layer, and count for each directory how many shards intersect
            bool progress = log == Console.Out;

            var folder = await TryOpenCurrentDirectoryAsync(path, db, ct);

            if (folder == null)
            {
                Program.Error(log, "# Directory not found");
                return;
            }

            Program.StdOut(log, "Listing all shards...");

            // note: this may break in future versions of the DL! Maybe we need a custom API to get a flat list of all directories in a DL that span a specific range ?
            var span   = folder.DirectoryLayer.ContentSubspace.Keys.ToRange();
            var shards = await Fdb.System.GetChunksAsync(db, span, ct);

            int totalShards = shards.Count;

            Program.StdOut(log, $"> Found {totalShards} shard(s) in partition /{folder.DirectoryLayer.FullName}", ConsoleColor.Gray);

            Program.StdOut(log, "Listing all directories...");
            var map = new Dictionary <string, int>(StringComparer.Ordinal);

            void Account(string[] p, int c)
            {
                for (int i = 1; i <= p.Length; i++)
                {
                    var s = "/" + string.Join("/", p, 0, i);
                    map[s] = map.TryGetValue(s, out int x) ? (x + c) : c;
                }
            }

            var work = new Stack <IFdbDirectory>();

            work.Push(folder);

            var dirs = new List <IFdbDirectory>();
            int n    = 0;

            while (work.Count > 0)
            {
                var cur = work.Pop();
                // skip sub partitions

                var names = await cur.ListAsync(db, ct);

                foreach (var name in names)
                {
                    var sub = await cur.TryOpenAsync(db, name, ct);

                    if (sub != null)
                    {
                        var p = sub.FullName;
                        if (sub is FdbDirectoryPartition)
                        {
                            if (progress)
                            {
                                log.Write("\r");
                            }
                            Program.StdOut(log, $"! Skipping partition {sub.Name}     ", ConsoleColor.DarkRed);
                            n = 0;
                            continue;
                        }
                        if (progress)
                        {
                            log.Write($"\r/{p}{(p.Length > n ? String.Empty : new string(' ', n - p.Length))}");
                        }
                        n = p.Length;
                        work.Push(sub);
                        dirs.Add(sub);
                    }
                }
            }
            if (progress)
            {
                log.Write("\r" + new string(' ', n + 2) + "\r");
            }
            Program.StdOut(log, $"> Found {dirs.Count} sub-directories", ConsoleColor.Gray);

            log.WriteLine();
            Program.StdOut(log, "Estimating size of each directory...");
            int foundShards = 0;

            n = 0;
            int           max    = 0;
            IFdbDirectory bigBad = null;

            foreach (var dir in dirs)
            {
                if (progress)
                {
                    log.Write($"\r> {dir.Name}{(dir.Name.Length > n ? String.Empty : new string(' ', n - dir.Name.Length))}");
                }
                n = dir.Name.Length;

                var p   = dir.Path.ToArray();
                var key = ((KeySubspace)dir).GetPrefix();

                // verify that the subspace has at least one key inside
                var bounds = await db.ReadAsync(async (tr) =>
                {
                    var kvs = await Task.WhenAll(
                        tr.GetRange(KeyRange.StartsWith(key)).FirstOrDefaultAsync(),
                        tr.GetRange(KeyRange.StartsWith(key)).LastOrDefaultAsync()
                        );
                    return(new { Min = kvs[0].Key, Max = kvs[1].Key });
                }, ct);

                if (bounds.Min.HasValue)
                {                 // folder is not empty
                    shards = await Fdb.System.GetChunksAsync(db, KeyRange.StartsWith(key), ct);

                    //TODO: we still need to check if the first and last shard really intersect the subspace

                    // we need to check if the shards actually contain data
                    //Console.WriteLine("/{0} under {1} with {2} shard(s)", string.Join("/", p), FdbKey.Dump(key), shards.Count);
                    foundShards += shards.Count;
                    Account(p, shards.Count);
                    if (shards.Count > max)
                    {
                        max = shards.Count; bigBad = dir;
                    }
                }
                else
                {
                    Account(p, 0);
                }
            }
            if (progress)
            {
                log.Write("\r" + new string(' ', n + 2) + "\r");
            }
            Program.StdOut(log, $"> Found a total of {foundShards:N0} shard(s) in {dirs.Count:N0} folder(s)", ConsoleColor.Gray);
            log.WriteLine();

            Program.StdOut(log, "Shards %Total              Path");
            foreach (var kvp in map.OrderBy(x => x.Key))
            {
                Program.StdOut(log, $"{kvp.Value,6} {RobustHistogram.FormatHistoBar((double)kvp.Value / foundShards, 20),-20} {kvp.Key}", ConsoleColor.Gray);
            }
            log.WriteLine();

            if (bigBad != null)
            {
                Program.StdOut(log, $"Biggest folder is /{bigBad.FullName} with {max} shards ({100.0 * max / totalShards:N1}% total, {100.0 * max / foundShards:N1}% subtree)");
                log.WriteLine();
            }
        }
        public static async Task Sampling(string[] path, IVarTuple extras, IFdbDatabase db, TextWriter log, CancellationToken ct)
        {
            double ratio = 0.1d;
            bool   auto  = true;

            if (extras.Count > 0)
            {
                double x = extras.Get <double>(0);
                if (x > 0 && x <= 1)
                {
                    ratio = x;
                }
                auto = false;
            }

            var folder = await TryOpenCurrentDirectoryAsync(path, db, ct);

            KeyRange span;

            if (folder is FdbDirectorySubspace)
            {
                span = KeyRange.StartsWith((folder as FdbDirectorySubspace).Copy().GetPrefix());
                log.WriteLine($"Reading list of shards for /{String.Join("/", path)} under {FdbKey.Dump(span.Begin)} ...");
            }
            else
            {
                log.WriteLine("Reading list of shards for the whole cluster ...");
                span = KeyRange.All;
            }

            // dump keyServers
            var ranges = await Fdb.System.GetChunksAsync(db, span, ct);

            log.WriteLine($"> Found {ranges.Count:N0} shard(s)");

            // take a sample
            var samples = new List <KeyRange>();

            if (ranges.Count <= 32)
            {             // small enough to scan it all
                samples.AddRange(ranges);
                log.WriteLine($"Sampling all {samples.Count:N0} shards ...");
            }
            else
            {             // need to take a random subset
                var rnd = new Random();
                int sz  = Math.Max((int)Math.Ceiling(ratio * ranges.Count), 1);
                if (auto)
                {
                    if (sz > 100)
                    {
                        sz = 100;                               //SAFETY
                    }
                    if (sz < 32)
                    {
                        sz = Math.Max(sz, Math.Min(32, ranges.Count));
                    }
                }

                var population = new List <KeyRange>(ranges);
                for (int i = 0; i < sz; i++)
                {
                    int p = rnd.Next(population.Count);
                    samples.Add(population[p]);
                    population.RemoveAt(p);
                }
                log.WriteLine($"Sampling {samples.Count:N0} out of {ranges.Count:N0} shards ({(100.0 * samples.Count / ranges.Count):N1}%) ...");
            }

            log.WriteLine();
            const string FORMAT_STRING = "{0,9} ║{1,10}{6,6} {2,-29} ║{3,10}{7,7} {4,-37} ║{5,10}";
            const string SCALE_KEY     = "....--------========########M";
            const string SCALE_VAL     = "....--------========########@@@@@@@@M";

            log.WriteLine(FORMAT_STRING, "Count", "Keys", SCALE_KEY, "Values", SCALE_VAL, "Total", "med.", "med.");

            var rangeOptions = new FdbRangeOptions {
                Mode = FdbStreamingMode.WantAll
            };

            samples = samples.OrderBy(x => x.Begin).ToList();

            long globalSize  = 0;
            long globalCount = 0;
            int  workers     = 8;        // Math.Max(4, Environment.ProcessorCount);

            var sw    = Stopwatch.StartNew();
            var tasks = new List <Task>();
            int n     = samples.Count;

            while (samples.Count > 0)
            {
                while (tasks.Count < workers && samples.Count > 0)
                {
                    var range = samples[0];
                    samples.RemoveAt(0);
                    tasks.Add(Task.Run(async() =>
                    {
                        var kk = new RobustHistogram(RobustHistogram.TimeScale.Ticks);
                        var vv = new RobustHistogram(RobustHistogram.TimeScale.Ticks);

                        #region Method 1: get_range everything...

                        using (var tr = db.BeginTransaction(ct))
                        {
                            long keySize   = 0;
                            long valueSize = 0;
                            long count     = 0;

                            int iter          = 0;
                            var beginSelector = KeySelector.FirstGreaterOrEqual(range.Begin);
                            var endSelector   = KeySelector.FirstGreaterOrEqual(range.End);
                            while (true)
                            {
                                FdbRangeChunk data = default(FdbRangeChunk);
                                FdbException error = null;
                                try
                                {
                                    data = await tr.Snapshot.GetRangeAsync(
                                        beginSelector,
                                        endSelector,
                                        rangeOptions,
                                        iter
                                        ).ConfigureAwait(false);
                                }
                                catch (FdbException e)
                                {
                                    error = e;
                                }

                                if (error != null)
                                {
                                    await tr.OnErrorAsync(error.Code).ConfigureAwait(false);
                                    continue;
                                }

                                if (data.Count == 0)
                                {
                                    break;
                                }

                                count += data.Count;
                                foreach (var kvp in data)
                                {
                                    keySize   += kvp.Key.Count;
                                    valueSize += kvp.Value.Count;

                                    kk.Add(TimeSpan.FromTicks(kvp.Key.Count));
                                    vv.Add(TimeSpan.FromTicks(kvp.Value.Count));
                                }

                                if (!data.HasMore)
                                {
                                    break;
                                }

                                beginSelector = KeySelector.FirstGreaterThan(data.Last);
                                ++iter;
                            }

                            long totalSize = keySize + valueSize;
                            Interlocked.Add(ref globalSize, totalSize);
                            Interlocked.Add(ref globalCount, count);

                            lock (log)
                            {
                                log.WriteLine(FORMAT_STRING, count.ToString("N0"), FormatSize(keySize), kk.GetDistribution(begin: 1, end: 12000, fold: 2), FormatSize(valueSize), vv.GetDistribution(begin: 1, end: 120000, fold: 2), FormatSize(totalSize), FormatSize((int)Math.Ceiling(kk.Median)), FormatSize((int)Math.Ceiling(vv.Median)));
                            }
                        }
                        #endregion

                        #region Method 2: estimate the count using key selectors...

                        //long counter = await Fdb.System.EstimateCountAsync(db, range, ct);
                        //Console.WriteLine("COUNT = " + counter.ToString("N0"));

                        #endregion
                    }, ct));
                }

                var done = await Task.WhenAny(tasks);

                tasks.Remove(done);
            }

            await Task.WhenAll(tasks);

            sw.Stop();

            log.WriteLine();
            if (n != ranges.Count)
            {
                log.WriteLine($"Sampled {FormatSize(globalSize)} ({globalSize:N0} bytes) and {globalCount:N0} keys in {sw.Elapsed.TotalSeconds:N1} sec");
                log.WriteLine($"> Estimated total size is {FormatSize(globalSize * ranges.Count / n)}");
            }
            else
            {
                log.WriteLine($"Found {FormatSize(globalSize)} ({globalSize:N0} bytes) and {globalCount:N0} keys in {sw.Elapsed.TotalSeconds:N1} sec");
                // compare to the whole cluster
                ranges = await Fdb.System.GetChunksAsync(db, FdbKey.MinValue, FdbKey.MaxValue, ct);

                log.WriteLine($"> This directory contains ~{(100.0 * n / ranges.Count):N2}% of all data");
            }
            log.WriteLine();
        }
		public RobustHistogram MergeResults(int samples)
		{
			var merged = new RobustHistogram(this.Scale);
			foreach (var histo in this.Histos.Reverse<RobustHistogram>().Take(samples))
			{
				merged.Merge(histo);
			}
			return merged;
		}
		private RobustHistogram GetFrame(TimeSpan elapsed)
		{
			int index = GetGraphIndex(elapsed);

			if (index != this.LastIndex && this.Completed != null && HasFrame(this.LastIndex))
			{
				if (this.Completed(this.Histos[this.LastIndex - this.Offset], this.LastIndex))
				{ // reset!
					this.Histos.Clear();
					this.Offset = this.LastIndex;
				}
				this.LastIndex = index;
			}

			while (!HasFrame(index))
			{
				var histo = new RobustHistogram(this.Scale);
				this.Histos.Add(histo);
			}

			return this.Histos[index - this.Offset];
		}
		public RobustTimeLine(TimeSpan step, RobustHistogram.TimeScale scale = RobustHistogram.TimeScale.Milliseconds, Func<RobustHistogram, int, bool> onCompleted = null)
		{
			if (step <= TimeSpan.Zero) throw new ArgumentException("Time step must be greater than zero", "step");

			this.Histos = new List<RobustHistogram>();
			this.Step = step;
			this.Completed = onCompleted;
			this.Clock = Stopwatch.StartNew();
		}
		public void Merge(RobustHistogram other)
		{
			if (other.Min < this.Min) this.Min = other.Min;
			if (other.Max > this.Max) this.Max = other.Max;
			this.Count += other.Count;
			this.InternalSum += other.InternalSum;
			this.InternalSumSquares += other.InternalSumSquares;
			for (int b = 0; b < NumBuckets; b++)
			{
				this.Buckets[b] += other.Buckets[b];
			}
		}
		public static async Task Sampling(string[] path, IFdbTuple extras, IFdbDatabase db, TextWriter log, CancellationToken ct)
		{
			double ratio = 0.1d;
			bool auto = true;
			if (extras.Count > 0)
			{
				double x = extras.Get<double>(0);
				if (x > 0 && x <= 1) ratio = x;
				auto = false;
			}

			var folder = await TryOpenCurrentDirectoryAsync(path, db, ct);
			FdbKeyRange span;
			if (folder is FdbDirectorySubspace)
			{
				span = FdbKeyRange.StartsWith((folder as FdbDirectorySubspace).Copy());
				log.WriteLine("Reading list of shards for /{0} under {1} ...", String.Join("/", path), FdbKey.Dump(span.Begin));
			}
			else
			{
				log.WriteLine("Reading list of shards for the whole cluster ...");
				span = FdbKeyRange.All;
			}

			// dump keyServers
			var ranges = await Fdb.System.GetChunksAsync(db, span, ct);
			log.WriteLine("> Found {0:N0} shard(s)", ranges.Count);

			// take a sample
			var samples = new List<FdbKeyRange>();

			if (ranges.Count <= 32)
			{ // small enough to scan it all
				samples.AddRange(ranges);
				log.WriteLine("Sampling all {0:N0} shards ...", samples.Count);
			}
			else
			{ // need to take a random subset
				var rnd = new Random();
				int sz = Math.Max((int)Math.Ceiling(ratio * ranges.Count), 1);
				if (auto)
				{
					if (sz > 100) sz = 100; //SAFETY
					if (sz < 32) sz = Math.Max(sz, Math.Min(32, ranges.Count));
				}

				var population = new List<FdbKeyRange>(ranges);
				for (int i = 0; i < sz; i++)
				{
					int p = rnd.Next(population.Count);
					samples.Add(population[p]);
					population.RemoveAt(p);
				}
				log.WriteLine("Sampling " + samples.Count + " out of " + ranges.Count + " shards (" + (100.0 * samples.Count / ranges.Count).ToString("N1") + "%) ...");
			}

			log.WriteLine();
			const string FORMAT_STRING = "{0,9} ║{1,10}{6,6} {2,-29} ║{3,10}{7,7} {4,-37} ║{5,10}";
			const string SCALE_KEY = "....--------========########M";
			const string SCALE_VAL = "....--------========########@@@@@@@@M";
			log.WriteLine(FORMAT_STRING, "Count", "Keys", SCALE_KEY, "Values", SCALE_VAL, "Total", "med.", "med.");

			var rangeOptions = new FdbRangeOptions { Mode = FdbStreamingMode.WantAll };

			samples = samples.OrderBy(x => x.Begin).ToList();

			long globalSize = 0;
			long globalCount = 0;
			int workers = 8; // Math.Max(4, Environment.ProcessorCount);

			var sw = Stopwatch.StartNew();
			var tasks = new List<Task>();
			int n = samples.Count;
			while (samples.Count > 0)
			{
				while (tasks.Count < workers && samples.Count > 0)
				{
					var range = samples[0];
					samples.RemoveAt(0);
					tasks.Add(Task.Run(async () =>
					{
						var kk = new RobustHistogram(RobustHistogram.TimeScale.Ticks);
						var vv = new RobustHistogram(RobustHistogram.TimeScale.Ticks);

						#region Method 1: get_range everything...

						using (var tr = db.BeginTransaction(ct))
						{
							long keySize = 0;
							long valueSize = 0;
							long count = 0;

							int iter = 0;
							var beginSelector = FdbKeySelector.FirstGreaterOrEqual(range.Begin);
							var endSelector = FdbKeySelector.FirstGreaterOrEqual(range.End);
							while (true)
							{
								FdbRangeChunk data = default(FdbRangeChunk);
								FdbException error = null;
								try
								{
									data = await tr.Snapshot.GetRangeAsync(
										beginSelector,
										endSelector,
										rangeOptions,
										iter
									).ConfigureAwait(false);
								}
								catch (FdbException e)
								{
									error = e;
								}

								if (error != null)
								{
									await tr.OnErrorAsync(error.Code).ConfigureAwait(false);
									continue;
								}

								if (data.Count == 0) break;

								count += data.Count;
								foreach (var kvp in data.Chunk)
								{
									keySize += kvp.Key.Count;
									valueSize += kvp.Value.Count;

									kk.Add(TimeSpan.FromTicks(kvp.Key.Count));
									vv.Add(TimeSpan.FromTicks(kvp.Value.Count));
								}

								if (!data.HasMore) break;

								beginSelector = FdbKeySelector.FirstGreaterThan(data.Last.Key);
								++iter;
							}

							long totalSize = keySize + valueSize;
							Interlocked.Add(ref globalSize, totalSize);
							Interlocked.Add(ref globalCount, count);

							lock (log)
							{
								log.WriteLine(FORMAT_STRING, count.ToString("N0"), FormatSize(keySize), kk.GetDistribution(begin: 1, end: 12000, fold: 2), FormatSize(valueSize), vv.GetDistribution(begin: 1, end: 120000, fold: 2), FormatSize(totalSize), FormatSize((int)Math.Ceiling(kk.Median)), FormatSize((int)Math.Ceiling(vv.Median)));
							}
						}
						#endregion

						#region Method 2: estimate the count using key selectors...

						//long counter = await Fdb.System.EstimateCountAsync(db, range, ct);
						//Console.WriteLine("COUNT = " + counter.ToString("N0"));

						#endregion
					}, ct));
				}

				var done = await Task.WhenAny(tasks);
				tasks.Remove(done);
			}

			await Task.WhenAll(tasks);
			sw.Stop();

			log.WriteLine();
			if (n != ranges.Count)
			{
				log.WriteLine("Sampled " + FormatSize(globalSize) + " (" + globalSize.ToString("N0") + " bytes) and " + globalCount.ToString("N0") + " keys in " + sw.Elapsed.TotalSeconds.ToString("N1") + " sec");
				log.WriteLine("> Estimated total size is " + FormatSize(globalSize * ranges.Count / n));
			}
			else
			{
				log.WriteLine("Found " + FormatSize(globalSize) + " (" + globalSize.ToString("N0") + " bytes) and " + globalCount.ToString("N0") + " keys in " + sw.Elapsed.TotalSeconds.ToString("N1") + " sec");
				// compare to the whole cluster
				ranges = await Fdb.System.GetChunksAsync(db, FdbKey.MinValue, FdbKey.MaxValue, ct);
				log.WriteLine("> This directory contains ~{0:N2}% of all data", (100.0 * n / ranges.Count));
			}
			log.WriteLine();
		}