/// <summary> /// Build the index /// </summary> public virtual void Build(MetricDB db, int bsize, Random rand) { this.DB = db; var n = this.DB.Count; // randomized has very good performance, even compared with more "intelligent" strategies var dseq = new DynamicSequentialOrdered (); dseq.Build (db, rand); this.NODES = new List<Node> (n / bsize + 1); var L = new List<ItemPair> (n); while (dseq.Count > 0) { if (this.NODES.Count % 100 == 0) { Console.WriteLine ("XXX {0}, bucketSize: {1}, remain {2}/{3}, db: {4}, date-time: {5}", this, bsize, dseq.Count, db.Count, Path.GetFileName(db.Name), DateTime.Now); } var refID = dseq.GetAnyItem (); dseq.Remove (refID); L.Clear (); dseq.ComputeDistances (this.DB[refID], L); var near = new Result(bsize); var far = new Result (1); dseq.AppendKExtremes (near, far, L); var node = new Node (refID); this.NODES.Add (node); dseq.Remove (near); foreach (var p in near) { node.Add(p.ObjID, p.Dist); } } }
public EPListMeanPivots(MetricDB DB, int seed, int num_pivs) { this.Items = null; var pivs = new List<EPivot> (32); var rand = new Random (seed); var n = DB.Count; var idxseq = new DynamicSequentialOrdered (); idxseq.Build (DB, RandomSets.GetIdentity (DB.Count)); var tmp_items = new List<ItemPair> (DB.Count); int next_piv = rand.Next (0, n); for (int i = 0; i < num_pivs; ++i) { var varX = 0.0; double min_diff = double.MaxValue; this.ComputeDistRow (next_piv, idxseq, rand, pivs, tmp_items); for (int objID = 0; objID < this.Items.Length; ++objID) { var u = this.Items [objID]; var diff = Math.Abs (u.Dist - pivs [u.ObjID].mean); if (diff < min_diff) { min_diff = diff; next_piv = objID; } varX += diff * diff / n; } ++i; Console.WriteLine ("XXXXXX i: {0}, variance: {1}", i, varX); } this.Pivs = pivs.ToArray (); Console.WriteLine("Number of pivots per group: {0}", this.Pivs.Length); }
public EPListOptimized(MetricDB DB, int seed, int num_indexes, int max_iters, double error_factor) { Console.WriteLine ("XXX {0}, num_indexes: {1}, max_iters: {2}, error_factor: {3}", this, num_indexes, max_iters, error_factor); this.Items = null; var pivs = new List<EPivot> (32); var rand = new Random (seed); var n = DB.Count; var idxseq = new DynamicSequentialOrdered (); idxseq.Build (DB, RandomSets.GetIdentity (DB.Count)); var tmp_items = new List<ItemPair> (DB.Count); double qrad; double varY; double mean; PivotSelector.EstimateQueryStatistics (DB, rand, 128, 128, out mean, out varY, out qrad); double prev_cost = -1; double curr_cost = n; double derivative; var pivsel = new PivotSelector (n, rand); int nextpiv = pivsel.NextPivot(); int i = 0; do { // Console.WriteLine("A {0} => {1}, {2}", this, i, seed); //double min_diff = double.MaxValue; this.ComputeDistRow (nextpiv, idxseq, rand, pivs, tmp_items); // Console.WriteLine("B {0} => {1}, {2}", this, i, seed); double varX = 0; for (int objID = 0; objID < this.Items.Length; ++objID) { var u = this.Items[objID]; var diff = Math.Abs( u.dist - pivs[u.objID].mean ); varX += diff * diff / n; // if (diff < min_diff) { // min_diff = diff; // next_piv = objID; // } } nextpiv = pivsel.NextPivot(); // Console.WriteLine("C {0} => {1}, {2}", this, i, seed); ++i; prev_cost = curr_cost; curr_cost = this.expected_cost(qrad, varX, varY, n, i, num_indexes, error_factor); derivative = curr_cost - prev_cost; // Console.WriteLine ("DEBUG: stddev: {0}", stats.stddev); if (i % 10 == 1) { Console.Write("XXXXXX {0}, seed: {1}, iteration: {2}, DB: {3}, ", this, seed, i, DB.Name); Console.WriteLine("qcurr_cost: {0}, prev_cost: {1}, varX: {2}, varY: {3}, qrad: {4}", curr_cost, prev_cost, varX, varY, qrad); } } while (derivative < 0 && i < max_iters); this.Pivs = pivs.ToArray (); Console.WriteLine("Number of pivots per group: {0}", this.Pivs.Length); }
public EPListOptimizedA(MetricDB DB, int num_indexes, Random rand, double max_error_factor = 0.001) { this.Items = null; var pivs = new List<EPivot> (32); var n = DB.Count; var idxseq = new DynamicSequentialOrdered (); idxseq.Build (DB, RandomSets.GetIdentity (DB.Count)); var tmp_items = new List<ItemPair> (DB.Count); double qrad; double varY; double mean; PivotSelector.EstimateQueryStatistics (DB, rand, 128, 128, out mean, out varY, out qrad); //double prev_cost = -1; //double curr_cost = n; //var pivsel = new PivotSelectorSSS (DB, 0.4, rand); var pivsel = new PivotSelectorRandom (n, rand); double avg_prev_cost = n; // anything larger than 1.x can be considered a valid starting error_factor double error_factor = n; var avg_window = 16; var iterID = 1; max_error_factor += 1; while (max_error_factor <= error_factor) { double avg_curr_cost = 0; for (int i = 0; i < avg_window; ++i, ++iterID) { this.ComputeDistRow (pivsel.NextPivot(), idxseq, rand, pivs, tmp_items); double varX = 0; for (int objID = 0; objID < this.Items.Length; ++objID) { var u = this.Items[objID]; var diff = Math.Abs( u.Dist - pivs[u.ObjID].mean ); varX += diff * diff / n; } var curr_cost = this.expected_cost(qrad, varX, varY, n, iterID, num_indexes); avg_curr_cost += curr_cost; } avg_curr_cost /= avg_window; error_factor = avg_prev_cost / avg_curr_cost; Console.WriteLine("XXXXXXXXXXXXXXXXXXXX {0}, iterID: {1}, DB: {2} ", this, iterID, DB.Name); Console.WriteLine("XXX DB: {0}", DB.Name); Console.WriteLine("XXX avg_curr_cost: {0}, avg_prev_cost: {1}, error_factor: {2}", avg_curr_cost, avg_prev_cost, error_factor); avg_prev_cost = avg_curr_cost; } this.Pivs = pivs.ToArray (); Console.WriteLine("Number of pivots per group: {0}", this.Pivs.Length); }
public EPListRandomPivots(MetricDB DB, int num_pivots, Random rand) { this.Items = null; var already_pivot = new HashSet<int> (); var pivs = new List<EPivot> (32); var idxseq = new DynamicSequentialOrdered (); idxseq.Build (DB, RandomSets.GetIdentity (DB.Count)); var tmp_items = new List<ItemPair> (DB.Count); for (int i = 0; i < num_pivots; ++i) { this.ComputeDistRow (idxseq, rand, already_pivot, pivs, tmp_items); // double sum = 0; // for (int objID = 0; objID < this.Items.Length; ++objID) { // var u = this.Items[objID]; // sum += Math.Abs( u.dist - pivs[u.objID].mean ); // } if (i % 10 == 0) { Console.WriteLine("XXXXXX {0}, iteration: {1}/{2}, DB: {3}", this, i, num_pivots, DB.Name); } } this.Pivs = pivs.ToArray (); Console.WriteLine("Number of pivots per group: {0}", this.Pivs.Length); }
public int[] GetMetricShell(object q) { var seq = new List<int> (); var idx = new DynamicSequentialOrdered (); // optimize the following: idx.Build (this.R, RandomSets.GetIdentity (this.R.Count)); List<ItemPair> cache = new List<ItemPair>(this.R.Count); // Console.WriteLine ("START GetMetricShell"); while (idx.Count > 0) { cache.Clear(); DynamicSequential.Stats stats; int min_objID, max_objID; idx.ComputeDistances(q, cache, out stats, out min_objID, out max_objID); for (int i = 0; i < cache.Count; ++i) { var obj_min = this.DB [min_objID]; var obj_cur = this.DB [cache[i].ObjID]; if (cache[i].Dist >= this.DB.Dist(obj_min, obj_cur)) { idx.Remove (cache[i].ObjID); } } //Console.WriteLine ("min: {0}, min_dist: {1}, refs_size: {2}", min_objID, stats.min, idx.Count); seq.Add (min_objID); } return seq.ToArray (); }
public virtual void Build(MetricDB DB, double alpha, int min_bs, int seed, bool do_far) { var idxDynamic = new DynamicSequentialOrdered (); idxDynamic.Build (DB, RandomSets.GetRandomPermutation(DB.Count, new Random(seed))); // this.Items = new ItemPair[DB.Count]; var pivs = new List<Pivot> (32); var items = new List<ItemPair> (DB.Count); int I = 0; var extreme_items = new List<ItemPair>(idxDynamic.Count); while (idxDynamic.Count > 0) { var pidx = idxDynamic.GetAnyItem(); object piv = DB[pidx]; idxDynamic.Remove(pidx); // this.Items[pidx] = new ItemPair(pidx, 0); DynamicSequential.Stats stats; Pivot piv_data; double near_first = double.MaxValue; double near_last = 0; double far_first = double.MaxValue; int num_near = 0; int num_far = 0; { IResult near, far; this.SearchExtremes(idxDynamic, extreme_items, piv, alpha, min_bs, out near, out far, out stats); foreach (var pair in near) { near_first = Math.Min (near_first, pair.Dist); near_last = Math.Max (near_last, pair.Dist); items.Add( new ItemPair { ObjID = pair.ObjID, Dist = pair.Dist} ); } num_near = near.Count; idxDynamic.Remove(near); if (do_far) { foreach (var pair in far) { far_first = Math.Min (far_first, pair.Dist); items.Add( new ItemPair {ObjID = pair.ObjID, Dist = pair.Dist} ); } num_far = far.Count; idxDynamic.Remove(far); } piv_data = new Pivot(pidx, stats.mean, stats.stddev, near_last, far_first, num_near, num_far); pivs.Add(piv_data); } if (I % 10 == 0) { Console.WriteLine (""); Console.WriteLine (this.ToString()); Console.WriteLine("-- I {0}> remains: {1}, alpha: {2}, mean: {3}, stddev: {4}, pivot: {5}, min_bs: {6}, db: {7}, do_far: {8}", I, idxDynamic.Count, alpha, stats.mean, stats.stddev, pidx, min_bs, DB.Name, do_far); if (piv_data.num_near > 0) { Console.WriteLine("-- (NORMVAL) first-near: {0}, last-near: {1}, near-count: {2}", near_first / stats.max, piv_data.last_near / stats.max, piv_data.num_near); } if (piv_data.num_far > 0) { Console.WriteLine("++ (NORMVAL) first-far: {0}, far-count: {1}", piv_data.first_far / stats.max, piv_data.num_far); } } ++I; //Console.WriteLine("Number of objects after: {0}",idxDynamic.DOCS.Count); } Console.WriteLine("Number of pivots per group: {0}", I); this.Pivs = pivs.ToArray (); this.Items = items.ToArray (); }
public EPListOptimizedB(MetricDB DB, int num_indexes, Random rand) { this.Items = null; var pivs = new List<EPivot> (32); var n = DB.Count; var idxseq = new DynamicSequentialOrdered (); idxseq.Build (DB, RandomSets.GetIdentity (DB.Count)); var tmp_items = new List<ItemPair> (DB.Count); double qrad; double varY; double mean; PivotSelector.EstimateQueryStatistics (DB, rand, 64, 128, out mean, out varY, out qrad); //double prev_cost = -1; //double curr_cost = n; var pivsel = new PivotSelectorRandom (n, rand); double weight_prev = 0.99; double weight_curr = 1.0 - weight_prev; double max_error = 0.01; double error = 1; double prev_cost = 1.0; double min_cost = 1; // anything larger than 1.x can be considered a valid starting error_factor var iterID = 0; var window = 1; while (true) { //++iterID; double curr_cost = 0; for (int i = 0; i < window; ++i, ++iterID) { this.ComputeDistRow (pivsel.NextPivot(), idxseq, rand, pivs, tmp_items); double varX = 0; for (int objID = 0; objID < this.Items.Length; ++objID) { var u = this.Items[objID]; var diff = Math.Abs( u.Dist - pivs[u.ObjID].mean ); varX += diff * diff / n; } curr_cost += this.expected_cost(qrad, varX, varY, n, iterID, num_indexes); } curr_cost = (curr_cost / window) / n; curr_cost = weight_prev * prev_cost + weight_curr * curr_cost; if (curr_cost < min_cost) { min_cost = curr_cost; } else { break; } if (iterID % 10 == 0) { Console.WriteLine("XXXXXXXXXXXXXXXXXXXX {0}, db: {1}", this, Path.GetFileName(DB.Name)); Console.WriteLine("XXX prev-cost: {0:0.000}, curr-cost: {1:0.000}, min-cost: {6}, error: {2:0.00000}, max-error: {3:0.00000}, pivs: {4}, groups: {5}", prev_cost, curr_cost, error, max_error, iterID, num_indexes, min_cost); } error = prev_cost - curr_cost; prev_cost = curr_cost; } this.Pivs = pivs.ToArray (); Console.WriteLine("Number of pivots per group: {0}", this.Pivs.Length); }