public virtual void Build(MetricDB db, ANNISetup setup) { // num_build_processors = 1; this.DB = db; var rows = new List<ANNI> (); var pivsel = new PivotSelectorRandom(db.Count, RandomSets.GetRandom()); this.leader = new NANNI(); var ilc = new ANNI(); var cost = ilc.InternalBuild (setup, 0, 1.0, db, 2); this.leader.Build (ilc); int m = this.leader.clusters.Count; double review_prob = cost.SingleCost - m; review_prob /= this.DB.Count; var min_prob = Math.Sqrt (this.DB.Count) / this.DB.Count; while (review_prob > min_prob) { var row = new ANNI (); rows.Add (row); var _cost = row.InternalBuild (setup, m, review_prob, db, 2); var _m = row.ACT.Count; review_prob *= (_cost.SingleCost - _m) / this.DB.Count; } this.rows = rows.ToArray (); }
public virtual void Build(MetricDB db, ANNISetup setup, int num_indexes, int num_tasks = -1) { // num_build_processors = 1; this.DB = db; --num_indexes; this.rows = new ANNI[num_indexes]; var pivsel = new PivotSelectorRandom(db.Count, RandomSets.GetRandom()); this.leader = new NANNI(); var ilc = new ANNI(); var cost = ilc.InternalBuild (setup, 0, 1.0, db, 2); this.leader.Build (ilc); int m = this.leader.clusters.Count; double review_prob = cost.SingleCost - m; review_prob /= this.DB.Count; // ParallelOptions ops = new ParallelOptions (); // ops.MaxDegreeOfParallelism = num_tasks; // Parallel.For (0, num_indexes, ops, (int i) => { // this.rows [i] = new ILC (); // this.rows [i].InternalBuild (m, review_prob, db, num_indexes, pivsel); // }); Console.WriteLine ("====> num_indexes: {0}", num_indexes); LongParallel.For (0, num_indexes, (int i) => { this.rows [i] = new ANNI (); this.rows [i].InternalBuild (setup, m, review_prob, db, num_indexes); }, num_tasks); }
/// <summary> /// Build the index /// </summary> public virtual void Build(MetricDB db, int m) { this.DB = db; var n = this.DB.Count; var pivsel = new PivotSelectorRandom (n, RandomSets.GetRandom ()); this.nodes = new Node[m]; for (int i = 0; i < m; ++i) { this.nodes [i] = new Node (db, pivsel.NextPivot ()); } }
public EPListRandomPivotsPriorized(MetricDB DB, int seed, int num_pivs) { var n = DB.Count; this.Items = new ItemPair[n]; var pivs = new List<EPivot> (32); var rand = new Random (seed); var pivsel = new PivotSelectorRandom (n, rand); var piv = pivsel.NextPivot (); var pivOBJ = DB [piv]; for (int objID = 0; objID < n; ++objID) { var d = DB.Dist(pivOBJ, DB[objID]); this.Items[objID] = new ItemPair(0, d); } double mean, variance; this.Statistics (out mean, out variance); pivs.Add(new EPivot(piv, Math.Sqrt(variance), mean, 0, 0, 0, 0)); var item_cmp = new Comparison<ItemPair>((x,y) => { var diff_x = Math.Abs (x.Dist - pivs[x.ObjID].mean); var diff_y = Math.Abs (y.Dist - pivs[y.ObjID].mean); return diff_x.CompareTo(diff_y); }); var queue = new SkipList2<int> (0.5, (x,y) => item_cmp (this.Items [x], this.Items [y])); for (int objID = 0; objID < n; ++objID) { queue.Add(objID, null); } var max_review = 2 * n / num_pivs; var list = new List<int> (); for (int i = 0; i < num_pivs; ++i) { Console.WriteLine("XXXXXX BEGIN {0} i: {1}", this, i); piv = pivsel.NextPivot(); double piv_mean, piv_variance, qrad; PivotSelector.EstimatePivotStatistics(DB, rand, DB[piv], 256, out piv_mean, out piv_variance, out qrad); var pivID = pivs.Count; pivs.Add(new EPivot(piv, Math.Sqrt(piv_variance), mean, 0, 0, 0, 0)); list.Clear(); for (int s = 0; s < max_review; ++s) { var objID = queue.RemoveFirst(); var d = DB.Dist(DB[objID], pivOBJ); var new_item = new ItemPair(pivID, d); if (item_cmp(new_item, this.Items[objID]) > 0) { this.Items[objID] = new_item; } list.Add (objID); } foreach (var objID in list) { queue.Add(objID, null); } Console.WriteLine("XXXXXX END {0} i: {1}", this, i); } this.Pivs = pivs.ToArray (); Console.WriteLine("Number of pivots per group: {0}", this.Pivs.Length); }
public override void PreBuild(Random rand, object firstObject) { this.H = new ushort[this.Width]; int[] u = firstObject as int[]; var m = u.Length << 3; var sel = new PivotSelectorRandom (m, rand); for (int i = 0; i < this.Width; ++i) { var p = sel.NextPivot (); this.H [i] = (ushort)p; } Array.Sort (this.H); }
public EPListOptimized(MetricDB DB, int num_indexes, Random rand, int max_iters, double error_factor) { Console.WriteLine ("XXX {0}, num_indexes: {1}, max_iters: {2}, error_factor: {3}", this, num_indexes, max_iters, error_factor); this.Items = null; var pivs = new List<EPivot> (32); var n = DB.Count; var idxseq = new DynamicSequentialOrdered (); idxseq.Build (DB, RandomSets.GetIdentity (DB.Count)); var tmp_items = new List<ItemPair> (DB.Count); double qrad; double varY; double mean; PivotSelector.EstimateQueryStatistics (DB, rand, 128, 128, out mean, out varY, out qrad); double prev_cost = -1; double curr_cost = n; double derivative; var pivsel = new PivotSelectorRandom (n, rand); int nextpiv = pivsel.NextPivot(); int i = 0; do { // Console.WriteLine("A {0} => {1}, {2}", this, i, seed); //double min_diff = double.MaxValue; this.ComputeDistRow (nextpiv, idxseq, rand, pivs, tmp_items); // Console.WriteLine("B {0} => {1}, {2}", this, i, seed); double varX = 0; for (int objID = 0; objID < this.Items.Length; ++objID) { var u = this.Items[objID]; var diff = Math.Abs( u.Dist - pivs[u.ObjID].mean ); varX += diff * diff / n; // if (diff < min_diff) { // min_diff = diff; // next_piv = objID; // } } nextpiv = pivsel.NextPivot(); // Console.WriteLine("C {0} => {1}, {2}", this, i, seed); ++i; prev_cost = curr_cost; curr_cost = this.expected_cost(qrad, varX, varY, n, i, num_indexes, error_factor); derivative = curr_cost - prev_cost; // Console.WriteLine ("DEBUG: stddev: {0}", stats.stddev); if (i % 10 == 1) { Console.Write("XXXXXX {0}, iteration: {1}, DB: {2}, ", this, i, DB.Name); Console.WriteLine("qcurr_cost: {0}, prev_cost: {1}, varX: {2}, varY: {3}, qrad: {4}", curr_cost, prev_cost, varX, varY, qrad); } } while (derivative < 0 && i < max_iters); this.Pivs = pivs.ToArray (); Console.WriteLine("Number of pivots per group: {0}", this.Pivs.Length); }
public EPListOptimizedA(MetricDB DB, int num_indexes, Random rand, double max_error_factor = 0.001) { this.Items = null; var pivs = new List<EPivot> (32); var n = DB.Count; var idxseq = new DynamicSequentialOrdered (); idxseq.Build (DB, RandomSets.GetIdentity (DB.Count)); var tmp_items = new List<ItemPair> (DB.Count); double qrad; double varY; double mean; PivotSelector.EstimateQueryStatistics (DB, rand, 128, 128, out mean, out varY, out qrad); //double prev_cost = -1; //double curr_cost = n; //var pivsel = new PivotSelectorSSS (DB, 0.4, rand); var pivsel = new PivotSelectorRandom (n, rand); double avg_prev_cost = n; // anything larger than 1.x can be considered a valid starting error_factor double error_factor = n; var avg_window = 16; var iterID = 1; max_error_factor += 1; while (max_error_factor <= error_factor) { double avg_curr_cost = 0; for (int i = 0; i < avg_window; ++i, ++iterID) { this.ComputeDistRow (pivsel.NextPivot(), idxseq, rand, pivs, tmp_items); double varX = 0; for (int objID = 0; objID < this.Items.Length; ++objID) { var u = this.Items[objID]; var diff = Math.Abs( u.Dist - pivs[u.ObjID].mean ); varX += diff * diff / n; } var curr_cost = this.expected_cost(qrad, varX, varY, n, iterID, num_indexes); avg_curr_cost += curr_cost; } avg_curr_cost /= avg_window; error_factor = avg_prev_cost / avg_curr_cost; Console.WriteLine("XXXXXXXXXXXXXXXXXXXX {0}, iterID: {1}, DB: {2} ", this, iterID, DB.Name); Console.WriteLine("XXX DB: {0}", DB.Name); Console.WriteLine("XXX avg_curr_cost: {0}, avg_prev_cost: {1}, error_factor: {2}", avg_curr_cost, avg_prev_cost, error_factor); avg_prev_cost = avg_curr_cost; } this.Pivs = pivs.ToArray (); Console.WriteLine("Number of pivots per group: {0}", this.Pivs.Length); }
public EPListOptimizedB(MetricDB DB, int num_indexes, Random rand) { this.Items = null; var pivs = new List<EPivot> (32); var n = DB.Count; var idxseq = new DynamicSequentialOrdered (); idxseq.Build (DB, RandomSets.GetIdentity (DB.Count)); var tmp_items = new List<ItemPair> (DB.Count); double qrad; double varY; double mean; PivotSelector.EstimateQueryStatistics (DB, rand, 64, 128, out mean, out varY, out qrad); //double prev_cost = -1; //double curr_cost = n; var pivsel = new PivotSelectorRandom (n, rand); double weight_prev = 0.99; double weight_curr = 1.0 - weight_prev; double max_error = 0.01; double error = 1; double prev_cost = 1.0; double min_cost = 1; // anything larger than 1.x can be considered a valid starting error_factor var iterID = 0; var window = 1; while (true) { //++iterID; double curr_cost = 0; for (int i = 0; i < window; ++i, ++iterID) { this.ComputeDistRow (pivsel.NextPivot(), idxseq, rand, pivs, tmp_items); double varX = 0; for (int objID = 0; objID < this.Items.Length; ++objID) { var u = this.Items[objID]; var diff = Math.Abs( u.Dist - pivs[u.ObjID].mean ); varX += diff * diff / n; } curr_cost += this.expected_cost(qrad, varX, varY, n, iterID, num_indexes); } curr_cost = (curr_cost / window) / n; curr_cost = weight_prev * prev_cost + weight_curr * curr_cost; if (curr_cost < min_cost) { min_cost = curr_cost; } else { break; } if (iterID % 10 == 0) { Console.WriteLine("XXXXXXXXXXXXXXXXXXXX {0}, db: {1}", this, Path.GetFileName(DB.Name)); Console.WriteLine("XXX prev-cost: {0:0.000}, curr-cost: {1:0.000}, min-cost: {6}, error: {2:0.00000}, max-error: {3:0.00000}, pivs: {4}, groups: {5}", prev_cost, curr_cost, error, max_error, iterID, num_indexes, min_cost); } error = prev_cost - curr_cost; prev_cost = curr_cost; } this.Pivs = pivs.ToArray (); Console.WriteLine("Number of pivots per group: {0}", this.Pivs.Length); }