Beispiel #1
0
        public virtual void Build(MetricDB db, ANNISetup setup)
        {
            // num_build_processors = 1;
            this.DB = db;
            var rows = new List<ANNI> ();

            var pivsel = new PivotSelectorRandom(db.Count, RandomSets.GetRandom());

            this.leader = new NANNI();
            var ilc = new ANNI();
            var cost = ilc.InternalBuild (setup, 0, 1.0, db, 2);
            this.leader.Build (ilc);
            int m = this.leader.clusters.Count;
            double review_prob = cost.SingleCost - m; review_prob /= this.DB.Count;
            var min_prob = Math.Sqrt (this.DB.Count) / this.DB.Count;

            while (review_prob > min_prob) {
                var row = new ANNI ();
                rows.Add (row);
                var _cost = row.InternalBuild (setup, m, review_prob, db, 2);
                var _m = row.ACT.Count;
                review_prob *= (_cost.SingleCost - _m) / this.DB.Count;
            }
            this.rows = rows.ToArray ();
        }
Beispiel #2
0
        public virtual void Build(MetricDB db, ANNISetup setup, int num_indexes, int num_tasks = -1)
        {
            // num_build_processors = 1;
            this.DB = db;
            --num_indexes;
            this.rows = new ANNI[num_indexes];

            var pivsel = new PivotSelectorRandom(db.Count, RandomSets.GetRandom());

            this.leader = new NANNI();
            var ilc = new ANNI();
            var cost = ilc.InternalBuild (setup, 0, 1.0, db, 2);
            this.leader.Build (ilc);
            int m = this.leader.clusters.Count;
            double review_prob = cost.SingleCost - m; review_prob /= this.DB.Count;

            //			ParallelOptions ops = new ParallelOptions ();
            //			ops.MaxDegreeOfParallelism = num_tasks;
            //			Parallel.For (0, num_indexes, ops, (int i) => {
            //				this.rows [i] = new ILC ();
            //				this.rows [i].InternalBuild (m, review_prob, db, num_indexes, pivsel);
            //			});

            Console.WriteLine ("====> num_indexes: {0}", num_indexes);
            LongParallel.For (0, num_indexes, (int i) => {
                this.rows [i] = new ANNI ();
                this.rows [i].InternalBuild (setup, m, review_prob, db, num_indexes);
            }, num_tasks);
        }
Beispiel #3
0
 /// <summary>
 /// Build the index
 /// </summary>
 public virtual void Build(MetricDB db, int m)
 {
     this.DB = db;
     var n = this.DB.Count;
     var pivsel = new PivotSelectorRandom (n, RandomSets.GetRandom ());
     this.nodes = new Node[m];
     for (int i = 0; i < m; ++i) {
         this.nodes [i] = new Node (db, pivsel.NextPivot ());
     }
 }
 public EPListRandomPivotsPriorized(MetricDB DB, int seed, int num_pivs)
 {
     var n = DB.Count;
     this.Items = new ItemPair[n];
     var pivs = new List<EPivot> (32);
     var rand = new Random (seed);
     var pivsel = new PivotSelectorRandom (n, rand);
     var piv = pivsel.NextPivot ();
     var pivOBJ = DB [piv];
     for (int objID = 0; objID < n; ++objID) {
         var d = DB.Dist(pivOBJ, DB[objID]);
         this.Items[objID] = new ItemPair(0, d);
     }
     double mean, variance;
     this.Statistics (out mean, out variance);
     pivs.Add(new EPivot(piv, Math.Sqrt(variance), mean, 0, 0, 0, 0));
     var item_cmp = new Comparison<ItemPair>((x,y) => {
         var diff_x = Math.Abs (x.Dist - pivs[x.ObjID].mean);
         var diff_y = Math.Abs (y.Dist - pivs[y.ObjID].mean);
         return diff_x.CompareTo(diff_y);
     });
     var queue = new SkipList2<int> (0.5, (x,y) => item_cmp (this.Items [x], this.Items [y]));
     for (int objID = 0; objID < n; ++objID) {
         queue.Add(objID, null);
     }
     var max_review = 2 * n / num_pivs;
     var list = new List<int> ();
     for (int i = 0; i < num_pivs; ++i) {
         Console.WriteLine("XXXXXX BEGIN {0} i: {1}", this, i);
         piv = pivsel.NextPivot();
         double piv_mean, piv_variance, qrad;
         PivotSelector.EstimatePivotStatistics(DB, rand, DB[piv], 256, out piv_mean, out piv_variance, out qrad);
         var pivID = pivs.Count;
         pivs.Add(new EPivot(piv, Math.Sqrt(piv_variance), mean, 0, 0, 0, 0));
         list.Clear();
         for (int s = 0; s < max_review; ++s) {
             var objID = queue.RemoveFirst();
             var d = DB.Dist(DB[objID], pivOBJ);
             var new_item = new ItemPair(pivID, d);
             if (item_cmp(new_item, this.Items[objID]) > 0) {
                 this.Items[objID] = new_item;
             }
             list.Add (objID);
         }
         foreach (var objID in list) {
             queue.Add(objID, null);
         }
         Console.WriteLine("XXXXXX END {0} i: {1}", this, i);
     }
     this.Pivs = pivs.ToArray ();
     Console.WriteLine("Number of pivots per group: {0}", this.Pivs.Length);
 }
Beispiel #5
0
        public override void PreBuild(Random rand, object firstObject)
        {
            this.H = new ushort[this.Width];
            int[] u = firstObject as int[];
            var m = u.Length << 3;
            var sel = new PivotSelectorRandom (m, rand);

            for (int i = 0; i < this.Width; ++i) {
                var p = sel.NextPivot ();
                this.H [i] = (ushort)p;
            }
            Array.Sort (this.H);
        }
Beispiel #6
0
 public EPListOptimized(MetricDB DB, int num_indexes, Random rand, int max_iters, double error_factor)
 {
     Console.WriteLine ("XXX {0}, num_indexes: {1}, max_iters: {2}, error_factor: {3}", this, num_indexes, max_iters, error_factor);
     this.Items = null;
     var pivs = new List<EPivot> (32);
     var n = DB.Count;
     var idxseq = new DynamicSequentialOrdered ();
     idxseq.Build (DB, RandomSets.GetIdentity (DB.Count));
     var tmp_items = new List<ItemPair> (DB.Count);
     double qrad;
     double varY;
     double mean;
     PivotSelector.EstimateQueryStatistics (DB, rand, 128, 128, out mean, out varY, out qrad);
     double prev_cost = -1;
     double curr_cost = n;
     double derivative;
     var pivsel = new PivotSelectorRandom (n, rand);
     int nextpiv = pivsel.NextPivot();
     int i = 0;
     do {
         // Console.WriteLine("A {0} => {1}, {2}", this, i, seed);
         //double min_diff = double.MaxValue;
         this.ComputeDistRow (nextpiv, idxseq, rand, pivs, tmp_items);
         // Console.WriteLine("B {0} => {1}, {2}", this, i, seed);
         double varX = 0;
         for (int objID = 0; objID < this.Items.Length; ++objID) {
             var u = this.Items[objID];
             var diff = Math.Abs( u.Dist - pivs[u.ObjID].mean );
             varX += diff * diff / n;
     //					if (diff < min_diff) {
     //						min_diff = diff;
     //						next_piv = objID;
     //					}
         }
         nextpiv = pivsel.NextPivot();
         // Console.WriteLine("C {0} => {1}, {2}", this, i, seed);
         ++i;
         prev_cost = curr_cost;
         curr_cost = this.expected_cost(qrad, varX, varY, n, i, num_indexes, error_factor);
         derivative = curr_cost - prev_cost;
         // Console.WriteLine ("DEBUG: stddev: {0}", stats.stddev);
         if (i % 10 == 1) {
             Console.Write("XXXXXX {0}, iteration: {1}, DB: {2}, ", this, i, DB.Name);
             Console.WriteLine("qcurr_cost: {0}, prev_cost: {1}, varX: {2}, varY: {3}, qrad: {4}",
                                curr_cost, prev_cost, varX, varY, qrad);
         }
     } while (derivative < 0 && i < max_iters);
     this.Pivs = pivs.ToArray ();
     Console.WriteLine("Number of pivots per group: {0}", this.Pivs.Length);
 }
Beispiel #7
0
        public EPListOptimizedA(MetricDB DB, int num_indexes, Random rand, double max_error_factor = 0.001)
        {
            this.Items = null;
            var pivs = new List<EPivot> (32);
            var n = DB.Count;
            var idxseq = new DynamicSequentialOrdered ();
            idxseq.Build (DB, RandomSets.GetIdentity (DB.Count));
            var tmp_items = new List<ItemPair> (DB.Count);
            double qrad;
            double varY;
            double mean;
            PivotSelector.EstimateQueryStatistics (DB, rand, 128, 128, out mean, out varY, out qrad);
            //double prev_cost = -1;
            //double curr_cost = n;

            //var pivsel = new PivotSelectorSSS (DB, 0.4, rand);
            var pivsel = new PivotSelectorRandom (n, rand);
            double avg_prev_cost = n;
            // anything larger than 1.x can be considered a valid starting error_factor
            double error_factor = n;
            var avg_window = 16;

            var iterID = 1;
            max_error_factor += 1;
            while (max_error_factor <= error_factor) {
                double avg_curr_cost = 0;
                for (int i = 0; i < avg_window; ++i, ++iterID) {
                    this.ComputeDistRow (pivsel.NextPivot(), idxseq, rand, pivs, tmp_items);
                    double varX = 0;
                    for (int objID = 0; objID < this.Items.Length; ++objID) {
                        var u = this.Items[objID];
                        var diff = Math.Abs( u.Dist - pivs[u.ObjID].mean );
                        varX += diff * diff / n;
                    }
                    var curr_cost = this.expected_cost(qrad, varX, varY, n, iterID, num_indexes);
                    avg_curr_cost += curr_cost;
                }
                avg_curr_cost /= avg_window;
                error_factor = avg_prev_cost / avg_curr_cost;

                Console.WriteLine("XXXXXXXXXXXXXXXXXXXX {0}, iterID: {1}, DB: {2} ", this, iterID, DB.Name);
                Console.WriteLine("XXX DB: {0}", DB.Name);
                Console.WriteLine("XXX avg_curr_cost: {0}, avg_prev_cost: {1}, error_factor: {2}",
                    avg_curr_cost, avg_prev_cost, error_factor);

                avg_prev_cost = avg_curr_cost;
            }
            this.Pivs = pivs.ToArray ();
            Console.WriteLine("Number of pivots per group: {0}", this.Pivs.Length);
        }
Beispiel #8
0
        public EPListOptimizedB(MetricDB DB, int num_indexes, Random rand)
        {
            this.Items = null;
            var pivs = new List<EPivot> (32);
            var n = DB.Count;
            var idxseq = new DynamicSequentialOrdered ();
            idxseq.Build (DB, RandomSets.GetIdentity (DB.Count));
            var tmp_items = new List<ItemPair> (DB.Count);
            double qrad;
            double varY;
            double mean;
            PivotSelector.EstimateQueryStatistics (DB, rand, 64, 128, out mean, out varY, out qrad);
            //double prev_cost = -1;
            //double curr_cost = n;

            var pivsel = new PivotSelectorRandom (n, rand);

            double weight_prev = 0.99;
            double weight_curr = 1.0 - weight_prev;

            double max_error = 0.01;
            double error = 1;
            double prev_cost = 1.0;

            double min_cost = 1;

            // anything larger than 1.x can be considered a valid starting error_factor

            var iterID = 0;
            var window = 1;

            while (true) {
                //++iterID;

                double curr_cost = 0;
                for (int i = 0; i < window; ++i, ++iterID) {
                    this.ComputeDistRow (pivsel.NextPivot(), idxseq, rand, pivs, tmp_items);
                    double varX = 0;
                    for (int objID = 0; objID < this.Items.Length; ++objID) {
                        var u = this.Items[objID];
                        var diff = Math.Abs( u.Dist - pivs[u.ObjID].mean );
                        varX += diff * diff / n;
                    }
                    curr_cost += this.expected_cost(qrad, varX, varY, n, iterID, num_indexes);
                }
                curr_cost = (curr_cost / window) / n;

                curr_cost = weight_prev * prev_cost + weight_curr * curr_cost;

                if (curr_cost < min_cost) {
                    min_cost = curr_cost;
                } else {
                    break;
                }

                if (iterID % 10 == 0) {
                    Console.WriteLine("XXXXXXXXXXXXXXXXXXXX {0}, db: {1}", this, Path.GetFileName(DB.Name));
                    Console.WriteLine("XXX prev-cost: {0:0.000}, curr-cost: {1:0.000}, min-cost: {6}, error: {2:0.00000}, max-error: {3:0.00000}, pivs: {4}, groups: {5}",
                                      prev_cost, curr_cost, error, max_error, iterID, num_indexes, min_cost);
                }
                error = prev_cost - curr_cost;
                prev_cost = curr_cost;
            }
            this.Pivs = pivs.ToArray ();
            Console.WriteLine("Number of pivots per group: {0}", this.Pivs.Length);
        }