The sequential index
Inheritance: DynamicSequential
Example #1
0
File: LC.cs Project: sadit/natix
 /// <summary>
 /// Build the index
 /// </summary>
 public virtual void Build(MetricDB db, int bsize, Random rand)
 {
     this.DB = db;
     var n = this.DB.Count;
     // randomized has very good performance, even compared with more "intelligent" strategies
     var dseq = new DynamicSequentialOrdered ();
     dseq.Build (db, rand);
     this.NODES = new List<Node> (n / bsize + 1);
     var L = new List<ItemPair> (n);
     while (dseq.Count > 0) {
         if (this.NODES.Count % 100 == 0) {
             Console.WriteLine ("XXX {0}, bucketSize: {1}, remain {2}/{3}, db: {4}, date-time: {5}",
                                this, bsize, dseq.Count, db.Count, Path.GetFileName(db.Name), DateTime.Now);
         }
         var refID = dseq.GetAnyItem ();
         dseq.Remove (refID);
         L.Clear ();
         dseq.ComputeDistances (this.DB[refID], L);
         var near = new Result(bsize);
         var far = new Result (1);
         dseq.AppendKExtremes (near, far, L);
         var node = new Node (refID);
         this.NODES.Add (node);
         dseq.Remove (near);
         foreach (var p in near) {
             node.Add(p.ObjID, p.Dist);
         }
     }
 }
Example #2
0
 protected virtual void ComputeDistRow(DynamicSequentialOrdered idxseq, Random rand, HashSet<int> already_pivot, List<EPivot> pivs, List<ItemPair> _items)
 {
     _items.Clear ();
     int n = idxseq.DB.Count;
     int piv;
     do {
         piv = rand.Next(0, n);
     } while (already_pivot.Contains(piv));
     already_pivot.Add (piv);
     var stats = new DynamicSequential.Stats ();
     idxseq.ComputeDistances (idxseq.DB [piv], _items, out stats);
     int pivID = pivs.Count;
     pivs.Add(new EPivot(piv, stats.stddev, stats.mean, stats.min, stats.max, 0, 0));
     if (this.Items == null) {
         this.Items = new ItemPair[n];
         for (int objID = 0; objID < n; ++objID) {
             this.Items[objID] = new ItemPair(0, _items[objID].Dist);
         }
     } else {
         for (int objID = 0; objID < n; ++objID) {
             var new_piv = pivs[pivID];
             var new_dist = _items[objID].Dist;
             var old_piv = pivs[ this.Items[objID].ObjID ];
             var old_dist = this.Items[objID].Dist;
             if (Math.Abs(old_dist - old_piv.mean) < Math.Abs (new_dist - new_piv.mean)) {
                 this.Items[objID] = new ItemPair(pivID, _items[objID].Dist);
             }
         }
     }
 }
Example #3
0
 protected virtual DynamicSequential.Stats ComputeDistRow(int piv, DynamicSequentialOrdered idxseq, Random rand, List<EPivot> pivs, List<ItemPair> _items)
 {
     _items.Clear ();
     int n = idxseq.DB.Count;
     var stats = new DynamicSequential.Stats ();
     idxseq.ComputeDistances (idxseq.DB [piv], _items, out stats);
     int pivID = pivs.Count;
     pivs.Add(new EPivot(piv, stats.stddev, stats.mean, stats.min, stats.max, 0, 0));
     if (this.Items == null) {
         this.Items = new ItemPair[n];
         for (int objID = 0; objID < n; ++objID) {
             this.Items[objID] = new ItemPair(0, _items[objID].Dist);
         }
     } else {
         for (int objID = 0; objID < n; ++objID) {
             var new_piv = pivs[pivID];
             var new_dist = _items[objID].Dist;
             var old_piv = pivs[ this.Items[objID].ObjID ];
             var old_dist = this.Items[objID].Dist;
             if (Math.Abs(old_dist - old_piv.mean) < Math.Abs (new_dist - new_piv.mean)) {
                 this.Items[objID] = new ItemPair(pivID, _items[objID].Dist);
             }
         }
     }
     return stats;
 }
Example #4
0
        public EPListMeanPivots(MetricDB DB, int seed, int num_pivs)
        {
            this.Items = null;
            var pivs = new List<EPivot> (32);
            var rand = new Random (seed);
            var n = DB.Count;
            var idxseq = new DynamicSequentialOrdered ();
            idxseq.Build (DB, RandomSets.GetIdentity (DB.Count));
            var tmp_items = new List<ItemPair> (DB.Count);
            int next_piv = rand.Next (0, n);
            for (int i = 0; i < num_pivs; ++i) {
                var varX = 0.0;
                double min_diff = double.MaxValue;
                this.ComputeDistRow (next_piv, idxseq, rand, pivs, tmp_items);
                for (int objID = 0; objID < this.Items.Length; ++objID) {
                    var u = this.Items [objID];
                    var diff = Math.Abs (u.Dist - pivs [u.ObjID].mean);
                    if (diff < min_diff) {
                        min_diff = diff;
                        next_piv = objID;
                    }
                    varX += diff * diff / n;
                }
                ++i;

                Console.WriteLine ("XXXXXX i: {0}, variance: {1}", i, varX);
            }
            this.Pivs = pivs.ToArray ();
            Console.WriteLine("Number of pivots per group: {0}", this.Pivs.Length);
        }
Example #5
0
 public EPListOptimized(MetricDB DB, int seed, int num_indexes, int max_iters, double error_factor)
 {
     Console.WriteLine ("XXX {0}, num_indexes: {1}, max_iters: {2}, error_factor: {3}", this, num_indexes, max_iters, error_factor);
     this.Items = null;
     var pivs = new List<EPivot> (32);
     var rand = new Random (seed);
     var n = DB.Count;
     var idxseq = new DynamicSequentialOrdered ();
     idxseq.Build (DB, RandomSets.GetIdentity (DB.Count));
     var tmp_items = new List<ItemPair> (DB.Count);
     double qrad;
     double varY;
     double mean;
     PivotSelector.EstimateQueryStatistics (DB, rand, 128, 128, out mean, out varY, out qrad);
     double prev_cost = -1;
     double curr_cost = n;
     double derivative;
     var pivsel = new PivotSelector (n, rand);
     int nextpiv = pivsel.NextPivot();
     int i = 0;
     do {
         // Console.WriteLine("A {0} => {1}, {2}", this, i, seed);
         //double min_diff = double.MaxValue;
         this.ComputeDistRow (nextpiv, idxseq, rand, pivs, tmp_items);
         // Console.WriteLine("B {0} => {1}, {2}", this, i, seed);
         double varX = 0;
         for (int objID = 0; objID < this.Items.Length; ++objID) {
             var u = this.Items[objID];
             var diff = Math.Abs( u.dist - pivs[u.objID].mean );
             varX += diff * diff / n;
     //					if (diff < min_diff) {
     //						min_diff = diff;
     //						next_piv = objID;
     //					}
         }
         nextpiv = pivsel.NextPivot();
         // Console.WriteLine("C {0} => {1}, {2}", this, i, seed);
         ++i;
         prev_cost = curr_cost;
         curr_cost = this.expected_cost(qrad, varX, varY, n, i, num_indexes, error_factor);
         derivative = curr_cost - prev_cost;
         // Console.WriteLine ("DEBUG: stddev: {0}", stats.stddev);
         if (i % 10 == 1) {
             Console.Write("XXXXXX {0}, seed: {1}, iteration: {2}, DB: {3}, ", this, seed, i, DB.Name);
             Console.WriteLine("qcurr_cost: {0}, prev_cost: {1}, varX: {2}, varY: {3}, qrad: {4}",
                                curr_cost, prev_cost, varX, varY, qrad);
         }
     } while (derivative < 0 && i < max_iters);
     this.Pivs = pivs.ToArray ();
     Console.WriteLine("Number of pivots per group: {0}", this.Pivs.Length);
 }
Example #6
0
        public EPListOptimizedA(MetricDB DB, int num_indexes, Random rand, double max_error_factor = 0.001)
        {
            this.Items = null;
            var pivs = new List<EPivot> (32);
            var n = DB.Count;
            var idxseq = new DynamicSequentialOrdered ();
            idxseq.Build (DB, RandomSets.GetIdentity (DB.Count));
            var tmp_items = new List<ItemPair> (DB.Count);
            double qrad;
            double varY;
            double mean;
            PivotSelector.EstimateQueryStatistics (DB, rand, 128, 128, out mean, out varY, out qrad);
            //double prev_cost = -1;
            //double curr_cost = n;

            //var pivsel = new PivotSelectorSSS (DB, 0.4, rand);
            var pivsel = new PivotSelectorRandom (n, rand);
            double avg_prev_cost = n;
            // anything larger than 1.x can be considered a valid starting error_factor
            double error_factor = n;
            var avg_window = 16;

            var iterID = 1;
            max_error_factor += 1;
            while (max_error_factor <= error_factor) {
                double avg_curr_cost = 0;
                for (int i = 0; i < avg_window; ++i, ++iterID) {
                    this.ComputeDistRow (pivsel.NextPivot(), idxseq, rand, pivs, tmp_items);
                    double varX = 0;
                    for (int objID = 0; objID < this.Items.Length; ++objID) {
                        var u = this.Items[objID];
                        var diff = Math.Abs( u.Dist - pivs[u.ObjID].mean );
                        varX += diff * diff / n;
                    }
                    var curr_cost = this.expected_cost(qrad, varX, varY, n, iterID, num_indexes);
                    avg_curr_cost += curr_cost;
                }
                avg_curr_cost /= avg_window;
                error_factor = avg_prev_cost / avg_curr_cost;

                Console.WriteLine("XXXXXXXXXXXXXXXXXXXX {0}, iterID: {1}, DB: {2} ", this, iterID, DB.Name);
                Console.WriteLine("XXX DB: {0}", DB.Name);
                Console.WriteLine("XXX avg_curr_cost: {0}, avg_prev_cost: {1}, error_factor: {2}",
                    avg_curr_cost, avg_prev_cost, error_factor);

                avg_prev_cost = avg_curr_cost;
            }
            this.Pivs = pivs.ToArray ();
            Console.WriteLine("Number of pivots per group: {0}", this.Pivs.Length);
        }
Example #7
0
        public EPListRandomPivots(MetricDB DB, int num_pivots, Random rand)
        {
            this.Items = null;
            var already_pivot = new HashSet<int> ();
            var pivs = new List<EPivot> (32);
            var idxseq = new DynamicSequentialOrdered ();
            idxseq.Build (DB, RandomSets.GetIdentity (DB.Count));
            var tmp_items = new List<ItemPair> (DB.Count);

            for (int i = 0; i < num_pivots; ++i) {
                this.ComputeDistRow (idxseq, rand, already_pivot, pivs, tmp_items);
            //				double sum = 0;
            //				for (int objID = 0; objID < this.Items.Length; ++objID) {
            //					var u = this.Items[objID];
            //					sum += Math.Abs( u.dist - pivs[u.objID].mean );
            //				}
                if (i % 10 == 0) {
                    Console.WriteLine("XXXXXX {0}, iteration: {1}/{2}, DB: {3}", this, i, num_pivots, DB.Name);
                }
            }
            this.Pivs = pivs.ToArray ();
            Console.WriteLine("Number of pivots per group: {0}", this.Pivs.Length);
        }
Example #8
0
 public int[] GetMetricShell(object q)
 {
     var seq = new List<int> ();
     var idx = new DynamicSequentialOrdered ();
     // optimize the following:
     idx.Build (this.R, RandomSets.GetIdentity (this.R.Count));
     List<ItemPair> cache = new List<ItemPair>(this.R.Count);
     // Console.WriteLine ("START GetMetricShell");
     while (idx.Count > 0) {
         cache.Clear();
         DynamicSequential.Stats stats;
         int min_objID, max_objID;
         idx.ComputeDistances(q, cache, out stats, out min_objID, out max_objID);
         for (int i = 0; i < cache.Count; ++i) {
             var obj_min = this.DB [min_objID];
             var obj_cur = this.DB [cache[i].ObjID];
             if (cache[i].Dist >= this.DB.Dist(obj_min, obj_cur)) {
                 idx.Remove (cache[i].ObjID);
             }
         }
         //Console.WriteLine ("min: {0}, min_dist: {1}, refs_size: {2}", min_objID, stats.min, idx.Count);
         seq.Add (min_objID);
     }
     return seq.ToArray ();
 }
Example #9
0
        public virtual void Build(MetricDB DB, double alpha, int min_bs, int seed, bool do_far)
        {
            var idxDynamic = new DynamicSequentialOrdered ();
            idxDynamic.Build (DB, RandomSets.GetRandomPermutation(DB.Count, new Random(seed)));
            // this.Items = new ItemPair[DB.Count];
            var pivs = new List<Pivot> (32);
            var items = new List<ItemPair> (DB.Count);
            int I = 0;
            var extreme_items = new List<ItemPair>(idxDynamic.Count);
            while (idxDynamic.Count > 0) {
                var pidx = idxDynamic.GetAnyItem();
                object piv = DB[pidx];
                idxDynamic.Remove(pidx);
                // this.Items[pidx] = new ItemPair(pidx, 0);
                DynamicSequential.Stats stats;
                Pivot piv_data;
                double near_first = double.MaxValue;
                double near_last = 0;
                double far_first = double.MaxValue;
                int num_near = 0;
                int num_far = 0;
                {
                    IResult near, far;
                    this.SearchExtremes(idxDynamic, extreme_items, piv, alpha, min_bs, out near, out far, out stats);
                    foreach (var pair in near) {
                        near_first = Math.Min (near_first, pair.Dist);
                        near_last = Math.Max (near_last, pair.Dist);
                        items.Add( new ItemPair { ObjID = pair.ObjID, Dist = pair.Dist} );
                    }
                    num_near = near.Count;
                    idxDynamic.Remove(near);
                    if (do_far) {
                        foreach (var pair in far) {
                            far_first = Math.Min (far_first, pair.Dist);
                            items.Add( new ItemPair {ObjID = pair.ObjID, Dist = pair.Dist} );
                        }
                        num_far = far.Count;
                        idxDynamic.Remove(far);
                    }
                    piv_data = new Pivot(pidx, stats.mean, stats.stddev, near_last, far_first, num_near, num_far);
                    pivs.Add(piv_data);
                }
                if (I % 10 == 0) {
                    Console.WriteLine ("");
                    Console.WriteLine (this.ToString());
                    Console.WriteLine("-- I {0}> remains: {1}, alpha: {2}, mean: {3}, stddev: {4}, pivot: {5}, min_bs: {6}, db: {7}, do_far: {8}",
                                      I, idxDynamic.Count, alpha, stats.mean, stats.stddev, pidx, min_bs, DB.Name, do_far);
                    if (piv_data.num_near > 0) {
                        Console.WriteLine("-- (NORMVAL) first-near: {0}, last-near: {1}, near-count: {2}",
                                          near_first / stats.max, piv_data.last_near / stats.max, piv_data.num_near);

                    }
                    if (piv_data.num_far > 0) {
                        Console.WriteLine("++ (NORMVAL) first-far: {0}, far-count: {1}",
                                          piv_data.first_far / stats.max, piv_data.num_far);
                    }
                }
                ++I;

                //Console.WriteLine("Number of objects after: {0}",idxDynamic.DOCS.Count);
            }
            Console.WriteLine("Number of pivots per group: {0}", I);
            this.Pivs = pivs.ToArray ();
            this.Items = items.ToArray ();
        }
Example #10
0
        public EPListOptimizedB(MetricDB DB, int num_indexes, Random rand)
        {
            this.Items = null;
            var pivs = new List<EPivot> (32);
            var n = DB.Count;
            var idxseq = new DynamicSequentialOrdered ();
            idxseq.Build (DB, RandomSets.GetIdentity (DB.Count));
            var tmp_items = new List<ItemPair> (DB.Count);
            double qrad;
            double varY;
            double mean;
            PivotSelector.EstimateQueryStatistics (DB, rand, 64, 128, out mean, out varY, out qrad);
            //double prev_cost = -1;
            //double curr_cost = n;

            var pivsel = new PivotSelectorRandom (n, rand);

            double weight_prev = 0.99;
            double weight_curr = 1.0 - weight_prev;

            double max_error = 0.01;
            double error = 1;
            double prev_cost = 1.0;

            double min_cost = 1;

            // anything larger than 1.x can be considered a valid starting error_factor

            var iterID = 0;
            var window = 1;

            while (true) {
                //++iterID;

                double curr_cost = 0;
                for (int i = 0; i < window; ++i, ++iterID) {
                    this.ComputeDistRow (pivsel.NextPivot(), idxseq, rand, pivs, tmp_items);
                    double varX = 0;
                    for (int objID = 0; objID < this.Items.Length; ++objID) {
                        var u = this.Items[objID];
                        var diff = Math.Abs( u.Dist - pivs[u.ObjID].mean );
                        varX += diff * diff / n;
                    }
                    curr_cost += this.expected_cost(qrad, varX, varY, n, iterID, num_indexes);
                }
                curr_cost = (curr_cost / window) / n;

                curr_cost = weight_prev * prev_cost + weight_curr * curr_cost;

                if (curr_cost < min_cost) {
                    min_cost = curr_cost;
                } else {
                    break;
                }

                if (iterID % 10 == 0) {
                    Console.WriteLine("XXXXXXXXXXXXXXXXXXXX {0}, db: {1}", this, Path.GetFileName(DB.Name));
                    Console.WriteLine("XXX prev-cost: {0:0.000}, curr-cost: {1:0.000}, min-cost: {6}, error: {2:0.00000}, max-error: {3:0.00000}, pivs: {4}, groups: {5}",
                                      prev_cost, curr_cost, error, max_error, iterID, num_indexes, min_cost);
                }
                error = prev_cost - curr_cost;
                prev_cost = curr_cost;
            }
            this.Pivs = pivs.ToArray ();
            Console.WriteLine("Number of pivots per group: {0}", this.Pivs.Length);
        }