Пример #1
0
 public ANNISetup(int n, int expectedK)
 {
     this.Selector = new PivotSelectorRandom (n);
     this.StepWidth = (int)Math.Sqrt (n) + 1;
     this.NumberQueries = 32; // (int)Math.Log (n, 2) + 1;
     this.ExpectedK = expectedK;
 }
Пример #2
0
 public ANNISetup(PivotSelector sel, int expectedK, double alphaStop, int step, int numQueries)
 {
     this.Selector = sel;
     this.ExpectedK = expectedK;
     this.AlphaStop = alphaStop;
     this.StepWidth = step;
     this.NumberQueries = numQueries;
 }
Пример #3
0
 public EPListRandomPivotsPriorized(MetricDB DB, int seed, int num_pivs)
 {
     var n = DB.Count;
     this.Items = new ItemPair[n];
     var pivs = new List<EPivot> (32);
     var rand = new Random (seed);
     var pivsel = new PivotSelector (n, rand);
     var piv = pivsel.NextPivot ();
     var pivOBJ = DB [piv];
     for (int objID = 0; objID < n; ++objID) {
         var d = DB.Dist(pivOBJ, DB[objID]);
         this.Items[objID] = new ItemPair(0, d);
     }
     double mean, variance;
     this.Statistics (out mean, out variance);
     pivs.Add(new EPivot(piv, Math.Sqrt(variance), mean, 0, 0, 0, 0));
     var item_cmp = new Comparison<ItemPair>((x,y) => {
         var diff_x = Math.Abs (x.dist - pivs[x.objID].mean);
         var diff_y = Math.Abs (y.dist - pivs[y.objID].mean);
         return diff_x.CompareTo(diff_y);
     });
     var queue = new SkipList2<int> (0.5, (x,y) => item_cmp (this.Items [x], this.Items [y]));
     for (int objID = 0; objID < n; ++objID) {
         queue.Add(objID, null);
     }
     var max_review = 2 * n / num_pivs;
     var list = new List<int> ();
     for (int i = 0; i < num_pivs; ++i) {
         Console.WriteLine("XXXXXX BEGIN {0} i: {1}", this, i);
         piv = pivsel.NextPivot();
         double piv_mean, piv_variance, qrad;
         PivotSelector.EstimatePivotStatistics(DB, rand, DB[piv], 256, out piv_mean, out piv_variance, out qrad);
         var pivID = pivs.Count;
         pivs.Add(new EPivot(piv, Math.Sqrt(piv_variance), mean, 0, 0, 0, 0));
         list.Clear();
         for (int s = 0; s < max_review; ++s) {
             var objID = queue.RemoveFirst();
             var d = DB.Dist(DB[objID], pivOBJ);
             var new_item = new ItemPair(pivID, d);
             if (item_cmp(new_item, this.Items[objID]) > 0) {
                 this.Items[objID] = new_item;
             }
             list.Add (objID);
         }
         foreach (var objID in list) {
             queue.Add(objID, null);
         }
         Console.WriteLine("XXXXXX END {0} i: {1}", this, i);
     }
     this.Pivs = pivs.ToArray ();
     Console.WriteLine("Number of pivots per group: {0}", this.Pivs.Length);
 }
Пример #4
0
 public EPListOptimized(MetricDB DB, int seed, int num_indexes, int max_iters, double error_factor)
 {
     Console.WriteLine ("XXX {0}, num_indexes: {1}, max_iters: {2}, error_factor: {3}", this, num_indexes, max_iters, error_factor);
     this.Items = null;
     var pivs = new List<EPivot> (32);
     var rand = new Random (seed);
     var n = DB.Count;
     var idxseq = new DynamicSequentialOrdered ();
     idxseq.Build (DB, RandomSets.GetIdentity (DB.Count));
     var tmp_items = new List<ItemPair> (DB.Count);
     double qrad;
     double varY;
     double mean;
     PivotSelector.EstimateQueryStatistics (DB, rand, 128, 128, out mean, out varY, out qrad);
     double prev_cost = -1;
     double curr_cost = n;
     double derivative;
     var pivsel = new PivotSelector (n, rand);
     int nextpiv = pivsel.NextPivot();
     int i = 0;
     do {
         // Console.WriteLine("A {0} => {1}, {2}", this, i, seed);
         //double min_diff = double.MaxValue;
         this.ComputeDistRow (nextpiv, idxseq, rand, pivs, tmp_items);
         // Console.WriteLine("B {0} => {1}, {2}", this, i, seed);
         double varX = 0;
         for (int objID = 0; objID < this.Items.Length; ++objID) {
             var u = this.Items[objID];
             var diff = Math.Abs( u.dist - pivs[u.objID].mean );
             varX += diff * diff / n;
     //					if (diff < min_diff) {
     //						min_diff = diff;
     //						next_piv = objID;
     //					}
         }
         nextpiv = pivsel.NextPivot();
         // Console.WriteLine("C {0} => {1}, {2}", this, i, seed);
         ++i;
         prev_cost = curr_cost;
         curr_cost = this.expected_cost(qrad, varX, varY, n, i, num_indexes, error_factor);
         derivative = curr_cost - prev_cost;
         // Console.WriteLine ("DEBUG: stddev: {0}", stats.stddev);
         if (i % 10 == 1) {
             Console.Write("XXXXXX {0}, seed: {1}, iteration: {2}, DB: {3}, ", this, seed, i, DB.Name);
             Console.WriteLine("qcurr_cost: {0}, prev_cost: {1}, varX: {2}, varY: {3}, qrad: {4}",
                                curr_cost, prev_cost, varX, varY, qrad);
         }
     } while (derivative < 0 && i < max_iters);
     this.Pivs = pivs.ToArray ();
     Console.WriteLine("Number of pivots per group: {0}", this.Pivs.Length);
 }
Пример #5
0
        public void PartialBuild(MetricDB db, PivotSelector pivsel)
        {
            this.DB = db;
            int n = this.DB.Count;
            this.ACT = new List<int>(256); // just a starting capacity
            this.CT = new int[n];
            this.DT = new double[n];

            var pivID = 0;
            var nextPivot = pivsel.NextPivot ();
            this.ACT.Add (nextPivot);
            this.DT [nextPivot] = 0.0;
            this.CT [nextPivot] = -1;
            var piv = this.DB [nextPivot];

            for (int docID = 0; docID < n; ++docID) {
                this.CT [docID] = pivID;
                this.DT [docID] = this.DB.Dist (this.DB [docID], piv);
            }
        }
Пример #6
0
        public void PartialBuild(MetricDB db, PivotSelector pivsel)
        {
            this.DB = db;
            int n = this.DB.Count;
            this.clusters.Clear();

            Node node = new Node ();
            var pivID = pivsel.NextPivot ();
            node.objID = pivID;
            this.clusters.Add (node);

            // take distances from all points to the center
            var piv = this.DB [pivID];
            for (int docID = 0; docID < n; ++docID) {
                if (pivID == docID) {
                    continue;
                }
                var d = this.DB.Dist (this.DB [docID], piv);
                node.Add (docID, d);
            }
        }