示例#1
0
文件: HSP_app.cs 项目: KeithNel/natix
        // Computes the edges of an object (static mode)
        public static IList<int> ComputeEdges(object q,MetricDB db, bool[] subset=null)
        {
            IList<int> edges=new List<int>();
            Set Candidates;
            if (subset==null) Candidates=new Set(db.Count);
            else Candidates=new Set(db.Count,subset);

            double[] distances=new double[db.Count];
            int[] objs=new int[db.Count]; // <-- used in the sorting
            for(int j=0;j<db.Count;j++)
            {
                distances[j]=db.Dist(q,db[j]);
                objs[j]=j;
            }
            Sorting.Sort(distances,objs);

            while (Candidates.Cardinality>0 && distances[Candidates.First]==0 )
                Candidates.Remove(Candidates.First);

            int outdegree=0;

            while (Candidates.Cardinality>0)
            {
                //Console.WriteLine("Candidates:{0}",Candidates.Cardinality);

                // get closest element
                int closest_id=objs[Candidates.First];
                outdegree++;
                Candidates.actual=Candidates.First;
                edges.Add(closest_id);

                // remove elements in the forbidden area
                while(Candidates.actual != -1)
                {
                    //Console.WriteLine(Candidates.actual);
                    if ( distances[Candidates.actual] > db.Dist(db[closest_id],db[objs[Candidates.actual]]))
                    {
                        Candidates.Remove(Candidates.actual);
                        continue;
                        //Console.WriteLine("Removed");
                    }
                    Candidates.Next();
                }

            }
            //this.sum_out_degree+=edges.Count;
            //if (edges.Count > this.max_out_degree)
            //	this.max_out_degree=edges.Count;
            //if (edges.Count < this.min_out_degree || this.min_out_degree==0)
            //	this.min_out_degree=edges.Count;

            return edges;
        }
 public EPListRandomPivotsPriorized(MetricDB DB, int seed, int num_pivs)
 {
     var n = DB.Count;
     this.Items = new ItemPair[n];
     var pivs = new List<EPivot> (32);
     var rand = new Random (seed);
     var pivsel = new PivotSelector (n, rand);
     var piv = pivsel.NextPivot ();
     var pivOBJ = DB [piv];
     for (int objID = 0; objID < n; ++objID) {
         var d = DB.Dist(pivOBJ, DB[objID]);
         this.Items[objID] = new ItemPair(0, d);
     }
     double mean, variance;
     this.Statistics (out mean, out variance);
     pivs.Add(new EPivot(piv, Math.Sqrt(variance), mean, 0, 0, 0, 0));
     var item_cmp = new Comparison<ItemPair>((x,y) => {
         var diff_x = Math.Abs (x.dist - pivs[x.objID].mean);
         var diff_y = Math.Abs (y.dist - pivs[y.objID].mean);
         return diff_x.CompareTo(diff_y);
     });
     var queue = new SkipList2<int> (0.5, (x,y) => item_cmp (this.Items [x], this.Items [y]));
     for (int objID = 0; objID < n; ++objID) {
         queue.Add(objID, null);
     }
     var max_review = 2 * n / num_pivs;
     var list = new List<int> ();
     for (int i = 0; i < num_pivs; ++i) {
         Console.WriteLine("XXXXXX BEGIN {0} i: {1}", this, i);
         piv = pivsel.NextPivot();
         double piv_mean, piv_variance, qrad;
         PivotSelector.EstimatePivotStatistics(DB, rand, DB[piv], 256, out piv_mean, out piv_variance, out qrad);
         var pivID = pivs.Count;
         pivs.Add(new EPivot(piv, Math.Sqrt(piv_variance), mean, 0, 0, 0, 0));
         list.Clear();
         for (int s = 0; s < max_review; ++s) {
             var objID = queue.RemoveFirst();
             var d = DB.Dist(DB[objID], pivOBJ);
             var new_item = new ItemPair(pivID, d);
             if (item_cmp(new_item, this.Items[objID]) > 0) {
                 this.Items[objID] = new_item;
             }
             list.Add (objID);
         }
         foreach (var objID in list) {
             queue.Add(objID, null);
         }
         Console.WriteLine("XXXXXX END {0} i: {1}", this, i);
     }
     this.Pivs = pivs.ToArray ();
     Console.WriteLine("Number of pivots per group: {0}", this.Pivs.Length);
 }
示例#3
0
 public static void EstimateQueryStatistics(MetricDB DB, Random rand, int num_queries, int sample_size, out double mean, out double varY, out double qrad)
 {
     var n = DB.Count;
     var N = num_queries * sample_size;
     mean = 0.0;
     var square_mean = 0.0;
     qrad = 0;
     for (int qID = 0; qID < num_queries; ++qID) {
         var q = DB[ rand.Next(0, n) ];
         var min = double.MaxValue;
         for (int sampleID = 0; sampleID < sample_size; ++sampleID) {
             var u = DB[ rand.Next(0, n) ];
             var d = DB.Dist(q, u);
             mean += d / N;
             square_mean += d * d / N;
             if (d > 0) {
                 min = Math.Min(min, d);
             }
         }
         qrad = Math.Max (min, qrad);
     //				if (qrad == 0) {
     //					qrad = min;
     //				} else {
     //					qrad = (min + qrad) * 0.5;
     //				}
     }
     varY = square_mean - mean * mean;
 }
示例#4
0
文件: Main.cs 项目: vfaby/natix
 public void AssertEqualityDB(MetricDB db0, MetricDB db1)
 {
     Console.WriteLine("Checking equality between original and saved databases");
     for (int i = 0; i < db0.Count; ++i) {
         var d = db0.Dist(db0[i], db1[i]);
         if (d != 0) {
             throw new Exception("=== ASSERTION ERROR: databases are not identical");
         }
     }
     Console.WriteLine("OK");
 }
示例#5
0
 public override int SearchKNN(MetricDB db, object q, int K, IResult res, short[] A, short current_rank_A)
 {
     int abs_pos = 0;
     int count_dist = 0;
     foreach (var piv in this.Pivs) {
         var pivOBJ = db [piv.objID];
         var dqp = db.Dist (q, pivOBJ);
         res.Push (piv.objID, dqp);
         ++count_dist;
         // checking near ball radius
         if (dqp <= piv.last_near + res.CoveringRadius * this.ApproxFactor) {
             for (int j = 0; j < piv.num_near; ++j, ++abs_pos) {
                 var item = this.Items [abs_pos];
                 // checking covering pivot
                 if (Math.Abs (item.Dist - dqp) <= res.CoveringRadius) {
                     ++A [item.ObjID];
                 }
             }
         } else {
             abs_pos += piv.num_near;
         }
         // checking external radius
         if (dqp + res.CoveringRadius * this.ApproxFactor >= piv.first_far) {
             for (int j = 0; j < piv.num_far; ++j, ++abs_pos) {
                 var item = this.Items [abs_pos];
                 // checking covering pivot
                 if (Math.Abs (item.Dist - dqp) <= res.CoveringRadius) {
                     ++A [item.ObjID];
                 }
             }
         } else {
             abs_pos += piv.num_far;
         }
         if (dqp + res.CoveringRadius*this.ApproxFactor <= piv.last_near || piv.first_far <= dqp - res.CoveringRadius*this.ApproxFactor) {
             break;
         }
     }
     return count_dist;
 }
示例#6
0
 public static void EstimatePivotStatistics(MetricDB DB, Random rand, object piv, int sample_size, out double mean, out double variance, out double qrad)
 {
     var n = DB.Count;
     mean = 0.0;
     var square_mean = 0.0;
     qrad = 0;
     var min = double.MaxValue;
     for (int sampleID = 0; sampleID < sample_size; ++sampleID) {
         var u = DB[ rand.Next(0, n) ];
         var d = DB.Dist(piv, u);
         mean += d / sample_size;
         square_mean += d * d / sample_size;
         if (d > 0) {
             min = Math.Min (min, d);
         }
     }
     // qrad = Math.Max (min, qrad);
     if (qrad == 0) {
         qrad = min;
     } else {
         qrad = (min + qrad) * 0.5;
     }
     variance = square_mean - mean * mean;
 }
示例#7
0
文件: VPTX.cs 项目: sadit/natix
            public void SearchKNN(object q, IResult res, MetricDB db)
            {
                var d = db.Dist (db [this.refID], q);
                res.Push (this.refID, d);

                if (this.left != null && d - res.CoveringRadius <= this.median) {
                    this.left.SearchKNN (q, res, db);
                }

                if (this.right != null && this.median <= d + res.CoveringRadius) {
                    this.right.SearchKNN (q, res, db);
                }
            }
示例#8
0
文件: VPTX.cs 项目: sadit/natix
            public Node(int[] items, MetricDB db, Random rand, bool isleft)
            {
                if (ADVANCE % 1000 == 0) {
                    Console.WriteLine("Advance {0}", ADVANCE);
                }
                ++ADVANCE;

                if (items.Length == 1) {
                    this.refID = items[0];
                    this.median = 0;
                    return;
                }

                double[] D = new double[items.Length];
                this.refID = items[rand.Next(items.Length)];

                for (int i = 0; i < D.Length; ++i) {
                    D[i] = db.Dist(db[items[i]], db[this.refID]);
                }

                Sorting.Sort(D, items);
                this.refID = items[0];  // adjusting in case of two identical items

                int m;
                if (isleft) {
                    m = (D.Length + 1) / 10 + 1;
                } else {
                    m = (D.Length + 1) / 2;
                }
                this.median = D[m];

                var _left = new int[m - 1];
                var _right = new int[items.Length - _left.Length - 1];

                for (int i = 0; i < _left.Length; ++i) {
                    _left[i] = items[i + 1];
                }

                for (int i = 0; i < _right.Length; ++i) {
                    _right[i] = items[m + i];
                }

                // items will be present for all its children, so we should care about wasting memory
                D = null;
                items = null; // it cannot be free since it exists for its parent

                if (_left.Length > 0) {
                    this.left = new Node(_left, db, rand, true);
                }

                _left = null;
                if (_right.Length > 0) {
                    this.right = new Node(_right, db, rand, false);
                }
            }
示例#9
0
文件: VPT.cs 项目: sadit/natix
            public Node(int[] items, MetricDB db, Random rand)
            {
                if (items.Length == 1) {
                    this.refID = items[0];
                    this.median = 0;
                    return;
                }

                double[] D = new double[items.Length];
                this.refID = items[rand.Next(0, items.Length)];

                for (int i = 0; i < D.Length; ++i) {
                    D[i] = db.Dist(db[items[i]], db[this.refID]);
                }

                Sorting.Sort(D, items);
                this.refID = items[0];  // adjusting in case of two identical items

                int m = (D.Length + 1) / 2;

                this.median = D[m];

                var _left = new int[m - 1];
                var _right = new int[items.Length - _left.Length - 1];

                for (int i = 0; i < _left.Length; ++i) {
                    _left[i] = items[i + 1];
                }

                for (int i = 0; i < _right.Length; ++i) {
                    _right[i] = items[m + i];
                }

                // items will be present for all its children, so we should care about wasting memory
                D = null;
                items = null; // it cannot be free since it exists for its parent

                if (_left.Length > 0) {
                    this.left = new Node(_left, db, rand);
                }
                _left = null;
                if (_right.Length > 0) {
                    this.right = new Node(_right, db, rand);
                }
            }
示例#10
0
 protected void AppendPivot(MetricDB db, double alpha, double dmax, int objID)
 {
     double dmin = Double.MaxValue;
     var obj = db [objID];
     for (int i = 0; i < pivs.Count; ++i) {
         var u = db [pivs[i]];
         var d = db.Dist (obj, u);
         if (d < dmin) {
             dmin = d;
         }
     }
     if (dmin / dmax < alpha) {
         return;
     }
     Console.WriteLine ("**** computing pivot alpha={0}, pivots={1}, {2}", alpha, pivs.Count, DateTime.Now);
     this.pivs.Add (objID);
 }
示例#11
0
        protected double EstimateMaxDistance(MetricDB db, double prob)
        {
            var rand = RandomSets.GetRandom ();
            double max = 0;

            for (int i = 0; i < db.Count; ++i) {
                var q = db[i];
                if (rand.NextDouble() <= prob) {
                    for (int uID = 0; uID < db.Count; ++uID) {
                        var u = db [uID];
                        var d = db.Dist(q, u);
                        if (d > max) {
                            max = d;
                        }
                    }
                }
            }
            return max;
        }
示例#12
0
 public int SearchKNN(MetricDB db, object q, int K, IResult res, short[] A)
 {
     this.CachePivObjects (db);
     int abs_pos = 0;
     int inner_numdist = 0;
     for (int pivID = 0; pivID < this.Pivs.Length; ++pivID) {
         var piv = this.Pivs [pivID];
         var pivOBJ = this._PivObjects [pivID];
         //foreach (var piv in group._Pivs) {
         // var pivOBJ = this.DB[piv.objID];
         var dqp = db.Dist (q, pivOBJ);
         res.Push (piv.objID, dqp);
         ++inner_numdist;
         // checking near ball radius
         if (dqp <= piv.last_near + res.CoveringRadius) {
             var bucket_size = piv.num_near;
             var bucket = this.DiskItems.ReadArray (abs_pos, bucket_size);
             abs_pos += bucket_size;
             foreach (var item in bucket) {
                 // checking covering pivot
                 if (Math.Abs (item.Dist - dqp) <= res.CoveringRadius) {
                     ++A [item.ObjID];
                 }
             }
         } else {
             abs_pos += piv.num_near;
         }
         // checking external radius
         if (dqp + res.CoveringRadius >= piv.first_far) {
             var bucket_size = piv.num_far;
             var bucket = this.DiskItems.ReadArray (abs_pos, bucket_size);
             abs_pos += bucket_size;
             foreach (var item in bucket) {
                 // checking covering pivot
                 if (Math.Abs (item.Dist - dqp) <= res.CoveringRadius) {
                     ++A [item.ObjID];
                 }
             }
         } else {
             abs_pos += piv.num_far;
         }
         if (dqp + res.CoveringRadius <= piv.last_near || piv.first_far <= dqp - res.CoveringRadius) {
             break;
         }
     }
     return inner_numdist;
 }
示例#13
0
文件: PermTree.cs 项目: sadit/natix
 public void Search(MetricDB db, object q, IResult res, int numCandidates)
 {
     if (this.IsLeaf) {
         foreach (var docID in this.bag) {
             var d = db.Dist (db [docID], q);
             res.Push (docID, d);
         }
     } else {
         var hash = this.ComputeFingerprint (q, db);
         long[] near = new long[this.children.Count];
         double[] dist = new double[this.children.Count];
         int i = 0;
         foreach (var p in this.children.Keys) {
             near [i] = p;
             dist [i] = distL1 (hash, p);
             ++i;
         }
         Array.Sort<double, long> (dist, near);
         dist = null;
         i = 0;
         while (i < near.Length && numCandidates > 0) {
             var node = this.children [near [i]];
             node.Search (db, q, res, numCandidates);
             numCandidates -= node.count;
             ++i;
         }
     }
 }
示例#14
0
文件: PermTree.cs 项目: sadit/natix
            public long ComputeFingerprint(object u, MetricDB db)
            {
                byte[] near = new byte[refs.Length];
                double[] dist = new double[refs.Length];

                for (byte i = 0; i < refs.Length; ++i) {
                    int refID = refs [i];
                    near [i] = i;
                    dist [i] = db.Dist (db [refID], u);
                }
                Array.Sort<double,byte> (dist, near);
                near = RandomSets.GetInverse (near);
                long h = 0;
                for (byte i = 0; i < refs.Length; ++i) {
                    h |= ((long)(near[i])) << (i << 2); // this is enough for 16 references
                }
                return h;
            }
示例#15
0
 public static List<ItemPair> ComputeDistances(MetricDB db, IEnumerable<int> sample, object piv, List<ItemPair> output, out Stats stats, out int min_objID, out int max_objID)
 {
     if (output == null) {
         output = new List<ItemPair>();
     }
     //var L = new Item[this.DOCS.Count];
     max_objID = min_objID = -1;
     stats = default(Stats);
     stats.min = double.MaxValue;
     stats.max = 0;
     double mean = 0;
     var count = 0;
     foreach (var objID in sample) {
         var dist = db.Dist(piv, db[objID]);
         mean += dist;
         output.Add (new ItemPair (objID, dist));
         if (dist < stats.min) {
             stats.min = dist;
             min_objID = objID;
         }
         if (dist > stats.max) {
             stats.max = dist;
             max_objID = objID;
         }
         ++count;
     }
     stats.mean = mean / count;
     double stddev = 0;
     foreach (var item in output) {
         var m = item.Dist - stats.mean;
         stddev += m * m;
     }
     stats.stddev = Math.Sqrt(stddev / count);
     return output;
 }
示例#16
0
文件: Spaghetti.cs 项目: sadit/natix
 public Node(MetricDB db, int pivID)
 {
     this.objID = pivID;
     var n = db.Count;
     this.perm = new int[n];
     this.distances = new double[n];
     var piv = db[pivID];
     for (int i = 0; i < n; ++i) {
         this.perm[i] = i;
         this.distances[i] = db.Dist(piv, db[i]);
     }
     Array.Sort(this.perm, (a, b) => this.distances[a].CompareTo(this.distances[b]));
 }