// Computes the edges of an object (static mode) public static IList<int> ComputeEdges(object q,MetricDB db, bool[] subset=null) { IList<int> edges=new List<int>(); Set Candidates; if (subset==null) Candidates=new Set(db.Count); else Candidates=new Set(db.Count,subset); double[] distances=new double[db.Count]; int[] objs=new int[db.Count]; // <-- used in the sorting for(int j=0;j<db.Count;j++) { distances[j]=db.Dist(q,db[j]); objs[j]=j; } Sorting.Sort(distances,objs); while (Candidates.Cardinality>0 && distances[Candidates.First]==0 ) Candidates.Remove(Candidates.First); int outdegree=0; while (Candidates.Cardinality>0) { //Console.WriteLine("Candidates:{0}",Candidates.Cardinality); // get closest element int closest_id=objs[Candidates.First]; outdegree++; Candidates.actual=Candidates.First; edges.Add(closest_id); // remove elements in the forbidden area while(Candidates.actual != -1) { //Console.WriteLine(Candidates.actual); if ( distances[Candidates.actual] > db.Dist(db[closest_id],db[objs[Candidates.actual]])) { Candidates.Remove(Candidates.actual); continue; //Console.WriteLine("Removed"); } Candidates.Next(); } } //this.sum_out_degree+=edges.Count; //if (edges.Count > this.max_out_degree) // this.max_out_degree=edges.Count; //if (edges.Count < this.min_out_degree || this.min_out_degree==0) // this.min_out_degree=edges.Count; return edges; }
public EPListRandomPivotsPriorized(MetricDB DB, int seed, int num_pivs) { var n = DB.Count; this.Items = new ItemPair[n]; var pivs = new List<EPivot> (32); var rand = new Random (seed); var pivsel = new PivotSelector (n, rand); var piv = pivsel.NextPivot (); var pivOBJ = DB [piv]; for (int objID = 0; objID < n; ++objID) { var d = DB.Dist(pivOBJ, DB[objID]); this.Items[objID] = new ItemPair(0, d); } double mean, variance; this.Statistics (out mean, out variance); pivs.Add(new EPivot(piv, Math.Sqrt(variance), mean, 0, 0, 0, 0)); var item_cmp = new Comparison<ItemPair>((x,y) => { var diff_x = Math.Abs (x.dist - pivs[x.objID].mean); var diff_y = Math.Abs (y.dist - pivs[y.objID].mean); return diff_x.CompareTo(diff_y); }); var queue = new SkipList2<int> (0.5, (x,y) => item_cmp (this.Items [x], this.Items [y])); for (int objID = 0; objID < n; ++objID) { queue.Add(objID, null); } var max_review = 2 * n / num_pivs; var list = new List<int> (); for (int i = 0; i < num_pivs; ++i) { Console.WriteLine("XXXXXX BEGIN {0} i: {1}", this, i); piv = pivsel.NextPivot(); double piv_mean, piv_variance, qrad; PivotSelector.EstimatePivotStatistics(DB, rand, DB[piv], 256, out piv_mean, out piv_variance, out qrad); var pivID = pivs.Count; pivs.Add(new EPivot(piv, Math.Sqrt(piv_variance), mean, 0, 0, 0, 0)); list.Clear(); for (int s = 0; s < max_review; ++s) { var objID = queue.RemoveFirst(); var d = DB.Dist(DB[objID], pivOBJ); var new_item = new ItemPair(pivID, d); if (item_cmp(new_item, this.Items[objID]) > 0) { this.Items[objID] = new_item; } list.Add (objID); } foreach (var objID in list) { queue.Add(objID, null); } Console.WriteLine("XXXXXX END {0} i: {1}", this, i); } this.Pivs = pivs.ToArray (); Console.WriteLine("Number of pivots per group: {0}", this.Pivs.Length); }
public static void EstimateQueryStatistics(MetricDB DB, Random rand, int num_queries, int sample_size, out double mean, out double varY, out double qrad) { var n = DB.Count; var N = num_queries * sample_size; mean = 0.0; var square_mean = 0.0; qrad = 0; for (int qID = 0; qID < num_queries; ++qID) { var q = DB[ rand.Next(0, n) ]; var min = double.MaxValue; for (int sampleID = 0; sampleID < sample_size; ++sampleID) { var u = DB[ rand.Next(0, n) ]; var d = DB.Dist(q, u); mean += d / N; square_mean += d * d / N; if (d > 0) { min = Math.Min(min, d); } } qrad = Math.Max (min, qrad); // if (qrad == 0) { // qrad = min; // } else { // qrad = (min + qrad) * 0.5; // } } varY = square_mean - mean * mean; }
public void AssertEqualityDB(MetricDB db0, MetricDB db1) { Console.WriteLine("Checking equality between original and saved databases"); for (int i = 0; i < db0.Count; ++i) { var d = db0.Dist(db0[i], db1[i]); if (d != 0) { throw new Exception("=== ASSERTION ERROR: databases are not identical"); } } Console.WriteLine("OK"); }
public override int SearchKNN(MetricDB db, object q, int K, IResult res, short[] A, short current_rank_A) { int abs_pos = 0; int count_dist = 0; foreach (var piv in this.Pivs) { var pivOBJ = db [piv.objID]; var dqp = db.Dist (q, pivOBJ); res.Push (piv.objID, dqp); ++count_dist; // checking near ball radius if (dqp <= piv.last_near + res.CoveringRadius * this.ApproxFactor) { for (int j = 0; j < piv.num_near; ++j, ++abs_pos) { var item = this.Items [abs_pos]; // checking covering pivot if (Math.Abs (item.Dist - dqp) <= res.CoveringRadius) { ++A [item.ObjID]; } } } else { abs_pos += piv.num_near; } // checking external radius if (dqp + res.CoveringRadius * this.ApproxFactor >= piv.first_far) { for (int j = 0; j < piv.num_far; ++j, ++abs_pos) { var item = this.Items [abs_pos]; // checking covering pivot if (Math.Abs (item.Dist - dqp) <= res.CoveringRadius) { ++A [item.ObjID]; } } } else { abs_pos += piv.num_far; } if (dqp + res.CoveringRadius*this.ApproxFactor <= piv.last_near || piv.first_far <= dqp - res.CoveringRadius*this.ApproxFactor) { break; } } return count_dist; }
public static void EstimatePivotStatistics(MetricDB DB, Random rand, object piv, int sample_size, out double mean, out double variance, out double qrad) { var n = DB.Count; mean = 0.0; var square_mean = 0.0; qrad = 0; var min = double.MaxValue; for (int sampleID = 0; sampleID < sample_size; ++sampleID) { var u = DB[ rand.Next(0, n) ]; var d = DB.Dist(piv, u); mean += d / sample_size; square_mean += d * d / sample_size; if (d > 0) { min = Math.Min (min, d); } } // qrad = Math.Max (min, qrad); if (qrad == 0) { qrad = min; } else { qrad = (min + qrad) * 0.5; } variance = square_mean - mean * mean; }
public void SearchKNN(object q, IResult res, MetricDB db) { var d = db.Dist (db [this.refID], q); res.Push (this.refID, d); if (this.left != null && d - res.CoveringRadius <= this.median) { this.left.SearchKNN (q, res, db); } if (this.right != null && this.median <= d + res.CoveringRadius) { this.right.SearchKNN (q, res, db); } }
public Node(int[] items, MetricDB db, Random rand, bool isleft) { if (ADVANCE % 1000 == 0) { Console.WriteLine("Advance {0}", ADVANCE); } ++ADVANCE; if (items.Length == 1) { this.refID = items[0]; this.median = 0; return; } double[] D = new double[items.Length]; this.refID = items[rand.Next(items.Length)]; for (int i = 0; i < D.Length; ++i) { D[i] = db.Dist(db[items[i]], db[this.refID]); } Sorting.Sort(D, items); this.refID = items[0]; // adjusting in case of two identical items int m; if (isleft) { m = (D.Length + 1) / 10 + 1; } else { m = (D.Length + 1) / 2; } this.median = D[m]; var _left = new int[m - 1]; var _right = new int[items.Length - _left.Length - 1]; for (int i = 0; i < _left.Length; ++i) { _left[i] = items[i + 1]; } for (int i = 0; i < _right.Length; ++i) { _right[i] = items[m + i]; } // items will be present for all its children, so we should care about wasting memory D = null; items = null; // it cannot be free since it exists for its parent if (_left.Length > 0) { this.left = new Node(_left, db, rand, true); } _left = null; if (_right.Length > 0) { this.right = new Node(_right, db, rand, false); } }
public Node(int[] items, MetricDB db, Random rand) { if (items.Length == 1) { this.refID = items[0]; this.median = 0; return; } double[] D = new double[items.Length]; this.refID = items[rand.Next(0, items.Length)]; for (int i = 0; i < D.Length; ++i) { D[i] = db.Dist(db[items[i]], db[this.refID]); } Sorting.Sort(D, items); this.refID = items[0]; // adjusting in case of two identical items int m = (D.Length + 1) / 2; this.median = D[m]; var _left = new int[m - 1]; var _right = new int[items.Length - _left.Length - 1]; for (int i = 0; i < _left.Length; ++i) { _left[i] = items[i + 1]; } for (int i = 0; i < _right.Length; ++i) { _right[i] = items[m + i]; } // items will be present for all its children, so we should care about wasting memory D = null; items = null; // it cannot be free since it exists for its parent if (_left.Length > 0) { this.left = new Node(_left, db, rand); } _left = null; if (_right.Length > 0) { this.right = new Node(_right, db, rand); } }
protected void AppendPivot(MetricDB db, double alpha, double dmax, int objID) { double dmin = Double.MaxValue; var obj = db [objID]; for (int i = 0; i < pivs.Count; ++i) { var u = db [pivs[i]]; var d = db.Dist (obj, u); if (d < dmin) { dmin = d; } } if (dmin / dmax < alpha) { return; } Console.WriteLine ("**** computing pivot alpha={0}, pivots={1}, {2}", alpha, pivs.Count, DateTime.Now); this.pivs.Add (objID); }
protected double EstimateMaxDistance(MetricDB db, double prob) { var rand = RandomSets.GetRandom (); double max = 0; for (int i = 0; i < db.Count; ++i) { var q = db[i]; if (rand.NextDouble() <= prob) { for (int uID = 0; uID < db.Count; ++uID) { var u = db [uID]; var d = db.Dist(q, u); if (d > max) { max = d; } } } } return max; }
public int SearchKNN(MetricDB db, object q, int K, IResult res, short[] A) { this.CachePivObjects (db); int abs_pos = 0; int inner_numdist = 0; for (int pivID = 0; pivID < this.Pivs.Length; ++pivID) { var piv = this.Pivs [pivID]; var pivOBJ = this._PivObjects [pivID]; //foreach (var piv in group._Pivs) { // var pivOBJ = this.DB[piv.objID]; var dqp = db.Dist (q, pivOBJ); res.Push (piv.objID, dqp); ++inner_numdist; // checking near ball radius if (dqp <= piv.last_near + res.CoveringRadius) { var bucket_size = piv.num_near; var bucket = this.DiskItems.ReadArray (abs_pos, bucket_size); abs_pos += bucket_size; foreach (var item in bucket) { // checking covering pivot if (Math.Abs (item.Dist - dqp) <= res.CoveringRadius) { ++A [item.ObjID]; } } } else { abs_pos += piv.num_near; } // checking external radius if (dqp + res.CoveringRadius >= piv.first_far) { var bucket_size = piv.num_far; var bucket = this.DiskItems.ReadArray (abs_pos, bucket_size); abs_pos += bucket_size; foreach (var item in bucket) { // checking covering pivot if (Math.Abs (item.Dist - dqp) <= res.CoveringRadius) { ++A [item.ObjID]; } } } else { abs_pos += piv.num_far; } if (dqp + res.CoveringRadius <= piv.last_near || piv.first_far <= dqp - res.CoveringRadius) { break; } } return inner_numdist; }
public void Search(MetricDB db, object q, IResult res, int numCandidates) { if (this.IsLeaf) { foreach (var docID in this.bag) { var d = db.Dist (db [docID], q); res.Push (docID, d); } } else { var hash = this.ComputeFingerprint (q, db); long[] near = new long[this.children.Count]; double[] dist = new double[this.children.Count]; int i = 0; foreach (var p in this.children.Keys) { near [i] = p; dist [i] = distL1 (hash, p); ++i; } Array.Sort<double, long> (dist, near); dist = null; i = 0; while (i < near.Length && numCandidates > 0) { var node = this.children [near [i]]; node.Search (db, q, res, numCandidates); numCandidates -= node.count; ++i; } } }
public long ComputeFingerprint(object u, MetricDB db) { byte[] near = new byte[refs.Length]; double[] dist = new double[refs.Length]; for (byte i = 0; i < refs.Length; ++i) { int refID = refs [i]; near [i] = i; dist [i] = db.Dist (db [refID], u); } Array.Sort<double,byte> (dist, near); near = RandomSets.GetInverse (near); long h = 0; for (byte i = 0; i < refs.Length; ++i) { h |= ((long)(near[i])) << (i << 2); // this is enough for 16 references } return h; }
public static List<ItemPair> ComputeDistances(MetricDB db, IEnumerable<int> sample, object piv, List<ItemPair> output, out Stats stats, out int min_objID, out int max_objID) { if (output == null) { output = new List<ItemPair>(); } //var L = new Item[this.DOCS.Count]; max_objID = min_objID = -1; stats = default(Stats); stats.min = double.MaxValue; stats.max = 0; double mean = 0; var count = 0; foreach (var objID in sample) { var dist = db.Dist(piv, db[objID]); mean += dist; output.Add (new ItemPair (objID, dist)); if (dist < stats.min) { stats.min = dist; min_objID = objID; } if (dist > stats.max) { stats.max = dist; max_objID = objID; } ++count; } stats.mean = mean / count; double stddev = 0; foreach (var item in output) { var m = item.Dist - stats.mean; stddev += m * m; } stats.stddev = Math.Sqrt(stddev / count); return output; }
public Node(MetricDB db, int pivID) { this.objID = pivID; var n = db.Count; this.perm = new int[n]; this.distances = new double[n]; var piv = db[pivID]; for (int i = 0; i < n; ++i) { this.perm[i] = i; this.distances[i] = db.Dist(piv, db[i]); } Array.Sort(this.perm, (a, b) => this.distances[a].CompareTo(this.distances[b])); }