public EPListRandomPivotsPriorized(MetricDB DB, int seed, int num_pivs) { var n = DB.Count; this.Items = new ItemPair[n]; var pivs = new List<EPivot> (32); var rand = new Random (seed); var pivsel = new PivotSelector (n, rand); var piv = pivsel.NextPivot (); var pivOBJ = DB [piv]; for (int objID = 0; objID < n; ++objID) { var d = DB.Dist(pivOBJ, DB[objID]); this.Items[objID] = new ItemPair(0, d); } double mean, variance; this.Statistics (out mean, out variance); pivs.Add(new EPivot(piv, Math.Sqrt(variance), mean, 0, 0, 0, 0)); var item_cmp = new Comparison<ItemPair>((x,y) => { var diff_x = Math.Abs (x.dist - pivs[x.objID].mean); var diff_y = Math.Abs (y.dist - pivs[y.objID].mean); return diff_x.CompareTo(diff_y); }); var queue = new SkipList2<int> (0.5, (x,y) => item_cmp (this.Items [x], this.Items [y])); for (int objID = 0; objID < n; ++objID) { queue.Add(objID, null); } var max_review = 2 * n / num_pivs; var list = new List<int> (); for (int i = 0; i < num_pivs; ++i) { Console.WriteLine("XXXXXX BEGIN {0} i: {1}", this, i); piv = pivsel.NextPivot(); double piv_mean, piv_variance, qrad; PivotSelector.EstimatePivotStatistics(DB, rand, DB[piv], 256, out piv_mean, out piv_variance, out qrad); var pivID = pivs.Count; pivs.Add(new EPivot(piv, Math.Sqrt(piv_variance), mean, 0, 0, 0, 0)); list.Clear(); for (int s = 0; s < max_review; ++s) { var objID = queue.RemoveFirst(); var d = DB.Dist(DB[objID], pivOBJ); var new_item = new ItemPair(pivID, d); if (item_cmp(new_item, this.Items[objID]) > 0) { this.Items[objID] = new_item; } list.Add (objID); } foreach (var objID in list) { queue.Add(objID, null); } Console.WriteLine("XXXXXX END {0} i: {1}", this, i); } this.Pivs = pivs.ToArray (); Console.WriteLine("Number of pivots per group: {0}", this.Pivs.Length); }
public EPListOptimized(MetricDB DB, int seed, int num_indexes, int max_iters, double error_factor) { Console.WriteLine ("XXX {0}, num_indexes: {1}, max_iters: {2}, error_factor: {3}", this, num_indexes, max_iters, error_factor); this.Items = null; var pivs = new List<EPivot> (32); var rand = new Random (seed); var n = DB.Count; var idxseq = new DynamicSequentialOrdered (); idxseq.Build (DB, RandomSets.GetIdentity (DB.Count)); var tmp_items = new List<ItemPair> (DB.Count); double qrad; double varY; double mean; PivotSelector.EstimateQueryStatistics (DB, rand, 128, 128, out mean, out varY, out qrad); double prev_cost = -1; double curr_cost = n; double derivative; var pivsel = new PivotSelector (n, rand); int nextpiv = pivsel.NextPivot(); int i = 0; do { // Console.WriteLine("A {0} => {1}, {2}", this, i, seed); //double min_diff = double.MaxValue; this.ComputeDistRow (nextpiv, idxseq, rand, pivs, tmp_items); // Console.WriteLine("B {0} => {1}, {2}", this, i, seed); double varX = 0; for (int objID = 0; objID < this.Items.Length; ++objID) { var u = this.Items[objID]; var diff = Math.Abs( u.dist - pivs[u.objID].mean ); varX += diff * diff / n; // if (diff < min_diff) { // min_diff = diff; // next_piv = objID; // } } nextpiv = pivsel.NextPivot(); // Console.WriteLine("C {0} => {1}, {2}", this, i, seed); ++i; prev_cost = curr_cost; curr_cost = this.expected_cost(qrad, varX, varY, n, i, num_indexes, error_factor); derivative = curr_cost - prev_cost; // Console.WriteLine ("DEBUG: stddev: {0}", stats.stddev); if (i % 10 == 1) { Console.Write("XXXXXX {0}, seed: {1}, iteration: {2}, DB: {3}, ", this, seed, i, DB.Name); Console.WriteLine("qcurr_cost: {0}, prev_cost: {1}, varX: {2}, varY: {3}, qrad: {4}", curr_cost, prev_cost, varX, varY, qrad); } } while (derivative < 0 && i < max_iters); this.Pivs = pivs.ToArray (); Console.WriteLine("Number of pivots per group: {0}", this.Pivs.Length); }
public void PartialBuild(MetricDB db, PivotSelector pivsel) { this.DB = db; int n = this.DB.Count; this.ACT = new List<int>(256); // just a starting capacity this.CT = new int[n]; this.DT = new double[n]; var pivID = 0; var nextPivot = pivsel.NextPivot (); this.ACT.Add (nextPivot); this.DT [nextPivot] = 0.0; this.CT [nextPivot] = -1; var piv = this.DB [nextPivot]; for (int docID = 0; docID < n; ++docID) { this.CT [docID] = pivID; this.DT [docID] = this.DB.Dist (this.DB [docID], piv); } }
public void PartialBuild(MetricDB db, PivotSelector pivsel) { this.DB = db; int n = this.DB.Count; this.clusters.Clear(); Node node = new Node (); var pivID = pivsel.NextPivot (); node.objID = pivID; this.clusters.Add (node); // take distances from all points to the center var piv = this.DB [pivID]; for (int docID = 0; docID < n; ++docID) { if (pivID == docID) { continue; } var d = this.DB.Dist (this.DB [docID], piv); node.Add (docID, d); } }