/// <summary> /// Build the index /// </summary> public virtual void Build(MetricDB db, int bsize, Random rand) { this.DB = db; var n = this.DB.Count; // randomized has very good performance, even compared with more "intelligent" strategies var dseq = new DynamicSequentialOrdered (); dseq.Build (db, rand); this.NODES = new List<Node> (n / bsize + 1); var L = new List<ItemPair> (n); while (dseq.Count > 0) { if (this.NODES.Count % 100 == 0) { Console.WriteLine ("XXX {0}, bucketSize: {1}, remain {2}/{3}, db: {4}, date-time: {5}", this, bsize, dseq.Count, db.Count, Path.GetFileName(db.Name), DateTime.Now); } var refID = dseq.GetAnyItem (); dseq.Remove (refID); L.Clear (); dseq.ComputeDistances (this.DB[refID], L); var near = new Result(bsize); var far = new Result (1); dseq.AppendKExtremes (near, far, L); var node = new Node (refID); this.NODES.Add (node); dseq.Remove (near); foreach (var p in near) { node.Add(p.ObjID, p.Dist); } } }
public int[] GetMetricShell(object q) { var seq = new List<int> (); var idx = new DynamicSequentialOrdered (); // optimize the following: idx.Build (this.R, RandomSets.GetIdentity (this.R.Count)); List<ItemPair> cache = new List<ItemPair>(this.R.Count); // Console.WriteLine ("START GetMetricShell"); while (idx.Count > 0) { cache.Clear(); DynamicSequential.Stats stats; int min_objID, max_objID; idx.ComputeDistances(q, cache, out stats, out min_objID, out max_objID); for (int i = 0; i < cache.Count; ++i) { var obj_min = this.DB [min_objID]; var obj_cur = this.DB [cache[i].ObjID]; if (cache[i].Dist >= this.DB.Dist(obj_min, obj_cur)) { idx.Remove (cache[i].ObjID); } } //Console.WriteLine ("min: {0}, min_dist: {1}, refs_size: {2}", min_objID, stats.min, idx.Count); seq.Add (min_objID); } return seq.ToArray (); }
public virtual void Build(MetricDB DB, double alpha, int min_bs, int seed, bool do_far) { var idxDynamic = new DynamicSequentialOrdered (); idxDynamic.Build (DB, RandomSets.GetRandomPermutation(DB.Count, new Random(seed))); // this.Items = new ItemPair[DB.Count]; var pivs = new List<Pivot> (32); var items = new List<ItemPair> (DB.Count); int I = 0; var extreme_items = new List<ItemPair>(idxDynamic.Count); while (idxDynamic.Count > 0) { var pidx = idxDynamic.GetAnyItem(); object piv = DB[pidx]; idxDynamic.Remove(pidx); // this.Items[pidx] = new ItemPair(pidx, 0); DynamicSequential.Stats stats; Pivot piv_data; double near_first = double.MaxValue; double near_last = 0; double far_first = double.MaxValue; int num_near = 0; int num_far = 0; { IResult near, far; this.SearchExtremes(idxDynamic, extreme_items, piv, alpha, min_bs, out near, out far, out stats); foreach (var pair in near) { near_first = Math.Min (near_first, pair.Dist); near_last = Math.Max (near_last, pair.Dist); items.Add( new ItemPair { ObjID = pair.ObjID, Dist = pair.Dist} ); } num_near = near.Count; idxDynamic.Remove(near); if (do_far) { foreach (var pair in far) { far_first = Math.Min (far_first, pair.Dist); items.Add( new ItemPair {ObjID = pair.ObjID, Dist = pair.Dist} ); } num_far = far.Count; idxDynamic.Remove(far); } piv_data = new Pivot(pidx, stats.mean, stats.stddev, near_last, far_first, num_near, num_far); pivs.Add(piv_data); } if (I % 10 == 0) { Console.WriteLine (""); Console.WriteLine (this.ToString()); Console.WriteLine("-- I {0}> remains: {1}, alpha: {2}, mean: {3}, stddev: {4}, pivot: {5}, min_bs: {6}, db: {7}, do_far: {8}", I, idxDynamic.Count, alpha, stats.mean, stats.stddev, pidx, min_bs, DB.Name, do_far); if (piv_data.num_near > 0) { Console.WriteLine("-- (NORMVAL) first-near: {0}, last-near: {1}, near-count: {2}", near_first / stats.max, piv_data.last_near / stats.max, piv_data.num_near); } if (piv_data.num_far > 0) { Console.WriteLine("++ (NORMVAL) first-far: {0}, far-count: {1}", piv_data.first_far / stats.max, piv_data.num_far); } } ++I; //Console.WriteLine("Number of objects after: {0}",idxDynamic.DOCS.Count); } Console.WriteLine("Number of pivots per group: {0}", I); this.Pivs = pivs.ToArray (); this.Items = items.ToArray (); }