/// <summary> /// Build the index /// </summary> public virtual void Build(MetricDB db, int bsize, Random rand) { this.DB = db; var n = this.DB.Count; // randomized has very good performance, even compared with more "intelligent" strategies var dseq = new DynamicSequentialOrdered (); dseq.Build (db, rand); this.NODES = new List<Node> (n / bsize + 1); var L = new List<ItemPair> (n); while (dseq.Count > 0) { if (this.NODES.Count % 100 == 0) { Console.WriteLine ("XXX {0}, bucketSize: {1}, remain {2}/{3}, db: {4}, date-time: {5}", this, bsize, dseq.Count, db.Count, Path.GetFileName(db.Name), DateTime.Now); } var refID = dseq.GetAnyItem (); dseq.Remove (refID); L.Clear (); dseq.ComputeDistances (this.DB[refID], L); var near = new Result(bsize); var far = new Result (1); dseq.AppendKExtremes (near, far, L); var node = new Node (refID); this.NODES.Add (node); dseq.Remove (near); foreach (var p in near) { node.Add(p.ObjID, p.Dist); } } }
protected virtual void ComputeDistRow(DynamicSequentialOrdered idxseq, Random rand, HashSet<int> already_pivot, List<EPivot> pivs, List<ItemPair> _items) { _items.Clear (); int n = idxseq.DB.Count; int piv; do { piv = rand.Next(0, n); } while (already_pivot.Contains(piv)); already_pivot.Add (piv); var stats = new DynamicSequential.Stats (); idxseq.ComputeDistances (idxseq.DB [piv], _items, out stats); int pivID = pivs.Count; pivs.Add(new EPivot(piv, stats.stddev, stats.mean, stats.min, stats.max, 0, 0)); if (this.Items == null) { this.Items = new ItemPair[n]; for (int objID = 0; objID < n; ++objID) { this.Items[objID] = new ItemPair(0, _items[objID].Dist); } } else { for (int objID = 0; objID < n; ++objID) { var new_piv = pivs[pivID]; var new_dist = _items[objID].Dist; var old_piv = pivs[ this.Items[objID].ObjID ]; var old_dist = this.Items[objID].Dist; if (Math.Abs(old_dist - old_piv.mean) < Math.Abs (new_dist - new_piv.mean)) { this.Items[objID] = new ItemPair(pivID, _items[objID].Dist); } } } }
protected virtual DynamicSequential.Stats ComputeDistRow(int piv, DynamicSequentialOrdered idxseq, Random rand, List<EPivot> pivs, List<ItemPair> _items) { _items.Clear (); int n = idxseq.DB.Count; var stats = new DynamicSequential.Stats (); idxseq.ComputeDistances (idxseq.DB [piv], _items, out stats); int pivID = pivs.Count; pivs.Add(new EPivot(piv, stats.stddev, stats.mean, stats.min, stats.max, 0, 0)); if (this.Items == null) { this.Items = new ItemPair[n]; for (int objID = 0; objID < n; ++objID) { this.Items[objID] = new ItemPair(0, _items[objID].Dist); } } else { for (int objID = 0; objID < n; ++objID) { var new_piv = pivs[pivID]; var new_dist = _items[objID].Dist; var old_piv = pivs[ this.Items[objID].ObjID ]; var old_dist = this.Items[objID].Dist; if (Math.Abs(old_dist - old_piv.mean) < Math.Abs (new_dist - new_piv.mean)) { this.Items[objID] = new ItemPair(pivID, _items[objID].Dist); } } } return stats; }
public int[] GetMetricShell(object q) { var seq = new List<int> (); var idx = new DynamicSequentialOrdered (); // optimize the following: idx.Build (this.R, RandomSets.GetIdentity (this.R.Count)); List<ItemPair> cache = new List<ItemPair>(this.R.Count); // Console.WriteLine ("START GetMetricShell"); while (idx.Count > 0) { cache.Clear(); DynamicSequential.Stats stats; int min_objID, max_objID; idx.ComputeDistances(q, cache, out stats, out min_objID, out max_objID); for (int i = 0; i < cache.Count; ++i) { var obj_min = this.DB [min_objID]; var obj_cur = this.DB [cache[i].ObjID]; if (cache[i].Dist >= this.DB.Dist(obj_min, obj_cur)) { idx.Remove (cache[i].ObjID); } } //Console.WriteLine ("min: {0}, min_dist: {1}, refs_size: {2}", min_objID, stats.min, idx.Count); seq.Add (min_objID); } return seq.ToArray (); }