Exemple #1
0
 /// <summary>
 /// Build the index
 /// </summary>
 public virtual void Build(MetricDB db, int bsize, Random rand)
 {
     this.DB = db;
     var n = this.DB.Count;
     // randomized has very good performance, even compared with more "intelligent" strategies
     var dseq = new DynamicSequentialOrdered ();
     dseq.Build (db, rand);
     this.NODES = new List<Node> (n / bsize + 1);
     var L = new List<ItemPair> (n);
     while (dseq.Count > 0) {
         if (this.NODES.Count % 100 == 0) {
             Console.WriteLine ("XXX {0}, bucketSize: {1}, remain {2}/{3}, db: {4}, date-time: {5}",
                                this, bsize, dseq.Count, db.Count, Path.GetFileName(db.Name), DateTime.Now);
         }
         var refID = dseq.GetAnyItem ();
         dseq.Remove (refID);
         L.Clear ();
         dseq.ComputeDistances (this.DB[refID], L);
         var near = new Result(bsize);
         var far = new Result (1);
         dseq.AppendKExtremes (near, far, L);
         var node = new Node (refID);
         this.NODES.Add (node);
         dseq.Remove (near);
         foreach (var p in near) {
             node.Add(p.ObjID, p.Dist);
         }
     }
 }
Exemple #2
0
 protected virtual void ComputeDistRow(DynamicSequentialOrdered idxseq, Random rand, HashSet<int> already_pivot, List<EPivot> pivs, List<ItemPair> _items)
 {
     _items.Clear ();
     int n = idxseq.DB.Count;
     int piv;
     do {
         piv = rand.Next(0, n);
     } while (already_pivot.Contains(piv));
     already_pivot.Add (piv);
     var stats = new DynamicSequential.Stats ();
     idxseq.ComputeDistances (idxseq.DB [piv], _items, out stats);
     int pivID = pivs.Count;
     pivs.Add(new EPivot(piv, stats.stddev, stats.mean, stats.min, stats.max, 0, 0));
     if (this.Items == null) {
         this.Items = new ItemPair[n];
         for (int objID = 0; objID < n; ++objID) {
             this.Items[objID] = new ItemPair(0, _items[objID].Dist);
         }
     } else {
         for (int objID = 0; objID < n; ++objID) {
             var new_piv = pivs[pivID];
             var new_dist = _items[objID].Dist;
             var old_piv = pivs[ this.Items[objID].ObjID ];
             var old_dist = this.Items[objID].Dist;
             if (Math.Abs(old_dist - old_piv.mean) < Math.Abs (new_dist - new_piv.mean)) {
                 this.Items[objID] = new ItemPair(pivID, _items[objID].Dist);
             }
         }
     }
 }
Exemple #3
0
 protected virtual DynamicSequential.Stats ComputeDistRow(int piv, DynamicSequentialOrdered idxseq, Random rand, List<EPivot> pivs, List<ItemPair> _items)
 {
     _items.Clear ();
     int n = idxseq.DB.Count;
     var stats = new DynamicSequential.Stats ();
     idxseq.ComputeDistances (idxseq.DB [piv], _items, out stats);
     int pivID = pivs.Count;
     pivs.Add(new EPivot(piv, stats.stddev, stats.mean, stats.min, stats.max, 0, 0));
     if (this.Items == null) {
         this.Items = new ItemPair[n];
         for (int objID = 0; objID < n; ++objID) {
             this.Items[objID] = new ItemPair(0, _items[objID].Dist);
         }
     } else {
         for (int objID = 0; objID < n; ++objID) {
             var new_piv = pivs[pivID];
             var new_dist = _items[objID].Dist;
             var old_piv = pivs[ this.Items[objID].ObjID ];
             var old_dist = this.Items[objID].Dist;
             if (Math.Abs(old_dist - old_piv.mean) < Math.Abs (new_dist - new_piv.mean)) {
                 this.Items[objID] = new ItemPair(pivID, _items[objID].Dist);
             }
         }
     }
     return stats;
 }
Exemple #4
0
 public int[] GetMetricShell(object q)
 {
     var seq = new List<int> ();
     var idx = new DynamicSequentialOrdered ();
     // optimize the following:
     idx.Build (this.R, RandomSets.GetIdentity (this.R.Count));
     List<ItemPair> cache = new List<ItemPair>(this.R.Count);
     // Console.WriteLine ("START GetMetricShell");
     while (idx.Count > 0) {
         cache.Clear();
         DynamicSequential.Stats stats;
         int min_objID, max_objID;
         idx.ComputeDistances(q, cache, out stats, out min_objID, out max_objID);
         for (int i = 0; i < cache.Count; ++i) {
             var obj_min = this.DB [min_objID];
             var obj_cur = this.DB [cache[i].ObjID];
             if (cache[i].Dist >= this.DB.Dist(obj_min, obj_cur)) {
                 idx.Remove (cache[i].ObjID);
             }
         }
         //Console.WriteLine ("min: {0}, min_dist: {1}, refs_size: {2}", min_objID, stats.min, idx.Count);
         seq.Add (min_objID);
     }
     return seq.ToArray ();
 }