Beispiel #1
0
 /// <summary>
 /// Build the index
 /// </summary>
 public virtual void Build(MetricDB db, int num_centers, Random rand)
 {
     this.DB = db;
     var n = this.DB.Count;
     // randomized has very good performance, even compared with more "intelligent" strategies
     this.node_list = new List<Node> (num_centers);
     var subset = RandomSets.GetRandomSubSet (num_centers, this.DB.Count, rand);
     var H = new HashSet<int> (subset);
     for (int centerID = 0; centerID < num_centers; ++centerID) {
         this.node_list.Add (new Node (subset [centerID]));
     }
     var IDX = new SAT_Distal ();
     IDX.Build (new SampleSpace("", db, subset), rand);
     for (int docID = 0; docID < n; ++docID) {
         if (docID % 1000 == 0) {
             Console.WriteLine ("== Vor {0}/{1}, num_centers: {2}, db: {3}", docID + 1, n, num_centers, db.Name);
         }
         if (H.Contains(docID)) {
             continue;
         }
     //				var near = new Result(1);
     //				for (var centerID = 0; centerID < num_centers; ++centerID) {
     //					var node = this.node_list[centerID];
     //					var d = this.DB.Dist(this.DB[node.refID], this.DB[docID]);
     //					near.Push(centerID, d);
     //				}
     //				var _near = near.First;
         var _near = IDX.SearchKNN (this.DB[docID], 1, new Result (1)).First;
         this.node_list[_near.ObjID].Add(docID, _near.Dist);
     }
 }
Beispiel #2
0
 public void Build(MetricDB db, Random rand, int num_refs, int K=7, int maxcand=1024, SequenceBuilder seq_builder=null)
 {
     var sample = new SampleSpace ("", db, num_refs, rand);
     var sat = new SAT_Distal ();
     sat.Build (sample, RandomSets.GetRandom());
     this.Build (db, sat, K, maxcand, seq_builder);
 }
Beispiel #3
0
 public void Build(MetricDB db, int k, int num_refs, Random rand)
 {
     var sample = new SampleSpace("", db, num_refs, rand);
     var I = new SAT_Distal();
     I.Build(sample, rand);
     this.Build(db, k, I);
 }
Beispiel #4
0
        public void Build(MetricDB db, int symbolsPerHash, int neighborhoodExpansion, Random rand)
        {
            this.DB = db;
            int n = db.Count;
            this.SymbolsPerHash = symbolsPerHash; // very small values, i.e., 2, 3, 4
            this.NeighborhoodExpansion = neighborhoodExpansion; // neighborhoodExpansion >= symbolsPerHash
            var numrefs = (int)(Math.Pow(n, 1.0 / this.SymbolsPerHash));
            this.CountSymbolBits = countBits(numrefs);

            var refs = new SampleSpace("", db, numrefs, rand);
            var sat = new SAT_Distal ();
            sat.Build (refs, new Random ());
            this.R = sat;

            var G = new KnrFP ();
            G.Build (db, this.R, this.SymbolsPerHash);
            var knrcopy = new int[this.SymbolsPerHash];
            this.hashTable = new Dictionary<long, List<int>> ();
            for (int objID = 0; objID < n; ++objID) {
                var knr = G [objID] as int[];
                knr.CopyTo (knrcopy, 0);
                var hash = this.EncodeKnr (knrcopy); // EncodeKnr destroys the reference order

                List<int> L;
                if (!this.hashTable.TryGetValue(hash, out L)) {
                    L = new List<int>();
                    this.hashTable.Add(hash, L);
                }
                L.Add (objID);
            }
            double avg_len = 0;
            double avg_len_sq = 0;

            foreach (var list in this.hashTable.Values) {
                avg_len += list.Count;
                avg_len_sq += list.Count * list.Count;
            }
            avg_len /= this.hashTable.Count;
            avg_len_sq /= this.hashTable.Count;
            Console.WriteLine ("=== created hash table with {0} keys, items: {1}, popcount mean: {2}, popcount stddev: {3}",
                               this.hashTable.Count, n, avg_len, Math.Sqrt(avg_len_sq - avg_len * avg_len));
        }
Beispiel #5
0
 public static string ExecuteSATDistal(IndexArgumentSetup setup, string nick)
 {
     var idxname = String.Format ("{0}/Index.SAT-Distal", nick);
     return Execute (setup, nick, idxname, (db) => {
         var sat = new SAT_Distal ();
         sat.Build (db, RandomSets.GetRandom());
         return sat;
     });
 }