/// <summary> /// Build the index /// </summary> public virtual void Build(MetricDB db, int num_centers, Random rand) { this.DB = db; var n = this.DB.Count; // randomized has very good performance, even compared with more "intelligent" strategies this.node_list = new List<Node> (num_centers); var subset = RandomSets.GetRandomSubSet (num_centers, this.DB.Count, rand); var H = new HashSet<int> (subset); for (int centerID = 0; centerID < num_centers; ++centerID) { this.node_list.Add (new Node (subset [centerID])); } var IDX = new SAT_Distal (); IDX.Build (new SampleSpace("", db, subset), rand); for (int docID = 0; docID < n; ++docID) { if (docID % 1000 == 0) { Console.WriteLine ("== Vor {0}/{1}, num_centers: {2}, db: {3}", docID + 1, n, num_centers, db.Name); } if (H.Contains(docID)) { continue; } // var near = new Result(1); // for (var centerID = 0; centerID < num_centers; ++centerID) { // var node = this.node_list[centerID]; // var d = this.DB.Dist(this.DB[node.refID], this.DB[docID]); // near.Push(centerID, d); // } // var _near = near.First; var _near = IDX.SearchKNN (this.DB[docID], 1, new Result (1)).First; this.node_list[_near.ObjID].Add(docID, _near.Dist); } }
public void Build(MetricDB db, Random rand, int num_refs, int K=7, int maxcand=1024, SequenceBuilder seq_builder=null) { var sample = new SampleSpace ("", db, num_refs, rand); var sat = new SAT_Distal (); sat.Build (sample, RandomSets.GetRandom()); this.Build (db, sat, K, maxcand, seq_builder); }
public void Build(MetricDB db, int k, int num_refs, Random rand) { var sample = new SampleSpace("", db, num_refs, rand); var I = new SAT_Distal(); I.Build(sample, rand); this.Build(db, k, I); }
public void Build(MetricDB db, int symbolsPerHash, int neighborhoodExpansion, Random rand) { this.DB = db; int n = db.Count; this.SymbolsPerHash = symbolsPerHash; // very small values, i.e., 2, 3, 4 this.NeighborhoodExpansion = neighborhoodExpansion; // neighborhoodExpansion >= symbolsPerHash var numrefs = (int)(Math.Pow(n, 1.0 / this.SymbolsPerHash)); this.CountSymbolBits = countBits(numrefs); var refs = new SampleSpace("", db, numrefs, rand); var sat = new SAT_Distal (); sat.Build (refs, new Random ()); this.R = sat; var G = new KnrFP (); G.Build (db, this.R, this.SymbolsPerHash); var knrcopy = new int[this.SymbolsPerHash]; this.hashTable = new Dictionary<long, List<int>> (); for (int objID = 0; objID < n; ++objID) { var knr = G [objID] as int[]; knr.CopyTo (knrcopy, 0); var hash = this.EncodeKnr (knrcopy); // EncodeKnr destroys the reference order List<int> L; if (!this.hashTable.TryGetValue(hash, out L)) { L = new List<int>(); this.hashTable.Add(hash, L); } L.Add (objID); } double avg_len = 0; double avg_len_sq = 0; foreach (var list in this.hashTable.Values) { avg_len += list.Count; avg_len_sq += list.Count * list.Count; } avg_len /= this.hashTable.Count; avg_len_sq /= this.hashTable.Count; Console.WriteLine ("=== created hash table with {0} keys, items: {1}, popcount mean: {2}, popcount stddev: {3}", this.hashTable.Count, n, avg_len, Math.Sqrt(avg_len_sq - avg_len * avg_len)); }
public static string ExecuteSATDistal(IndexArgumentSetup setup, string nick) { var idxname = String.Format ("{0}/Index.SAT-Distal", nick); return Execute (setup, nick, idxname, (db) => { var sat = new SAT_Distal (); sat.Build (db, RandomSets.GetRandom()); return sat; }); }