public void Build(MetricDB db, Random rand, int num_refs, int K=7, int maxcand=1024, SequenceBuilder seq_builder=null) { var sample = new SampleSpace ("", db, num_refs, rand); var sat = new SAT_Distal (); sat.Build (sample, RandomSets.GetRandom()); this.Build (db, sat, K, maxcand, seq_builder); }
public void Build(MetricDB db, int k, int num_refs, Random rand) { var sample = new SampleSpace("", db, num_refs, rand); var I = new SAT_Distal(); I.Build(sample, rand); this.Build(db, k, I); }
public void Build(MetricDB db, int symbolsPerHash, int neighborhoodExpansion, Random rand) { this.DB = db; int n = db.Count; this.SymbolsPerHash = symbolsPerHash; // very small values, i.e., 2, 3, 4 this.NeighborhoodExpansion = neighborhoodExpansion; // neighborhoodExpansion >= symbolsPerHash var numrefs = (int)(Math.Pow(n, 1.0 / this.SymbolsPerHash)); this.CountSymbolBits = countBits(numrefs); var refs = new SampleSpace("", db, numrefs, rand); var sat = new SAT_Distal (); sat.Build (refs, new Random ()); this.R = sat; var G = new KnrFP (); G.Build (db, this.R, this.SymbolsPerHash); var knrcopy = new int[this.SymbolsPerHash]; this.hashTable = new Dictionary<long, List<int>> (); for (int objID = 0; objID < n; ++objID) { var knr = G [objID] as int[]; knr.CopyTo (knrcopy, 0); var hash = this.EncodeKnr (knrcopy); // EncodeKnr destroys the reference order List<int> L; if (!this.hashTable.TryGetValue(hash, out L)) { L = new List<int>(); this.hashTable.Add(hash, L); } L.Add (objID); } double avg_len = 0; double avg_len_sq = 0; foreach (var list in this.hashTable.Values) { avg_len += list.Count; avg_len_sq += list.Count * list.Count; } avg_len /= this.hashTable.Count; avg_len_sq /= this.hashTable.Count; Console.WriteLine ("=== created hash table with {0} keys, items: {1}, popcount mean: {2}, popcount stddev: {3}", this.hashTable.Count, n, avg_len, Math.Sqrt(avg_len_sq - avg_len * avg_len)); }
public void Build(MetricDB db, int K, int num_refs, Random rand) { this.DB = db; int n = db.Count; // valid values to be used as parameters // numrefs <= 255 // K <= 4 if (K > 4) { throw new ArgumentOutOfRangeException (String.Format("K should be between 1 to 4, K={0}", K)); } if (num_refs > 255) { throw new ArgumentOutOfRangeException (String.Format("num_refs should be between 1 to 255, num_refs={0}", num_refs)); } this.K = K; var refs = new SampleSpace("", db, num_refs); var seq = new Sequential(); seq.Build(refs); this.R = seq; int[] G = new int[n]; for (int objID = 0; objID < n; ++objID) { var u = this.DB[objID]; var useq = this.GetHashKnr(u); G[objID] = useq; if (objID % 10000 == 0) { Console.WriteLine ("computing knrlsh {0}/{1} (adv. {2:0.00}%, db: {3}, K: {4}, curr. time: {5})", objID, n, objID*100.0/n, this.DB.Name, this.K, DateTime.Now); } } this.TABLE = new Dictionary<int, List<int>> (); for (int objID = 0; objID < n; ++objID) { var hash = G[objID]; List<int> L; if (!this.TABLE.TryGetValue(hash, out L)) { L = new List<int>(); this.TABLE.Add(hash, L); } L.Add (objID); } }
public void Test(string nick, MetricDB db, string queries, int num_centers, int num_perms, int num_refs) { var qstream = new QueryStream (queries); var reslist = new List<string> (); // Exhaustive search { Sequential seq = new Sequential (); seq.Build (db); var idxname = "Index.Sequential." + nick; IndexGenericIO.Save (idxname, seq); var resname = "Res." + idxname + "." + queries; if (!File.Exists (resname)) { Commands.Search (seq, qstream.Iterate (), new ShellSearchOptions (queries, idxname, resname)); } reslist.Add (resname); } /// /// The List of Clusters and variants /// // LC_RNN reslist.Add (this.TestLC ("Index.LC_RNN." + nick, db, num_centers, new LC_RNN (), queries, qstream)); // LC reslist.Add (this.TestLC ("Index.LC." + nick, db, num_centers, new LC (), queries, qstream)); // LC_IRNN reslist.Add (this.TestLC ("Index.LC_IRNN." + nick, db, num_centers, new LC_IRNN (), queries, qstream)); // LC_PRNN reslist.Add (this.TestLC ("Index.LC_PRNN." + nick, db, num_centers, new LC_PRNN (), queries, qstream)); // LC_ParallelBuild reslist.Add (this.TestLC ("Index.LC_ParallelBuild." + nick, db, num_centers, new LC_ParallelBuild (), queries, qstream)); /// /// Permutation Based Indexes /// // Permutations reslist.Add (this.TestPI ("Index.Perms." + nick, db, num_perms, new Perms (), queries, qstream)); // Brief Index reslist.Add (this.TestPI ("Index.BinPerms." + nick, db, num_perms, new BinPerms (), queries, qstream)); // BinPermsTwoBits reslist.Add (this.TestPI ("Index.BinPermsTwoBits." + nick, db, num_perms, new BinPermsTwoBit (), queries, qstream)); /// /// KNR /// { KnrSeqSearch idx; var idxname = "Index.KnrSeqSearch." + nick; if (File.Exists (idxname)) { idx = (KnrSeqSearch)IndexGenericIO.Load (idxname); } else { Console.WriteLine ("** Starting construction of '{0}'", idxname); var knr = new KnrSeqSearch (); var sample = RandomSets.GetRandomSubSet (num_refs, db.Count); var refsdb = new SampleSpace ("", db, sample); var refsidx = new LC (); refsidx.Build (refsdb, refsdb.Count / 10); knr.Build (db, refsidx, 7); IndexGenericIO.Save (idxname, knr); idx = knr; } idx.MAXCAND = 1024; this.TestKNR(idx, idxname, queries, num_refs, reslist, (I) => I); Console.WriteLine ("==== Working on a permuted space"); idxname = idxname + ".proximity-sorted"; if (!File.Exists(idxname)) { idx = idx.GetSortedByPrefix(); idx.MAXCAND = 1024; IndexGenericIO.Save(idxname, idx); } else { idx = (KnrSeqSearch)IndexGenericIO.Load(idxname); } this.TestKNR(idx, idxname, queries, num_refs, reslist, (I) => new PermutedIndex(I)); } reslist.Add("--horizontal"); Commands.Check(reslist); }
public void Build(MetricDB db, int num_refs, int maxcand=1024, double mod=0.5, bool permcenter=true) { var ss = new SampleSpace ("", db, num_refs); this.Build (db, ss, maxcand, mod, permcenter); }
public static List<string> ExecuteKNRSEQ(IndexArgumentSetup setup, string nick, int numrefs, int k, double maxcand_ratio) { var idxname = String.Format ("{0}/Index.knrseq-{1}-{2}", nick, numrefs, k); MetricDB db = SpaceGenericIO.Load (setup.BINARY_DATABASE); Index idx; var suffix = ""; var resnamelist = new List<string> (); if (!File.Exists (idxname)) { Console.WriteLine ("*** creating index {0}", idxname); var s = DateTime.Now.Ticks; var c = db.NumberDistances; var IDX = new KnrSeqSearch (); var refsDB = new SampleSpace("", db, numrefs); var refsIDX = new EPTable (); refsIDX.Build(refsDB, 4, (_db, _rand) => new EPListOptimizedA(_db, 4, _rand)); if (k == 0) { k = KnrEstimateParameters.EstimateKnrEnsuringSharedNeighborhoods (db, refsIDX, (int)Math.Abs (setup.QARG)); suffix = String.Format ("estimated-K={0}.", k); } IDX.Build (db, refsIDX, k, int.MaxValue); SaveConstructionTime (idxname, DateTime.Now.Ticks - s, db.NumberDistances - c); IndexGenericIO.Save (idxname, IDX); idx = IDX; } else { Console.WriteLine ("*** loading index {0}", idxname); idx = IndexGenericIO.Load (idxname); if (k == 0) { var _idx = idx as KnrSeqSearch; suffix = String.Format ("estimated-K={0}.", _idx.K); } } string resname; // PPIndex resname = GetResultName (nick, idxname, setup, String.Format(suffix + "maxcand={0}.PPI", maxcand_ratio)); resnamelist.Add(resname); if (!File.Exists (resname)) { var knr = idx as KnrSeqSearch; knr.MAXCAND = (int)(idx.DB.Count * maxcand_ratio); PerformSearch (resname, knr, idxname, setup); } // KnrSeqSearchCosine resname = GetResultName (nick, idxname, setup, String.Format(suffix + "maxcand={0}.COS", maxcand_ratio)); resnamelist.Add(resname); if (!File.Exists (resname)) { var knr = new KnrSeqSearchCosine(idx as KnrSeqSearch); knr.MAXCAND = (int)(idx.DB.Count * maxcand_ratio); PerformSearch (resname, knr, idxname, setup); } // KnrSeqSearchFootrule resname = GetResultName (nick, idxname, setup, String.Format(suffix + "maxcand={0}.FOOTRULE", maxcand_ratio)); resnamelist.Add(resname); if (!File.Exists (resname)) { var knr = new KnrSeqSearchFootrule(idx as KnrSeqSearch); knr.MAXCAND = (int)(idx.DB.Count * maxcand_ratio); PerformSearch (resname, knr, idxname, setup); } // KnrSeqSearchJaccLCS resname = GetResultName (nick, idxname, setup, String.Format(suffix + "maxcand={0}.JACCLCS", maxcand_ratio)); resnamelist.Add(resname); if (!File.Exists (resname)) { var knr = new KnrSeqSearchJaccLCS(idx as KnrSeqSearch); knr.MAXCAND = (int)(idx.DB.Count * maxcand_ratio); PerformSearch (resname, knr, idxname, setup); } // KnrSeqSearchLCSv3 resname = GetResultName (nick, idxname, setup, String.Format(suffix + "maxcand={0}.LCSv3", maxcand_ratio)); resnamelist.Add(resname); if (!File.Exists (resname)) { var knr = new KnrSeqSearchLCSv3(idx as KnrSeqSearch); knr.MAXCAND = (int)(idx.DB.Count * maxcand_ratio); PerformSearch (resname, knr, idxname, setup); } // NAPP foreach (var ksearch in setup.KNR_KSEARCH) { var knr = new NAPP(idx as KnrSeqSearch); knr.MAXCAND = (int)(idx.DB.Count * maxcand_ratio); resname = GetResultName (nick, idxname, setup, String.Format(suffix + "maxcand={0}.NAPP.ksearch={1}", maxcand_ratio, ksearch)); resnamelist.Add(resname); if (!File.Exists (resname)) { PerformSearch (resname, knr, idxname, setup); } } return resnamelist; }
public void BuildApprox(MetricDB db, Random rand, int num_refs, int K=7, int maxcand=1024, SequenceBuilder seq_builder = null) { var sample = new SampleSpace ("", db, num_refs); var inner = new KnrSeqSearch (); inner.Build (sample, rand, 1024, K, int.MaxValue); this.Build (db, new KnrSeqSearchFootrule(inner), K, maxcand, seq_builder); }