public virtual void Build(MetricDB db, int num_pairs, int maxCandidates = -1) { this.DB = db; this.Fingerprints = new BinQ8HammingSpace (1); this.Sample = new SampleSpace("", this.DB, num_pairs * 2); this.MaxCandidates = maxCandidates; var n = this.DB.Count; var A = new byte[n][]; int pc = this.DB.Count / 100 + 1; int advance = 0; var create_one = new Action<int> (delegate(int i) { var fp = this.GetFP(this.DB[i]); A[i] = fp; if (advance % pc == 0) { Console.WriteLine ("DEBUG {0} ({1}/{2}), db: {3}, num_pairs: {4}, timestamp: {5}", this, advance, n, db.Name, num_pairs, DateTime.Now); } advance++; }); ParallelOptions ops = new ParallelOptions(); ops.MaxDegreeOfParallelism = -1; Parallel.For (0, n, create_one); foreach (var fp in A) { this.Fingerprints.Add( fp ); } var s = new Sequential (); s.Build (this.Fingerprints); this.InternalIndex = s; }
public static int EstimateKnrEnsuringSharedNeighborhoods(MetricDB db, Index refs, int k, int numQueries = 256) { // this strategy consist on ensure that neighborhoods of the query and all its knn are shared // update: we introduce a probability to reduce noisy hard queries // NOTICE It cannot be adjusted for 1-nn because we are using database items as training objects // it will produce valid values for 2-nn and more Sequential seq = new Sequential (); var overlappingMinProb = 1.0; if (k < 10) { overlappingMinProb = 1.0; } seq.Build (db); var n = db.Count; var Kmax = 128; // large k will need no extra items, but smaller ones (1 or 2) will need a small constant var Kmin = 1; foreach (var qID in RandomSets.GetRandomSubSet (numQueries, n)) { var q = db [qID]; var qknr = Result2Sequence(refs.SearchKNN(q, Kmax)); var list = new List<int[]> (k); foreach (var p in seq.SearchKNN (db [qID], k)) { list.Add (Result2Sequence(refs.SearchKNN(db[p.ObjID], Kmax))); } var qset = new HashSet<int>(); var overlapping = 0; for (int i = 0; i < Kmin; ++i) { qset.Add (qknr [i]); } for (int i = 0; i < Kmax && overlapping < list.Count * overlappingMinProb; ++i) { qset.Add (qknr [i]); overlapping = 0; for (int j = 0; j < list.Count; ++j) { if (list [j] == null) { ++overlapping; } else if (qset.Contains(list [j] [i])) { list [j] = null; ++overlapping; } } Kmin = Math.Max (Kmin, i + 1); } } return Kmin; }
public void Build(MetricDB db, int K, int num_refs, Random rand) { this.DB = db; int n = db.Count; // valid values to be used as parameters // numrefs <= 255 // K <= 4 if (K > 4) { throw new ArgumentOutOfRangeException (String.Format("K should be between 1 to 4, K={0}", K)); } if (num_refs > 255) { throw new ArgumentOutOfRangeException (String.Format("num_refs should be between 1 to 255, num_refs={0}", num_refs)); } this.K = K; var refs = new SampleSpace("", db, num_refs); var seq = new Sequential(); seq.Build(refs); this.R = seq; int[] G = new int[n]; for (int objID = 0; objID < n; ++objID) { var u = this.DB[objID]; var useq = this.GetHashKnr(u); G[objID] = useq; if (objID % 10000 == 0) { Console.WriteLine ("computing knrlsh {0}/{1} (adv. {2:0.00}%, db: {3}, K: {4}, curr. time: {5})", objID, n, objID*100.0/n, this.DB.Name, this.K, DateTime.Now); } } this.TABLE = new Dictionary<int, List<int>> (); for (int objID = 0; objID < n; ++objID) { var hash = G[objID]; List<int> L; if (!this.TABLE.TryGetValue(hash, out L)) { L = new List<int>(); this.TABLE.Add(hash, L); } L.Add (objID); } }
public static Parameters EstimateParameters(MetricDB db, int max_instances, int k, double expected_recall, int num_estimation_queries) { var seq = new Sequential (); seq.Build (db); var I = new NeighborhoodHash (); int symbolsPerHash = 3; I.Build (db, symbolsPerHash); var Q = RandomSets.GetRandomSubSet (num_estimation_queries, db.Count); // k > 1 since Q is a subset of the database if (k == 1) { ++k; } ++k; var res_array = new HashSet<int> [Q.Length]; for (int i = 0; i < Q.Length; ++i) { var s = KnrFP.GetFP (db [Q [i]], seq, k); res_array [i] = new HashSet<int> (s); } int num_instances = 0; --I.NeighborhoodExpansion; double cost = 0.0; double time = 0.0; do { ++I.NeighborhoodExpansion; var c = db.NumberDistances; var t = DateTime.Now.Ticks; num_instances = _EstimateParameters(k, expected_recall, I, Q, res_array); cost = (db.NumberDistances - c) / Q.Length * num_instances; time = TimeSpan.FromTicks((DateTime.Now.Ticks - t) / Q.Length).TotalSeconds * num_instances; Console.WriteLine("==== expansion: {0}, num_instances: {1}, search-cost: {2}, search-time: {3}", I.NeighborhoodExpansion, num_instances, cost, time); } while (num_instances > max_instances); return new Parameters() { Index = I, NumberOfInstances = num_instances }; }
public void Test(string nick, MetricDB db, string queries, int num_centers, int num_perms, int num_refs) { var qstream = new QueryStream (queries); var reslist = new List<string> (); // Exhaustive search { Sequential seq = new Sequential (); seq.Build (db); var idxname = "Index.Sequential." + nick; IndexGenericIO.Save (idxname, seq); var resname = "Res." + idxname + "." + queries; if (!File.Exists (resname)) { Commands.Search (seq, qstream.Iterate (), new ShellSearchOptions (queries, idxname, resname)); } reslist.Add (resname); } /// /// The List of Clusters and variants /// // LC_RNN reslist.Add (this.TestLC ("Index.LC_RNN." + nick, db, num_centers, new LC_RNN (), queries, qstream)); // LC reslist.Add (this.TestLC ("Index.LC." + nick, db, num_centers, new LC (), queries, qstream)); // LC_IRNN reslist.Add (this.TestLC ("Index.LC_IRNN." + nick, db, num_centers, new LC_IRNN (), queries, qstream)); // LC_PRNN reslist.Add (this.TestLC ("Index.LC_PRNN." + nick, db, num_centers, new LC_PRNN (), queries, qstream)); // LC_ParallelBuild reslist.Add (this.TestLC ("Index.LC_ParallelBuild." + nick, db, num_centers, new LC_ParallelBuild (), queries, qstream)); /// /// Permutation Based Indexes /// // Permutations reslist.Add (this.TestPI ("Index.Perms." + nick, db, num_perms, new Perms (), queries, qstream)); // Brief Index reslist.Add (this.TestPI ("Index.BinPerms." + nick, db, num_perms, new BinPerms (), queries, qstream)); // BinPermsTwoBits reslist.Add (this.TestPI ("Index.BinPermsTwoBits." + nick, db, num_perms, new BinPermsTwoBit (), queries, qstream)); /// /// KNR /// { KnrSeqSearch idx; var idxname = "Index.KnrSeqSearch." + nick; if (File.Exists (idxname)) { idx = (KnrSeqSearch)IndexGenericIO.Load (idxname); } else { Console.WriteLine ("** Starting construction of '{0}'", idxname); var knr = new KnrSeqSearch (); var sample = RandomSets.GetRandomSubSet (num_refs, db.Count); var refsdb = new SampleSpace ("", db, sample); var refsidx = new LC (); refsidx.Build (refsdb, refsdb.Count / 10); knr.Build (db, refsidx, 7); IndexGenericIO.Save (idxname, knr); idx = knr; } idx.MAXCAND = 1024; this.TestKNR(idx, idxname, queries, num_refs, reslist, (I) => I); Console.WriteLine ("==== Working on a permuted space"); idxname = idxname + ".proximity-sorted"; if (!File.Exists(idxname)) { idx = idx.GetSortedByPrefix(); idx.MAXCAND = 1024; IndexGenericIO.Save(idxname, idx); } else { idx = (KnrSeqSearch)IndexGenericIO.Load(idxname); } this.TestKNR(idx, idxname, queries, num_refs, reslist, (I) => new PermutedIndex(I)); } reslist.Add("--horizontal"); Commands.Check(reslist); }
/// <summary> /// The API Build method for BinPerms /// </summary> public void Build(MetricDB db, MetricDB refs, int maxcand=1024, double mod=0.5, bool permcenter=true, Perms idxperms=null) { this.DB = db; this.REFS = refs; this.MAXCAND = maxcand; if (mod < 1) { this.MOD = (int)Math.Ceiling (mod * this.REFS.Count); } else { this.MOD = (int)mod; } this.permcenter = permcenter; var DATA = new List<byte[]>(); if (idxperms == null) { // base.Build (name, spaceClass, spaceName, spacePerms, maxcand); int onepercent = 1 + (this.DB.Count / 100); for (int docID = 0; docID < this.DB.Count; ++docID) DATA.Add (null); int I = 0; var build_one = new Action<int> ((int docID) => { if ((I % onepercent) == 0) { Console.WriteLine ("Generating {0}, db: {1}, num_refs: {2}, docID: {3}, advance {4:0.00}%, timestamp: {5}", this, db.Name, refs.Count, I, I * 100.0 / DATA.Count, DateTime.Now); } var inv = this.ComputeInverse (docID); DATA[docID] = this.Encode(inv); ++I; }); var ops = new ParallelOptions (); ops.MaxDegreeOfParallelism = -1; Parallel.For (0, this.DB.Count, ops, build_one); } else { for (int docid = 0; docid < this.DB.Count; docid++) { var inv = idxperms.GetComputedInverse (docid); DATA.Add(this.Encode(inv)); } } var binperms = new MemMinkowskiVectorDB<byte> (); binperms.Build ("", DATA, 1); var seq = new Sequential (); seq.Build(binperms); this.IndexHamming = seq; }
public static string ExecuteSeq(IndexArgumentSetup setup, string nick) { var idxname = String.Format ("{0}/Index.Seq", nick); return Execute (setup, nick, idxname, (db) => { var seq = new Sequential (); seq.Build (db); return seq; }); }
public static void Main(string[] args) { // Uso: VPForest db_file queries_file db_name dim tau string db_file="DB.colors"; //string db_file="/home/memo/Descargas/db/colors/DB-colors.save"; string queries_file="colors.queries"; double querie_arg=.07; string query_type="Range"; string dbname="colors"; int dim=112; double tau=.07; //IList<float[]> queries=new List<float[]>(); if (args.Length!=0 ) { db_file=args[0]; queries_file=args[1]; querie_arg = Convert.ToDouble(args[2]); query_type = args [3]; dbname = args [4]; dim = Convert.ToInt32 (args [5]); if (args.Length == 7) tau = Convert.ToDouble (args [6]); } // Leer DB if (!File.Exists (db_file)) { MemMinkowskiVectorDB<float> _db = new MemMinkowskiVectorDB<float> (); _db.Build (dbname+".ascii.header"); SpaceGenericIO.Save (db_file, _db); } MetricDB DB; DB=SpaceGenericIO.Load(db_file,true); Console.WriteLine("DB Loaded size:{0}",DB.Count); int[] J={1,2,4,8,16}; // groups int [] I={1}; // not used foreach (int i in I) { foreach (int j in J) { int pivspergrp=0; // Crear índice VP-forest //Console.WriteLine("Building Forest m:{0}",i/10d); string VPF_file = "VP-Forest-"+dbname+"-Tau-" + tau + ".idx"; VP_Forest VPF_Search; if (!File.Exists (VPF_file)) { Chronos chr_time = new Chronos (); chr_time.Start (); VPF_Search = new VP_Forest (DB, _tau: tau); chr_time.End (); File.AppendAllText("index-"+dbname+"-construction-speed-VP-Forest.csv", string.Format("{0} {1}{2}",tau,chr_time.AccTime,Environment.NewLine)); VPF_Search.Save (new BinaryWriter (File.OpenWrite (VPF_file))); } else { VPF_Search = new VP_Forest (); VPF_Search.Load (new BinaryReader(File.OpenRead(VPF_file))); } // indice secuencial Sequential Seq=new Sequential(); Seq.Build(DB); // índices EPT EPTable eptable_rnd400=new EPTable(); // 400 pivots / group EPTable eptable_rnd100=new EPTable(); // 100 pivots / group EPTable eptable_rnd8=new EPTable(); // 8 pivots / group EPTable eptable_rnd32=new EPTable(); // 32 pivots / group EPTable eptable_opt=new EPTable(); // Construye los índices EPT Chronos chr_ept; string ept_file = "ept-opt-" + dbname + "-grps-" + j + ".idx"; if (!File.Exists (ept_file)) { chr_ept = new Chronos (); chr_ept.Start (); eptable_opt.Build (DB, j, (MetricDB _db, Random seed) => new EPListOptimized (DB, j,seed, 1000, .8), 1); chr_ept.End (); File.AppendAllText ("index-" + dbname + "-construction-speed-ept.csv", string.Format ("EPT-opt {0} {1}{2}", j, chr_ept.AccTime, Environment.NewLine)); eptable_opt.Save (new BinaryWriter (File.OpenWrite (ept_file))); } else { eptable_opt.Load (new BinaryReader (File.OpenRead (ept_file))); } ept_file = "ept-rnd100-" + dbname + "-grps-" + j + ".idx"; if (!File.Exists (ept_file)) { chr_ept = new Chronos (); chr_ept.Start (); eptable_rnd100.Build (DB, j); chr_ept.End (); File.AppendAllText ("index-" + dbname + "-construction-speed-ept.csv", string.Format ("EPT-rnd100 {0} {1}{2}", j, chr_ept.AccTime, Environment.NewLine)); eptable_rnd100.Save (new BinaryWriter (File.OpenWrite (ept_file))); } else { eptable_rnd100.Load (new BinaryReader (File.OpenRead (ept_file))); } ept_file = "ept-rnd8-" + dbname + "-grps-" + j + ".idx"; if (!File.Exists (ept_file)) { chr_ept = new Chronos (); chr_ept.Start (); eptable_rnd8.Build (DB, j, (MetricDB _db, Random seed) => new EPListRandomPivots (DB, 8,seed), 1); chr_ept.End (); File.AppendAllText ("index-" + dbname + "-construction-speed-ept.csv", string.Format ("EPT-rnd8 {0} {1}{2}", j, chr_ept.AccTime, Environment.NewLine)); eptable_rnd8.Save (new BinaryWriter (File.OpenWrite (ept_file))); } else { eptable_rnd8.Load (new BinaryReader (File.OpenRead (ept_file))); } ept_file = "ept-rnd32-" + dbname + "-grps-" + j + ".idx"; if (!File.Exists (ept_file)) { chr_ept = new Chronos (); chr_ept.Start (); eptable_rnd32.Build (DB, j, (MetricDB _db, Random seed) => new EPListRandomPivots (DB,32, seed), 1); chr_ept.End (); File.AppendAllText ("index-" + dbname + "-construction-speed-ept.csv", string.Format ("EPT-rnd32 {0} {1}{2}", j, chr_ept.AccTime, Environment.NewLine)); eptable_rnd32.Save (new BinaryWriter (File.OpenWrite (ept_file))); } else { eptable_rnd32.Load (new BinaryReader (File.OpenRead (ept_file))); } ept_file = "ept-rnd400-" + dbname + "-grps-" + j + ".idx"; if (!File.Exists (ept_file)) { chr_ept = new Chronos (); chr_ept.Start (); eptable_rnd400.Build (DB, j, (MetricDB _db, Random seed) => new EPListRandomPivots (DB,400, seed), 1); chr_ept.End (); File.AppendAllText ("index-" + dbname + "-construction-speed-ept.csv", string.Format ("EPT-rnd400 {0} {1}{2}", j, chr_ept.AccTime, Environment.NewLine)); eptable_rnd400.Save (new BinaryWriter (File.OpenWrite (ept_file))); } else { eptable_rnd400.Load (new BinaryReader (File.OpenRead (ept_file))); } // generar queries var qstream=new QueryStream(queries_file,querie_arg); List<string> reslist=new List<string>(); // ======================= Búsquedas ===============================0000 string out_file=string.Format("res-{0}-dim[{2}]-dbsize[{1}]-{3}-",dbname,DB.Count,dim,query_type); string complete_out_file; // Sequential complete_out_file=out_file+"Seq.dat"; Commands.Search(Seq,qstream.Iterate(),new ShellSearchOptions(queries_file,"Sequential",complete_out_file)); reslist.Add(complete_out_file); // VPForest complete_out_file=out_file+string.Format("tau[{0}]-VPForest.dat",VPF_Search.Tau); Commands.Search(VPF_Search,qstream.Iterate(),new ShellSearchOptions(queries_file,"VP-Forest",complete_out_file)); reslist.Add(complete_out_file); // EPTable_rnd-8 complete_out_file=out_file+"EPTable_rnd-numgroups["+j+"]-pivspergrp[8].dat"; Commands.Search(eptable_rnd8,qstream.Iterate(),new ShellSearchOptions(queries_file,"EPTable-rnd-8",complete_out_file)); reslist.Add(complete_out_file); // EPTable_rnd-32 complete_out_file=out_file+"EPTable_rnd-numgroups["+j+"]-pivspergrp[32].dat"; Commands.Search(eptable_rnd32,qstream.Iterate(),new ShellSearchOptions(queries_file,"EPTable-rnd-32",complete_out_file)); reslist.Add(complete_out_file); // EPTable_rnd-100 complete_out_file=out_file+"EPTable_rnd-numgroups["+j+"]-pivspergrp[100].dat"; Commands.Search(eptable_rnd100,qstream.Iterate(),new ShellSearchOptions(queries_file,"EPTable-rnd-100",complete_out_file)); reslist.Add(complete_out_file); // EPTable_rnd-400 complete_out_file=out_file+"EPTable_rnd-numgroups["+j+"]-pivspergrp[400].dat"; Commands.Search(eptable_rnd400,qstream.Iterate(),new ShellSearchOptions(queries_file,"EPTable-rnd-400",complete_out_file)); reslist.Add(complete_out_file); // EPTable_Opt complete_out_file=out_file+"EPTable_Opt-numgroups["+j+"].dat"; Commands.Search(eptable_opt,qstream.Iterate(),new ShellSearchOptions(queries_file,"EPTable_Opt",complete_out_file)); reslist.Add(complete_out_file); /**/ // Parámetros para guardar los resultados reslist.Add("--horizontal"); reslist.Add(string.Format("--save=res-{0}-check-out-dim[{3}]-dbsize[{1}]-{5}-VPF-Tau[{2}]-EPT-gps[{4}]", dbname,DB.Count,tau,dim,j,query_type) ); Commands.Check(reslist); } } } #endregion Methods #region Other /* public static void LoadQueries<T>(out IList<T[]> queries,string filename) where T: struct { BinaryReader br=new BinaryReader(File.OpenRead(filename)); queries=new List<T[]>(); int count=br.ReadInt32(); int dim=br.ReadInt32(); for (int i=0;i<count;i++) { queries.Add(new T[dim]); PrimitiveIO<T>.LoadVector(br,dim,queries[i]); } } public static void LoadQueries<T>(out IList<T[]> queries,string filename,int count,int dim) where T: struct { StreamReader br=new StreamReader(filename); queries=new List<T[]>(); List<T> q=new List<T>(dim); string line=""; for (int i=0;i<count;i++) { line=br.ReadLine(); Console.WriteLine("Read: {0}",line); queries.Add(new T[dim]); PrimitiveIO<T>.LoadVector(line,q); queries[i]=q.ToArray(); } } */ #endregion Other } }