The sequential index
Inheritance: BasicIndex
Exemple #1
0
 public virtual void Build(MetricDB db, int num_pairs, int maxCandidates = -1)
 {
     this.DB = db;
     this.Fingerprints = new BinQ8HammingSpace (1);
     this.Sample = new SampleSpace("", this.DB, num_pairs * 2);
     this.MaxCandidates = maxCandidates;
     var n = this.DB.Count;
     var A = new byte[n][];
     int pc = this.DB.Count / 100 + 1;
     int advance = 0;
     var create_one = new Action<int> (delegate(int i) {
         var fp = this.GetFP(this.DB[i]);
         A[i] = fp;
         if (advance % pc == 0) {
             Console.WriteLine ("DEBUG {0}  ({1}/{2}), db: {3}, num_pairs: {4}, timestamp: {5}", this, advance, n, db.Name, num_pairs, DateTime.Now);
         }
         advance++;
     });
     ParallelOptions ops = new ParallelOptions();
     ops.MaxDegreeOfParallelism = -1;
     Parallel.For (0, n, create_one);
     foreach (var fp in A) {
         this.Fingerprints.Add( fp );
     }
     var s = new Sequential ();
     s.Build (this.Fingerprints);
     this.InternalIndex = s;
 }
Exemple #2
0
        public static int EstimateKnrEnsuringSharedNeighborhoods(MetricDB db, Index refs, int k, int numQueries = 256)
        {
            // this strategy consist on ensure that neighborhoods of the query and all its knn are shared
            // update: we introduce a probability to reduce noisy hard queries
            // NOTICE It cannot be adjusted for 1-nn because we are using database items as training objects
            // it will produce valid values for 2-nn and more
            Sequential seq = new Sequential ();
            var overlappingMinProb = 1.0;
            if (k < 10) {
                overlappingMinProb = 1.0;
            }
            seq.Build (db);
            var n = db.Count;
            var Kmax = 128; // large k will need no extra items, but smaller ones (1 or 2) will need a small constant
            var Kmin = 1;

            foreach (var qID in RandomSets.GetRandomSubSet (numQueries, n)) {
                var q = db [qID];
                var qknr = Result2Sequence(refs.SearchKNN(q, Kmax));
                var list = new List<int[]> (k);

                foreach (var p in seq.SearchKNN (db [qID], k)) {
                    list.Add (Result2Sequence(refs.SearchKNN(db[p.ObjID], Kmax)));
                }

                var qset = new HashSet<int>();
                var overlapping = 0;

                for (int i = 0; i < Kmin; ++i) {
                    qset.Add (qknr [i]);
                }
                for (int i = 0; i < Kmax && overlapping < list.Count * overlappingMinProb; ++i) {
                    qset.Add (qknr [i]);
                    overlapping = 0;
                    for (int j = 0; j < list.Count; ++j) {
                        if (list [j] == null) {
                            ++overlapping;
                        } else if (qset.Contains(list [j] [i])) {
                            list [j] = null;
                            ++overlapping;
                        }
                    }
                    Kmin = Math.Max (Kmin, i + 1);
                }
            }
            return Kmin;
        }
Exemple #3
0
 public void Build(MetricDB db, int K, int num_refs, Random rand)
 {
     this.DB = db;
     int n = db.Count;
     // valid values to be used as parameters
     // numrefs <= 255
     // K <= 4
     if (K > 4) {
         throw new ArgumentOutOfRangeException (String.Format("K should be between 1 to 4, K={0}", K));
     }
     if (num_refs > 255) {
         throw new ArgumentOutOfRangeException (String.Format("num_refs should be between 1 to 255, num_refs={0}", num_refs));
     }
     this.K = K;
     var refs = new SampleSpace("", db, num_refs);
     var seq = new Sequential();
     seq.Build(refs);
     this.R = seq;
     int[] G = new int[n];
     for (int objID = 0; objID < n; ++objID) {
         var u = this.DB[objID];
         var useq = this.GetHashKnr(u);
         G[objID] = useq;
         if (objID % 10000 == 0) {
             Console.WriteLine ("computing knrlsh {0}/{1} (adv. {2:0.00}%, db: {3}, K: {4}, curr. time: {5})", objID, n, objID*100.0/n, this.DB.Name, this.K, DateTime.Now);
         }
     }
     this.TABLE = new Dictionary<int, List<int>> ();
     for (int objID = 0; objID < n; ++objID) {
         var hash = G[objID];
         List<int> L;
         if (!this.TABLE.TryGetValue(hash, out L)) {
             L = new List<int>();
             this.TABLE.Add(hash, L);
         }
         L.Add (objID);
     }
 }
Exemple #4
0
        public static Parameters EstimateParameters(MetricDB db, int max_instances, int k, double expected_recall, int num_estimation_queries)
        {
            var seq = new Sequential ();
            seq.Build (db);
            var I = new NeighborhoodHash ();
            int symbolsPerHash = 3;
            I.Build (db, symbolsPerHash);
            var Q = RandomSets.GetRandomSubSet (num_estimation_queries, db.Count);
            // k > 1 since Q is a subset of the database
            if (k == 1) {
                ++k;
            }
            ++k;
            var res_array = new HashSet<int> [Q.Length];
            for (int i = 0; i < Q.Length; ++i) {
                var s = KnrFP.GetFP (db [Q [i]], seq, k);
                res_array [i] = new HashSet<int> (s);
            }
            int num_instances = 0;
            --I.NeighborhoodExpansion;
            double cost = 0.0;
            double time = 0.0;

            do {
                ++I.NeighborhoodExpansion;
                var c = db.NumberDistances;
                var t = DateTime.Now.Ticks;
                num_instances = _EstimateParameters(k, expected_recall, I, Q, res_array);
                cost = (db.NumberDistances - c) / Q.Length * num_instances;
                time = TimeSpan.FromTicks((DateTime.Now.Ticks - t) / Q.Length).TotalSeconds * num_instances;
                Console.WriteLine("==== expansion: {0}, num_instances: {1}, search-cost: {2}, search-time: {3}", I.NeighborhoodExpansion, num_instances, cost, time);
            } while (num_instances > max_instances);

            return new Parameters() {
                Index = I,
                NumberOfInstances = num_instances
            };
        }
Exemple #5
0
        public void Test(string nick, MetricDB db, string queries, int num_centers, int num_perms, int num_refs)
        {
            var qstream = new QueryStream (queries);
            var reslist = new List<string> ();
            // Exhaustive search
            {
                Sequential seq = new Sequential ();
                seq.Build (db);
                var idxname = "Index.Sequential." + nick;
                IndexGenericIO.Save (idxname, seq);
                var resname = "Res." + idxname + "." + queries;
                if (!File.Exists (resname)) {
                    Commands.Search (seq, qstream.Iterate (), new ShellSearchOptions (queries, idxname, resname));
                }
                reslist.Add (resname);
            }

            ///
            /// The List of Clusters and variants
            ///

            // LC_RNN
            reslist.Add (this.TestLC ("Index.LC_RNN." + nick, db, num_centers, new LC_RNN (), queries, qstream));
            // LC
            reslist.Add (this.TestLC ("Index.LC." + nick, db, num_centers, new LC (), queries, qstream));
            // LC_IRNN
            reslist.Add (this.TestLC ("Index.LC_IRNN." + nick, db, num_centers, new LC_IRNN (), queries, qstream));
            // LC_PRNN
            reslist.Add (this.TestLC ("Index.LC_PRNN." + nick, db, num_centers, new LC_PRNN (), queries, qstream));
            // LC_ParallelBuild
            reslist.Add (this.TestLC ("Index.LC_ParallelBuild." + nick, db, num_centers, new LC_ParallelBuild (), queries, qstream));

            ///
            /// Permutation Based Indexes
            ///

            // Permutations
            reslist.Add (this.TestPI ("Index.Perms." + nick, db, num_perms, new Perms (), queries, qstream));
            // Brief Index
            reslist.Add (this.TestPI ("Index.BinPerms." + nick, db, num_perms, new BinPerms (), queries, qstream));
            // BinPermsTwoBits
            reslist.Add (this.TestPI ("Index.BinPermsTwoBits." + nick, db, num_perms, new BinPermsTwoBit (), queries, qstream));
            ///
            /// KNR
            ///

            {
                KnrSeqSearch idx;
                var idxname = "Index.KnrSeqSearch." + nick;
                if (File.Exists (idxname)) {
                    idx = (KnrSeqSearch)IndexGenericIO.Load (idxname);
                } else {
                    Console.WriteLine ("** Starting construction of '{0}'", idxname);
                    var knr = new KnrSeqSearch ();
                    var sample = RandomSets.GetRandomSubSet (num_refs, db.Count);
                    var refsdb = new SampleSpace ("", db, sample);
                    var refsidx = new LC ();
                    refsidx.Build (refsdb, refsdb.Count / 10);
                    knr.Build (db, refsidx, 7);
                    IndexGenericIO.Save (idxname, knr);
                    idx = knr;
                }
                idx.MAXCAND = 1024;
                this.TestKNR(idx, idxname, queries, num_refs, reslist, (I) => I);
                Console.WriteLine ("==== Working on a permuted space");
                idxname = idxname + ".proximity-sorted";
                if (!File.Exists(idxname)) {
                    idx = idx.GetSortedByPrefix();
                    idx.MAXCAND = 1024;
                    IndexGenericIO.Save(idxname, idx);
                } else {
                    idx = (KnrSeqSearch)IndexGenericIO.Load(idxname);
                }
                this.TestKNR(idx, idxname, queries, num_refs, reslist, (I) => new PermutedIndex(I));
            }
            reslist.Add("--horizontal");
            Commands.Check(reslist);
        }
Exemple #6
0
        /// <summary>
        /// The API Build method for BinPerms 
        /// </summary>
        public void Build(MetricDB db, MetricDB refs, int maxcand=1024, double mod=0.5, bool permcenter=true, Perms idxperms=null)
        {
            this.DB = db;
            this.REFS = refs;
            this.MAXCAND = maxcand;
            if (mod < 1) {
                this.MOD = (int)Math.Ceiling (mod * this.REFS.Count);
            } else {
                this.MOD = (int)mod;
            }
            this.permcenter = permcenter;
            var DATA = new List<byte[]>();
            if (idxperms == null) {
                // base.Build (name, spaceClass, spaceName, spacePerms, maxcand);
                int onepercent = 1 + (this.DB.Count / 100);
                for (int docID = 0; docID < this.DB.Count; ++docID) DATA.Add (null);
                int I = 0;

                var build_one = new Action<int> ((int docID) => {
                    if ((I % onepercent) == 0) {
                        Console.WriteLine ("Generating {0}, db: {1}, num_refs: {2}, docID: {3}, advance {4:0.00}%, timestamp: {5}",
                            this, db.Name, refs.Count, I, I * 100.0 / DATA.Count, DateTime.Now);
                    }
                    var inv = this.ComputeInverse (docID);
                    DATA[docID] = this.Encode(inv);
                    ++I;
                });
                var ops = new ParallelOptions ();
                ops.MaxDegreeOfParallelism = -1;
                Parallel.For (0, this.DB.Count, ops, build_one);
            } else {
                for (int docid = 0; docid < this.DB.Count; docid++) {
                    var inv = idxperms.GetComputedInverse (docid);
                    DATA.Add(this.Encode(inv));
                }
            }
            var binperms = new MemMinkowskiVectorDB<byte> ();
            binperms.Build ("", DATA, 1);
            var seq = new Sequential ();
            seq.Build(binperms);
            this.IndexHamming = seq;
        }
Exemple #7
0
 public static string ExecuteSeq(IndexArgumentSetup setup, string nick)
 {
     var idxname = String.Format ("{0}/Index.Seq", nick);
     return Execute (setup, nick, idxname, (db) => {
         var seq = new Sequential ();
         seq.Build (db);
         return seq;
     });
 }
Exemple #8
0
        public static void Main(string[] args)
        {
            // Uso: VPForest db_file queries_file db_name dim tau
            string db_file="DB.colors";
            //string db_file="/home/memo/Descargas/db/colors/DB-colors.save";
            string queries_file="colors.queries";
            double querie_arg=.07;
            string query_type="Range";
            string dbname="colors";
            int dim=112;
            double tau=.07;
            //IList<float[]> queries=new List<float[]>();

            if (args.Length!=0 )
            {
                db_file=args[0];
                queries_file=args[1];
                querie_arg = Convert.ToDouble(args[2]);
                query_type = args [3];
                dbname = args [4];
                dim = Convert.ToInt32 (args [5]);
                if (args.Length == 7)
                    tau = Convert.ToDouble (args [6]);

            }

            // Leer DB
            if (!File.Exists (db_file)) {
                MemMinkowskiVectorDB<float> _db = new MemMinkowskiVectorDB<float> ();
                _db.Build (dbname+".ascii.header");
                SpaceGenericIO.Save (db_file, _db);
            }
            MetricDB DB;
            DB=SpaceGenericIO.Load(db_file,true);
            Console.WriteLine("DB Loaded size:{0}",DB.Count);

            int[] J={1,2,4,8,16}; // groups
            int [] I={1}; // not used
            foreach (int i in I)
            {
                foreach (int j in J)
                {
                    int pivspergrp=0;

                    // Crear índice VP-forest
                    //Console.WriteLine("Building Forest m:{0}",i/10d);
                    string VPF_file = "VP-Forest-"+dbname+"-Tau-" + tau + ".idx";
                    VP_Forest VPF_Search;
                    if (!File.Exists (VPF_file)) {
                        Chronos chr_time = new Chronos ();
                        chr_time.Start ();
                        VPF_Search = new VP_Forest (DB, _tau: tau);
                        chr_time.End ();
                        File.AppendAllText("index-"+dbname+"-construction-speed-VP-Forest.csv", string.Format("{0} {1}{2}",tau,chr_time.AccTime,Environment.NewLine));
                        VPF_Search.Save (new BinaryWriter (File.OpenWrite (VPF_file)));
                    } else {
                        VPF_Search = new VP_Forest ();
                        VPF_Search.Load (new BinaryReader(File.OpenRead(VPF_file)));
                    }

                    // indice secuencial
                    Sequential Seq=new Sequential();
                    Seq.Build(DB);

                    // índices EPT
                    EPTable eptable_rnd400=new EPTable();	// 400 pivots / group
                    EPTable eptable_rnd100=new EPTable(); 	// 100 pivots / group
                    EPTable eptable_rnd8=new EPTable();		// 8 pivots / group
                    EPTable eptable_rnd32=new EPTable();	// 32 pivots / group
                    EPTable eptable_opt=new EPTable();

                    // Construye los índices EPT
                    Chronos chr_ept;
                    string ept_file = "ept-opt-" + dbname + "-grps-" + j + ".idx";
                    if (!File.Exists (ept_file)) {
                        chr_ept = new Chronos ();
                        chr_ept.Start ();
                        eptable_opt.Build (DB, j, (MetricDB _db, Random seed) => new EPListOptimized (DB, j,seed, 1000, .8), 1);
                        chr_ept.End ();
                        File.AppendAllText ("index-" + dbname + "-construction-speed-ept.csv", string.Format ("EPT-opt {0} {1}{2}", j, chr_ept.AccTime, Environment.NewLine));
                        eptable_opt.Save (new BinaryWriter (File.OpenWrite (ept_file)));
                    } else {
                        eptable_opt.Load (new BinaryReader (File.OpenRead (ept_file)));
                    }

                    ept_file = "ept-rnd100-" + dbname + "-grps-" + j + ".idx";
                    if (!File.Exists (ept_file)) {
                        chr_ept = new Chronos ();
                        chr_ept.Start ();
                        eptable_rnd100.Build (DB, j);
                        chr_ept.End ();
                        File.AppendAllText ("index-" + dbname + "-construction-speed-ept.csv", string.Format ("EPT-rnd100 {0} {1}{2}", j, chr_ept.AccTime, Environment.NewLine));
                        eptable_rnd100.Save (new BinaryWriter (File.OpenWrite (ept_file)));
                    } else {
                        eptable_rnd100.Load (new BinaryReader (File.OpenRead (ept_file)));
                    }

                    ept_file = "ept-rnd8-" + dbname + "-grps-" + j + ".idx";
                    if (!File.Exists (ept_file)) {
                        chr_ept = new Chronos ();
                        chr_ept.Start ();
                        eptable_rnd8.Build (DB, j, (MetricDB _db, Random seed) => new EPListRandomPivots (DB, 8,seed), 1);
                        chr_ept.End ();
                        File.AppendAllText ("index-" + dbname + "-construction-speed-ept.csv", string.Format ("EPT-rnd8 {0} {1}{2}", j, chr_ept.AccTime, Environment.NewLine));
                        eptable_rnd8.Save (new BinaryWriter (File.OpenWrite (ept_file)));
                    } else {
                        eptable_rnd8.Load (new BinaryReader (File.OpenRead (ept_file)));
                    }

                    ept_file = "ept-rnd32-" + dbname + "-grps-" + j + ".idx";
                    if (!File.Exists (ept_file)) {
                        chr_ept = new Chronos ();
                        chr_ept.Start ();
                        eptable_rnd32.Build (DB, j, (MetricDB _db, Random seed) => new EPListRandomPivots (DB,32, seed), 1);
                        chr_ept.End ();
                        File.AppendAllText ("index-" + dbname + "-construction-speed-ept.csv", string.Format ("EPT-rnd32 {0} {1}{2}", j, chr_ept.AccTime, Environment.NewLine));
                        eptable_rnd32.Save (new BinaryWriter (File.OpenWrite (ept_file)));
                    } else {
                        eptable_rnd32.Load (new BinaryReader (File.OpenRead (ept_file)));
                    }

                    ept_file = "ept-rnd400-" + dbname + "-grps-" + j + ".idx";
                    if (!File.Exists (ept_file)) {
                        chr_ept = new Chronos ();
                        chr_ept.Start ();
                        eptable_rnd400.Build (DB, j, (MetricDB _db, Random seed) => new EPListRandomPivots (DB,400, seed), 1);
                        chr_ept.End ();
                        File.AppendAllText ("index-" + dbname + "-construction-speed-ept.csv", string.Format ("EPT-rnd400 {0} {1}{2}", j, chr_ept.AccTime, Environment.NewLine));
                        eptable_rnd400.Save (new BinaryWriter (File.OpenWrite (ept_file)));
                    } else {
                        eptable_rnd400.Load (new BinaryReader (File.OpenRead (ept_file)));
                    }

                    // generar queries
                    var qstream=new QueryStream(queries_file,querie_arg);
                    List<string> reslist=new List<string>();

                    // ======================= Búsquedas ===============================0000

                    string out_file=string.Format("res-{0}-dim[{2}]-dbsize[{1}]-{3}-",dbname,DB.Count,dim,query_type);
                    string complete_out_file;
                    // Sequential
                    complete_out_file=out_file+"Seq.dat";
                    Commands.Search(Seq,qstream.Iterate(),new ShellSearchOptions(queries_file,"Sequential",complete_out_file));
                    reslist.Add(complete_out_file);
                    // VPForest
                    complete_out_file=out_file+string.Format("tau[{0}]-VPForest.dat",VPF_Search.Tau);
                    Commands.Search(VPF_Search,qstream.Iterate(),new ShellSearchOptions(queries_file,"VP-Forest",complete_out_file));
                    reslist.Add(complete_out_file);
                    // EPTable_rnd-8
                    complete_out_file=out_file+"EPTable_rnd-numgroups["+j+"]-pivspergrp[8].dat";
                    Commands.Search(eptable_rnd8,qstream.Iterate(),new ShellSearchOptions(queries_file,"EPTable-rnd-8",complete_out_file));
                    reslist.Add(complete_out_file);
                    // EPTable_rnd-32
                    complete_out_file=out_file+"EPTable_rnd-numgroups["+j+"]-pivspergrp[32].dat";
                    Commands.Search(eptable_rnd32,qstream.Iterate(),new ShellSearchOptions(queries_file,"EPTable-rnd-32",complete_out_file));
                    reslist.Add(complete_out_file);
                    // EPTable_rnd-100
                    complete_out_file=out_file+"EPTable_rnd-numgroups["+j+"]-pivspergrp[100].dat";
                    Commands.Search(eptable_rnd100,qstream.Iterate(),new ShellSearchOptions(queries_file,"EPTable-rnd-100",complete_out_file));
                    reslist.Add(complete_out_file);
                    // EPTable_rnd-400
                    complete_out_file=out_file+"EPTable_rnd-numgroups["+j+"]-pivspergrp[400].dat";
                    Commands.Search(eptable_rnd400,qstream.Iterate(),new ShellSearchOptions(queries_file,"EPTable-rnd-400",complete_out_file));
                    reslist.Add(complete_out_file);
                    // EPTable_Opt
                    complete_out_file=out_file+"EPTable_Opt-numgroups["+j+"].dat";
                    Commands.Search(eptable_opt,qstream.Iterate(),new ShellSearchOptions(queries_file,"EPTable_Opt",complete_out_file));
                    reslist.Add(complete_out_file);
                    /**/

                    // Parámetros para guardar los resultados
                    reslist.Add("--horizontal");

                    reslist.Add(string.Format("--save=res-{0}-check-out-dim[{3}]-dbsize[{1}]-{5}-VPF-Tau[{2}]-EPT-gps[{4}]",
                                              dbname,DB.Count,tau,dim,j,query_type) );
                    Commands.Check(reslist);

                }
            }
        }

        #endregion Methods

        #region Other

        /*
        public static void LoadQueries<T>(out IList<T[]> queries,string filename) where T: struct
        {
            BinaryReader br=new BinaryReader(File.OpenRead(filename));
            queries=new List<T[]>();
            int	count=br.ReadInt32();
            int dim=br.ReadInt32();
            for (int i=0;i<count;i++)
            {
                queries.Add(new T[dim]);
                PrimitiveIO<T>.LoadVector(br,dim,queries[i]);
            }
        }

        public static void LoadQueries<T>(out IList<T[]> queries,string filename,int count,int dim) where T: struct
        {
            StreamReader br=new StreamReader(filename);
            queries=new List<T[]>();
            List<T> q=new List<T>(dim);
            string line="";
            for (int i=0;i<count;i++)
            {
                line=br.ReadLine();
                Console.WriteLine("Read: {0}",line);
                queries.Add(new T[dim]);

                PrimitiveIO<T>.LoadVector(line,q);
                queries[i]=q.ToArray();
            }
        }
        */

        #endregion Other
    }
}