A sample of an space
상속: MetricDB
예제 #1
0
 public void Build(MetricDB db, Random rand, int num_refs, int K=7, int maxcand=1024, SequenceBuilder seq_builder=null)
 {
     var sample = new SampleSpace ("", db, num_refs, rand);
     var sat = new SAT_Distal ();
     sat.Build (sample, RandomSets.GetRandom());
     this.Build (db, sat, K, maxcand, seq_builder);
 }
예제 #2
0
파일: NappHash.cs 프로젝트: KeithNel/natix
 public void Build(MetricDB db, int k, int num_refs, Random rand)
 {
     var sample = new SampleSpace("", db, num_refs, rand);
     var I = new SAT_Distal();
     I.Build(sample, rand);
     this.Build(db, k, I);
 }
예제 #3
0
        public void Build(MetricDB db, int symbolsPerHash, int neighborhoodExpansion, Random rand)
        {
            this.DB = db;
            int n = db.Count;
            this.SymbolsPerHash = symbolsPerHash; // very small values, i.e., 2, 3, 4
            this.NeighborhoodExpansion = neighborhoodExpansion; // neighborhoodExpansion >= symbolsPerHash
            var numrefs = (int)(Math.Pow(n, 1.0 / this.SymbolsPerHash));
            this.CountSymbolBits = countBits(numrefs);

            var refs = new SampleSpace("", db, numrefs, rand);
            var sat = new SAT_Distal ();
            sat.Build (refs, new Random ());
            this.R = sat;

            var G = new KnrFP ();
            G.Build (db, this.R, this.SymbolsPerHash);
            var knrcopy = new int[this.SymbolsPerHash];
            this.hashTable = new Dictionary<long, List<int>> ();
            for (int objID = 0; objID < n; ++objID) {
                var knr = G [objID] as int[];
                knr.CopyTo (knrcopy, 0);
                var hash = this.EncodeKnr (knrcopy); // EncodeKnr destroys the reference order

                List<int> L;
                if (!this.hashTable.TryGetValue(hash, out L)) {
                    L = new List<int>();
                    this.hashTable.Add(hash, L);
                }
                L.Add (objID);
            }
            double avg_len = 0;
            double avg_len_sq = 0;

            foreach (var list in this.hashTable.Values) {
                avg_len += list.Count;
                avg_len_sq += list.Count * list.Count;
            }
            avg_len /= this.hashTable.Count;
            avg_len_sq /= this.hashTable.Count;
            Console.WriteLine ("=== created hash table with {0} keys, items: {1}, popcount mean: {2}, popcount stddev: {3}",
                               this.hashTable.Count, n, avg_len, Math.Sqrt(avg_len_sq - avg_len * avg_len));
        }
예제 #4
0
파일: KnrLSH.cs 프로젝트: KeithNel/natix
 public void Build(MetricDB db, int K, int num_refs, Random rand)
 {
     this.DB = db;
     int n = db.Count;
     // valid values to be used as parameters
     // numrefs <= 255
     // K <= 4
     if (K > 4) {
         throw new ArgumentOutOfRangeException (String.Format("K should be between 1 to 4, K={0}", K));
     }
     if (num_refs > 255) {
         throw new ArgumentOutOfRangeException (String.Format("num_refs should be between 1 to 255, num_refs={0}", num_refs));
     }
     this.K = K;
     var refs = new SampleSpace("", db, num_refs);
     var seq = new Sequential();
     seq.Build(refs);
     this.R = seq;
     int[] G = new int[n];
     for (int objID = 0; objID < n; ++objID) {
         var u = this.DB[objID];
         var useq = this.GetHashKnr(u);
         G[objID] = useq;
         if (objID % 10000 == 0) {
             Console.WriteLine ("computing knrlsh {0}/{1} (adv. {2:0.00}%, db: {3}, K: {4}, curr. time: {5})", objID, n, objID*100.0/n, this.DB.Name, this.K, DateTime.Now);
         }
     }
     this.TABLE = new Dictionary<int, List<int>> ();
     for (int objID = 0; objID < n; ++objID) {
         var hash = G[objID];
         List<int> L;
         if (!this.TABLE.TryGetValue(hash, out L)) {
             L = new List<int>();
             this.TABLE.Add(hash, L);
         }
         L.Add (objID);
     }
 }
예제 #5
0
파일: Main.cs 프로젝트: vfaby/natix
        public void Test(string nick, MetricDB db, string queries, int num_centers, int num_perms, int num_refs)
        {
            var qstream = new QueryStream (queries);
            var reslist = new List<string> ();
            // Exhaustive search
            {
                Sequential seq = new Sequential ();
                seq.Build (db);
                var idxname = "Index.Sequential." + nick;
                IndexGenericIO.Save (idxname, seq);
                var resname = "Res." + idxname + "." + queries;
                if (!File.Exists (resname)) {
                    Commands.Search (seq, qstream.Iterate (), new ShellSearchOptions (queries, idxname, resname));
                }
                reslist.Add (resname);
            }

            ///
            /// The List of Clusters and variants
            ///

            // LC_RNN
            reslist.Add (this.TestLC ("Index.LC_RNN." + nick, db, num_centers, new LC_RNN (), queries, qstream));
            // LC
            reslist.Add (this.TestLC ("Index.LC." + nick, db, num_centers, new LC (), queries, qstream));
            // LC_IRNN
            reslist.Add (this.TestLC ("Index.LC_IRNN." + nick, db, num_centers, new LC_IRNN (), queries, qstream));
            // LC_PRNN
            reslist.Add (this.TestLC ("Index.LC_PRNN." + nick, db, num_centers, new LC_PRNN (), queries, qstream));
            // LC_ParallelBuild
            reslist.Add (this.TestLC ("Index.LC_ParallelBuild." + nick, db, num_centers, new LC_ParallelBuild (), queries, qstream));

            ///
            /// Permutation Based Indexes
            ///

            // Permutations
            reslist.Add (this.TestPI ("Index.Perms." + nick, db, num_perms, new Perms (), queries, qstream));
            // Brief Index
            reslist.Add (this.TestPI ("Index.BinPerms." + nick, db, num_perms, new BinPerms (), queries, qstream));
            // BinPermsTwoBits
            reslist.Add (this.TestPI ("Index.BinPermsTwoBits." + nick, db, num_perms, new BinPermsTwoBit (), queries, qstream));
            ///
            /// KNR
            ///

            {
                KnrSeqSearch idx;
                var idxname = "Index.KnrSeqSearch." + nick;
                if (File.Exists (idxname)) {
                    idx = (KnrSeqSearch)IndexGenericIO.Load (idxname);
                } else {
                    Console.WriteLine ("** Starting construction of '{0}'", idxname);
                    var knr = new KnrSeqSearch ();
                    var sample = RandomSets.GetRandomSubSet (num_refs, db.Count);
                    var refsdb = new SampleSpace ("", db, sample);
                    var refsidx = new LC ();
                    refsidx.Build (refsdb, refsdb.Count / 10);
                    knr.Build (db, refsidx, 7);
                    IndexGenericIO.Save (idxname, knr);
                    idx = knr;
                }
                idx.MAXCAND = 1024;
                this.TestKNR(idx, idxname, queries, num_refs, reslist, (I) => I);
                Console.WriteLine ("==== Working on a permuted space");
                idxname = idxname + ".proximity-sorted";
                if (!File.Exists(idxname)) {
                    idx = idx.GetSortedByPrefix();
                    idx.MAXCAND = 1024;
                    IndexGenericIO.Save(idxname, idx);
                } else {
                    idx = (KnrSeqSearch)IndexGenericIO.Load(idxname);
                }
                this.TestKNR(idx, idxname, queries, num_refs, reslist, (I) => new PermutedIndex(I));
            }
            reslist.Add("--horizontal");
            Commands.Check(reslist);
        }
예제 #6
0
파일: BinPerms.cs 프로젝트: sadit/natix
 public void Build(MetricDB db, int num_refs, int maxcand=1024, double mod=0.5, bool permcenter=true)
 {
     var ss = new SampleSpace ("", db, num_refs);
     this.Build (db, ss, maxcand, mod, permcenter);
 }
예제 #7
0
파일: Indexes.cs 프로젝트: sadit/natix
        public static List<string> ExecuteKNRSEQ(IndexArgumentSetup setup, string nick, int numrefs, int k, double maxcand_ratio)
        {
            var idxname = String.Format ("{0}/Index.knrseq-{1}-{2}", nick, numrefs, k);
            MetricDB db = SpaceGenericIO.Load (setup.BINARY_DATABASE);
            Index idx;
            var suffix = "";

            var resnamelist = new List<string> ();
            if (!File.Exists (idxname)) {
                Console.WriteLine ("*** creating index {0}", idxname);
                var s = DateTime.Now.Ticks;
                var c = db.NumberDistances;
                var IDX = new KnrSeqSearch ();
                var refsDB = new SampleSpace("", db, numrefs);
                var refsIDX = new EPTable ();
                refsIDX.Build(refsDB, 4, (_db, _rand) => new EPListOptimizedA(_db, 4, _rand));
                if (k == 0) {
                    k = KnrEstimateParameters.EstimateKnrEnsuringSharedNeighborhoods (db, refsIDX, (int)Math.Abs (setup.QARG));
                    suffix = String.Format ("estimated-K={0}.", k);
                }
                IDX.Build (db, refsIDX, k, int.MaxValue);
                SaveConstructionTime (idxname, DateTime.Now.Ticks - s, db.NumberDistances - c);
                IndexGenericIO.Save (idxname, IDX);
                idx = IDX;
            } else {
                Console.WriteLine ("*** loading index {0}", idxname);
                idx = IndexGenericIO.Load (idxname);
                if (k == 0) {
                    var _idx = idx as KnrSeqSearch;
                    suffix = String.Format ("estimated-K={0}.", _idx.K);
                }
            }
            string resname;
            // PPIndex
            resname = GetResultName (nick, idxname, setup, String.Format(suffix + "maxcand={0}.PPI", maxcand_ratio));
            resnamelist.Add(resname);
            if (!File.Exists (resname)) {
                var knr = idx as KnrSeqSearch;
                knr.MAXCAND = (int)(idx.DB.Count * maxcand_ratio);
                PerformSearch (resname, knr, idxname, setup);
            }
            // KnrSeqSearchCosine
            resname = GetResultName (nick, idxname, setup, String.Format(suffix + "maxcand={0}.COS", maxcand_ratio));
            resnamelist.Add(resname);
            if (!File.Exists (resname)) {
                var knr = new KnrSeqSearchCosine(idx as KnrSeqSearch);
                knr.MAXCAND = (int)(idx.DB.Count * maxcand_ratio);
                PerformSearch (resname, knr, idxname, setup);
            }
            // KnrSeqSearchFootrule
            resname = GetResultName (nick, idxname, setup, String.Format(suffix + "maxcand={0}.FOOTRULE", maxcand_ratio));
            resnamelist.Add(resname);
            if (!File.Exists (resname)) {
                var knr = new KnrSeqSearchFootrule(idx as KnrSeqSearch);
                knr.MAXCAND = (int)(idx.DB.Count * maxcand_ratio);
                PerformSearch (resname, knr, idxname, setup);
            }
            // KnrSeqSearchJaccLCS
            resname = GetResultName (nick, idxname, setup, String.Format(suffix + "maxcand={0}.JACCLCS", maxcand_ratio));
            resnamelist.Add(resname);
            if (!File.Exists (resname)) {
                var knr = new KnrSeqSearchJaccLCS(idx as KnrSeqSearch);
                knr.MAXCAND = (int)(idx.DB.Count * maxcand_ratio);
                PerformSearch (resname, knr, idxname, setup);
            }
            // KnrSeqSearchLCSv3
            resname = GetResultName (nick, idxname, setup, String.Format(suffix + "maxcand={0}.LCSv3", maxcand_ratio));
            resnamelist.Add(resname);
            if (!File.Exists (resname)) {
                var knr = new KnrSeqSearchLCSv3(idx as KnrSeqSearch);
                knr.MAXCAND = (int)(idx.DB.Count * maxcand_ratio);
                PerformSearch (resname, knr, idxname, setup);
            }
            // NAPP
            foreach (var ksearch in setup.KNR_KSEARCH) {
                var knr = new NAPP(idx as KnrSeqSearch);
                knr.MAXCAND = (int)(idx.DB.Count * maxcand_ratio);
                resname = GetResultName (nick, idxname, setup, String.Format(suffix + "maxcand={0}.NAPP.ksearch={1}", maxcand_ratio, ksearch));
                resnamelist.Add(resname);
                if (!File.Exists (resname)) {
                    PerformSearch (resname, knr, idxname, setup);
                }
            }
            return resnamelist;
        }
예제 #8
0
 public void BuildApprox(MetricDB db, Random rand, int num_refs, int K=7, int maxcand=1024, SequenceBuilder seq_builder = null)
 {
     var sample = new SampleSpace ("", db, num_refs);
     var inner = new KnrSeqSearch ();
     inner.Build (sample, rand, 1024, K, int.MaxValue);
     this.Build (db, new KnrSeqSearchFootrule(inner), K, maxcand, seq_builder);
 }