Beispiel #1
0
        public void TestTopk()
        {
            var topK = new TopK(0.001, 0.99, 5);

            topK.Add(BOB_BYTES).Add(BOB_BYTES).Add(BOB_BYTES);
            topK.Add(TYLER_BYTES).Add(TYLER_BYTES).Add(TYLER_BYTES).Add(TYLER_BYTES).Add(TYLER_BYTES);
            topK.Add(FRED_BYTES);
            topK.Add(ALICE_BYTES).Add(ALICE_BYTES).Add(ALICE_BYTES).Add(ALICE_BYTES);
            topK.Add(JAMES_BYTES);
            topK.Add(FRED_BYTES);
            topK.Add(SARA_BYTES).Add(SARA_BYTES);

            var addedK = topK.Add(BILL_BYTES);

            Assert.AreSame(topK, addedK);
            // latest one also
            var expected = new ProbabilisticDataStructures.Element[] {
                new ProbabilisticDataStructures.Element {
                    Data = BILL_BYTES, Freq = 1
                },
                new ProbabilisticDataStructures.Element {
                    Data = SARA_BYTES, Freq = 2
                },
                new ProbabilisticDataStructures.Element {
                    Data = BOB_BYTES, Freq = 3
                },
                new ProbabilisticDataStructures.Element {
                    Data = ALICE_BYTES, Freq = 4
                },
                new ProbabilisticDataStructures.Element {
                    Data = TYLER_BYTES, Freq = 5
                },
            };

            var actual = topK.Elements();

            Assert.AreEqual(5, actual.Length);

            for (int i = 0; i < actual.Length; i++)
            {
                var element = actual[i];
                Assert.IsTrue(Enumerable.SequenceEqual(element.Data, expected[i].Data));
                // freq check
                Assert.AreEqual(expected[i].Freq, element.Freq);
            }

            var resetK = topK.Reset();

            Assert.AreSame(topK, resetK);

            Assert.AreEqual(0, topK.Elements().Length);
            Assert.AreEqual(0u, topK.N);
        }
 public override IResult SearchKNN(object q, int K, IResult res)
 {
     var m = this.PIVS.Count;
     //var max = Math.Min (this.SEARCHPIVS, m);
     var max = m;
     var P = new TopK<Tuple<double, float, float, Sequence>> (max);
     var A = new ushort[this.DB.Count];
     var _PIVS = (this.PIVS as SampleSpace).SAMPLE;
     for (int piv_id = 0; piv_id < m; ++piv_id) {
         var stddev = this.STDDEV [piv_id];
         var mean = this.MEAN [piv_id];
         var dqp = this.DB.Dist (q, this.PIVS [piv_id]);
         ++this.internal_numdists;
         var seq = this.SEQ [piv_id];
         A[_PIVS[piv_id]] = (ushort)max;
         res.Push(_PIVS[piv_id], dqp);
         var start_sym = this.Discretize (dqp, stddev, mean);
         var end_sym = this.Discretize (dqp, stddev, mean);
         var count = Math.Min(start_sym, Math.Abs(this.MAX_SYMBOL - end_sym));
         P.Push (count, Tuple.Create (dqp, stddev, mean, seq));
     }
     var queue = new Queue<IEnumerator<Bitmap>> ();
     foreach (var p in P.Items.Traverse()) {
         var tuple = p.Value;
         var it = this.IteratePartsKNN(res, tuple.Item1, tuple.Item2, tuple.Item3, tuple.Item4).GetEnumerator();
         if (it.MoveNext()) {
             queue.Enqueue(it);
         }
     }
     int Isize = 0;
     while (queue.Count > 0) {
         var L = queue.Dequeue();
         var rs = L.Current;
         var count1 = rs.Count1;
         for (int i = 1; i <= count1; ++i) {
             var item = rs.Select1 (i);
             A [item]++;
             if (A [item] == max) {
                 var dist = this.DB.Dist (q, this.DB [item]);
                 res.Push (item, dist);
                 ++Isize;
             }
         }
     //	Console.WriteLine ("*** queue-count: {0}, count1: {1}, max: {2}, Isize: {3}", queue.Count, count1, max, Isize);
         if (L.MoveNext ()) {
             queue.Enqueue (L);
         }
     }
     return res;
 }
Beispiel #3
0
 public TopKFreqCoder(int K, IList<int> alphabet_freqs, ISymbolCoder not_freq_coder)
 {
     var top = new TopK<int> (K);
     var n = alphabet_freqs.Count;
     this.Dic = new int[K];
     int i;
     for (i = 0; i < n; ++i) {
         top.Push (-alphabet_freqs [i], i);
     }
     i = 0;
     foreach (var p in top.Items.Traverse()) {
         this.Dic[i] = p.Value;
         ++i;
     }
     this.NotFreqCoder = not_freq_coder;
 }
Beispiel #4
0
        public void BenchmarkTopKAdd()
        {
            var n    = 100000;
            var topK = new TopK(0.001, 0.99, 5);
            var data = new byte[n][];

            for (int i = 0; i < n; i++)
            {
                data[i] = Encoding.ASCII.GetBytes(i.ToString());
            }

            for (int i = 0; i < n; i++)
            {
                topK.Add(data[i]);
            }
        }
 public override IResult SearchRange(object q, double radius)
 {
     var m = this.PIVS.Count;
     var P = new TopK<Tuple<double, int, int, Sequence>> (m);
     for (int piv_id = 0; piv_id < m; ++piv_id) {
         var dqp = this.DB.Dist (q, this.PIVS [piv_id]);
         ++this.internal_numdists;
         var stddev = this.STDDEV [piv_id];
         var mean = this.MEAN [piv_id];
         var start_sym = this.Discretize (dqp - radius, stddev, mean);
         var seq = this.SEQ [piv_id];
         var end_sym = this.Discretize (dqp + radius, stddev, mean);
         var count = 0;
         var n = seq.Count;
         for (int s = start_sym; s <= end_sym; ++s) {
             count += seq.Rank (s, n - 1);
         }
         P.Push (count, Tuple.Create (dqp, start_sym, end_sym, seq));
     }
     HashSet<int> A = new HashSet<int>();
     HashSet<int> B = null;
     int I = 0;
     foreach (var p in P.Items.Traverse()) {
         var tuple = p.Value;
         // var dpq = tuple.Item1;
         var start_sym = tuple.Item2;
         var end_sym = tuple.Item3;
         var seq = tuple.Item4;
         for (int s = start_sym; s <= end_sym; ++s) {
             var rs = seq.Unravel(s);
             var count1 = rs.Count1;
             for (int i = 1; i <= count1; ++i) {
                 if (B == null) {
                     A.Add( rs.Select1(i) );
                 } else {
                     var pos = rs.Select1(i);
                     if (A.Contains(pos)) {
                         B.Add( pos );
                     }
                 }
             }
         }
         if (B == null) {
             B = new HashSet<int>();
         } else {
             A = B;
             B = new HashSet<int>();
         }
         ++I;
     }
     // Console.WriteLine();
     B = null;
     var res = new Result(this.DB.Count, false);
     foreach (var docid in A) {
         var d = this.DB.Dist(this.DB[docid], q);
         if (d <= radius) {
             res.Push(docid, d);
         }
     }
     return res;
 }
Beispiel #6
0
 protected void BuildNodeRandom(Node node, IList<ItemPair> input_collection, int arity, ref int count_step)
 {
     ++count_step;
     if (count_step < 100 || count_step % 100 == 0) {
         Console.WriteLine ("======== SAT_Randomized build_node: {0}, arity: {1}, part-size: {2}, advance: {3}/{4}", node.objID, arity, input_collection.Count, count_step, this.DB.Count);
     }
     var partition = new List< IList<ItemPair> > ();
     int count_arity;
     for (count_arity = 0; count_arity < arity && count_arity < input_collection.Count; ++count_arity) {
         var i = this.rand.Next (count_arity, input_collection.Count);
         // swap
         var child_item = input_collection [i];
         input_collection [i] = input_collection [count_arity];
         input_collection [count_arity] = child_item;
         node.cov = Math.Max (node.cov, child_item.dist);
         node.Children.Add( new Node(child_item.objID) );
         partition.Add ( new List<ItemPair> () );
     }
     for (int i = count_arity; i < input_collection.Count; ++i) {
         var curr_item = input_collection[i];
         node.cov = Math.Max (node.cov, curr_item.dist);
         var curr_OBJ = this.DB [curr_item.objID];
         var closer = new TopK<int> (1);
         for (int child_ID = 0; child_ID < node.Children.Count; ++child_ID) {
             var child_OBJ = this.DB [node.Children[child_ID].objID];
             var d_child_curr = this.DB.Dist (child_OBJ, curr_OBJ);
             closer.Push (d_child_curr, child_ID);
         }
         var p = closer.Items.GetFirst ();
         var closer_child_ID = p.Value;
         // var closer_child_objID = node.Children[closer_child_ID].objID;
         //Console.WriteLine("<X {0},{1}>", closer_child_ID, closer_child_objID);
         partition[closer_child_ID].Add(new ItemPair(curr_item.objID, p.Key));
     }
     for (int child_ID = 0; child_ID < node.Children.Count; ++child_ID) {
         //Console.WriteLine ("=== child objID: {0}, child_ID: {1}", node.Children[child_ID].objID, child_ID);
         this.BuildNodeRandom(node.Children[child_ID], partition[ child_ID ], arity, ref count_step);
     }
 }