Пример #1
0
 public int Rank(int symbol, int pos)
 {
     if (pos < 0) {
         return 0;
     }
     symbol++;
     var pos_start = this.LENS.Select1 (symbol);
     var rank0_start = pos_start + 1 - symbol;
     var pos_end = this.LENS.Select1 (symbol + 1);
     var rank0_end = pos_end - symbol;
     var count = rank0_end - rank0_start;
     // if (count > 0) {
     //
     if (count > 0) {
         var list = new ListShiftIndex<int> (this.PERM, rank0_start, count);
         return 1 + GenericSearch.FindFirst<int> (pos, list);
         // return this.PERM.GetListRL2 ().FindSumInSortedRange (pos, rank0_start, rank0_end);
     } else {
         return 0;
     }
 }
Пример #2
0
 public IList<int> GetQGram(int docid, int qlen = 0)
 {
     if (qlen <= 0) {
         qlen = this.Q;
     }
     var slist = new ListShiftIndex<int>(this.TEXT, docid*this.Q, qlen);
     if (this.CopyQGramsOnAccess) {
         return new List<int>(slist);
     } else {
         return slist;
     }
 }
Пример #3
0
 public int Rank(int symbol, int pos)
 {
     if (pos < 0) {
         return 0;
     }
     // determines the container block
     int block_id = pos / this.sigma;
     // Console.WriteLine ("============== Rank> symbol: {0}, pos: {1}, block_id: {2}", symbol, pos, block_id);
     int block_rank = 0;
     if (block_id > 0) {
         block_rank = this.GetBlockRank (symbol, block_id);
     }
     int rank0 = block_id * this.sigma + symbol + 1;
     var sp = this.B.Select0 (rank0);
     var ep = this.B.Select0 (rank0 + 1);
     int rel_rank = 0;
     int len = ep - sp - 1;
     if (len > 0) {
         int rank1 = sp - rank0 + 1;
         //var list = new ListShiftIndex<int> (this.perms [block_id], sp % this.sigma, len);
         var list = new ListShiftIndex<int> (this.perms [block_id], rank1 % this.sigma, len);
         rel_rank = 1 + GenericSearch.FindFirst<int> (pos % this.sigma, list);
     }
     /*Console.WriteLine ("=== rank0: {0}, sp: {1}, ep: {2}, block_rank: {3}, perm_sp: {4}, perm_ep: {5}, len: {6}, rel_rank: {7}",
         rank0, sp, ep, block_rank, sp % this.sigma, ep - sp - 1, len, rel_rank);			*/
     return block_rank + rel_rank;
 }
Пример #4
0
        public void SearchTThreshold(IList<IList<int>> PostingLists, int T, out IList<int> docs, out IList<short> card)
        {
            // K - T = Number of mismatches (errors)
            int suggestedCardinality = 64;
            docs = new List<int> (suggestedCardinality);
            card = new List<short> (suggestedCardinality);
            // we use InternalListClass objects allowing simple coding,
            // hoping that the JIT perform a good work optimizing the final code
            var _posting = new List<ListShiftIndex<int>> (PostingLists.Count);
            // List<InternalListClass<int>> _posting = new List<InternalListClass<int>> (PostingLists.Count);

            for (int i = 0; i < PostingLists.Count; i++) {
                var list = PostingLists [i];
                _posting.Add (new ListShiftIndex<int> (list, 0, list.Count));
                //_posting.Add (new InternalListClass<int> (0, PostingLists [i].Count, PostingLists [i]));
            }
            var posting = new ListShiftIndex< ListShiftIndex<int> > (_posting, 0, _posting.Count);
            Comparison<ListShiftIndex<int>> comptop = delegate(ListShiftIndex<int> x, ListShiftIndex<int> y) {
                this.splaycomparisons++;
                return x [0] - y [0];
            };
            while (posting.Count >= T) {
                int endT = T - 1;
                // internal note:
                // we can get fast access using splaytrees but there are other
                // associated operations
                // (test both or analyze them if you want to know more about it)
                //Sorting.Sort<IList<int>> ((IList<IList<int>>)posting, comptop);
                {
                    Sorting.Sort<ListShiftIndex<int>> (posting, comptop);
                }
                // Print (posting);
                var p = posting [endT] [0];
                if (posting [0] [0] == p) {
                    // Console.WriteLine ("Starting T: {0}, endT: {1}, posting.Count: {2}!!!", T, endT, posting.Count);
                    // we have a match
                    docs.Add (p);
                    // advance from Tindex to |postings|
                    while (endT < posting.Count && posting[endT][0] == p) {
                        ++endT;
                    }
                    card.Add ((short)endT);
                    this.SkipInSortedLists (p, posting, 0, endT - 1, false);
                    // Console.WriteLine ("Ending T: {0}, endT: {1}, posting.Count: {2}!!!", T, endT, posting.Count);
                } else {
                    // skip all lists behind than Tindex
                    // sort only the necessary items (0 to Tindex-1), we ignore this fact
                    // because posting is an small set
                    var startingLength = posting.Count;
                    // Console.WriteLine ("Starting endT-1: {0}", endT - 1);
                    short count = this.SkipInSortedLists (p, posting, 0, endT - 1, true);
                    // short count = this.SkipInSortedLists (p, posting, 0, posting.Count - 1, true);
                    // Console.WriteLine ("---> count: {0}", count);
                    if (count > 0) {
                        // posting list can reduce its length
                        endT -= startingLength - posting.Count;
                        int startT = endT;
                        // advance from Tindex to |postings|
                        while (endT < posting.Count && posting[endT][0] == p) {
                            ++endT;
                            ++count;
                        }
                        // Console.WriteLine ("Final cardinality: {0}, startT: {1}, endT: {2}", count, startT, endT);
                        if (count >= T) {
                            // we have a match
                            docs.Add (p);
                            card.Add (count);
                        }
                        this.SkipInSortedLists (p, posting, startT, endT - 1, false);
                    } else if (count > 0) {
                        Console.WriteLine ("count > 0");
                    }
                }
            }
        }
Пример #5
0
 /// <summary>
 /// Advance in all the items from startT to endT, if do_search is true it applies a search algorithm to find the next
 /// position. if do_search is false just advance by one. 
 /// </summary>
 public short SkipInSortedLists(int piv, ListShiftIndex<ListShiftIndex<int>> posting, int startT, int endT, bool do_search)
 {
     int swapIndex = 0;
     short count = 0;
     for (int i = endT; i >= startT && i >= swapIndex; i--) {
         var currentList = posting [i];
         // Console.WriteLine ("............ i: {0}, swapIndex: {1}, do_search: {2} .............>", i, swapIndex, do_search);
         // Print (currentList);
         if (do_search) {
             int occpos;
             if (this.SearchAlgorithm.Search (piv, currentList, out occpos, 0, currentList.Count)) {
                 occpos++;
                 count++;
             }
             // currentList.AdvanceStartPosition (occpos);
             currentList.Shift (occpos);
         } else {
             count++;
             // currentList.AdvanceStartPosition (1);
             currentList.Shift (1);
         }
         if (currentList.Count < 1) {
             // drop list
             // TODO: Swap to the end and change advance for an "decrease length" function
             posting [i] = posting [swapIndex];
             posting [swapIndex] = currentList;
             swapIndex++;
             i++;
         }
     }
     // posting.AdvanceStartPosition (swapIndex);
     posting.Shift (swapIndex);
     return count;
 }
Пример #6
0
 public int Rank(int symbol, int pos)
 {
     if (pos < 0) {
         return 0;
     }
     symbol++;
     var pos_start = this.LENS.Select1 (symbol);
     var rank0_start = pos_start + 1 - symbol;
     var pos_end = this.LENS.Select1 (symbol + 1);
     var rank0_end = pos_end - symbol;
     var count = rank0_end - rank0_start;
     // TODO: replace both methods by RL2 primitives to produce a faster rank
     if (count < 32) {
         // fast sequential access for small ranges
         for (int i = 0; i < count; i++) {
             var u = this.PERM [rank0_start + i];
             if (u > pos) {
                 return i;
             }
             if (u == pos) {
                 return i+1;
             }
         }
         return count;
     } else {
         var list = new ListShiftIndex<int> (this.PERM, rank0_start, count);
         return 1 + GenericSearch.FindFirst<int> (pos, list);
     }
 }