public int Rank(int symbol, int pos) { if (pos < 0) { return 0; } symbol++; var pos_start = this.LENS.Select1 (symbol); var rank0_start = pos_start + 1 - symbol; var pos_end = this.LENS.Select1 (symbol + 1); var rank0_end = pos_end - symbol; var count = rank0_end - rank0_start; // if (count > 0) { // if (count > 0) { var list = new ListShiftIndex<int> (this.PERM, rank0_start, count); return 1 + GenericSearch.FindFirst<int> (pos, list); // return this.PERM.GetListRL2 ().FindSumInSortedRange (pos, rank0_start, rank0_end); } else { return 0; } }
public IList<int> GetQGram(int docid, int qlen = 0) { if (qlen <= 0) { qlen = this.Q; } var slist = new ListShiftIndex<int>(this.TEXT, docid*this.Q, qlen); if (this.CopyQGramsOnAccess) { return new List<int>(slist); } else { return slist; } }
public int Rank(int symbol, int pos) { if (pos < 0) { return 0; } // determines the container block int block_id = pos / this.sigma; // Console.WriteLine ("============== Rank> symbol: {0}, pos: {1}, block_id: {2}", symbol, pos, block_id); int block_rank = 0; if (block_id > 0) { block_rank = this.GetBlockRank (symbol, block_id); } int rank0 = block_id * this.sigma + symbol + 1; var sp = this.B.Select0 (rank0); var ep = this.B.Select0 (rank0 + 1); int rel_rank = 0; int len = ep - sp - 1; if (len > 0) { int rank1 = sp - rank0 + 1; //var list = new ListShiftIndex<int> (this.perms [block_id], sp % this.sigma, len); var list = new ListShiftIndex<int> (this.perms [block_id], rank1 % this.sigma, len); rel_rank = 1 + GenericSearch.FindFirst<int> (pos % this.sigma, list); } /*Console.WriteLine ("=== rank0: {0}, sp: {1}, ep: {2}, block_rank: {3}, perm_sp: {4}, perm_ep: {5}, len: {6}, rel_rank: {7}", rank0, sp, ep, block_rank, sp % this.sigma, ep - sp - 1, len, rel_rank); */ return block_rank + rel_rank; }
public void SearchTThreshold(IList<IList<int>> PostingLists, int T, out IList<int> docs, out IList<short> card) { // K - T = Number of mismatches (errors) int suggestedCardinality = 64; docs = new List<int> (suggestedCardinality); card = new List<short> (suggestedCardinality); // we use InternalListClass objects allowing simple coding, // hoping that the JIT perform a good work optimizing the final code var _posting = new List<ListShiftIndex<int>> (PostingLists.Count); // List<InternalListClass<int>> _posting = new List<InternalListClass<int>> (PostingLists.Count); for (int i = 0; i < PostingLists.Count; i++) { var list = PostingLists [i]; _posting.Add (new ListShiftIndex<int> (list, 0, list.Count)); //_posting.Add (new InternalListClass<int> (0, PostingLists [i].Count, PostingLists [i])); } var posting = new ListShiftIndex< ListShiftIndex<int> > (_posting, 0, _posting.Count); Comparison<ListShiftIndex<int>> comptop = delegate(ListShiftIndex<int> x, ListShiftIndex<int> y) { this.splaycomparisons++; return x [0] - y [0]; }; while (posting.Count >= T) { int endT = T - 1; // internal note: // we can get fast access using splaytrees but there are other // associated operations // (test both or analyze them if you want to know more about it) //Sorting.Sort<IList<int>> ((IList<IList<int>>)posting, comptop); { Sorting.Sort<ListShiftIndex<int>> (posting, comptop); } // Print (posting); var p = posting [endT] [0]; if (posting [0] [0] == p) { // Console.WriteLine ("Starting T: {0}, endT: {1}, posting.Count: {2}!!!", T, endT, posting.Count); // we have a match docs.Add (p); // advance from Tindex to |postings| while (endT < posting.Count && posting[endT][0] == p) { ++endT; } card.Add ((short)endT); this.SkipInSortedLists (p, posting, 0, endT - 1, false); // Console.WriteLine ("Ending T: {0}, endT: {1}, posting.Count: {2}!!!", T, endT, posting.Count); } else { // skip all lists behind than Tindex // sort only the necessary items (0 to Tindex-1), we ignore this fact // because posting is an small set var startingLength = posting.Count; // Console.WriteLine ("Starting endT-1: {0}", endT - 1); short count = this.SkipInSortedLists (p, posting, 0, endT - 1, true); // short count = this.SkipInSortedLists (p, posting, 0, posting.Count - 1, true); // Console.WriteLine ("---> count: {0}", count); if (count > 0) { // posting list can reduce its length endT -= startingLength - posting.Count; int startT = endT; // advance from Tindex to |postings| while (endT < posting.Count && posting[endT][0] == p) { ++endT; ++count; } // Console.WriteLine ("Final cardinality: {0}, startT: {1}, endT: {2}", count, startT, endT); if (count >= T) { // we have a match docs.Add (p); card.Add (count); } this.SkipInSortedLists (p, posting, startT, endT - 1, false); } else if (count > 0) { Console.WriteLine ("count > 0"); } } } }
/// <summary> /// Advance in all the items from startT to endT, if do_search is true it applies a search algorithm to find the next /// position. if do_search is false just advance by one. /// </summary> public short SkipInSortedLists(int piv, ListShiftIndex<ListShiftIndex<int>> posting, int startT, int endT, bool do_search) { int swapIndex = 0; short count = 0; for (int i = endT; i >= startT && i >= swapIndex; i--) { var currentList = posting [i]; // Console.WriteLine ("............ i: {0}, swapIndex: {1}, do_search: {2} .............>", i, swapIndex, do_search); // Print (currentList); if (do_search) { int occpos; if (this.SearchAlgorithm.Search (piv, currentList, out occpos, 0, currentList.Count)) { occpos++; count++; } // currentList.AdvanceStartPosition (occpos); currentList.Shift (occpos); } else { count++; // currentList.AdvanceStartPosition (1); currentList.Shift (1); } if (currentList.Count < 1) { // drop list // TODO: Swap to the end and change advance for an "decrease length" function posting [i] = posting [swapIndex]; posting [swapIndex] = currentList; swapIndex++; i++; } } // posting.AdvanceStartPosition (swapIndex); posting.Shift (swapIndex); return count; }
public int Rank(int symbol, int pos) { if (pos < 0) { return 0; } symbol++; var pos_start = this.LENS.Select1 (symbol); var rank0_start = pos_start + 1 - symbol; var pos_end = this.LENS.Select1 (symbol + 1); var rank0_end = pos_end - symbol; var count = rank0_end - rank0_start; // TODO: replace both methods by RL2 primitives to produce a faster rank if (count < 32) { // fast sequential access for small ranges for (int i = 0; i < count; i++) { var u = this.PERM [rank0_start + i]; if (u > pos) { return i; } if (u == pos) { return i+1; } } return count; } else { var list = new ListShiftIndex<int> (this.PERM, rank0_start, count); return 1 + GenericSearch.FindFirst<int> (pos, list); } }