public WordIndex() { this.Words = new String8Set(); this.Counts = new List <int>(); this.Index = new Dictionary <String8, int>(); this.Indexed = true; }
public int[] Optimize() { if (!this.Indexed) { Reindex(); } int[] remapping = new int[Count]; int[] indices = new int[Count]; for (int i = 0; i < indices.Length; ++i) { indices[i] = i; } // First, sort words in descending frequency order Array.Sort(indices, new CountDescendingComparer(this)); // Next, within each set with the same byte length, sort by ordinal IComparer <int> wordComparer = new WordComparer(this); int countDone = 0; int countForLength = 1 << WordCompressor.BitsPerByte; do { int countToDo = Math.Min(countForLength, Count - countDone); Array.Sort(indices, countDone, countToDo, wordComparer); countDone += countToDo; countForLength = countForLength << WordCompressor.BitsPerByte; } while (countDone < Count); // Look up the old index for each word to map to the new index for (int i = 0; i < Count; ++i) { remapping[indices[i]] = i; } // Rebuild the word set String8Set newSet = new String8Set(Count, Words.LengthBytes); for (int i = 0; i < Count; ++i) { newSet.Add(Words[indices[i]]); } Words = newSet; // Clear the index (rebuild if needed later) Index.Clear(); Indexed = false; return(remapping); }
public String8SetEnumerator(String8Set set) { _set = set; _index = -1; }