Пример #1
0
 public WordIndex()
 {
     this.Words   = new String8Set();
     this.Counts  = new List <int>();
     this.Index   = new Dictionary <String8, int>();
     this.Indexed = true;
 }
Пример #2
0
        public int[] Optimize()
        {
            if (!this.Indexed)
            {
                Reindex();
            }
            int[] remapping = new int[Count];

            int[] indices = new int[Count];
            for (int i = 0; i < indices.Length; ++i)
            {
                indices[i] = i;
            }

            // First, sort words in descending frequency order
            Array.Sort(indices, new CountDescendingComparer(this));

            // Next, within each set with the same byte length, sort by ordinal
            IComparer <int> wordComparer   = new WordComparer(this);
            int             countDone      = 0;
            int             countForLength = 1 << WordCompressor.BitsPerByte;

            do
            {
                int countToDo = Math.Min(countForLength, Count - countDone);
                Array.Sort(indices, countDone, countToDo, wordComparer);
                countDone     += countToDo;
                countForLength = countForLength << WordCompressor.BitsPerByte;
            } while (countDone < Count);

            // Look up the old index for each word to map to the new index
            for (int i = 0; i < Count; ++i)
            {
                remapping[indices[i]] = i;
            }

            // Rebuild the word set
            String8Set newSet = new String8Set(Count, Words.LengthBytes);

            for (int i = 0; i < Count; ++i)
            {
                newSet.Add(Words[indices[i]]);
            }
            Words = newSet;

            // Clear the index (rebuild if needed later)
            Index.Clear();
            Indexed = false;

            return(remapping);
        }
Пример #3
0
 public String8SetEnumerator(String8Set set)
 {
     _set   = set;
     _index = -1;
 }