예제 #1
0
        public bool TryFind(String8 word, out int index)
        {
            if (this.Indexed)
            {
                return(Index.TryGetValue(word, out index));
            }
            else
            {
                index = -1;
                int countDone      = 0;
                int countForLength = 1 << WordCompressor.BitsPerByte;
                do
                {
                    int countToDo = Math.Min(countForLength, Count - countDone);
                    index          = Words.BinarySearch(countDone, countToDo, word);
                    countDone     += countToDo;
                    countForLength = countForLength << WordCompressor.BitsPerByte;
                } while (index < 0 && countDone < Count);

                return(index >= 0);
            }
        }
예제 #2
0
        public int FindOrAdd(String8 word)
        {
            if (!this.Indexed)
            {
                Reindex();
            }

            int index;

            if (Index.TryGetValue(word, out index))
            {
                Counts[index]++;
                return(index);
            }

            index = Count;
            Words.Add(word);
            Counts.Add(1);

            Index[Words[index]] = index;

            return(index);
        }
예제 #3
0
        public void Compress(BufferedReader reader, BufferedWriter writer)
        {
            if (reader.EndOfStream)
            {
                return;
            }

            bool isWord = WordSplitter.IsLetterOrDigit(reader.Buffer[reader.Index]);
            int  length = 0;

            while (!reader.EndOfStream)
            {
                // Read the next word
                length = WordSplitter.NextWordLength(reader, isWord);
                String8 word = String8.Reference(reader.Buffer, reader.Index, length);

                // Set state to read next word
                reader.Index += length;
                isWord        = !isWord;

                if (reader.Index < reader.Length || reader.EndOfStream)
                {
                    // If this is word is definitely complete, write it
                    int wordIndex = _words.FindOrAdd(word);
                    NumberConverter.WriteSixBitTerminated(writer, (ulong)wordIndex);
                }
                else if (!reader.EndOfStream)
                {
                    // Reset state to re-read this word
                    reader.Index -= length;
                    isWord        = !isWord;

                    // If end of buffer but not stream, request more
                    reader.EnsureSpace(length * 2);
                }
            }
        }
예제 #4
0
 public bool TryGetWordIndex(String8 word, out int index)
 {
     return(_words.TryFind(word, out index));
 }