public bool TryFind(String8 word, out int index) { if (this.Indexed) { return(Index.TryGetValue(word, out index)); } else { index = -1; int countDone = 0; int countForLength = 1 << WordCompressor.BitsPerByte; do { int countToDo = Math.Min(countForLength, Count - countDone); index = Words.BinarySearch(countDone, countToDo, word); countDone += countToDo; countForLength = countForLength << WordCompressor.BitsPerByte; } while (index < 0 && countDone < Count); return(index >= 0); } }
public int FindOrAdd(String8 word) { if (!this.Indexed) { Reindex(); } int index; if (Index.TryGetValue(word, out index)) { Counts[index]++; return(index); } index = Count; Words.Add(word); Counts.Add(1); Index[Words[index]] = index; return(index); }
public void Compress(BufferedReader reader, BufferedWriter writer) { if (reader.EndOfStream) { return; } bool isWord = WordSplitter.IsLetterOrDigit(reader.Buffer[reader.Index]); int length = 0; while (!reader.EndOfStream) { // Read the next word length = WordSplitter.NextWordLength(reader, isWord); String8 word = String8.Reference(reader.Buffer, reader.Index, length); // Set state to read next word reader.Index += length; isWord = !isWord; if (reader.Index < reader.Length || reader.EndOfStream) { // If this is word is definitely complete, write it int wordIndex = _words.FindOrAdd(word); NumberConverter.WriteSixBitTerminated(writer, (ulong)wordIndex); } else if (!reader.EndOfStream) { // Reset state to re-read this word reader.Index -= length; isWord = !isWord; // If end of buffer but not stream, request more reader.EnsureSpace(length * 2); } } }
public bool TryGetWordIndex(String8 word, out int index) { return(_words.TryFind(word, out index)); }