public static void CreateIndex(string binPath, string indexPath, Action <float> progressCallback = null) { VectorIndexItem[] index = null; // Read Index data using (FileStream binStream = File.OpenRead(binPath)) using (BinaryReader binReader = new BinaryReader(binStream, Encoding.UTF8)) { int version = binReader.ReadInt32(); int wordCount = binReader.ReadInt32(); int vectorDimensions = binReader.ReadInt32(); index = new VectorIndexItem[wordCount]; for (int wordIndex = 0; wordIndex < index.Length; wordIndex++) { long offset = binStream.Position; string word = binReader.ReadString(); index[wordIndex].Word = word; index[wordIndex].Offset = offset; binStream.Seek(vectorDimensions * sizeof(float), SeekOrigin.Current); if ((wordIndex % 10000) == 0 && progressCallback != null) { progressCallback(0.4f * (float)wordIndex / (float)index.Length); } } } // Sort index by key progressCallback(0.4f); Array.Sort(index, (a, b) => string.CompareOrdinal(a.Word, b.Word)); progressCallback(0.6f); // Write index to file using (FileStream indexStream = File.Open(indexPath, FileMode.Create)) using (BinaryWriter indexWriter = new BinaryWriter(indexStream, Encoding.UTF8)) { indexWriter.Write((int)1); indexWriter.Write((int)index.Length); for (int i = 0; i < index.Length; i++) { indexWriter.Write((long)index[i].Offset); if ((i % 10000) == 0 && progressCallback != null) { progressCallback(0.6f + 0.4f * (float)i / (float)index.Length); } } } }
private long SearchIndex(string word) { int min = 0; int max = this.vectorCount - 1; while (min <= max) { int mid = (min + max) / 2; VectorIndexItem item = this.ReadIndex(mid); if (word == item.Word) { return(item.Offset); } else if (string.CompareOrdinal(word, item.Word) < 0) { max = mid - 1; } else { min = mid + 1; } } return(-1); }