Example #1
0
        public static void CreateIndex(string binPath, string indexPath, Action <float> progressCallback = null)
        {
            VectorIndexItem[] index = null;

            // Read Index data
            using (FileStream binStream = File.OpenRead(binPath))
                using (BinaryReader binReader = new BinaryReader(binStream, Encoding.UTF8))
                {
                    int version          = binReader.ReadInt32();
                    int wordCount        = binReader.ReadInt32();
                    int vectorDimensions = binReader.ReadInt32();

                    index = new VectorIndexItem[wordCount];
                    for (int wordIndex = 0; wordIndex < index.Length; wordIndex++)
                    {
                        long   offset = binStream.Position;
                        string word   = binReader.ReadString();

                        index[wordIndex].Word   = word;
                        index[wordIndex].Offset = offset;

                        binStream.Seek(vectorDimensions * sizeof(float), SeekOrigin.Current);

                        if ((wordIndex % 10000) == 0 && progressCallback != null)
                        {
                            progressCallback(0.4f * (float)wordIndex / (float)index.Length);
                        }
                    }
                }

            // Sort index by key
            progressCallback(0.4f);
            Array.Sort(index, (a, b) => string.CompareOrdinal(a.Word, b.Word));
            progressCallback(0.6f);

            // Write index to file
            using (FileStream indexStream = File.Open(indexPath, FileMode.Create))
                using (BinaryWriter indexWriter = new BinaryWriter(indexStream, Encoding.UTF8))
                {
                    indexWriter.Write((int)1);
                    indexWriter.Write((int)index.Length);

                    for (int i = 0; i < index.Length; i++)
                    {
                        indexWriter.Write((long)index[i].Offset);

                        if ((i % 10000) == 0 && progressCallback != null)
                        {
                            progressCallback(0.6f + 0.4f * (float)i / (float)index.Length);
                        }
                    }
                }
        }
Example #2
0
        private long SearchIndex(string word)
        {
            int min = 0;
            int max = this.vectorCount - 1;

            while (min <= max)
            {
                int             mid  = (min + max) / 2;
                VectorIndexItem item = this.ReadIndex(mid);
                if (word == item.Word)
                {
                    return(item.Offset);
                }
                else if (string.CompareOrdinal(word, item.Word) < 0)
                {
                    max = mid - 1;
                }
                else
                {
                    min = mid + 1;
                }
            }
            return(-1);
        }