/// <summary>
    /// Expensive call, should be called only when necessary.
    /// Using HNSW graph search tool as in this article:
    /// https: //arxiv.org/ftp/arxiv/papers/1603/1603.09320.pdf
    /// </summary>
    private void RebuildGraph(List <float[]> vectors)
    {
        if (_verbose)
        {
            Debug.Log("Clusters: Rebuilding graph...");
        }

        // Setting parameters that makes sens for the number or rioters (Might adjust)
        int numNeighbours = 15;
        var parameters    = new SmallWorld <float[], float> .Parameters()
        {
            M           = numNeighbours,               // Max number of neighbours to connect with at each layer
            LevelLambda = 1 / Math.Log(numNeighbours), // Layer/Level logarithmic decrease factor
        };

        // Using CosineDistance calculation for approximation & speed only,
        // no need for precise Euclidean Distance calculation.
        SmallWorld <float[], float> graph = new SmallWorld <float[], float>(CosineDistance.NonOptimized);

        graph.BuildGraph(vectors, new System.Random(11), parameters);

        // Assigning new computed graph
        lock ( _lock ) {
            _graph = graph;
        }

        if (_verbose)
        {
            Debug.Log("Clusters: Done rebuilding graph.");
        }
    }
        private static void BuildAndSave(string pathPrefix)
        {
            Stopwatch      clock;
            List <float[]> sampleVectors;

            var parameters = new Parameters();

            parameters.EnableDistanceCacheForConstruction = true;
            var world = new SmallWorld <float[], float>(CosineDistance.NonOptimized);

            Console.Write($"Generating {SampleSize} sample vectos... ");
            clock         = Stopwatch.StartNew();
            sampleVectors = RandomVectors(Dimensionality, SampleSize);
            Console.WriteLine($"Done in {clock.ElapsedMilliseconds} ms.");

            Console.WriteLine("Building HNSW graph... ");
            using (var listener = new MetricsEventListener(EventSources.GraphBuildEventSource.Instance))
            {
                clock = Stopwatch.StartNew();
                world.BuildGraph(sampleVectors, new Random(42), parameters);
                Console.WriteLine($"Done in {clock.ElapsedMilliseconds} ms.");
            }

            Console.Write($"Saving HNSW graph to '${Path.Combine(Directory.GetCurrentDirectory(), pathPrefix)}'... ");
            clock = Stopwatch.StartNew();
            BinaryFormatter formatter           = new BinaryFormatter();
            MemoryStream    sampleVectorsStream = new MemoryStream();

            formatter.Serialize(sampleVectorsStream, sampleVectors);
            File.WriteAllBytes($"{pathPrefix}.{VectorsPathSuffix}", sampleVectorsStream.ToArray());
            File.WriteAllBytes($"{pathPrefix}.{GraphPathSuffix}", world.SerializeGraph());
            Console.WriteLine($"Done in {clock.ElapsedMilliseconds} ms.");
        }
Beispiel #3
0
        /// <summary>
        /// Entry point.
        /// </summary>
        public static void Main()
        {
            var parameters = new SmallWorld <float[], float> .Parameters();

            parameters.EnableDistanceCacheForConstruction = true;
            var graph = new SmallWorld <float[], float>(CosineDistance.SIMDForUnits);

            var vectorsGenerator = new Random(42);
            var randomVectors    = new List <float[]>();

            for (int i = 0; i < 40_000; i++)
            {
                var randomVector = new float[20];
                for (int j = 0; j < 20; j++)
                {
                    randomVector[j] = (float)vectorsGenerator.NextDouble();
                }

                VectorUtils.NormalizeSIMD(randomVector);
                randomVectors.Add(randomVector);
            }

            var clock = Stopwatch.StartNew();

            graph.BuildGraph(randomVectors, new Random(42), parameters);
            Console.WriteLine(clock.Elapsed.TotalMilliseconds);
        }
Beispiel #4
0
        public void SerializeDeserializeTest()
        {
            byte[] buffer;
            string original;

            // restrict scope of original graph
            {
                var parameters = new SmallWorld <float[], float> .Parameters()
                {
                    M           = 15,
                    LevelLambda = 1 / Math.Log(15),
                };

                var graph = new SmallWorld <float[], float>(CosineDistance.NonOptimized);
                graph.BuildGraph(this.vectors, new Random(42), parameters);

                buffer   = graph.SerializeGraph();
                original = graph.Print();
            }

            var copy = new SmallWorld <float[], float>(CosineDistance.NonOptimized);

            copy.DeserializeGraph(this.vectors, buffer);

            Assert.AreEqual(original, copy.Print());
        }
Beispiel #5
0
        /// <summary>
        /// Initializes the ANN class, given all featuresvectors to which
        /// a query will be compared.
        /// </summary>
        /// <param name="database">All featurevectors of its world.</param>
        public ANN(IEnumerable <NamedFeatureVector> database)
        {
            IReadOnlyList <NamedFeatureVector> vectors = database.ToList().AsReadOnly();

            world = new SmallWorld <NamedFeatureVector, double>(ANNDistance);

            SmallWorld <NamedFeatureVector, double> .Parameters parameters =
                new SmallWorld <NamedFeatureVector, double> .Parameters
            {
                EnableDistanceCacheForConstruction = true
            };

            using (MetricsEventListener listener =
                       new MetricsEventListener(EventSources.GraphBuildEventSource.Instance)) {
                world.BuildGraph(vectors, RandomUtil.ThreadSafeRandom, parameters);
            }
        }
Beispiel #6
0
        public void KNNSearchTest()
        {
            var parameters = new SmallWorld <float[], float> .Parameters();

            var graph = new SmallWorld <float[], float>(CosineDistance.NonOptimized);

            graph.BuildGraph(this.vectors, new Random(42), parameters);

            for (int i = 0; i < this.vectors.Count; ++i)
            {
                var result = graph.KNNSearch(this.vectors[i], 20);
                var best   = result.OrderBy(r => r.Distance).First();
                Assert.AreEqual(20, result.Count);
                Assert.AreEqual(i, best.Id);
                Assert.AreEqual(0, best.Distance, FloatError);
            }
        }
Beispiel #7
0
        static void Main(string[] args)
        {
            var parameters = new SmallWorld <float[], float> .Parameters()
            {
                M           = 50,
                LevelLambda = 1 / Math.Log(15),
            };

            var       r          = new Random();
            const int dimensions = 100;
            var       vectors    = GetFloatVectors(dimensions, r);
            var       graph      = new SmallWorld <float[], float>(CosineDistance.SIMD);
            var       stopWatch  = new Stopwatch();

            stopWatch.Start();
            graph.BuildGraph(vectors, new Random(42), parameters);
            stopWatch.Stop();
            var buildTime = stopWatch.Elapsed;

            Console.WriteLine($"graph build for {vectors.Count} items in {buildTime}");
            byte[] buffer = graph.SerializeGraph();
            Console.WriteLine($"graph serialized in {buffer.Length} bytes");
            float[] query = GetRandomVector(dimensions, r);

            for (var i = 0; i < 100; i++)
            {
                stopWatch = new Stopwatch();
                stopWatch.Start();
                var best20 = graph.KNNSearch(query, 20);
                stopWatch.Stop();
                buildTime = stopWatch.Elapsed;
                Console.WriteLine($"Top 20 items retrieved in {buildTime}");
            }

            /*foreach (var item in best20)
             * {
             *  Console.WriteLine($"{item.Id} -> {item.Distance}");
             * }*/
        }