Beispiel #1
0
        private static void LoadAndSearch(string pathPrefix)
        {
            Stopwatch clock;

            Console.Write("Loading HNSW graph... ");
            clock = Stopwatch.StartNew();
            BinaryFormatter             formatter     = new BinaryFormatter();
            var                         sampleVectors = (List <float[]>)formatter.Deserialize(new MemoryStream(File.ReadAllBytes($"{pathPrefix}.{VectorsPathSuffix}")));
            SmallWorld <float[], float> world;

            using (var f = File.OpenRead($"{pathPrefix}.{GraphPathSuffix}"))
            {
                world = SmallWorld <float[], float> .DeserializeGraph(sampleVectors, CosineDistance.SIMDForUnits, DefaultRandomGenerator.Instance, f);
            }
            Console.WriteLine($"Done in {clock.ElapsedMilliseconds} ms.");

            Console.Write($"Generating {TestSize} test vectos... ");
            clock = Stopwatch.StartNew();
            var vectors = RandomVectors(Dimensionality, TestSize);

            Console.WriteLine($"Done in {clock.ElapsedMilliseconds} ms.");

            Console.WriteLine("Running search agains the graph... ");
            using (var listener = new MetricsEventListener(EventSources.GraphSearchEventSource.Instance))
            {
                clock = Stopwatch.StartNew();
                Parallel.ForEach(vectors, (vector) =>
                {
                    world.KNNSearch(vector, 10);
                });
                Console.WriteLine($"Done in {clock.ElapsedMilliseconds} ms.");
            }
        }
        private static void BuildAndSave(string pathPrefix)
        {
            Stopwatch      clock;
            List <float[]> sampleVectors;

            var parameters = new Parameters();

            parameters.EnableDistanceCacheForConstruction = true;
            var world = new SmallWorld <float[], float>(CosineDistance.NonOptimized);

            Console.Write($"Generating {SampleSize} sample vectos... ");
            clock         = Stopwatch.StartNew();
            sampleVectors = RandomVectors(Dimensionality, SampleSize);
            Console.WriteLine($"Done in {clock.ElapsedMilliseconds} ms.");

            Console.WriteLine("Building HNSW graph... ");
            using (var listener = new MetricsEventListener(EventSources.GraphBuildEventSource.Instance))
            {
                clock = Stopwatch.StartNew();
                world.BuildGraph(sampleVectors, new Random(42), parameters);
                Console.WriteLine($"Done in {clock.ElapsedMilliseconds} ms.");
            }

            Console.Write($"Saving HNSW graph to '${Path.Combine(Directory.GetCurrentDirectory(), pathPrefix)}'... ");
            clock = Stopwatch.StartNew();
            BinaryFormatter formatter           = new BinaryFormatter();
            MemoryStream    sampleVectorsStream = new MemoryStream();

            formatter.Serialize(sampleVectorsStream, sampleVectors);
            File.WriteAllBytes($"{pathPrefix}.{VectorsPathSuffix}", sampleVectorsStream.ToArray());
            File.WriteAllBytes($"{pathPrefix}.{GraphPathSuffix}", world.SerializeGraph());
            Console.WriteLine($"Done in {clock.ElapsedMilliseconds} ms.");
        }
    /// <summary>
    /// Expensive call, should be called only when necessary.
    /// Using HNSW graph search tool as in this article:
    /// https: //arxiv.org/ftp/arxiv/papers/1603/1603.09320.pdf
    /// </summary>
    private void RebuildGraph(List <float[]> vectors)
    {
        if (_verbose)
        {
            Debug.Log("Clusters: Rebuilding graph...");
        }

        // Setting parameters that makes sens for the number or rioters (Might adjust)
        int numNeighbours = 15;
        var parameters    = new SmallWorld <float[], float> .Parameters()
        {
            M           = numNeighbours,               // Max number of neighbours to connect with at each layer
            LevelLambda = 1 / Math.Log(numNeighbours), // Layer/Level logarithmic decrease factor
        };

        // Using CosineDistance calculation for approximation & speed only,
        // no need for precise Euclidean Distance calculation.
        SmallWorld <float[], float> graph = new SmallWorld <float[], float>(CosineDistance.NonOptimized);

        graph.BuildGraph(vectors, new System.Random(11), parameters);

        // Assigning new computed graph
        lock ( _lock ) {
            _graph = graph;
        }

        if (_verbose)
        {
            Debug.Log("Clusters: Done rebuilding graph.");
        }
    }
Beispiel #4
0
        public void KNNSearchTestAlgorithm4(bool expandBestSelection, bool keepPrunedConnections)
        {
            var parameters = new SmallWorld <float[], float> .Parameters()
            {
                NeighbourHeuristic = NeighbourSelectionHeuristic.SelectHeuristic, ExpandBestSelection = expandBestSelection, KeepPrunedConnections = keepPrunedConnections
            };
            var graph = new SmallWorld <float[], float>(CosineDistance.NonOptimized, DefaultRandomGenerator.Instance, parameters);

            graph.AddItems(vectors);

            int   bestWrong = 0;
            float maxError  = float.MinValue;

            for (int i = 0; i < vectors.Count; ++i)
            {
                var result = graph.KNNSearch(vectors[i], 20);
                var best   = result.OrderBy(r => r.Distance).First();
                Assert.AreEqual(20, result.Count);
                if (best.Id != i)
                {
                    bestWrong++;
                }
                maxError = Math.Max(maxError, best.Distance);
            }
            Assert.AreEqual(0, bestWrong);
            Assert.AreEqual(0, maxError, FloatError);
        }
Beispiel #5
0
        public void KNNSearchTest()
        {
            var parameters = new SmallWorld <float[], float> .Parameters();

            var graph = new SmallWorld <float[], float>(CosineDistance.NonOptimized, DefaultRandomGenerator.Instance, parameters);

            graph.AddItems(vectors);

            int   bestWrong = 0;
            float maxError  = float.MinValue;

            for (int i = 0; i < vectors.Count; ++i)
            {
                var result = graph.KNNSearch(vectors[i], 20);
                var best   = result.OrderBy(r => r.Distance).First();
                Assert.AreEqual(20, result.Count);
                if (best.Id != i)
                {
                    bestWrong++;
                }
                maxError = Math.Max(maxError, best.Distance);
            }
            Assert.AreEqual(0, bestWrong);
            Assert.AreEqual(0, maxError, FloatError);
        }
Beispiel #6
0
        public void SerializeDeserializeTest()
        {
            byte[] buffer;
            string original;

            // restrict scope of original graph
            var stream = new MemoryStream();

            {
                var parameters = new SmallWorld <float[], float> .Parameters()
                {
                    M           = 15,
                    LevelLambda = 1 / Math.Log(15),
                };

                var graph = new SmallWorld <float[], float>(CosineDistance.NonOptimized, DefaultRandomGenerator.Instance, parameters);
                graph.AddItems(vectors);

                graph.SerializeGraph(stream);
                original = graph.Print();
            }
            stream.Position = 0;

            var copy = SmallWorld <float[], float> .DeserializeGraph(vectors, CosineDistance.NonOptimized, DefaultRandomGenerator.Instance, stream);

            Assert.AreEqual(original, copy.Print());
        }
Beispiel #7
0
        /// <summary>
        /// Entry point.
        /// </summary>
        public static void Main()
        {
            var parameters = new SmallWorld <float[], float> .Parameters();

            parameters.EnableDistanceCacheForConstruction = true;
            var graph = new SmallWorld <float[], float>(CosineDistance.SIMDForUnits);

            var vectorsGenerator = new Random(42);
            var randomVectors    = new List <float[]>();

            for (int i = 0; i < 40_000; i++)
            {
                var randomVector = new float[20];
                for (int j = 0; j < 20; j++)
                {
                    randomVector[j] = (float)vectorsGenerator.NextDouble();
                }

                VectorUtils.NormalizeSIMD(randomVector);
                randomVectors.Add(randomVector);
            }

            var clock = Stopwatch.StartNew();

            graph.BuildGraph(randomVectors, new Random(42), parameters);
            Console.WriteLine(clock.Elapsed.TotalMilliseconds);
        }
Beispiel #8
0
        public void SerializeDeserializeTest()
        {
            byte[] buffer;
            string original;

            // restrict scope of original graph
            {
                var parameters = new SmallWorld <float[], float> .Parameters()
                {
                    M           = 15,
                    LevelLambda = 1 / Math.Log(15),
                };

                var graph = new SmallWorld <float[], float>(CosineDistance.NonOptimized);
                graph.BuildGraph(this.vectors, new Random(42), parameters);

                buffer   = graph.SerializeGraph();
                original = graph.Print();
            }

            var copy = new SmallWorld <float[], float>(CosineDistance.NonOptimized);

            copy.DeserializeGraph(this.vectors, buffer);

            Assert.AreEqual(original, copy.Print());
        }
Beispiel #9
0
        internal SmallWorld CreateWorld(int x, int y)
        {
            SmallWorld newWorld = new SmallWorld();

            newWorld.CreateLocationAtPoint(x, y, 0, true);
            ExpandWorld(newWorld, x, y);

            return(newWorld);
        }
Beispiel #10
0
        /// <summary>
        /// Constructor to the view of the map
        /// </summary>
        /// <param name="_map">Reference to the Map model</param>
        /// <param name="_mapViewGrid">WPF Grid element</param>
        public MapView(SmallWorld.IMap _map, Grid _mapViewGrid)
        {
            Map = _map;
            MapViewGrid = _mapViewGrid;

            TilesView = new Dictionary<ITile,TileView>();

            for (int y = 0; y < Map.MapSize; y++)
            {
                for (int x = 0; x < Map.MapSize; x++)
                {
                    TileView tile = new TileView(Map.Tiles[x, y]);
                    MapViewGrid.Children.Add(tile);
                    TilesView.Add(Map.Tiles[x, y], tile);
                }
            }
        }
Beispiel #11
0
        /// <summary>
        /// Initializes the ANN class, given all featuresvectors to which
        /// a query will be compared.
        /// </summary>
        /// <param name="database">All featurevectors of its world.</param>
        public ANN(IEnumerable <NamedFeatureVector> database)
        {
            IReadOnlyList <NamedFeatureVector> vectors = database.ToList().AsReadOnly();

            world = new SmallWorld <NamedFeatureVector, double>(ANNDistance);

            SmallWorld <NamedFeatureVector, double> .Parameters parameters =
                new SmallWorld <NamedFeatureVector, double> .Parameters
            {
                EnableDistanceCacheForConstruction = true
            };

            using (MetricsEventListener listener =
                       new MetricsEventListener(EventSources.GraphBuildEventSource.Instance)) {
                world.BuildGraph(vectors, RandomUtil.ThreadSafeRandom, parameters);
            }
        }
Beispiel #12
0
        public void KNNSearchTest()
        {
            var parameters = new SmallWorld <float[], float> .Parameters();

            var graph = new SmallWorld <float[], float>(CosineDistance.NonOptimized);

            graph.BuildGraph(this.vectors, new Random(42), parameters);

            for (int i = 0; i < this.vectors.Count; ++i)
            {
                var result = graph.KNNSearch(this.vectors[i], 20);
                var best   = result.OrderBy(r => r.Distance).First();
                Assert.AreEqual(20, result.Count);
                Assert.AreEqual(i, best.Id);
                Assert.AreEqual(0, best.Distance, FloatError);
            }
        }
Beispiel #13
0
        internal SmallWorld CreateWorld()
        {
            int y = RandomNumberGenerator.NumberBetween(0, 5);
            int x = RandomNumberGenerator.NumberBetween(0, 5);

            x = 10;
            y = 10;
            SmallWorld newWorld = new SmallWorld();

            for (int i = 0; i <= y; i++)
            {
                for (int j = 0; j <= x; j++)
                {
                    newWorld.CreateLocationAtPoint(i, j, i > j ? i / 2 : j / 2);
                }
            }

            return(newWorld);
        }
Beispiel #14
0
        internal void ExpandWorld(SmallWorld world, int x, int y)
        {
            int i = x;
            int j = y;

            i = x - 1;
            world.CreateLocationAtPoint(i, j, i > j ? i / 2 : j / 2);


            i = x + 1;
            world.CreateLocationAtPoint(i, j, i > j ? i / 2 : j / 2);


            i = x;
            j = y - 1;
            world.CreateLocationAtPoint(i, j, i > j ? i / 2 : j / 2);

            j = y + 1;
            world.CreateLocationAtPoint(i, j, i > j ? i / 2 : j / 2, true);
        }
Beispiel #15
0
        private static void BuildAndSave(string pathPrefix)
        {
            var world = new SmallWorld <float[], float>(CosineDistance.SIMDForUnits, DefaultRandomGenerator.Instance, new Parameters()
            {
                EnableDistanceCacheForConstruction = true, InitialDistanceCacheSize = SampleSize, NeighbourHeuristic = NeighbourSelectionHeuristic.SelectHeuristic, KeepPrunedConnections = true, ExpandBestSelection = true
            });

            Console.Write($"Generating {SampleSize} sample vectos... ");
            var clock         = Stopwatch.StartNew();
            var sampleVectors = RandomVectors(Dimensionality, SampleSize);

            Console.WriteLine($"Done in {clock.ElapsedMilliseconds} ms.");

            Console.WriteLine("Building HNSW graph... ");
            using (var listener = new MetricsEventListener(EventSources.GraphBuildEventSource.Instance))
            {
                clock = Stopwatch.StartNew();
                for (int i = 0; i < (SampleSize / SampleIncrSize); i++)
                {
                    world.AddItems(sampleVectors.Skip(i * SampleIncrSize).Take(SampleIncrSize).ToArray());
                    Console.WriteLine($"\nAt {i+1} of {SampleSize / SampleIncrSize}  Elapsed: {clock.ElapsedMilliseconds} ms.\n");
                }
                Console.WriteLine($"Done in {clock.ElapsedMilliseconds} ms.");
            }

            Console.Write($"Saving HNSW graph to '${Path.Combine(Directory.GetCurrentDirectory(), pathPrefix)}'... ");
            clock = Stopwatch.StartNew();
            BinaryFormatter formatter           = new BinaryFormatter();
            MemoryStream    sampleVectorsStream = new MemoryStream();

            formatter.Serialize(sampleVectorsStream, sampleVectors);
            File.WriteAllBytes($"{pathPrefix}.{VectorsPathSuffix}", sampleVectorsStream.ToArray());


            using (var f = File.Open($"{pathPrefix}.{GraphPathSuffix}", FileMode.Create))
            {
                world.SerializeGraph(f);
            }

            Console.WriteLine($"Done in {clock.ElapsedMilliseconds} ms.");
        }
Beispiel #16
0
        static void Main(string[] args)
        {
            var parameters = new SmallWorld <float[], float> .Parameters()
            {
                M           = 50,
                LevelLambda = 1 / Math.Log(15),
            };

            var       r          = new Random();
            const int dimensions = 100;
            var       vectors    = GetFloatVectors(dimensions, r);
            var       graph      = new SmallWorld <float[], float>(CosineDistance.SIMD);
            var       stopWatch  = new Stopwatch();

            stopWatch.Start();
            graph.BuildGraph(vectors, new Random(42), parameters);
            stopWatch.Stop();
            var buildTime = stopWatch.Elapsed;

            Console.WriteLine($"graph build for {vectors.Count} items in {buildTime}");
            byte[] buffer = graph.SerializeGraph();
            Console.WriteLine($"graph serialized in {buffer.Length} bytes");
            float[] query = GetRandomVector(dimensions, r);

            for (var i = 0; i < 100; i++)
            {
                stopWatch = new Stopwatch();
                stopWatch.Start();
                var best20 = graph.KNNSearch(query, 20);
                stopWatch.Stop();
                buildTime = stopWatch.Elapsed;
                Console.WriteLine($"Top 20 items retrieved in {buildTime}");
            }

            /*foreach (var item in best20)
             * {
             *  Console.WriteLine($"{item.Id} -> {item.Distance}");
             * }*/
        }