private static void BuildAndSave(string pathPrefix)
        {
            Stopwatch      clock;
            List <float[]> sampleVectors;

            var parameters = new Parameters();

            parameters.EnableDistanceCacheForConstruction = true;
            var world = new SmallWorld <float[], float>(CosineDistance.NonOptimized);

            Console.Write($"Generating {SampleSize} sample vectos... ");
            clock         = Stopwatch.StartNew();
            sampleVectors = RandomVectors(Dimensionality, SampleSize);
            Console.WriteLine($"Done in {clock.ElapsedMilliseconds} ms.");

            Console.WriteLine("Building HNSW graph... ");
            using (var listener = new MetricsEventListener(EventSources.GraphBuildEventSource.Instance))
            {
                clock = Stopwatch.StartNew();
                world.BuildGraph(sampleVectors, new Random(42), parameters);
                Console.WriteLine($"Done in {clock.ElapsedMilliseconds} ms.");
            }

            Console.Write($"Saving HNSW graph to '${Path.Combine(Directory.GetCurrentDirectory(), pathPrefix)}'... ");
            clock = Stopwatch.StartNew();
            BinaryFormatter formatter           = new BinaryFormatter();
            MemoryStream    sampleVectorsStream = new MemoryStream();

            formatter.Serialize(sampleVectorsStream, sampleVectors);
            File.WriteAllBytes($"{pathPrefix}.{VectorsPathSuffix}", sampleVectorsStream.ToArray());
            File.WriteAllBytes($"{pathPrefix}.{GraphPathSuffix}", world.SerializeGraph());
            Console.WriteLine($"Done in {clock.ElapsedMilliseconds} ms.");
        }
Beispiel #2
0
        public void SerializeDeserializeTest()
        {
            byte[] buffer;
            string original;

            // restrict scope of original graph
            var stream = new MemoryStream();

            {
                var parameters = new SmallWorld <float[], float> .Parameters()
                {
                    M           = 15,
                    LevelLambda = 1 / Math.Log(15),
                };

                var graph = new SmallWorld <float[], float>(CosineDistance.NonOptimized, DefaultRandomGenerator.Instance, parameters);
                graph.AddItems(vectors);

                graph.SerializeGraph(stream);
                original = graph.Print();
            }
            stream.Position = 0;

            var copy = SmallWorld <float[], float> .DeserializeGraph(vectors, CosineDistance.NonOptimized, DefaultRandomGenerator.Instance, stream);

            Assert.AreEqual(original, copy.Print());
        }
Beispiel #3
0
        public void SerializeDeserializeTest()
        {
            byte[] buffer;
            string original;

            // restrict scope of original graph
            {
                var parameters = new SmallWorld <float[], float> .Parameters()
                {
                    M           = 15,
                    LevelLambda = 1 / Math.Log(15),
                };

                var graph = new SmallWorld <float[], float>(CosineDistance.NonOptimized);
                graph.BuildGraph(this.vectors, new Random(42), parameters);

                buffer   = graph.SerializeGraph();
                original = graph.Print();
            }

            var copy = new SmallWorld <float[], float>(CosineDistance.NonOptimized);

            copy.DeserializeGraph(this.vectors, buffer);

            Assert.AreEqual(original, copy.Print());
        }
Beispiel #4
0
        private static void BuildAndSave(string pathPrefix)
        {
            var world = new SmallWorld <float[], float>(CosineDistance.SIMDForUnits, DefaultRandomGenerator.Instance, new Parameters()
            {
                EnableDistanceCacheForConstruction = true, InitialDistanceCacheSize = SampleSize, NeighbourHeuristic = NeighbourSelectionHeuristic.SelectHeuristic, KeepPrunedConnections = true, ExpandBestSelection = true
            });

            Console.Write($"Generating {SampleSize} sample vectos... ");
            var clock         = Stopwatch.StartNew();
            var sampleVectors = RandomVectors(Dimensionality, SampleSize);

            Console.WriteLine($"Done in {clock.ElapsedMilliseconds} ms.");

            Console.WriteLine("Building HNSW graph... ");
            using (var listener = new MetricsEventListener(EventSources.GraphBuildEventSource.Instance))
            {
                clock = Stopwatch.StartNew();
                for (int i = 0; i < (SampleSize / SampleIncrSize); i++)
                {
                    world.AddItems(sampleVectors.Skip(i * SampleIncrSize).Take(SampleIncrSize).ToArray());
                    Console.WriteLine($"\nAt {i+1} of {SampleSize / SampleIncrSize}  Elapsed: {clock.ElapsedMilliseconds} ms.\n");
                }
                Console.WriteLine($"Done in {clock.ElapsedMilliseconds} ms.");
            }

            Console.Write($"Saving HNSW graph to '${Path.Combine(Directory.GetCurrentDirectory(), pathPrefix)}'... ");
            clock = Stopwatch.StartNew();
            BinaryFormatter formatter           = new BinaryFormatter();
            MemoryStream    sampleVectorsStream = new MemoryStream();

            formatter.Serialize(sampleVectorsStream, sampleVectors);
            File.WriteAllBytes($"{pathPrefix}.{VectorsPathSuffix}", sampleVectorsStream.ToArray());


            using (var f = File.Open($"{pathPrefix}.{GraphPathSuffix}", FileMode.Create))
            {
                world.SerializeGraph(f);
            }

            Console.WriteLine($"Done in {clock.ElapsedMilliseconds} ms.");
        }
Beispiel #5
0
        static void Main(string[] args)
        {
            var parameters = new SmallWorld <float[], float> .Parameters()
            {
                M           = 50,
                LevelLambda = 1 / Math.Log(15),
            };

            var       r          = new Random();
            const int dimensions = 100;
            var       vectors    = GetFloatVectors(dimensions, r);
            var       graph      = new SmallWorld <float[], float>(CosineDistance.SIMD);
            var       stopWatch  = new Stopwatch();

            stopWatch.Start();
            graph.BuildGraph(vectors, new Random(42), parameters);
            stopWatch.Stop();
            var buildTime = stopWatch.Elapsed;

            Console.WriteLine($"graph build for {vectors.Count} items in {buildTime}");
            byte[] buffer = graph.SerializeGraph();
            Console.WriteLine($"graph serialized in {buffer.Length} bytes");
            float[] query = GetRandomVector(dimensions, r);

            for (var i = 0; i < 100; i++)
            {
                stopWatch = new Stopwatch();
                stopWatch.Start();
                var best20 = graph.KNNSearch(query, 20);
                stopWatch.Stop();
                buildTime = stopWatch.Elapsed;
                Console.WriteLine($"Top 20 items retrieved in {buildTime}");
            }

            /*foreach (var item in best20)
             * {
             *  Console.WriteLine($"{item.Id} -> {item.Distance}");
             * }*/
        }