private static void BuildAndSave(string pathPrefix) { Stopwatch clock; List <float[]> sampleVectors; var parameters = new Parameters(); parameters.EnableDistanceCacheForConstruction = true; var world = new SmallWorld <float[], float>(CosineDistance.NonOptimized); Console.Write($"Generating {SampleSize} sample vectos... "); clock = Stopwatch.StartNew(); sampleVectors = RandomVectors(Dimensionality, SampleSize); Console.WriteLine($"Done in {clock.ElapsedMilliseconds} ms."); Console.WriteLine("Building HNSW graph... "); using (var listener = new MetricsEventListener(EventSources.GraphBuildEventSource.Instance)) { clock = Stopwatch.StartNew(); world.BuildGraph(sampleVectors, new Random(42), parameters); Console.WriteLine($"Done in {clock.ElapsedMilliseconds} ms."); } Console.Write($"Saving HNSW graph to '${Path.Combine(Directory.GetCurrentDirectory(), pathPrefix)}'... "); clock = Stopwatch.StartNew(); BinaryFormatter formatter = new BinaryFormatter(); MemoryStream sampleVectorsStream = new MemoryStream(); formatter.Serialize(sampleVectorsStream, sampleVectors); File.WriteAllBytes($"{pathPrefix}.{VectorsPathSuffix}", sampleVectorsStream.ToArray()); File.WriteAllBytes($"{pathPrefix}.{GraphPathSuffix}", world.SerializeGraph()); Console.WriteLine($"Done in {clock.ElapsedMilliseconds} ms."); }
public void SerializeDeserializeTest() { byte[] buffer; string original; // restrict scope of original graph var stream = new MemoryStream(); { var parameters = new SmallWorld <float[], float> .Parameters() { M = 15, LevelLambda = 1 / Math.Log(15), }; var graph = new SmallWorld <float[], float>(CosineDistance.NonOptimized, DefaultRandomGenerator.Instance, parameters); graph.AddItems(vectors); graph.SerializeGraph(stream); original = graph.Print(); } stream.Position = 0; var copy = SmallWorld <float[], float> .DeserializeGraph(vectors, CosineDistance.NonOptimized, DefaultRandomGenerator.Instance, stream); Assert.AreEqual(original, copy.Print()); }
public void SerializeDeserializeTest() { byte[] buffer; string original; // restrict scope of original graph { var parameters = new SmallWorld <float[], float> .Parameters() { M = 15, LevelLambda = 1 / Math.Log(15), }; var graph = new SmallWorld <float[], float>(CosineDistance.NonOptimized); graph.BuildGraph(this.vectors, new Random(42), parameters); buffer = graph.SerializeGraph(); original = graph.Print(); } var copy = new SmallWorld <float[], float>(CosineDistance.NonOptimized); copy.DeserializeGraph(this.vectors, buffer); Assert.AreEqual(original, copy.Print()); }
private static void BuildAndSave(string pathPrefix) { var world = new SmallWorld <float[], float>(CosineDistance.SIMDForUnits, DefaultRandomGenerator.Instance, new Parameters() { EnableDistanceCacheForConstruction = true, InitialDistanceCacheSize = SampleSize, NeighbourHeuristic = NeighbourSelectionHeuristic.SelectHeuristic, KeepPrunedConnections = true, ExpandBestSelection = true }); Console.Write($"Generating {SampleSize} sample vectos... "); var clock = Stopwatch.StartNew(); var sampleVectors = RandomVectors(Dimensionality, SampleSize); Console.WriteLine($"Done in {clock.ElapsedMilliseconds} ms."); Console.WriteLine("Building HNSW graph... "); using (var listener = new MetricsEventListener(EventSources.GraphBuildEventSource.Instance)) { clock = Stopwatch.StartNew(); for (int i = 0; i < (SampleSize / SampleIncrSize); i++) { world.AddItems(sampleVectors.Skip(i * SampleIncrSize).Take(SampleIncrSize).ToArray()); Console.WriteLine($"\nAt {i+1} of {SampleSize / SampleIncrSize} Elapsed: {clock.ElapsedMilliseconds} ms.\n"); } Console.WriteLine($"Done in {clock.ElapsedMilliseconds} ms."); } Console.Write($"Saving HNSW graph to '${Path.Combine(Directory.GetCurrentDirectory(), pathPrefix)}'... "); clock = Stopwatch.StartNew(); BinaryFormatter formatter = new BinaryFormatter(); MemoryStream sampleVectorsStream = new MemoryStream(); formatter.Serialize(sampleVectorsStream, sampleVectors); File.WriteAllBytes($"{pathPrefix}.{VectorsPathSuffix}", sampleVectorsStream.ToArray()); using (var f = File.Open($"{pathPrefix}.{GraphPathSuffix}", FileMode.Create)) { world.SerializeGraph(f); } Console.WriteLine($"Done in {clock.ElapsedMilliseconds} ms."); }
static void Main(string[] args) { var parameters = new SmallWorld <float[], float> .Parameters() { M = 50, LevelLambda = 1 / Math.Log(15), }; var r = new Random(); const int dimensions = 100; var vectors = GetFloatVectors(dimensions, r); var graph = new SmallWorld <float[], float>(CosineDistance.SIMD); var stopWatch = new Stopwatch(); stopWatch.Start(); graph.BuildGraph(vectors, new Random(42), parameters); stopWatch.Stop(); var buildTime = stopWatch.Elapsed; Console.WriteLine($"graph build for {vectors.Count} items in {buildTime}"); byte[] buffer = graph.SerializeGraph(); Console.WriteLine($"graph serialized in {buffer.Length} bytes"); float[] query = GetRandomVector(dimensions, r); for (var i = 0; i < 100; i++) { stopWatch = new Stopwatch(); stopWatch.Start(); var best20 = graph.KNNSearch(query, 20); stopWatch.Stop(); buildTime = stopWatch.Elapsed; Console.WriteLine($"Top 20 items retrieved in {buildTime}"); } /*foreach (var item in best20) * { * Console.WriteLine($"{item.Id} -> {item.Distance}"); * }*/ }