private static void LoadAndSearch(string pathPrefix) { Stopwatch clock; Console.Write("Loading HNSW graph... "); clock = Stopwatch.StartNew(); BinaryFormatter formatter = new BinaryFormatter(); var sampleVectors = (List <float[]>)formatter.Deserialize(new MemoryStream(File.ReadAllBytes($"{pathPrefix}.{VectorsPathSuffix}"))); SmallWorld <float[], float> world; using (var f = File.OpenRead($"{pathPrefix}.{GraphPathSuffix}")) { world = SmallWorld <float[], float> .DeserializeGraph(sampleVectors, CosineDistance.SIMDForUnits, DefaultRandomGenerator.Instance, f); } Console.WriteLine($"Done in {clock.ElapsedMilliseconds} ms."); Console.Write($"Generating {TestSize} test vectos... "); clock = Stopwatch.StartNew(); var vectors = RandomVectors(Dimensionality, TestSize); Console.WriteLine($"Done in {clock.ElapsedMilliseconds} ms."); Console.WriteLine("Running search agains the graph... "); using (var listener = new MetricsEventListener(EventSources.GraphSearchEventSource.Instance)) { clock = Stopwatch.StartNew(); Parallel.ForEach(vectors, (vector) => { world.KNNSearch(vector, 10); }); Console.WriteLine($"Done in {clock.ElapsedMilliseconds} ms."); } }
private static void BuildAndSave(string pathPrefix) { Stopwatch clock; List <float[]> sampleVectors; var parameters = new Parameters(); parameters.EnableDistanceCacheForConstruction = true; var world = new SmallWorld <float[], float>(CosineDistance.NonOptimized); Console.Write($"Generating {SampleSize} sample vectos... "); clock = Stopwatch.StartNew(); sampleVectors = RandomVectors(Dimensionality, SampleSize); Console.WriteLine($"Done in {clock.ElapsedMilliseconds} ms."); Console.WriteLine("Building HNSW graph... "); using (var listener = new MetricsEventListener(EventSources.GraphBuildEventSource.Instance)) { clock = Stopwatch.StartNew(); world.BuildGraph(sampleVectors, new Random(42), parameters); Console.WriteLine($"Done in {clock.ElapsedMilliseconds} ms."); } Console.Write($"Saving HNSW graph to '${Path.Combine(Directory.GetCurrentDirectory(), pathPrefix)}'... "); clock = Stopwatch.StartNew(); BinaryFormatter formatter = new BinaryFormatter(); MemoryStream sampleVectorsStream = new MemoryStream(); formatter.Serialize(sampleVectorsStream, sampleVectors); File.WriteAllBytes($"{pathPrefix}.{VectorsPathSuffix}", sampleVectorsStream.ToArray()); File.WriteAllBytes($"{pathPrefix}.{GraphPathSuffix}", world.SerializeGraph()); Console.WriteLine($"Done in {clock.ElapsedMilliseconds} ms."); }
/// <summary> /// Expensive call, should be called only when necessary. /// Using HNSW graph search tool as in this article: /// https: //arxiv.org/ftp/arxiv/papers/1603/1603.09320.pdf /// </summary> private void RebuildGraph(List <float[]> vectors) { if (_verbose) { Debug.Log("Clusters: Rebuilding graph..."); } // Setting parameters that makes sens for the number or rioters (Might adjust) int numNeighbours = 15; var parameters = new SmallWorld <float[], float> .Parameters() { M = numNeighbours, // Max number of neighbours to connect with at each layer LevelLambda = 1 / Math.Log(numNeighbours), // Layer/Level logarithmic decrease factor }; // Using CosineDistance calculation for approximation & speed only, // no need for precise Euclidean Distance calculation. SmallWorld <float[], float> graph = new SmallWorld <float[], float>(CosineDistance.NonOptimized); graph.BuildGraph(vectors, new System.Random(11), parameters); // Assigning new computed graph lock ( _lock ) { _graph = graph; } if (_verbose) { Debug.Log("Clusters: Done rebuilding graph."); } }
public void KNNSearchTestAlgorithm4(bool expandBestSelection, bool keepPrunedConnections) { var parameters = new SmallWorld <float[], float> .Parameters() { NeighbourHeuristic = NeighbourSelectionHeuristic.SelectHeuristic, ExpandBestSelection = expandBestSelection, KeepPrunedConnections = keepPrunedConnections }; var graph = new SmallWorld <float[], float>(CosineDistance.NonOptimized, DefaultRandomGenerator.Instance, parameters); graph.AddItems(vectors); int bestWrong = 0; float maxError = float.MinValue; for (int i = 0; i < vectors.Count; ++i) { var result = graph.KNNSearch(vectors[i], 20); var best = result.OrderBy(r => r.Distance).First(); Assert.AreEqual(20, result.Count); if (best.Id != i) { bestWrong++; } maxError = Math.Max(maxError, best.Distance); } Assert.AreEqual(0, bestWrong); Assert.AreEqual(0, maxError, FloatError); }
public void KNNSearchTest() { var parameters = new SmallWorld <float[], float> .Parameters(); var graph = new SmallWorld <float[], float>(CosineDistance.NonOptimized, DefaultRandomGenerator.Instance, parameters); graph.AddItems(vectors); int bestWrong = 0; float maxError = float.MinValue; for (int i = 0; i < vectors.Count; ++i) { var result = graph.KNNSearch(vectors[i], 20); var best = result.OrderBy(r => r.Distance).First(); Assert.AreEqual(20, result.Count); if (best.Id != i) { bestWrong++; } maxError = Math.Max(maxError, best.Distance); } Assert.AreEqual(0, bestWrong); Assert.AreEqual(0, maxError, FloatError); }
public void SerializeDeserializeTest() { byte[] buffer; string original; // restrict scope of original graph var stream = new MemoryStream(); { var parameters = new SmallWorld <float[], float> .Parameters() { M = 15, LevelLambda = 1 / Math.Log(15), }; var graph = new SmallWorld <float[], float>(CosineDistance.NonOptimized, DefaultRandomGenerator.Instance, parameters); graph.AddItems(vectors); graph.SerializeGraph(stream); original = graph.Print(); } stream.Position = 0; var copy = SmallWorld <float[], float> .DeserializeGraph(vectors, CosineDistance.NonOptimized, DefaultRandomGenerator.Instance, stream); Assert.AreEqual(original, copy.Print()); }
/// <summary> /// Entry point. /// </summary> public static void Main() { var parameters = new SmallWorld <float[], float> .Parameters(); parameters.EnableDistanceCacheForConstruction = true; var graph = new SmallWorld <float[], float>(CosineDistance.SIMDForUnits); var vectorsGenerator = new Random(42); var randomVectors = new List <float[]>(); for (int i = 0; i < 40_000; i++) { var randomVector = new float[20]; for (int j = 0; j < 20; j++) { randomVector[j] = (float)vectorsGenerator.NextDouble(); } VectorUtils.NormalizeSIMD(randomVector); randomVectors.Add(randomVector); } var clock = Stopwatch.StartNew(); graph.BuildGraph(randomVectors, new Random(42), parameters); Console.WriteLine(clock.Elapsed.TotalMilliseconds); }
public void SerializeDeserializeTest() { byte[] buffer; string original; // restrict scope of original graph { var parameters = new SmallWorld <float[], float> .Parameters() { M = 15, LevelLambda = 1 / Math.Log(15), }; var graph = new SmallWorld <float[], float>(CosineDistance.NonOptimized); graph.BuildGraph(this.vectors, new Random(42), parameters); buffer = graph.SerializeGraph(); original = graph.Print(); } var copy = new SmallWorld <float[], float>(CosineDistance.NonOptimized); copy.DeserializeGraph(this.vectors, buffer); Assert.AreEqual(original, copy.Print()); }
internal SmallWorld CreateWorld(int x, int y) { SmallWorld newWorld = new SmallWorld(); newWorld.CreateLocationAtPoint(x, y, 0, true); ExpandWorld(newWorld, x, y); return(newWorld); }
/// <summary> /// Constructor to the view of the map /// </summary> /// <param name="_map">Reference to the Map model</param> /// <param name="_mapViewGrid">WPF Grid element</param> public MapView(SmallWorld.IMap _map, Grid _mapViewGrid) { Map = _map; MapViewGrid = _mapViewGrid; TilesView = new Dictionary<ITile,TileView>(); for (int y = 0; y < Map.MapSize; y++) { for (int x = 0; x < Map.MapSize; x++) { TileView tile = new TileView(Map.Tiles[x, y]); MapViewGrid.Children.Add(tile); TilesView.Add(Map.Tiles[x, y], tile); } } }
/// <summary> /// Initializes the ANN class, given all featuresvectors to which /// a query will be compared. /// </summary> /// <param name="database">All featurevectors of its world.</param> public ANN(IEnumerable <NamedFeatureVector> database) { IReadOnlyList <NamedFeatureVector> vectors = database.ToList().AsReadOnly(); world = new SmallWorld <NamedFeatureVector, double>(ANNDistance); SmallWorld <NamedFeatureVector, double> .Parameters parameters = new SmallWorld <NamedFeatureVector, double> .Parameters { EnableDistanceCacheForConstruction = true }; using (MetricsEventListener listener = new MetricsEventListener(EventSources.GraphBuildEventSource.Instance)) { world.BuildGraph(vectors, RandomUtil.ThreadSafeRandom, parameters); } }
public void KNNSearchTest() { var parameters = new SmallWorld <float[], float> .Parameters(); var graph = new SmallWorld <float[], float>(CosineDistance.NonOptimized); graph.BuildGraph(this.vectors, new Random(42), parameters); for (int i = 0; i < this.vectors.Count; ++i) { var result = graph.KNNSearch(this.vectors[i], 20); var best = result.OrderBy(r => r.Distance).First(); Assert.AreEqual(20, result.Count); Assert.AreEqual(i, best.Id); Assert.AreEqual(0, best.Distance, FloatError); } }
internal SmallWorld CreateWorld() { int y = RandomNumberGenerator.NumberBetween(0, 5); int x = RandomNumberGenerator.NumberBetween(0, 5); x = 10; y = 10; SmallWorld newWorld = new SmallWorld(); for (int i = 0; i <= y; i++) { for (int j = 0; j <= x; j++) { newWorld.CreateLocationAtPoint(i, j, i > j ? i / 2 : j / 2); } } return(newWorld); }
internal void ExpandWorld(SmallWorld world, int x, int y) { int i = x; int j = y; i = x - 1; world.CreateLocationAtPoint(i, j, i > j ? i / 2 : j / 2); i = x + 1; world.CreateLocationAtPoint(i, j, i > j ? i / 2 : j / 2); i = x; j = y - 1; world.CreateLocationAtPoint(i, j, i > j ? i / 2 : j / 2); j = y + 1; world.CreateLocationAtPoint(i, j, i > j ? i / 2 : j / 2, true); }
private static void BuildAndSave(string pathPrefix) { var world = new SmallWorld <float[], float>(CosineDistance.SIMDForUnits, DefaultRandomGenerator.Instance, new Parameters() { EnableDistanceCacheForConstruction = true, InitialDistanceCacheSize = SampleSize, NeighbourHeuristic = NeighbourSelectionHeuristic.SelectHeuristic, KeepPrunedConnections = true, ExpandBestSelection = true }); Console.Write($"Generating {SampleSize} sample vectos... "); var clock = Stopwatch.StartNew(); var sampleVectors = RandomVectors(Dimensionality, SampleSize); Console.WriteLine($"Done in {clock.ElapsedMilliseconds} ms."); Console.WriteLine("Building HNSW graph... "); using (var listener = new MetricsEventListener(EventSources.GraphBuildEventSource.Instance)) { clock = Stopwatch.StartNew(); for (int i = 0; i < (SampleSize / SampleIncrSize); i++) { world.AddItems(sampleVectors.Skip(i * SampleIncrSize).Take(SampleIncrSize).ToArray()); Console.WriteLine($"\nAt {i+1} of {SampleSize / SampleIncrSize} Elapsed: {clock.ElapsedMilliseconds} ms.\n"); } Console.WriteLine($"Done in {clock.ElapsedMilliseconds} ms."); } Console.Write($"Saving HNSW graph to '${Path.Combine(Directory.GetCurrentDirectory(), pathPrefix)}'... "); clock = Stopwatch.StartNew(); BinaryFormatter formatter = new BinaryFormatter(); MemoryStream sampleVectorsStream = new MemoryStream(); formatter.Serialize(sampleVectorsStream, sampleVectors); File.WriteAllBytes($"{pathPrefix}.{VectorsPathSuffix}", sampleVectorsStream.ToArray()); using (var f = File.Open($"{pathPrefix}.{GraphPathSuffix}", FileMode.Create)) { world.SerializeGraph(f); } Console.WriteLine($"Done in {clock.ElapsedMilliseconds} ms."); }
static void Main(string[] args) { var parameters = new SmallWorld <float[], float> .Parameters() { M = 50, LevelLambda = 1 / Math.Log(15), }; var r = new Random(); const int dimensions = 100; var vectors = GetFloatVectors(dimensions, r); var graph = new SmallWorld <float[], float>(CosineDistance.SIMD); var stopWatch = new Stopwatch(); stopWatch.Start(); graph.BuildGraph(vectors, new Random(42), parameters); stopWatch.Stop(); var buildTime = stopWatch.Elapsed; Console.WriteLine($"graph build for {vectors.Count} items in {buildTime}"); byte[] buffer = graph.SerializeGraph(); Console.WriteLine($"graph serialized in {buffer.Length} bytes"); float[] query = GetRandomVector(dimensions, r); for (var i = 0; i < 100; i++) { stopWatch = new Stopwatch(); stopWatch.Start(); var best20 = graph.KNNSearch(query, 20); stopWatch.Stop(); buildTime = stopWatch.Elapsed; Console.WriteLine($"Top 20 items retrieved in {buildTime}"); } /*foreach (var item in best20) * { * Console.WriteLine($"{item.Id} -> {item.Distance}"); * }*/ }