/// <summary> /// Expensive call, should be called only when necessary. /// Using HNSW graph search tool as in this article: /// https: //arxiv.org/ftp/arxiv/papers/1603/1603.09320.pdf /// </summary> private void RebuildGraph(List <float[]> vectors) { if (_verbose) { Debug.Log("Clusters: Rebuilding graph..."); } // Setting parameters that makes sens for the number or rioters (Might adjust) int numNeighbours = 15; var parameters = new SmallWorld <float[], float> .Parameters() { M = numNeighbours, // Max number of neighbours to connect with at each layer LevelLambda = 1 / Math.Log(numNeighbours), // Layer/Level logarithmic decrease factor }; // Using CosineDistance calculation for approximation & speed only, // no need for precise Euclidean Distance calculation. SmallWorld <float[], float> graph = new SmallWorld <float[], float>(CosineDistance.NonOptimized); graph.BuildGraph(vectors, new System.Random(11), parameters); // Assigning new computed graph lock ( _lock ) { _graph = graph; } if (_verbose) { Debug.Log("Clusters: Done rebuilding graph."); } }
private static void BuildAndSave(string pathPrefix) { Stopwatch clock; List <float[]> sampleVectors; var parameters = new Parameters(); parameters.EnableDistanceCacheForConstruction = true; var world = new SmallWorld <float[], float>(CosineDistance.NonOptimized); Console.Write($"Generating {SampleSize} sample vectos... "); clock = Stopwatch.StartNew(); sampleVectors = RandomVectors(Dimensionality, SampleSize); Console.WriteLine($"Done in {clock.ElapsedMilliseconds} ms."); Console.WriteLine("Building HNSW graph... "); using (var listener = new MetricsEventListener(EventSources.GraphBuildEventSource.Instance)) { clock = Stopwatch.StartNew(); world.BuildGraph(sampleVectors, new Random(42), parameters); Console.WriteLine($"Done in {clock.ElapsedMilliseconds} ms."); } Console.Write($"Saving HNSW graph to '${Path.Combine(Directory.GetCurrentDirectory(), pathPrefix)}'... "); clock = Stopwatch.StartNew(); BinaryFormatter formatter = new BinaryFormatter(); MemoryStream sampleVectorsStream = new MemoryStream(); formatter.Serialize(sampleVectorsStream, sampleVectors); File.WriteAllBytes($"{pathPrefix}.{VectorsPathSuffix}", sampleVectorsStream.ToArray()); File.WriteAllBytes($"{pathPrefix}.{GraphPathSuffix}", world.SerializeGraph()); Console.WriteLine($"Done in {clock.ElapsedMilliseconds} ms."); }
/// <summary> /// Entry point. /// </summary> public static void Main() { var parameters = new SmallWorld <float[], float> .Parameters(); parameters.EnableDistanceCacheForConstruction = true; var graph = new SmallWorld <float[], float>(CosineDistance.SIMDForUnits); var vectorsGenerator = new Random(42); var randomVectors = new List <float[]>(); for (int i = 0; i < 40_000; i++) { var randomVector = new float[20]; for (int j = 0; j < 20; j++) { randomVector[j] = (float)vectorsGenerator.NextDouble(); } VectorUtils.NormalizeSIMD(randomVector); randomVectors.Add(randomVector); } var clock = Stopwatch.StartNew(); graph.BuildGraph(randomVectors, new Random(42), parameters); Console.WriteLine(clock.Elapsed.TotalMilliseconds); }
public void SerializeDeserializeTest() { byte[] buffer; string original; // restrict scope of original graph { var parameters = new SmallWorld <float[], float> .Parameters() { M = 15, LevelLambda = 1 / Math.Log(15), }; var graph = new SmallWorld <float[], float>(CosineDistance.NonOptimized); graph.BuildGraph(this.vectors, new Random(42), parameters); buffer = graph.SerializeGraph(); original = graph.Print(); } var copy = new SmallWorld <float[], float>(CosineDistance.NonOptimized); copy.DeserializeGraph(this.vectors, buffer); Assert.AreEqual(original, copy.Print()); }
/// <summary> /// Initializes the ANN class, given all featuresvectors to which /// a query will be compared. /// </summary> /// <param name="database">All featurevectors of its world.</param> public ANN(IEnumerable <NamedFeatureVector> database) { IReadOnlyList <NamedFeatureVector> vectors = database.ToList().AsReadOnly(); world = new SmallWorld <NamedFeatureVector, double>(ANNDistance); SmallWorld <NamedFeatureVector, double> .Parameters parameters = new SmallWorld <NamedFeatureVector, double> .Parameters { EnableDistanceCacheForConstruction = true }; using (MetricsEventListener listener = new MetricsEventListener(EventSources.GraphBuildEventSource.Instance)) { world.BuildGraph(vectors, RandomUtil.ThreadSafeRandom, parameters); } }
public void KNNSearchTest() { var parameters = new SmallWorld <float[], float> .Parameters(); var graph = new SmallWorld <float[], float>(CosineDistance.NonOptimized); graph.BuildGraph(this.vectors, new Random(42), parameters); for (int i = 0; i < this.vectors.Count; ++i) { var result = graph.KNNSearch(this.vectors[i], 20); var best = result.OrderBy(r => r.Distance).First(); Assert.AreEqual(20, result.Count); Assert.AreEqual(i, best.Id); Assert.AreEqual(0, best.Distance, FloatError); } }
static void Main(string[] args) { var parameters = new SmallWorld <float[], float> .Parameters() { M = 50, LevelLambda = 1 / Math.Log(15), }; var r = new Random(); const int dimensions = 100; var vectors = GetFloatVectors(dimensions, r); var graph = new SmallWorld <float[], float>(CosineDistance.SIMD); var stopWatch = new Stopwatch(); stopWatch.Start(); graph.BuildGraph(vectors, new Random(42), parameters); stopWatch.Stop(); var buildTime = stopWatch.Elapsed; Console.WriteLine($"graph build for {vectors.Count} items in {buildTime}"); byte[] buffer = graph.SerializeGraph(); Console.WriteLine($"graph serialized in {buffer.Length} bytes"); float[] query = GetRandomVector(dimensions, r); for (var i = 0; i < 100; i++) { stopWatch = new Stopwatch(); stopWatch.Start(); var best20 = graph.KNNSearch(query, 20); stopWatch.Stop(); buildTime = stopWatch.Elapsed; Console.WriteLine($"Top 20 items retrieved in {buildTime}"); } /*foreach (var item in best20) * { * Console.WriteLine($"{item.Id} -> {item.Distance}"); * }*/ }