/// <summary> /// Creates graph from the given items. /// Contains implementation of INSERT(hnsw, q, M, Mmax, efConstruction, mL) algorithm. /// Article: Section 4. Algorithm 1. /// </summary> /// <param name="items">The items to insert.</param> /// <param name="generator">The random number generator to distribute nodes across layers.</param> internal void Build(IReadOnlyList <TItem> items, Random generator) { if (!items?.Any() ?? false) { return; } var core = new Core(this.distance, this.Parameters, items); core.AllocateNodes(generator); var entryPoint = core.Nodes[0]; var searcher = new Searcher(core); Func <int, int, TDistance> nodeDistance = core.GetDistance; var neighboursIdsBuffer = new List <int>(core.Algorithm.GetM(0) + 1); for (int nodeId = 1; nodeId < core.Nodes.Count; ++nodeId) { using (new ScopeLatencyTracker(GraphBuildEventSource.Instance?.GraphInsertNodeLatencyReporter)) { /* * W ← ∅ // list for the currently found nearest elements * ep ← get enter point for hnsw * L ← level of ep // top layer for hnsw * l ← ⌊-ln(unif(0..1))∙mL⌋ // new element’s level * for lc ← L … l+1 * W ← SEARCH-LAYER(q, ep, ef=1, lc) * ep ← get the nearest element from W to q * for lc ← min(L, l) … 0 * W ← SEARCH-LAYER(q, ep, efConstruction, lc) * neighbors ← SELECT-NEIGHBORS(q, W, M, lc) // alg. 3 or alg. 4 * for each e ∈ neighbors // shrink connections if needed * eConn ← neighbourhood(e) at layer lc * if │eConn│ > Mmax // shrink connections of e if lc = 0 then Mmax = Mmax0 * eNewConn ← SELECT-NEIGHBORS(e, eConn, Mmax, lc) // alg. 3 or alg. 4 * set neighbourhood(e) at layer lc to eNewConn * ep ← W * if l > L * set enter point for hnsw to q */ // zoom in and find the best peer on the same level as newNode var bestPeer = entryPoint; var currentNode = core.Nodes[nodeId]; var currentNodeTravelingCosts = new TravelingCosts <int, TDistance>(nodeDistance, nodeId); for (int layer = bestPeer.MaxLayer; layer > currentNode.MaxLayer; --layer) { searcher.RunKnnAtLayer(bestPeer.Id, currentNodeTravelingCosts, neighboursIdsBuffer, layer, 1); bestPeer = core.Nodes[neighboursIdsBuffer[0]]; neighboursIdsBuffer.Clear(); } // connecting new node to the small world for (int layer = Math.Min(currentNode.MaxLayer, entryPoint.MaxLayer); layer >= 0; --layer) { searcher.RunKnnAtLayer(bestPeer.Id, currentNodeTravelingCosts, neighboursIdsBuffer, layer, this.Parameters.ConstructionPruning); var bestNeighboursIds = core.Algorithm.SelectBestForConnecting(neighboursIdsBuffer, currentNodeTravelingCosts, layer); for (int i = 0; i < bestNeighboursIds.Count; ++i) { int newNeighbourId = bestNeighboursIds[i]; core.Algorithm.Connect(currentNode, core.Nodes[newNeighbourId], layer); core.Algorithm.Connect(core.Nodes[newNeighbourId], currentNode, layer); // if distance from newNode to newNeighbour is better than to bestPeer => update bestPeer if (DistanceUtils.Lt(currentNodeTravelingCosts.From(newNeighbourId), currentNodeTravelingCosts.From(bestPeer.Id))) { bestPeer = core.Nodes[newNeighbourId]; } } neighboursIdsBuffer.Clear(); } // zoom out to the highest level if (currentNode.MaxLayer > entryPoint.MaxLayer) { entryPoint = currentNode; } // report distance cache hit rate GraphBuildEventSource.Instance?.CoreGetDistanceCacheHitRateReporter?.Invoke(core.DistanceCacheHitRate); } } // construction is done this.core = core; this.entryPoint = entryPoint; }
/// <inheritdoc/> internal override List <int> SelectBestForConnecting(List <int> candidatesIds, TravelingCosts <int, TDistance> travelingCosts, int layer) { /* * q ← this * R ← ∅ // result * W ← C // working queue for the candidates * if expandCandidates // expand candidates * for each e ∈ C * for each eadj ∈ neighbourhood(e) at layer lc * if eadj ∉ W * W ← W ⋃ eadj * * Wd ← ∅ // queue for the discarded candidates * while │W│ gt 0 and │R│ lt M * e ← extract nearest element from W to q * if e is closer to q compared to any element from R * R ← R ⋃ e * else * Wd ← Wd ⋃ e * * if keepPrunedConnections // add some of the discarded connections from Wd * while │Wd│ gt 0 and │R│ lt M * R ← R ⋃ extract nearest element from Wd to q * * return R */ IComparer <int> fartherIsOnTop = travelingCosts; IComparer <int> closerIsOnTop = fartherIsOnTop.Reverse(); var layerM = GetM(layer); var resultHeap = new BinaryHeap <int>(new List <int>(layerM + 1), fartherIsOnTop); var candidatesHeap = new BinaryHeap <int>(candidatesIds, closerIsOnTop); // expand candidates option is enabled if (GraphCore.Parameters.ExpandBestSelection) { var visited = new HashSet <int>(candidatesHeap.Buffer); var toAdd = new HashSet <int>(); foreach (var candidateId in candidatesHeap.Buffer) { var candidateNeighborsIDs = GraphCore.Nodes[candidateId][layer]; foreach (var candidateNeighbourId in candidateNeighborsIDs) { if (!visited.Contains(candidateNeighbourId)) { toAdd.Add(candidateNeighbourId); visited.Add(candidateNeighbourId); } } } foreach (var id in toAdd) { candidatesHeap.Push(id); } } // main stage of moving candidates to result var discardedHeap = new BinaryHeap <int>(new List <int>(candidatesHeap.Buffer.Count), closerIsOnTop); while (candidatesHeap.Buffer.Any() && resultHeap.Buffer.Count < layerM) { var candidateId = candidatesHeap.Pop(); var farestResultId = resultHeap.Buffer.FirstOrDefault(); if (!resultHeap.Buffer.Any() || DistanceUtils.LowerThan(travelingCosts.From(candidateId), travelingCosts.From(farestResultId))) { resultHeap.Push(candidateId); } else if (GraphCore.Parameters.KeepPrunedConnections) { discardedHeap.Push(candidateId); } } // keep pruned option is enabled if (GraphCore.Parameters.KeepPrunedConnections) { while (discardedHeap.Buffer.Any() && resultHeap.Buffer.Count < layerM) { resultHeap.Push(discardedHeap.Pop()); } } return(resultHeap.Buffer); }
/// <summary> /// The implementaiton of SEARCH-LAYER(q, ep, ef, lc) algorithm. /// Article: Section 4. Algorithm 2. /// </summary> /// <param name="entryPointId">The identifier of the entry point for the search.</param> /// <param name="targetCosts">The traveling costs for the search target.</param> /// <param name="resultList">The list of identifiers of the nearest neighbours at the level.</param> /// <param name="layer">The layer to perform search at.</param> /// <param name="k">The number of the nearest neighbours to get from the layer.</param> internal void RunKnnAtLayer(int entryPointId, TravelingCosts <int, TDistance> targetCosts, IList <int> resultList, int layer, int k) { /* * v ← ep // set of visited elements * C ← ep // set of candidates * W ← ep // dynamic list of found nearest neighbors * while │C│ > 0 * c ← extract nearest element from C to q * f ← get furthest element from W to q * if distance(c, q) > distance(f, q) * break // all elements in W are evaluated * for each e ∈ neighbourhood(c) at layer lc // update C and W * if e ∉ v * v ← v ⋃ e * f ← get furthest element from W to q * if distance(e, q) < distance(f, q) or │W│ < ef * C ← C ⋃ e * W ← W ⋃ e * if │W│ > ef * remove furthest element from W to q * return W */ // prepare tools IComparer <int> fartherIsOnTop = targetCosts; IComparer <int> closerIsOnTop = fartherIsOnTop.Reverse(); // prepare collections // TODO: Optimize by providing buffers var resultHeap = new BinaryHeap <int>(resultList, fartherIsOnTop); var expansionHeap = new BinaryHeap <int>(this.expansionBuffer, closerIsOnTop); resultHeap.Push(entryPointId); expansionHeap.Push(entryPointId); this.visitedSet.Add(entryPointId); // run bfs while (expansionHeap.Buffer.Count > 0) { // get next candidate to check and expand var toExpandId = expansionHeap.Pop(); var farthestResultId = resultHeap.Buffer[0]; if (DistanceUtils.Gt(targetCosts.From(toExpandId), targetCosts.From(farthestResultId))) { // the closest candidate is farther than farthest result break; } // expand candidate var neighboursIds = this.core.Nodes[toExpandId][layer]; for (int i = 0; i < neighboursIds.Count; ++i) { int neighbourId = neighboursIds[i]; if (!this.visitedSet.Contains(neighbourId)) { // enque perspective neighbours to expansion list farthestResultId = resultHeap.Buffer[0]; if (resultHeap.Buffer.Count < k || DistanceUtils.Lt(targetCosts.From(neighbourId), targetCosts.From(farthestResultId))) { expansionHeap.Push(neighbourId); resultHeap.Push(neighbourId); if (resultHeap.Buffer.Count > k) { resultHeap.Pop(); } } // update visited list this.visitedSet.Add(neighbourId); } } } this.expansionBuffer.Clear(); this.visitedSet.Clear(); }
/// <summary> /// Creates graph from the given items. /// Contains implementation of INSERT(hnsw, q, M, Mmax, efConstruction, mL) algorithm. /// Article: Section 4. Algorithm 1. /// </summary> /// <param name="items">The items to insert.</param> /// <param name="generator">The random number generator to distribute nodes across layers.</param> /// <param name="progressReporter">Interface to report progress </param> /// <param name="cancellationToken">Token to cancel adding items to the graph. The graph state will be corrupt if you cancel, and will need to be rebuilt from scratch.</param> internal IReadOnlyList <int> AddItems(IReadOnlyList <TItem> items, IProvideRandomValues generator, IProgressReporter progressReporter, CancellationToken cancellationToken) { if (items is null || !items.Any()) { return(Array.Empty <int>()); } GraphCore = GraphCore ?? new Core(Distance, Parameters); int startIndex = GraphCore.Items.Count; var newIDs = GraphCore.AddItems(items, generator, cancellationToken); var entryPoint = EntryPoint ?? GraphCore.Nodes[0]; var searcher = new Searcher(GraphCore); Func <int, int, TDistance> nodeDistance = GraphCore.GetDistance; var neighboursIdsBuffer = new List <int>(GraphCore.Algorithm.GetM(0) + 1); for (int nodeId = startIndex; nodeId < GraphCore.Nodes.Count; ++nodeId) { cancellationToken.ThrowIfCancellationRequested(); using (new ScopeLatencyTracker(GraphBuildEventSource.Instance?.GraphInsertNodeLatencyReporter)) { /* * W ← ∅ // list for the currently found nearest elements * ep ← get enter point for hnsw * L ← level of ep // top layer for hnsw * l ← ⌊-ln(unif(0..1))∙mL⌋ // new element’s level * for lc ← L … l+1 * W ← SEARCH-LAYER(q, ep, ef=1, lc) * ep ← get the nearest element from W to q * for lc ← min(L, l) … 0 * W ← SEARCH-LAYER(q, ep, efConstruction, lc) * neighbors ← SELECT-NEIGHBORS(q, W, M, lc) // alg. 3 or alg. 4 * for each e ∈ neighbors // shrink connections if needed * eConn ← neighbourhood(e) at layer lc * if │eConn│ > Mmax // shrink connections of e if lc = 0 then Mmax = Mmax0 * eNewConn ← SELECT-NEIGHBORS(e, eConn, Mmax, lc) // alg. 3 or alg. 4 * set neighbourhood(e) at layer lc to eNewConn * ep ← W * if l > L * set enter point for hnsw to q */ // zoom in and find the best peer on the same level as newNode var bestPeer = entryPoint; var currentNode = GraphCore.Nodes[nodeId]; var currentNodeTravelingCosts = new TravelingCosts <int, TDistance>(nodeDistance, nodeId); for (int layer = bestPeer.MaxLayer; layer > currentNode.MaxLayer; --layer) { searcher.RunKnnAtLayer(bestPeer.Id, currentNodeTravelingCosts, neighboursIdsBuffer, layer, 1); bestPeer = GraphCore.Nodes[neighboursIdsBuffer[0]]; neighboursIdsBuffer.Clear(); } // connecting new node to the small world for (int layer = Math.Min(currentNode.MaxLayer, entryPoint.MaxLayer); layer >= 0; --layer) { searcher.RunKnnAtLayer(bestPeer.Id, currentNodeTravelingCosts, neighboursIdsBuffer, layer, Parameters.ConstructionPruning); var bestNeighboursIds = GraphCore.Algorithm.SelectBestForConnecting(neighboursIdsBuffer, currentNodeTravelingCosts, layer); for (int i = 0; i < bestNeighboursIds.Count; ++i) { int newNeighbourId = bestNeighboursIds[i]; GraphCore.Algorithm.Connect(currentNode, GraphCore.Nodes[newNeighbourId], layer); GraphCore.Algorithm.Connect(GraphCore.Nodes[newNeighbourId], currentNode, layer); // if distance from newNode to newNeighbour is better than to bestPeer => update bestPeer if (DistanceUtils.LowerThan(currentNodeTravelingCosts.From(newNeighbourId), currentNodeTravelingCosts.From(bestPeer.Id))) { bestPeer = GraphCore.Nodes[newNeighbourId]; } } neighboursIdsBuffer.Clear(); } // zoom out to the highest level if (currentNode.MaxLayer > entryPoint.MaxLayer) { entryPoint = currentNode; } // report distance cache hit rate GraphBuildEventSource.Instance?.CoreGetDistanceCacheHitRateReporter?.Invoke(GraphCore.DistanceCacheHitRate); } progressReporter?.Progress(nodeId - startIndex, GraphCore.Nodes.Count - startIndex); } // construction is done EntryPoint = entryPoint; return(newIDs); }