/// <summary>
        /// Creates graph from the given items.
        /// Contains implementation of INSERT(hnsw, q, M, Mmax, efConstruction, mL) algorithm.
        /// Article: Section 4. Algorithm 1.
        /// </summary>
        /// <param name="items">The items to insert.</param>
        /// <param name="generator">The random number generator to distribute nodes across layers.</param>
        internal void Build(IReadOnlyList <TItem> items, Random generator)
        {
            if (!items?.Any() ?? false)
            {
                return;
            }

            var core = new Core(this.distance, this.Parameters, items);

            core.AllocateNodes(generator);

            var entryPoint = core.Nodes[0];
            var searcher   = new Searcher(core);
            Func <int, int, TDistance> nodeDistance = core.GetDistance;
            var neighboursIdsBuffer = new List <int>(core.Algorithm.GetM(0) + 1);

            for (int nodeId = 1; nodeId < core.Nodes.Count; ++nodeId)
            {
                using (new ScopeLatencyTracker(GraphBuildEventSource.Instance?.GraphInsertNodeLatencyReporter))
                {
                    /*
                     * W ← ∅ // list for the currently found nearest elements
                     * ep ← get enter point for hnsw
                     * L ← level of ep // top layer for hnsw
                     * l ← ⌊-ln(unif(0..1))∙mL⌋ // new element’s level
                     * for lc ← L … l+1
                     *   W ← SEARCH-LAYER(q, ep, ef=1, lc)
                     *   ep ← get the nearest element from W to q
                     * for lc ← min(L, l) … 0
                     *   W ← SEARCH-LAYER(q, ep, efConstruction, lc)
                     *   neighbors ← SELECT-NEIGHBORS(q, W, M, lc) // alg. 3 or alg. 4
                     *     for each e ∈ neighbors // shrink connections if needed
                     *       eConn ← neighbourhood(e) at layer lc
                     *       if │eConn│ > Mmax // shrink connections of e if lc = 0 then Mmax = Mmax0
                     *         eNewConn ← SELECT-NEIGHBORS(e, eConn, Mmax, lc) // alg. 3 or alg. 4
                     *         set neighbourhood(e) at layer lc to eNewConn
                     *   ep ← W
                     * if l > L
                     *   set enter point for hnsw to q
                     */

                    // zoom in and find the best peer on the same level as newNode
                    var bestPeer    = entryPoint;
                    var currentNode = core.Nodes[nodeId];
                    var currentNodeTravelingCosts = new TravelingCosts <int, TDistance>(nodeDistance, nodeId);
                    for (int layer = bestPeer.MaxLayer; layer > currentNode.MaxLayer; --layer)
                    {
                        searcher.RunKnnAtLayer(bestPeer.Id, currentNodeTravelingCosts, neighboursIdsBuffer, layer, 1);
                        bestPeer = core.Nodes[neighboursIdsBuffer[0]];
                        neighboursIdsBuffer.Clear();
                    }

                    // connecting new node to the small world
                    for (int layer = Math.Min(currentNode.MaxLayer, entryPoint.MaxLayer); layer >= 0; --layer)
                    {
                        searcher.RunKnnAtLayer(bestPeer.Id, currentNodeTravelingCosts, neighboursIdsBuffer, layer, this.Parameters.ConstructionPruning);
                        var bestNeighboursIds = core.Algorithm.SelectBestForConnecting(neighboursIdsBuffer, currentNodeTravelingCosts, layer);

                        for (int i = 0; i < bestNeighboursIds.Count; ++i)
                        {
                            int newNeighbourId = bestNeighboursIds[i];
                            core.Algorithm.Connect(currentNode, core.Nodes[newNeighbourId], layer);
                            core.Algorithm.Connect(core.Nodes[newNeighbourId], currentNode, layer);

                            // if distance from newNode to newNeighbour is better than to bestPeer => update bestPeer
                            if (DistanceUtils.Lt(currentNodeTravelingCosts.From(newNeighbourId), currentNodeTravelingCosts.From(bestPeer.Id)))
                            {
                                bestPeer = core.Nodes[newNeighbourId];
                            }
                        }

                        neighboursIdsBuffer.Clear();
                    }

                    // zoom out to the highest level
                    if (currentNode.MaxLayer > entryPoint.MaxLayer)
                    {
                        entryPoint = currentNode;
                    }

                    // report distance cache hit rate
                    GraphBuildEventSource.Instance?.CoreGetDistanceCacheHitRateReporter?.Invoke(core.DistanceCacheHitRate);
                }
            }

            // construction is done
            this.core       = core;
            this.entryPoint = entryPoint;
        }
예제 #2
0
            /// <inheritdoc/>
            internal override List <int> SelectBestForConnecting(List <int> candidatesIds, TravelingCosts <int, TDistance> travelingCosts, int layer)
            {
                /*
                 * q ← this
                 * R ← ∅    // result
                 * W ← C    // working queue for the candidates
                 * if expandCandidates  // expand candidates
                 *   for each e ∈ C
                 *     for each eadj ∈ neighbourhood(e) at layer lc
                 *       if eadj ∉ W
                 *         W ← W ⋃ eadj
                 *
                 * Wd ← ∅ // queue for the discarded candidates
                 * while │W│ gt 0 and │R│ lt M
                 *   e ← extract nearest element from W to q
                 *   if e is closer to q compared to any element from R
                 *     R ← R ⋃ e
                 *   else
                 *     Wd ← Wd ⋃ e
                 *
                 * if keepPrunedConnections // add some of the discarded connections from Wd
                 *   while │Wd│ gt 0 and │R│ lt M
                 *   R ← R ⋃ extract nearest element from Wd to q
                 *
                 * return R
                 */

                IComparer <int> fartherIsOnTop = travelingCosts;
                IComparer <int> closerIsOnTop  = fartherIsOnTop.Reverse();

                var layerM = GetM(layer);

                var resultHeap     = new BinaryHeap <int>(new List <int>(layerM + 1), fartherIsOnTop);
                var candidatesHeap = new BinaryHeap <int>(candidatesIds, closerIsOnTop);

                // expand candidates option is enabled
                if (GraphCore.Parameters.ExpandBestSelection)
                {
                    var visited = new HashSet <int>(candidatesHeap.Buffer);
                    var toAdd   = new HashSet <int>();
                    foreach (var candidateId in candidatesHeap.Buffer)
                    {
                        var candidateNeighborsIDs = GraphCore.Nodes[candidateId][layer];
                        foreach (var candidateNeighbourId in candidateNeighborsIDs)
                        {
                            if (!visited.Contains(candidateNeighbourId))
                            {
                                toAdd.Add(candidateNeighbourId);
                                visited.Add(candidateNeighbourId);
                            }
                        }
                    }
                    foreach (var id in toAdd)
                    {
                        candidatesHeap.Push(id);
                    }
                }

                // main stage of moving candidates to result
                var discardedHeap = new BinaryHeap <int>(new List <int>(candidatesHeap.Buffer.Count), closerIsOnTop);

                while (candidatesHeap.Buffer.Any() && resultHeap.Buffer.Count < layerM)
                {
                    var candidateId    = candidatesHeap.Pop();
                    var farestResultId = resultHeap.Buffer.FirstOrDefault();

                    if (!resultHeap.Buffer.Any() || DistanceUtils.LowerThan(travelingCosts.From(candidateId), travelingCosts.From(farestResultId)))
                    {
                        resultHeap.Push(candidateId);
                    }
                    else if (GraphCore.Parameters.KeepPrunedConnections)
                    {
                        discardedHeap.Push(candidateId);
                    }
                }

                // keep pruned option is enabled
                if (GraphCore.Parameters.KeepPrunedConnections)
                {
                    while (discardedHeap.Buffer.Any() && resultHeap.Buffer.Count < layerM)
                    {
                        resultHeap.Push(discardedHeap.Pop());
                    }
                }

                return(resultHeap.Buffer);
            }
예제 #3
0
            /// <summary>
            /// The implementaiton of SEARCH-LAYER(q, ep, ef, lc) algorithm.
            /// Article: Section 4. Algorithm 2.
            /// </summary>
            /// <param name="entryPointId">The identifier of the entry point for the search.</param>
            /// <param name="targetCosts">The traveling costs for the search target.</param>
            /// <param name="resultList">The list of identifiers of the nearest neighbours at the level.</param>
            /// <param name="layer">The layer to perform search at.</param>
            /// <param name="k">The number of the nearest neighbours to get from the layer.</param>
            internal void RunKnnAtLayer(int entryPointId, TravelingCosts <int, TDistance> targetCosts, IList <int> resultList, int layer, int k)
            {
                /*
                 * v ← ep // set of visited elements
                 * C ← ep // set of candidates
                 * W ← ep // dynamic list of found nearest neighbors
                 * while │C│ > 0
                 *   c ← extract nearest element from C to q
                 *   f ← get furthest element from W to q
                 *   if distance(c, q) > distance(f, q)
                 *     break // all elements in W are evaluated
                 *   for each e ∈ neighbourhood(c) at layer lc // update C and W
                 *     if e ∉ v
                 *       v ← v ⋃ e
                 *       f ← get furthest element from W to q
                 *       if distance(e, q) < distance(f, q) or │W│ < ef
                 *         C ← C ⋃ e
                 *         W ← W ⋃ e
                 *         if │W│ > ef
                 *           remove furthest element from W to q
                 * return W
                 */

                // prepare tools
                IComparer <int> fartherIsOnTop = targetCosts;
                IComparer <int> closerIsOnTop  = fartherIsOnTop.Reverse();

                // prepare collections
                // TODO: Optimize by providing buffers
                var resultHeap    = new BinaryHeap <int>(resultList, fartherIsOnTop);
                var expansionHeap = new BinaryHeap <int>(this.expansionBuffer, closerIsOnTop);

                resultHeap.Push(entryPointId);
                expansionHeap.Push(entryPointId);
                this.visitedSet.Add(entryPointId);

                // run bfs
                while (expansionHeap.Buffer.Count > 0)
                {
                    // get next candidate to check and expand
                    var toExpandId       = expansionHeap.Pop();
                    var farthestResultId = resultHeap.Buffer[0];
                    if (DistanceUtils.Gt(targetCosts.From(toExpandId), targetCosts.From(farthestResultId)))
                    {
                        // the closest candidate is farther than farthest result
                        break;
                    }

                    // expand candidate
                    var neighboursIds = this.core.Nodes[toExpandId][layer];
                    for (int i = 0; i < neighboursIds.Count; ++i)
                    {
                        int neighbourId = neighboursIds[i];
                        if (!this.visitedSet.Contains(neighbourId))
                        {
                            // enque perspective neighbours to expansion list
                            farthestResultId = resultHeap.Buffer[0];
                            if (resultHeap.Buffer.Count < k ||
                                DistanceUtils.Lt(targetCosts.From(neighbourId), targetCosts.From(farthestResultId)))
                            {
                                expansionHeap.Push(neighbourId);
                                resultHeap.Push(neighbourId);
                                if (resultHeap.Buffer.Count > k)
                                {
                                    resultHeap.Pop();
                                }
                            }

                            // update visited list
                            this.visitedSet.Add(neighbourId);
                        }
                    }
                }

                this.expansionBuffer.Clear();
                this.visitedSet.Clear();
            }
예제 #4
0
        /// <summary>
        /// Creates graph from the given items.
        /// Contains implementation of INSERT(hnsw, q, M, Mmax, efConstruction, mL) algorithm.
        /// Article: Section 4. Algorithm 1.
        /// </summary>
        /// <param name="items">The items to insert.</param>
        /// <param name="generator">The random number generator to distribute nodes across layers.</param>
        /// <param name="progressReporter">Interface to report progress </param>
        /// <param name="cancellationToken">Token to cancel adding items to the graph. The graph state will be corrupt if you cancel, and will need to be rebuilt from scratch.</param>
        internal IReadOnlyList <int> AddItems(IReadOnlyList <TItem> items, IProvideRandomValues generator, IProgressReporter progressReporter, CancellationToken cancellationToken)
        {
            if (items is null || !items.Any())
            {
                return(Array.Empty <int>());
            }

            GraphCore = GraphCore ?? new Core(Distance, Parameters);

            int startIndex = GraphCore.Items.Count;

            var newIDs = GraphCore.AddItems(items, generator, cancellationToken);

            var entryPoint = EntryPoint ?? GraphCore.Nodes[0];

            var searcher = new Searcher(GraphCore);
            Func <int, int, TDistance> nodeDistance = GraphCore.GetDistance;
            var neighboursIdsBuffer = new List <int>(GraphCore.Algorithm.GetM(0) + 1);

            for (int nodeId = startIndex; nodeId < GraphCore.Nodes.Count; ++nodeId)
            {
                cancellationToken.ThrowIfCancellationRequested();
                using (new ScopeLatencyTracker(GraphBuildEventSource.Instance?.GraphInsertNodeLatencyReporter))
                {
                    /*
                     * W ← ∅ // list for the currently found nearest elements
                     * ep ← get enter point for hnsw
                     * L ← level of ep // top layer for hnsw
                     * l ← ⌊-ln(unif(0..1))∙mL⌋ // new element’s level
                     * for lc ← L … l+1
                     *   W ← SEARCH-LAYER(q, ep, ef=1, lc)
                     *   ep ← get the nearest element from W to q
                     * for lc ← min(L, l) … 0
                     *   W ← SEARCH-LAYER(q, ep, efConstruction, lc)
                     *   neighbors ← SELECT-NEIGHBORS(q, W, M, lc) // alg. 3 or alg. 4
                     *     for each e ∈ neighbors // shrink connections if needed
                     *       eConn ← neighbourhood(e) at layer lc
                     *       if │eConn│ > Mmax // shrink connections of e if lc = 0 then Mmax = Mmax0
                     *         eNewConn ← SELECT-NEIGHBORS(e, eConn, Mmax, lc) // alg. 3 or alg. 4
                     *         set neighbourhood(e) at layer lc to eNewConn
                     *   ep ← W
                     * if l > L
                     *   set enter point for hnsw to q
                     */

                    // zoom in and find the best peer on the same level as newNode
                    var bestPeer    = entryPoint;
                    var currentNode = GraphCore.Nodes[nodeId];
                    var currentNodeTravelingCosts = new TravelingCosts <int, TDistance>(nodeDistance, nodeId);
                    for (int layer = bestPeer.MaxLayer; layer > currentNode.MaxLayer; --layer)
                    {
                        searcher.RunKnnAtLayer(bestPeer.Id, currentNodeTravelingCosts, neighboursIdsBuffer, layer, 1);
                        bestPeer = GraphCore.Nodes[neighboursIdsBuffer[0]];
                        neighboursIdsBuffer.Clear();
                    }

                    // connecting new node to the small world
                    for (int layer = Math.Min(currentNode.MaxLayer, entryPoint.MaxLayer); layer >= 0; --layer)
                    {
                        searcher.RunKnnAtLayer(bestPeer.Id, currentNodeTravelingCosts, neighboursIdsBuffer, layer, Parameters.ConstructionPruning);
                        var bestNeighboursIds = GraphCore.Algorithm.SelectBestForConnecting(neighboursIdsBuffer, currentNodeTravelingCosts, layer);

                        for (int i = 0; i < bestNeighboursIds.Count; ++i)
                        {
                            int newNeighbourId = bestNeighboursIds[i];
                            GraphCore.Algorithm.Connect(currentNode, GraphCore.Nodes[newNeighbourId], layer);
                            GraphCore.Algorithm.Connect(GraphCore.Nodes[newNeighbourId], currentNode, layer);

                            // if distance from newNode to newNeighbour is better than to bestPeer => update bestPeer
                            if (DistanceUtils.LowerThan(currentNodeTravelingCosts.From(newNeighbourId), currentNodeTravelingCosts.From(bestPeer.Id)))
                            {
                                bestPeer = GraphCore.Nodes[newNeighbourId];
                            }
                        }

                        neighboursIdsBuffer.Clear();
                    }

                    // zoom out to the highest level
                    if (currentNode.MaxLayer > entryPoint.MaxLayer)
                    {
                        entryPoint = currentNode;
                    }

                    // report distance cache hit rate
                    GraphBuildEventSource.Instance?.CoreGetDistanceCacheHitRateReporter?.Invoke(GraphCore.DistanceCacheHitRate);
                }
                progressReporter?.Progress(nodeId - startIndex, GraphCore.Nodes.Count - startIndex);
            }

            // construction is done
            EntryPoint = entryPoint;
            return(newIDs);
        }