/// <summary> /// Removes an item from a cluster but may not affect the cluster's internal state /// </summary> /// <returns>true if found and removed</returns> public bool RemoveItem(string id, ClusterBase clusterOwner = null) { clusterOwner = clusterOwner ?? this.Clusters.FirstOrDefault(c => c.Items.ContainsKey(id)); if (clusterOwner != null) { clusterOwner.Items.Remove(id); return(true); } return(false); }
/// <summary> /// Adds a new clustering item /// </summary> /// <param name="id">ID to track this item</param> /// <param name="content">The item's contents - this is what the clustering algorithm parses</param> public void AddItem(string id, string content) { if (!this.IsKnownItem(id)) { this.InitIfPending(); var item = new ClusterItem(id, content, this.Vocabulary); List <ClusterBase> checkClusters = null; float bestAffinity = 0.0f; for (int c = 0; c < this.Clusters.Count; c++) { if ((this.Clusters[c].LastItemHash != null && this.Clusters[c].LastItemHash == item.Hash) || (this.Clusters[c].StatsAffinity.Mean - this.Clusters[c].StatsAffinity.StandardDeviation > this.Config.MinClusterAffinity && !string.IsNullOrEmpty(item.Hash) && this.Clusters[c].ItemContentHashes.Contains(item.Hash))) { checkClusters = checkClusters ?? new List <ClusterBase>(); checkClusters.Add(this.Clusters[c]); bestAffinity = 1.0f; } } int bestCluster = -1; checkClusters = checkClusters ?? this.Clusters; if (checkClusters.Count <= 1) { if (checkClusters.Count == 1) { bestCluster = 0; if (bestAffinity < 0.99999f) { bestAffinity = checkClusters[0].GetAffinity(item, this.Config.MinClusterAffinity); } } } else { bool breakOnMax = checkClusters.Count > 5; if (this.Config.LogDebug && checkClusters.Count < this.Clusters.Count) { Debug.WriteLine($"Clusters ({checkClusters.Count} of {this.Clusters.Count}) found by hash. Item {item.Id}, Hash: {item.Hash} Content:{content.Replace('\r', ' ').Replace('\n', ' ')}"); } float[] clusterAffinity = new float[checkClusters.Count]; Parallel.For(0, checkClusters.Count, new ParallelOptions { MaxDegreeOfParallelism = Math.Max(1, this.Config.MaxDegreeOfParallelism) }, (ci, state) => { ClusterBase cluster = checkClusters[ci]; float caff = cluster.GetAffinity(item, this.Config.MinClusterAffinity); clusterAffinity[ci] = caff; if (breakOnMax && caff > 0.99999) { bestCluster = ci; bestAffinity = caff; state.Stop(); } }); if (bestCluster < 0) { bestAffinity = 0; for (int ci = 0; ci < checkClusters.Count; ci++) { if (clusterAffinity[ci] > bestAffinity) { bestAffinity = clusterAffinity[ci]; bestCluster = ci; } } } } if (bestCluster >= 0 && bestAffinity < this.Config.MinClusterAffinity) { bestCluster = -1; } if (bestCluster >= 0) { ClusterBase cluster = checkClusters[bestCluster]; cluster.AddToCluster(item, cluster.GetAffinity(item, this.Config.MinClusterAffinity)); } else { var newCluster = this.CreateCluster(item); this.Clusters.Add(newCluster); } } }