Esempio n. 1
0
        /// <summary>
        /// Removes an item from a cluster but may not affect the cluster's internal state
        /// </summary>
        /// <returns>true if found and removed</returns>
        public bool RemoveItem(string id, ClusterBase clusterOwner = null)
        {
            clusterOwner = clusterOwner ?? this.Clusters.FirstOrDefault(c => c.Items.ContainsKey(id));
            if (clusterOwner != null)
            {
                clusterOwner.Items.Remove(id);
                return(true);
            }

            return(false);
        }
Esempio n. 2
0
        /// <summary>
        /// Adds a new clustering item
        /// </summary>
        /// <param name="id">ID to track this item</param>
        /// <param name="content">The item's contents - this is what the clustering algorithm parses</param>
        public void AddItem(string id, string content)
        {
            if (!this.IsKnownItem(id))
            {
                this.InitIfPending();

                var item = new ClusterItem(id, content, this.Vocabulary);
                List <ClusterBase> checkClusters = null;

                float bestAffinity = 0.0f;
                for (int c = 0; c < this.Clusters.Count; c++)
                {
                    if ((this.Clusters[c].LastItemHash != null && this.Clusters[c].LastItemHash == item.Hash) ||
                        (this.Clusters[c].StatsAffinity.Mean - this.Clusters[c].StatsAffinity.StandardDeviation > this.Config.MinClusterAffinity &&
                         !string.IsNullOrEmpty(item.Hash) && this.Clusters[c].ItemContentHashes.Contains(item.Hash)))
                    {
                        checkClusters = checkClusters ?? new List <ClusterBase>();
                        checkClusters.Add(this.Clusters[c]);
                        bestAffinity = 1.0f;
                    }
                }

                int bestCluster = -1;
                checkClusters = checkClusters ?? this.Clusters;

                if (checkClusters.Count <= 1)
                {
                    if (checkClusters.Count == 1)
                    {
                        bestCluster = 0;
                        if (bestAffinity < 0.99999f)
                        {
                            bestAffinity = checkClusters[0].GetAffinity(item, this.Config.MinClusterAffinity);
                        }
                    }
                }
                else
                {
                    bool breakOnMax = checkClusters.Count > 5;

                    if (this.Config.LogDebug && checkClusters.Count < this.Clusters.Count)
                    {
                        Debug.WriteLine($"Clusters ({checkClusters.Count} of {this.Clusters.Count}) found by hash. Item {item.Id}, Hash: {item.Hash} Content:{content.Replace('\r', ' ').Replace('\n', ' ')}");
                    }

                    float[] clusterAffinity = new float[checkClusters.Count];

                    Parallel.For(0, checkClusters.Count, new ParallelOptions {
                        MaxDegreeOfParallelism = Math.Max(1, this.Config.MaxDegreeOfParallelism)
                    },
                                 (ci, state) =>
                    {
                        ClusterBase cluster = checkClusters[ci];

                        float caff          = cluster.GetAffinity(item, this.Config.MinClusterAffinity);
                        clusterAffinity[ci] = caff;
                        if (breakOnMax && caff > 0.99999)
                        {
                            bestCluster  = ci;
                            bestAffinity = caff;
                            state.Stop();
                        }
                    });

                    if (bestCluster < 0)
                    {
                        bestAffinity = 0;
                        for (int ci = 0; ci < checkClusters.Count; ci++)
                        {
                            if (clusterAffinity[ci] > bestAffinity)
                            {
                                bestAffinity = clusterAffinity[ci];
                                bestCluster  = ci;
                            }
                        }
                    }
                }

                if (bestCluster >= 0 && bestAffinity < this.Config.MinClusterAffinity)
                {
                    bestCluster = -1;
                }
                if (bestCluster >= 0)
                {
                    ClusterBase cluster = checkClusters[bestCluster];
                    cluster.AddToCluster(item, cluster.GetAffinity(item, this.Config.MinClusterAffinity));
                }
                else
                {
                    var newCluster = this.CreateCluster(item);
                    this.Clusters.Add(newCluster);
                }
            }
        }