private bool mergeClosestClusters(int numUsers, List <FastIDSet> clusters, bool done)
        {
            // We find a certain number of closest clusters...
            List <ClusterClusterPair> queue = findClosestClusters(numUsers, clusters);

            //  List<ClusterClusterPair> queue = new List<ClusterClusterPair>();
            //foreach (var item in _queue)
            //{
            //    queue.Enqueue(item);
            //}

            // The first one is definitely the closest pair in existence so we can cluster
            // the two together, put it back into the set of clusters, and start again. Instead
            // we assume everything else in our list of closest cluster pairs is still pretty good,
            // and we cluster them too.

            for (int n = 0; n < queue.Count; n++)
            {
                //}
                //while (queue.Count > 0)
                //{
                if (!clusteringByThreshold && clusters.Count <= numClusters)
                {
                    done = true;
                    break;
                }

                ClusterClusterPair top = queue[n];
                queue.RemoveAt(n);
                if (clusteringByThreshold && top.getSimilarity() < clusteringThreshold)
                {
                    done = true;
                    break;
                }

                FastIDSet cluster1 = top.getCluster1();
                FastIDSet cluster2 = top.getCluster2();

                // Pull out current two clusters from clusters
                var  clusterIterator = clusters;
                bool removed1        = false;
                bool removed2        = false;
                for (int m = 0; m < clusterIterator.Count; m++)
                {
                    if (!(removed1 && removed2))
                    {
                        FastIDSet current = clusterIterator[m];

                        // Yes, use == here
                        if (!removed1 && cluster1 == current)
                        {
                            clusterIterator.RemoveAt(m);
                            m--;
                            removed1 = true;
                        }
                        else if (!removed2 && cluster2 == current)
                        {
                            clusterIterator.RemoveAt(m);
                            m--;
                            removed2 = true;
                        }
                    }

                    // The only catch is if a cluster showed it twice in the list of best cluster pairs;
                    // have to remove the others. Pull out anything referencing these clusters from queue
                    for (int k = 0; k < queue.Count; k++)
                    {
                        //}

                        //    for (Iterator<ClusterClusterPair> queueIterator = queue.iterator(); queueIterator.hasNext(); )
                        //    {
                        ClusterClusterPair pair  = queue[k];
                        FastIDSet          pair1 = pair.getCluster1();
                        FastIDSet          pair2 = pair.getCluster2();
                        if (pair1 == cluster1 || pair1 == cluster2 || pair2 == cluster1 || pair2 == cluster2)
                        {
                            queue.RemoveAt(k);
                            //queueIterator.remove();
                        }
                    }

                    // Make new merged cluster
                    FastIDSet merged = new FastIDSet(cluster1.size() + cluster2.size());
                    merged.addAll(cluster1);
                    merged.addAll(cluster2);

                    // Compare against other clusters; update queue if needed
                    // That new pair we're just adding might be pretty close to something else, so
                    // catch that case here and put it back into our queue
                    for (var i = 0; i < clusters.Count; i++)
                    {
                        FastIDSet cluster    = clusters[i];
                        double    similarity = clusterSimilarity.getSimilarity(merged, cluster);
                        if (similarity > queue[queue.Count - 1].getSimilarity())
                        {
                            var queueIterator = queue.GetEnumerator();

                            while (queueIterator.MoveNext())
                            {
                                if (similarity > queueIterator.Current.getSimilarity())
                                {
                                    n--;
                                    // queueIterator.previous();
                                    break;
                                }
                            }
                            queue.Add(new ClusterClusterPair(merged, cluster, similarity));
                        }
                    }

                    // Finally add new cluster to list
                    clusters.Add(merged);
                }
            }
            return(done);
        }
示例#2
0
        private void BuildClusters()
        {
            try
            {
                buildClustersLock.Lock();

                DataModel model    = this.DataModel;
                int       numUsers = model.GetNumUsers();

                if (numUsers == 0)
                {
                    topRecsByUserID  = new Dictionary <object, IList <RecommendedItem> >();
                    clustersByUserID = new Dictionary <object, ICollection <User> >();
                }
                else
                {
                    LinkedList <ICollection <User> > clusters = new LinkedList <ICollection <User> >();
                    // Begin with a cluster for each user:
                    foreach (User user in model.GetUsers())
                    {
                        ICollection <User> newCluster = new HashedSet <User>();
                        newCluster.Add(user);
                        clusters.AddLast(newCluster);
                    }

                    bool done = false;
                    while (!done)
                    {
                        // We find a certain number of closest clusters...
                        bool full = false;
                        LinkedList <ClusterClusterPair> queue = new LinkedList <ClusterClusterPair>();
                        int i = 0;
                        LinkedListNode <ICollection <User> > it2 = clusters.First;
                        foreach (ICollection <User> cluster1 in clusters)
                        {
                            i++;
                            //ListIterator<ICollection<User>> it2 = clusters.listIterator(i);
                            it2 = it2.Next;
                            while (it2.Next != null)
                            {
                                it2 = it2.Next;
                                ICollection <User> cluster2   = it2.Value;
                                double             similarity = clusterSimilarity.GetSimilarity(cluster1, cluster2);
                                if (!double.IsNaN(similarity) &&
                                    (!full || similarity > queue.Last.Value.Similarity))
                                {
                                    LinkedListNode <ClusterClusterPair> qit = queue.Last;

                                    /// loop looks fishy
                                    while (qit.Previous != null)
                                    {
                                        if (similarity <= qit.Previous.Value.Similarity)
                                        {
                                            break;
                                        }
                                        qit = qit.Previous;
                                    }
                                    queue.AddAfter(qit, new ClusterClusterPair(cluster1, cluster2, similarity));
                                    if (full)
                                    {
                                        queue.RemoveLast();
                                    }
                                    else if (queue.Count > numUsers)
                                    {
                                        // use numUsers as queue size limit
                                        full = true;
                                        queue.RemoveLast();
                                    }
                                }
                            }
                        }

                        // The first one is definitely the closest pair in existence so we can cluster
                        // the two together, put it back into the set of clusters, and start again. Instead
                        // we assume everything else in our list of closest cluster pairs is still pretty good,
                        // and we cluster them too.

                        while (queue.Count > 0)
                        {
                            if (!clusteringByThreshold && clusters.Count <= numClusters)
                            {
                                done = true;
                                break;
                            }

                            ClusterClusterPair top = queue.First.Value;
                            queue.RemoveFirst();

                            if (clusteringByThreshold && top.Similarity < clusteringThreshold)
                            {
                                done = true;
                                break;
                            }

                            ICollection <User> cluster1 = top.Cluster1;
                            ICollection <User> cluster2 = top.Cluster2;

                            // Pull out current two clusters from clusters
                            clusters.Remove(cluster1);
                            clusters.Remove(cluster2);

                            // The only catch is if a cluster showed it twice in the list of best cluster pairs;
                            // have to remove the others. Pull out anything referencing these clusters from queue
                            for (LinkedListNode <ClusterClusterPair> qit = queue.First; qit != null; qit = qit.Next)
                            {
                                ClusterClusterPair pair  = qit.Value;
                                ICollection <User> pair1 = pair.Cluster1;
                                ICollection <User> pair2 = pair.Cluster2;

                                if (pair1 == cluster1 || pair1 == cluster2 || pair2 == cluster1 || pair2 == cluster2)
                                {
                                    if (qit == queue.First)
                                    {
                                        queue.RemoveFirst();
                                        qit = queue.First;
                                        continue;
                                    }
                                    else
                                    {
                                        LinkedListNode <ClusterClusterPair> temp = qit;
                                        qit = qit.Previous;
                                        queue.Remove(temp);
                                    }
                                }
                            }

                            // Make new merged cluster
                            HashedSet <User> merged = new HashedSet <User>(/*cluster1.Count + cluster2.Count*/);
                            merged.AddAll(cluster1);
                            merged.AddAll(cluster2);

                            // Compare against other clusters; update queue if needed
                            // That new pair we're just adding might be pretty close to something else, so
                            // catch that case here and put it back into our queue
                            foreach (ICollection <User> cluster in clusters)
                            {
                                double similarity = clusterSimilarity.GetSimilarity(merged, cluster);
                                if (similarity > queue.Last.Value.Similarity)
                                {
                                    // Iteration needs to be validated agains Java version
                                    LinkedListNode <ClusterClusterPair> qit = queue.First;
                                    while (qit.Next != null)
                                    {
                                        if (similarity > qit.Next.Value.Similarity)
                                        {
                                            break;
                                        }
                                        qit = qit.Next;
                                    }
                                    queue.AddAfter(qit, new ClusterClusterPair(merged, cluster, similarity));
                                }
                            }

                            // Finally add new cluster to list
                            clusters.AddLast(merged);
                        }
                    }

                    topRecsByUserID  = ComputeTopRecsPerUserID(clusters);
                    clustersByUserID = ComputeClustersPerUserID(clusters);
                }

                clustersBuilt = true;
            } finally {
                buildClustersLock.Unlock();
            }
        }