protected CentroidsKMeansPPKP FillRandomCluster(int dimensions) { this.dimensions = dimensions; CentroidsKMeansPPKP cl = new CentroidsKMeansPPKP(dimensions); Random random = new Random(); //tutaj chodzi nie o randomowych liczbach w TFIDF a o randomowych dokumentach w kolekcji. for (var i = 0; i < dimensions; i++) { cl.TFIDF[i] = (float)random.Next(0, Int32.MaxValue) / (float)Int32.MaxValue; } return cl; }
protected CentroidsKMeansPPKP FindNearestClusterCenter(DocumentVector doc) { var minDistance = (double)dimensions; CentroidsKMeansPPKP bestClusterCenter = clusters.First(); foreach (var cluster in clusters) { var distance = cluster.ComputeTFIDFDistance(doc); if (distance < minDistance) { bestClusterCenter = cluster; minDistance = distance; } } return bestClusterCenter; }
//changes provided 29.10.2017 protected void Iteration(int current, int max) { documentMoved = false; CentroidsKMeansPPKP cluster = null; foreach (var doc in DocCollection) { cluster = FindNearestClusterCenter(doc); cluster.AssignedDocuments.Add(doc); } if (current == max - 1) foreach (var clusterr in clusters) { clusterr.Update(true); } cluster.AssignedDocuments.Clear(); }