private void KMeansPP(object sender, RoutedEventArgs e) { clustResultTxtBox.Document.Blocks.Clear(); var clusterization_stopwatch = Stopwatch.StartNew(); string message = null; string algorithm = " k-means++;"; string PPKMeans_label_resul_path = @"F:\Magistry files\data\PPKMeans_label_result4.txt"; string PPK_means_report_path = @"F:\Magistry files\reports\PPKMeans_report4.txt"; List <string> docCollection = Logic.ClusteringAlgorithms.Used_functions.CreateDocumentCollection2.GenerateDocumentCollection_withoutLazyLoading(); Dictionary <int, string> docCollectionDictionary = Logic.ClusteringAlgorithms.Used_functions.CreateDocumentCollection2.GenerateDocumentCollection_withoutLazyLoadingToDictionary(); HashSet <string> termCollection = Logic.ClusteringAlgorithms.Used_functions.TFIDF2ndrealization.getTermCollection(); Dictionary <string, int> wordIndex1 = Logic.ClusteringAlgorithms.Used_functions.TFIDF2ndrealization.DocumentsContainsTermToDictionary(docCollectionDictionary, termCollection); List <DocumentVector> vSpace1 = VectorSpaceModel.DocumentCollectionProcessingDictionary(docCollectionDictionary); int totalIteration = 500; int clusterNumber = 5; clusterNumber = Convert.ToInt32(txtboxClusterNumber.Text); List <Centroid> firstCentroidList = new List <Centroid>(); firstCentroidList = Logic.ClusteringAlgorithms.WorkedAlgorithmsFromTest.InitialCentroidCalculation.CentroidCalculationsForTestKMeansPP(vSpace1, clusterNumber); List <Centroid> resultSet = Logic.ClusteringAlgorithms.WorkedAlgorithmsFromTest.KMeans.KMeansClustering(vSpace1, clusterNumber, totalIteration, firstCentroidList); clusterization_stopwatch.Stop(); int[] PPKMeans_label_matrix = new int[vSpace1.Count]; PPKMeans_label_matrix = Tests.Label_Matrix.ReleaseVersion_Label_Matrix_Extractions(resultSet, PPKMeans_label_resul_path); message = RaportGeneration.ReleaseRaportGenerationFunction(resultSet, clusterNumber, totalIteration, clusterization_stopwatch, PPK_means_report_path, algorithm); //clustResultTxtBox.AppendText(message); invokeFilesToVisualizationGenerator(resultSet, algorithm); }
private void FuzzyKMeans_Click(object sender, RoutedEventArgs e) { clustResultTxtBox.Document.Blocks.Clear(); var clusterization_stopwatch = Stopwatch.StartNew(); string message = null; string algorithm = " Fuzzy c-Means;"; List <string> docCollection = Logic.ClusteringAlgorithms.Used_functions.CreateDocumentCollection2.GenerateDocumentCollection_withoutLazyLoading(); Dictionary <int, string> docCollectionDictionary = Logic.ClusteringAlgorithms.Used_functions.CreateDocumentCollection2.GenerateDocumentCollection_withoutLazyLoadingToDictionary(); HashSet <string> termCollection = Logic.ClusteringAlgorithms.Used_functions.TFIDF2ndrealization.getTermCollection(); Dictionary <string, int> wordIndex = Logic.ClusteringAlgorithms.Used_functions.TFIDF2ndrealization.DocumentsContainsTermToDictionary(docCollectionDictionary, termCollection); List <DocumentVector> vSpace = VectorSpaceModel.DocumentCollectionProcessingDictionary(docCollectionDictionary); string Fuzzy_K_means_clusterization_result = @"F:\Magistry files\Fuzzy_KMeans_result6.txt"; string Fuzzy_K_means_label_result = @"F:\Magistry files\FCM_label_result6.txt"; string Fuzzy_K_means_report_path = @"F:\Magistry files\reports\FCM_report6.txt"; float fuzziness = 0.5f; float epsilon = 0.003f; int clusterNumber = 5; int totalIteration = 0; clusterNumber = Convert.ToInt32(txtboxClusterNumber.Text); List <Centroid> resultSet = Logic.ClusteringAlgorithms.WorkedAlgorithmsFromTest.FuzzyCMeans.CreateClusterSet(clusterNumber); float[,] Result_fcm; /* * Result_fcm = FuzzyKMeans.Fcm(vSpace, clusterNumber, epsilon, fuzziness, termCollection); * FuzzyKMeans.WriteSimilarityArrayToFile(Result_fcm, Fuzzy_K_means_clusterization_result); * resultSet = FuzzyKMeans.AssignDocsToClusters(Result_fcm, clusterNumber,vSpace); * FuzzyKMeans.Show_clusters(vSpace, Result_fcm, clusterNumber); */ var result = Logic.ClusteringAlgorithms.WorkedAlgorithmsFromTest.FuzzyCMeans.Fcm(vSpace, clusterNumber, epsilon, fuzziness); Result_fcm = result.Item1; totalIteration = result.Item2; Logic.ClusteringAlgorithms.WorkedAlgorithmsFromTest.FuzzyCMeans.WriteSimilarityArrayToFile(Result_fcm, Fuzzy_K_means_clusterization_result); var assignedResult = Logic.ClusteringAlgorithms.WorkedAlgorithmsFromTest.FuzzyCMeans.AssignDocsToClusters(Result_fcm, clusterNumber, vSpace, resultSet); clusterization_stopwatch.Stop(); int[] FuzzyKMeans_label_matrix = new int[vSpace.Count]; int[] FuzzyKMeans_label_matrix1 = assignedResult.Item1; resultSet = assignedResult.Item2; FuzzyKMeans_label_matrix = Tests.Label_Matrix.ReleaseVersion_Label_Matrix_Extractions(resultSet, Fuzzy_K_means_label_result); message = RaportGeneration.ReleaseRaportGenerationFunction(resultSet, clusterNumber, totalIteration, clusterization_stopwatch, Fuzzy_K_means_report_path, algorithm); //clustResultTxtBox.AppendText(message); invokeFilesToVisualizationGenerator(resultSet, algorithm); }
private void KMeans(object sender, RoutedEventArgs e) { clustResultTxtBox.Document.Blocks.Clear(); var clusterization_stopwatch = Stopwatch.StartNew(); string message = null; string algorithm = " k-means;"; string PKMeans_label_resul_path = @"F:\Magistry files\data\PKMeans_label_result6.txt"; string K_means_report_path = @"F:\Magistry files\reports\PKMeans_report6.txt"; #region OldDataGeneration /* * List<string> docCollection = Logic.ClusteringAlgorithms.Used_functions.CreateDocumentCollection2.GenerateDocumentCollection_withoutLazyLoading(); * HashSet<string> termCollection = Logic.ClusteringAlgorithms.Used_functions.TFIDF2ndrealization.getTermCollection(); * Dictionary<string, int> wordIndex = Logic.ClusteringAlgorithms.Used_functions.TFIDF2ndrealization.DocumentsContainsTerm(docCollection, termCollection); * List<DocumentVector> vSpace = VectorSpaceModel.DocumentCollectionProcessing(docCollection); */ #endregion Dictionary <int, string> docCollectionDictionary = Logic.ClusteringAlgorithms.Used_functions.CreateDocumentCollection2.GenerateDocumentCollection_withoutLazyLoadingToDictionary(); HashSet <string> termCollection = Logic.ClusteringAlgorithms.Used_functions.TFIDF2ndrealization.getTermCollection(); Dictionary <string, int> wordIndex = Logic.ClusteringAlgorithms.Used_functions.TFIDF2ndrealization.DocumentsContainsTermToDictionary(docCollectionDictionary, termCollection); List <DocumentVector> vSpace = VectorSpaceModel.DocumentCollectionProcessingDictionary(docCollectionDictionary); int totalIteration = 500; int clusterNumber = 5; clusterNumber = Convert.ToInt32(txtboxClusterNumber.Text); List <Centroid> firstCentroidList = new List <Centroid>(); #region OldClusteringAlgorithm //firstCentroidList = CentroidCalculationClass.CentroidCalculationsForKMeans(vSpace, clusterNumber); //List<Centroid> resultSet = Logic.ClusteringAlgorithms.Algorithms.KMeansPPImplementations.MyKmeansPPInterpritationcs.NewKMeansClusterization(clusterNumber, docCollection, totalIteration, vSpace, wordIndex, firstCentroidList); #endregion firstCentroidList = Logic.ClusteringAlgorithms.WorkedAlgorithmsFromTest.KMeans.CentroidCalculationsForKMeans(vSpace, clusterNumber); List <Centroid> resultSet = Logic.ClusteringAlgorithms.WorkedAlgorithmsFromTest.KMeans.KMeansClustering(vSpace, clusterNumber, totalIteration, firstCentroidList); clusterization_stopwatch.Stop(); int[] PKMeans_label_matrix = new int[vSpace.Count]; PKMeans_label_matrix = Tests.Label_Matrix.ReleaseVersion_Label_Matrix_Extractions(resultSet, PKMeans_label_resul_path); #region tests_metrics //List<string> docs = Tests.DocClasses.SurveyAndMeasurementsClassOfDocuments_ListCreations(); //List<List<string>> ClassCollection = Tests.DocClasses.ListOfClasses(); /* * var distance = Tests.InterclusterDistances.d_centroids(resultSet); * var min_centroid_distances = Tests.InterclusterDistances.d_min_centroids(resultSet); * var max_intracluster_d = Tests.IntraclusterDistances.d_max(resultSet); * var min_intracluster_d = Tests.IntraclusterDistances.d_min(resultSet); * * var median_intracluster_d = Tests.IntraclusterDistances.d_sr(resultSet); * //string DistanceMetricsFilePath = @"F:\Magistry files\distanceMetrics\KmeansDistanceMetrics1.txt"; * //for(int iK=0; iK<clusterNumber; iK++) * //{ * // for (int jK = 0; jK < clusterNumber; jK++) * // { * // File.WriteAllText(DistanceMetricsFilePath, distance[iK, jK].ToString()); * // } * //} * * * /* * var Recall_result = Tests.Recall.Recall_Calculating(resultSet, docs); * var Precision_result = Tests.Precision.Precision_Calculating(resultSet, docs); * var Purity = Tests.Purity.Purity_Calculating(resultSet, ClassCollection, vSpace); * var Fmeasure = Tests.F1Measure.F1_Measure_Calculating(resultSet, ClassCollection); * var GMeasure = Tests.F1Measure.G_Measure_Calculating(resultSet, ClassCollection); * var NMI = Tests.NormilizedMutualInformation.NMI_Calculating(resultSet, ClassCollection, vSpace); * var Entropy = Tests.Entropy.Enthropy_Calculating(resultSet, ClassCollection); */ #endregion message = RaportGeneration.ReleaseRaportGenerationFunction(resultSet, clusterNumber, totalIteration, clusterization_stopwatch, K_means_report_path, algorithm); //clustResultTxtBox.AppendText(message); invokeFilesToVisualizationGenerator(resultSet, algorithm); }
private void Gravitational_Click(object sender, RoutedEventArgs e) { #region gravitational_old_working_code /* * List<string> docCollection = Logic.ClusteringAlgorithms.Used_functions.CreateDocumentCollection2.GenerateDocumentCollection_withoutLazyLoading(); * HashSet<string> termCollection = Logic.ClusteringAlgorithms.Used_functions.TFIDF2ndrealization.getTermCollection(); * Dictionary<string, int> wordIndex = Logic.ClusteringAlgorithms.Used_functions.TFIDF2ndrealization.DocumentsContainsTerm(docCollection, termCollection); * List<DocumentVector> vSpace = VectorSpaceModel.DocumentCollectionProcessing(docCollection); * int M = 1000000; * float G = -1.28171817154F; //G=1*e-4 according to 3.2.2 in article * float deltaG = 0.01F; * float epsilon = -3.28171817154F;//epsilon=1*e-6 according to 3.2.2 in article or 10^(-4)= 0.0001F; * //float epsilon = 0.6F; * float alpha = 0.06F; * var result1 = Logic.ClusteringAlgorithms.Algorithms.GravitationalClusteringAlgorithm.Gravitational(vSpace, G, deltaG, M, epsilon); * //var result2 = GravitationalClusteringAlgorithm.GetClusters(result1, alpha, vSpace); * List<string> docs = Tests.DocClasses.SurveyAndMeasurementsClassOfDocuments_ListCreations(); * List<List<string>> ClassCollection = Tests.DocClasses.ListOfClasses(); * var distance = Tests.InterclusterDistances.d_centroids(result1); * var min_centroid_distances = Tests.InterclusterDistances.d_min_centroids(result1); * var max_intracluster_d = Tests.IntraclusterDistances.d_max(result1); * var min_intracluster_d = Tests.IntraclusterDistances.d_min(result1); * var median_intracluster_d = Tests.IntraclusterDistances.d_sr(result1); * var Recall_result = Tests.Recall.Recall_Calculating(result1, docs); * var Precision_result = Tests.Precision.Precision_Calculating(result1, docs); * var Purity = Tests.Purity.Purity_Calculating(result1, ClassCollection, vSpace); * var Fmeasure = Tests.F1Measure.F1_Measure_Calculating(result1, ClassCollection); * var GMeasure = Tests.F1Measure.G_Measure_Calculating(result1, ClassCollection); * var NMI = Tests.NormilizedMutualInformation.NMI_Calculating(result1, ClassCollection, vSpace); */ #endregion clustResultTxtBox.Document.Blocks.Clear(); var clusterization_stopwatch = Stopwatch.StartNew(); string message = null; string algorithm = " Gravitational clustering algorithm;"; Dictionary <int, string> docCollectionDictionary = Logic.ClusteringAlgorithms.Used_functions.CreateDocumentCollection2.GenerateDocumentCollection_withoutLazyLoadingToDictionary(); HashSet <string> termCollection = Logic.ClusteringAlgorithms.Used_functions.TFIDF2ndrealization.getTermCollection(); Dictionary <string, int> wordIndex = Logic.ClusteringAlgorithms.Used_functions.TFIDF2ndrealization.DocumentsContainsTermToDictionary(docCollectionDictionary, termCollection); List <DocumentVector> vSpace = VectorSpaceModel.DocumentCollectionProcessingDictionary(docCollectionDictionary); int M = 500; //float G = 7 * (float)Math.Pow(10, (-6)); //float G = -1.28171817154F; //G=1*e-4 according to 3.2.2 in article float G = 6.67408313131313131F * (float)Math.Pow(10, (-6)); float deltaG = 0.001F; //float epsilon = -3.28171817154F;//epsilon=1*e-6 according to 3.2.2 in article or 10^(-4)= 0.0001F; float epsilon = 0.1F; float alpha = 0.06F; int clusterNumber = 6; clusterNumber = Convert.ToInt32(txtboxClusterNumber.Text); M = Convert.ToInt32(txtboxIterationCount.Text); string gravitational_label_resul_path = @"F:\Magistry files\data\Gravitational_label_result5.txt"; string Gravitational_report_path = @"F:\Magistry files\reports\Gravitational_report5.txt"; List <Centroid> result = new List <Centroid>(vSpace.Count); var results = Logic.ClusteringAlgorithms.WorkedAlgorithmsFromTest.GravitationalClusteringAlgorithm.GravitationalAlgorithm(vSpace, G, deltaG, M, epsilon); var get_Clusters = Logic.ClusteringAlgorithms.WorkedAlgorithmsFromTest.GravitationalClusteringAlgorithm.GetClusters(results, alpha, vSpace); List <Centroid> resultSet = Logic.ClusteringAlgorithms.WorkedAlgorithmsFromTest.GravitationalClusteringAlgorithm.RemoveSameElementsFromClusters(get_Clusters); int[] label_matrix = Tests.Label_Matrix.ReleaseVersion_Label_Matrix_Extractions(get_Clusters, gravitational_label_resul_path); clusterization_stopwatch.Stop(); message = RaportGeneration.ReleaseRaportGenerationFunction(get_Clusters, get_Clusters.Count, M, clusterization_stopwatch, Gravitational_report_path, algorithm); //clustResultTxtBox.AppendText(message); invokeFilesToVisualizationGenerator(resultSet, algorithm); }