예제 #1
0
        private void KMeansPP(object sender, RoutedEventArgs e)
        {
            clustResultTxtBox.Document.Blocks.Clear();
            var                      clusterization_stopwatch = Stopwatch.StartNew();
            string                   message   = null;
            string                   algorithm = " k-means++;";
            string                   PPKMeans_label_resul_path = @"F:\Magistry files\data\PPKMeans_label_result4.txt";
            string                   PPK_means_report_path     = @"F:\Magistry files\reports\PPKMeans_report4.txt";
            List <string>            docCollection             = Logic.ClusteringAlgorithms.Used_functions.CreateDocumentCollection2.GenerateDocumentCollection_withoutLazyLoading();
            Dictionary <int, string> docCollectionDictionary   = Logic.ClusteringAlgorithms.Used_functions.CreateDocumentCollection2.GenerateDocumentCollection_withoutLazyLoadingToDictionary();
            HashSet <string>         termCollection            = Logic.ClusteringAlgorithms.Used_functions.TFIDF2ndrealization.getTermCollection();
            Dictionary <string, int> wordIndex1     = Logic.ClusteringAlgorithms.Used_functions.TFIDF2ndrealization.DocumentsContainsTermToDictionary(docCollectionDictionary, termCollection);
            List <DocumentVector>    vSpace1        = VectorSpaceModel.DocumentCollectionProcessingDictionary(docCollectionDictionary);
            int                      totalIteration = 500;
            int                      clusterNumber  = 5;

            clusterNumber = Convert.ToInt32(txtboxClusterNumber.Text);
            List <Centroid> firstCentroidList = new List <Centroid>();

            firstCentroidList = Logic.ClusteringAlgorithms.WorkedAlgorithmsFromTest.InitialCentroidCalculation.CentroidCalculationsForTestKMeansPP(vSpace1, clusterNumber);
            List <Centroid> resultSet = Logic.ClusteringAlgorithms.WorkedAlgorithmsFromTest.KMeans.KMeansClustering(vSpace1, clusterNumber, totalIteration, firstCentroidList);

            clusterization_stopwatch.Stop();
            int[] PPKMeans_label_matrix = new int[vSpace1.Count];
            PPKMeans_label_matrix = Tests.Label_Matrix.ReleaseVersion_Label_Matrix_Extractions(resultSet, PPKMeans_label_resul_path);
            message = RaportGeneration.ReleaseRaportGenerationFunction(resultSet, clusterNumber, totalIteration, clusterization_stopwatch, PPK_means_report_path, algorithm);
            //clustResultTxtBox.AppendText(message);
            invokeFilesToVisualizationGenerator(resultSet, algorithm);
        }
예제 #2
0
        private void FuzzyKMeans_Click(object sender, RoutedEventArgs e)
        {
            clustResultTxtBox.Document.Blocks.Clear();
            var                      clusterization_stopwatch = Stopwatch.StartNew();
            string                   message                 = null;
            string                   algorithm               = " Fuzzy c-Means;";
            List <string>            docCollection           = Logic.ClusteringAlgorithms.Used_functions.CreateDocumentCollection2.GenerateDocumentCollection_withoutLazyLoading();
            Dictionary <int, string> docCollectionDictionary = Logic.ClusteringAlgorithms.Used_functions.CreateDocumentCollection2.GenerateDocumentCollection_withoutLazyLoadingToDictionary();
            HashSet <string>         termCollection          = Logic.ClusteringAlgorithms.Used_functions.TFIDF2ndrealization.getTermCollection();
            Dictionary <string, int> wordIndex               = Logic.ClusteringAlgorithms.Used_functions.TFIDF2ndrealization.DocumentsContainsTermToDictionary(docCollectionDictionary, termCollection);
            List <DocumentVector>    vSpace = VectorSpaceModel.DocumentCollectionProcessingDictionary(docCollectionDictionary);
            string                   Fuzzy_K_means_clusterization_result = @"F:\Magistry files\Fuzzy_KMeans_result6.txt";
            string                   Fuzzy_K_means_label_result          = @"F:\Magistry files\FCM_label_result6.txt";
            string                   Fuzzy_K_means_report_path           = @"F:\Magistry files\reports\FCM_report6.txt";
            float                    fuzziness      = 0.5f;
            float                    epsilon        = 0.003f;
            int                      clusterNumber  = 5;
            int                      totalIteration = 0;

            clusterNumber = Convert.ToInt32(txtboxClusterNumber.Text);
            List <Centroid> resultSet = Logic.ClusteringAlgorithms.WorkedAlgorithmsFromTest.FuzzyCMeans.CreateClusterSet(clusterNumber);

            float[,] Result_fcm;

            /*
             * Result_fcm = FuzzyKMeans.Fcm(vSpace, clusterNumber, epsilon, fuzziness, termCollection);
             * FuzzyKMeans.WriteSimilarityArrayToFile(Result_fcm, Fuzzy_K_means_clusterization_result);
             * resultSet = FuzzyKMeans.AssignDocsToClusters(Result_fcm, clusterNumber,vSpace);
             * FuzzyKMeans.Show_clusters(vSpace, Result_fcm, clusterNumber);
             */
            var result = Logic.ClusteringAlgorithms.WorkedAlgorithmsFromTest.FuzzyCMeans.Fcm(vSpace, clusterNumber, epsilon, fuzziness);

            Result_fcm     = result.Item1;
            totalIteration = result.Item2;
            Logic.ClusteringAlgorithms.WorkedAlgorithmsFromTest.FuzzyCMeans.WriteSimilarityArrayToFile(Result_fcm, Fuzzy_K_means_clusterization_result);
            var assignedResult = Logic.ClusteringAlgorithms.WorkedAlgorithmsFromTest.FuzzyCMeans.AssignDocsToClusters(Result_fcm, clusterNumber, vSpace, resultSet);

            clusterization_stopwatch.Stop();
            int[] FuzzyKMeans_label_matrix  = new int[vSpace.Count];
            int[] FuzzyKMeans_label_matrix1 = assignedResult.Item1;
            resultSet = assignedResult.Item2;
            FuzzyKMeans_label_matrix = Tests.Label_Matrix.ReleaseVersion_Label_Matrix_Extractions(resultSet, Fuzzy_K_means_label_result);
            message = RaportGeneration.ReleaseRaportGenerationFunction(resultSet, clusterNumber, totalIteration, clusterization_stopwatch, Fuzzy_K_means_report_path, algorithm);
            //clustResultTxtBox.AppendText(message);
            invokeFilesToVisualizationGenerator(resultSet, algorithm);
        }
예제 #3
0
        private void KMeans(object sender, RoutedEventArgs e)
        {
            clustResultTxtBox.Document.Blocks.Clear();
            var    clusterization_stopwatch = Stopwatch.StartNew();
            string message   = null;
            string algorithm = " k-means;";
            string PKMeans_label_resul_path = @"F:\Magistry files\data\PKMeans_label_result6.txt";
            string K_means_report_path      = @"F:\Magistry files\reports\PKMeans_report6.txt";

            #region OldDataGeneration

            /*
             * List<string> docCollection = Logic.ClusteringAlgorithms.Used_functions.CreateDocumentCollection2.GenerateDocumentCollection_withoutLazyLoading();
             * HashSet<string> termCollection = Logic.ClusteringAlgorithms.Used_functions.TFIDF2ndrealization.getTermCollection();
             * Dictionary<string, int> wordIndex = Logic.ClusteringAlgorithms.Used_functions.TFIDF2ndrealization.DocumentsContainsTerm(docCollection, termCollection);
             * List<DocumentVector> vSpace = VectorSpaceModel.DocumentCollectionProcessing(docCollection);
             */
            #endregion
            Dictionary <int, string> docCollectionDictionary = Logic.ClusteringAlgorithms.Used_functions.CreateDocumentCollection2.GenerateDocumentCollection_withoutLazyLoadingToDictionary();
            HashSet <string>         termCollection          = Logic.ClusteringAlgorithms.Used_functions.TFIDF2ndrealization.getTermCollection();
            Dictionary <string, int> wordIndex = Logic.ClusteringAlgorithms.Used_functions.TFIDF2ndrealization.DocumentsContainsTermToDictionary(docCollectionDictionary, termCollection);
            List <DocumentVector>    vSpace    = VectorSpaceModel.DocumentCollectionProcessingDictionary(docCollectionDictionary);
            int totalIteration = 500;
            int clusterNumber  = 5;
            clusterNumber = Convert.ToInt32(txtboxClusterNumber.Text);
            List <Centroid> firstCentroidList = new List <Centroid>();
            #region OldClusteringAlgorithm
            //firstCentroidList = CentroidCalculationClass.CentroidCalculationsForKMeans(vSpace, clusterNumber);
            //List<Centroid> resultSet = Logic.ClusteringAlgorithms.Algorithms.KMeansPPImplementations.MyKmeansPPInterpritationcs.NewKMeansClusterization(clusterNumber, docCollection, totalIteration, vSpace, wordIndex, firstCentroidList);
            #endregion
            firstCentroidList = Logic.ClusteringAlgorithms.WorkedAlgorithmsFromTest.KMeans.CentroidCalculationsForKMeans(vSpace, clusterNumber);
            List <Centroid> resultSet = Logic.ClusteringAlgorithms.WorkedAlgorithmsFromTest.KMeans.KMeansClustering(vSpace, clusterNumber, totalIteration, firstCentroidList);
            clusterization_stopwatch.Stop();
            int[] PKMeans_label_matrix = new int[vSpace.Count];
            PKMeans_label_matrix = Tests.Label_Matrix.ReleaseVersion_Label_Matrix_Extractions(resultSet, PKMeans_label_resul_path);
            #region tests_metrics

            //List<string> docs = Tests.DocClasses.SurveyAndMeasurementsClassOfDocuments_ListCreations();
            //List<List<string>> ClassCollection = Tests.DocClasses.ListOfClasses();

            /*
             * var distance = Tests.InterclusterDistances.d_centroids(resultSet);
             * var min_centroid_distances = Tests.InterclusterDistances.d_min_centroids(resultSet);
             * var max_intracluster_d = Tests.IntraclusterDistances.d_max(resultSet);
             * var min_intracluster_d = Tests.IntraclusterDistances.d_min(resultSet);
             *
             * var median_intracluster_d = Tests.IntraclusterDistances.d_sr(resultSet);
             * //string DistanceMetricsFilePath = @"F:\Magistry files\distanceMetrics\KmeansDistanceMetrics1.txt";
             * //for(int iK=0; iK<clusterNumber; iK++)
             * //{
             * //    for (int jK = 0; jK < clusterNumber; jK++)
             * //    {
             * //        File.WriteAllText(DistanceMetricsFilePath, distance[iK, jK].ToString());
             * //    }
             * //}
             *
             *
             * /*
             * var Recall_result = Tests.Recall.Recall_Calculating(resultSet, docs);
             * var Precision_result = Tests.Precision.Precision_Calculating(resultSet, docs);
             * var Purity = Tests.Purity.Purity_Calculating(resultSet, ClassCollection, vSpace);
             * var Fmeasure = Tests.F1Measure.F1_Measure_Calculating(resultSet, ClassCollection);
             * var GMeasure = Tests.F1Measure.G_Measure_Calculating(resultSet, ClassCollection);
             * var NMI = Tests.NormilizedMutualInformation.NMI_Calculating(resultSet, ClassCollection, vSpace);
             * var Entropy = Tests.Entropy.Enthropy_Calculating(resultSet, ClassCollection);
             */
            #endregion
            message = RaportGeneration.ReleaseRaportGenerationFunction(resultSet, clusterNumber, totalIteration, clusterization_stopwatch, K_means_report_path, algorithm);
            //clustResultTxtBox.AppendText(message);
            invokeFilesToVisualizationGenerator(resultSet, algorithm);
        }
예제 #4
0
        private void Gravitational_Click(object sender, RoutedEventArgs e)
        {
            #region gravitational_old_working_code

            /*
             * List<string> docCollection = Logic.ClusteringAlgorithms.Used_functions.CreateDocumentCollection2.GenerateDocumentCollection_withoutLazyLoading();
             * HashSet<string> termCollection = Logic.ClusteringAlgorithms.Used_functions.TFIDF2ndrealization.getTermCollection();
             * Dictionary<string, int> wordIndex = Logic.ClusteringAlgorithms.Used_functions.TFIDF2ndrealization.DocumentsContainsTerm(docCollection, termCollection);
             * List<DocumentVector> vSpace = VectorSpaceModel.DocumentCollectionProcessing(docCollection);
             * int M = 1000000;
             * float G = -1.28171817154F; //G=1*e-4 according to 3.2.2  in article
             * float deltaG = 0.01F;
             * float epsilon = -3.28171817154F;//epsilon=1*e-6 according to 3.2.2 in article or 10^(-4)= 0.0001F;
             * //float epsilon = 0.6F;
             * float alpha = 0.06F;
             * var result1 = Logic.ClusteringAlgorithms.Algorithms.GravitationalClusteringAlgorithm.Gravitational(vSpace, G, deltaG, M, epsilon);
             * //var result2 = GravitationalClusteringAlgorithm.GetClusters(result1, alpha, vSpace);
             * List<string> docs = Tests.DocClasses.SurveyAndMeasurementsClassOfDocuments_ListCreations();
             * List<List<string>> ClassCollection = Tests.DocClasses.ListOfClasses();
             * var distance = Tests.InterclusterDistances.d_centroids(result1);
             * var min_centroid_distances = Tests.InterclusterDistances.d_min_centroids(result1);
             * var max_intracluster_d = Tests.IntraclusterDistances.d_max(result1);
             * var min_intracluster_d = Tests.IntraclusterDistances.d_min(result1);
             * var median_intracluster_d = Tests.IntraclusterDistances.d_sr(result1);
             * var Recall_result = Tests.Recall.Recall_Calculating(result1, docs);
             * var Precision_result = Tests.Precision.Precision_Calculating(result1, docs);
             * var Purity = Tests.Purity.Purity_Calculating(result1, ClassCollection, vSpace);
             * var Fmeasure = Tests.F1Measure.F1_Measure_Calculating(result1, ClassCollection);
             * var GMeasure = Tests.F1Measure.G_Measure_Calculating(result1, ClassCollection);
             * var NMI = Tests.NormilizedMutualInformation.NMI_Calculating(result1, ClassCollection, vSpace);
             */
            #endregion

            clustResultTxtBox.Document.Blocks.Clear();
            var    clusterization_stopwatch = Stopwatch.StartNew();
            string message   = null;
            string algorithm = " Gravitational clustering algorithm;";
            Dictionary <int, string> docCollectionDictionary = Logic.ClusteringAlgorithms.Used_functions.CreateDocumentCollection2.GenerateDocumentCollection_withoutLazyLoadingToDictionary();
            HashSet <string>         termCollection          = Logic.ClusteringAlgorithms.Used_functions.TFIDF2ndrealization.getTermCollection();
            Dictionary <string, int> wordIndex = Logic.ClusteringAlgorithms.Used_functions.TFIDF2ndrealization.DocumentsContainsTermToDictionary(docCollectionDictionary, termCollection);
            List <DocumentVector>    vSpace    = VectorSpaceModel.DocumentCollectionProcessingDictionary(docCollectionDictionary);
            int M = 500;
            //float G = 7 * (float)Math.Pow(10, (-6));
            //float G = -1.28171817154F; //G=1*e-4 according to 3.2.2  in article
            float G      = 6.67408313131313131F * (float)Math.Pow(10, (-6));
            float deltaG = 0.001F;
            //float epsilon = -3.28171817154F;//epsilon=1*e-6 according to 3.2.2 in article or 10^(-4)= 0.0001F;
            float epsilon       = 0.1F;
            float alpha         = 0.06F;
            int   clusterNumber = 6;
            clusterNumber = Convert.ToInt32(txtboxClusterNumber.Text);
            M             = Convert.ToInt32(txtboxIterationCount.Text);
            string          gravitational_label_resul_path = @"F:\Magistry files\data\Gravitational_label_result5.txt";
            string          Gravitational_report_path      = @"F:\Magistry files\reports\Gravitational_report5.txt";
            List <Centroid> result       = new List <Centroid>(vSpace.Count);
            var             results      = Logic.ClusteringAlgorithms.WorkedAlgorithmsFromTest.GravitationalClusteringAlgorithm.GravitationalAlgorithm(vSpace, G, deltaG, M, epsilon);
            var             get_Clusters = Logic.ClusteringAlgorithms.WorkedAlgorithmsFromTest.GravitationalClusteringAlgorithm.GetClusters(results, alpha, vSpace);
            List <Centroid> resultSet    = Logic.ClusteringAlgorithms.WorkedAlgorithmsFromTest.GravitationalClusteringAlgorithm.RemoveSameElementsFromClusters(get_Clusters);
            int[]           label_matrix = Tests.Label_Matrix.ReleaseVersion_Label_Matrix_Extractions(get_Clusters, gravitational_label_resul_path);
            clusterization_stopwatch.Stop();
            message = RaportGeneration.ReleaseRaportGenerationFunction(get_Clusters, get_Clusters.Count, M, clusterization_stopwatch, Gravitational_report_path, algorithm);
            //clustResultTxtBox.AppendText(message);
            invokeFilesToVisualizationGenerator(resultSet, algorithm);
        }