private void button4_Click(object sender, EventArgs e) { string path_data_vSpace = "E:\\Dropbox\\Masters\\myMSc\\PracticalPart\\Sematic_K-MEANSClustering\\MScDataSets\\Reuters21578\\data\\00_" + maxNoDoc + "_vSpace.xml"; if (cboxDataSet.Text == "Reu_01") { path_data_vSpace = "E:\\Dropbox\\Masters\\myMSc\\PracticalPart\\Sematic_K-MEANSClustering\\MScDataSets\\Reuters21578\\data\\01_" + maxNoDoc + "_vSpace.xml"; } else if (cboxDataSet.Text == "Re0") { } else { //this is test ds so already assigned } if (File.Exists(path_data_vSpace)) { vSpace = DeSerializeObject <List <DocumentVector> >(path_data_vSpace); } else { var watch = System.Diagnostics.Stopwatch.StartNew(); vSpace = VectorSpaceModel.ProcessDocumentCollection(docCollection); watch.Stop(); var elapsedMs = watch.ElapsedMilliseconds; lblTime.Text = "Time: " + elapsedMs; SerializeObject(vSpace, path_data_vSpace); } MessageBox.Show("Done"); }
public ActionResult TestAlgorithm(TestAlgorithmModel model) { var list = _taiLieuVanBanService.GetDocuments().Take(model.Amount).ToList(); list.Add(model.Name); var docCollection = new DocumentCollection() { DocumentList = list }; List <DocumentVector> vSpace = VectorSpaceModel.ProcessDocumentCollection(docCollection); List <Centroid> resultSet = DocumnetClustering.DocumentCluster(model.Cluster, vSpace, model.Name); string docNear = DocumnetClustering.FindClosestDocument(); var mode = new TestAlgorithmModel { Name = model.Name, Amount = model.Amount, Cluster = model.Cluster, Centroids = resultSet, DocumentNear = docNear }; return(View(mode)); }
public ActionResult StorageSuggestion(string document, string type) { string local = "Không tìm thấy tài liêu/văn bản có cùng nội dung! Tạo hồ sơ mới."; var hosos = AutoCompleteTextHoSos(GetHoSos()); var list = _taiLieuVanBanService.GetDocuments(); list.Add(document); var docCollection = new DocumentCollection() { DocumentList = list }; var cluster = _taiLieuVanBanService.CountDocumentType(type); List <DocumentVector> vSpace = VectorSpaceModel.ProcessDocumentCollection(docCollection); List <Centroid> resultSet = DocumnetClustering.DocumentCluster(cluster, vSpace, document); string documentNeedSearch = DocumnetClustering.FindClosestDocument(); if (!string.IsNullOrEmpty(documentNeedSearch)) { var taiLieuVanBan = _taiLieuVanBanService.Get(p => p.NoiDung == documentNeedSearch); local = hosos.FirstOrDefault(p => p.Id == taiLieuVanBan.HoSoId).Text; } return(Json(new { da = local }, JsonRequestBehavior.AllowGet)); }
private void KMeansPP(object sender, RoutedEventArgs e) { clustResultTxtBox.Document.Blocks.Clear(); var clusterization_stopwatch = Stopwatch.StartNew(); string message = null; string algorithm = " k-means++;"; string PPKMeans_label_resul_path = @"F:\Magistry files\data\PPKMeans_label_result4.txt"; string PPK_means_report_path = @"F:\Magistry files\reports\PPKMeans_report4.txt"; List <string> docCollection = Logic.ClusteringAlgorithms.Used_functions.CreateDocumentCollection2.GenerateDocumentCollection_withoutLazyLoading(); Dictionary <int, string> docCollectionDictionary = Logic.ClusteringAlgorithms.Used_functions.CreateDocumentCollection2.GenerateDocumentCollection_withoutLazyLoadingToDictionary(); HashSet <string> termCollection = Logic.ClusteringAlgorithms.Used_functions.TFIDF2ndrealization.getTermCollection(); Dictionary <string, int> wordIndex1 = Logic.ClusteringAlgorithms.Used_functions.TFIDF2ndrealization.DocumentsContainsTermToDictionary(docCollectionDictionary, termCollection); List <DocumentVector> vSpace1 = VectorSpaceModel.DocumentCollectionProcessingDictionary(docCollectionDictionary); int totalIteration = 500; int clusterNumber = 5; clusterNumber = Convert.ToInt32(txtboxClusterNumber.Text); List <Centroid> firstCentroidList = new List <Centroid>(); firstCentroidList = Logic.ClusteringAlgorithms.WorkedAlgorithmsFromTest.InitialCentroidCalculation.CentroidCalculationsForTestKMeansPP(vSpace1, clusterNumber); List <Centroid> resultSet = Logic.ClusteringAlgorithms.WorkedAlgorithmsFromTest.KMeans.KMeansClustering(vSpace1, clusterNumber, totalIteration, firstCentroidList); clusterization_stopwatch.Stop(); int[] PPKMeans_label_matrix = new int[vSpace1.Count]; PPKMeans_label_matrix = Tests.Label_Matrix.ReleaseVersion_Label_Matrix_Extractions(resultSet, PPKMeans_label_resul_path); message = RaportGeneration.ReleaseRaportGenerationFunction(resultSet, clusterNumber, totalIteration, clusterization_stopwatch, PPK_means_report_path, algorithm); //clustResultTxtBox.AppendText(message); invokeFilesToVisualizationGenerator(resultSet, algorithm); }
private void btnStartClustering_Click(object sender, EventArgs e) { List <DocumentVector> vSpace = VectorSpaceModel.ProcessDocumentCollection(docCollection); int totalIteration = 0; List <Centroid> resultSet = DocumnetClustering.PrepareDocumentCluster(int.Parse(txtClusterNo.Text), vSpace, ref totalIteration); string msg = string.Empty; int count = 1; foreach (Centroid c in resultSet) { msg += String.Format("------------------------------[ CLUSTER {0} ]-----------------------------{1}", count, System.Environment.NewLine); foreach (DocumentVector document in c.GroupedDocument) { msg += document.Content + System.Environment.NewLine; if (c.GroupedDocument.Count > 1) { msg += String.Format("{0}-------------------------------------------------------------------------------{0}", System.Environment.NewLine); } } msg += "-------------------------------------------------------------------------------" + System.Environment.NewLine; count++; } richTextBox1.Text = msg; lblTotalIteration.Text = totalIteration.ToString(); }
/// <summary> /// 開始訓練 /// </summary> public void train() { List <DocumentVector> vSpace = VectorSpaceModel.ProcessDocumentCollection(docCollection); totalIteration = 0; resultSet = DocumnetClustering.PrepareDocumentCluster(txtClusterNum, vSpace, ref totalIteration); Console.WriteLine("totalIteration: " + totalIteration.ToString()); }
public ActionResult Test() { var list = _taiLieuVanBanService.GetDocuments(); var docCollection = new DocumentCollection() { DocumentList = list }; var cluster = _taiLieuVanBanService.CountDocumentType("Thông Báo"); List <DocumentVector> vSpace = VectorSpaceModel.ProcessDocumentCollection(docCollection); List <Centroid> resultSet = DocumnetClustering.DocumentCluster(cluster, vSpace, "thông báo chính phủ mới"); return(View(resultSet)); }
public string GetFromCurrency(string priceToConvert) { var from_currency = ""; Preprocess(priceToConvert); int totalIteration = 0; int final_index = -1; int collectionNumber = docCollection.DocumentList.Count - 1; List <DocumentVector> vSpace = VectorSpaceModel.ProcessDocumentCollection(docCollection); List <Centroid> resultSet = DocumnetClustering.PrepareDocumentCluster(collectionNumber, vSpace, ref totalIteration, ref final_index, currency); from_currency = resultSet[final_index].GroupedDocument[0].Content.Split(',')[0]; return(from_currency); }
private void FuzzyKMeans_Click(object sender, RoutedEventArgs e) { clustResultTxtBox.Document.Blocks.Clear(); var clusterization_stopwatch = Stopwatch.StartNew(); string message = null; string algorithm = " Fuzzy c-Means;"; List <string> docCollection = Logic.ClusteringAlgorithms.Used_functions.CreateDocumentCollection2.GenerateDocumentCollection_withoutLazyLoading(); Dictionary <int, string> docCollectionDictionary = Logic.ClusteringAlgorithms.Used_functions.CreateDocumentCollection2.GenerateDocumentCollection_withoutLazyLoadingToDictionary(); HashSet <string> termCollection = Logic.ClusteringAlgorithms.Used_functions.TFIDF2ndrealization.getTermCollection(); Dictionary <string, int> wordIndex = Logic.ClusteringAlgorithms.Used_functions.TFIDF2ndrealization.DocumentsContainsTermToDictionary(docCollectionDictionary, termCollection); List <DocumentVector> vSpace = VectorSpaceModel.DocumentCollectionProcessingDictionary(docCollectionDictionary); string Fuzzy_K_means_clusterization_result = @"F:\Magistry files\Fuzzy_KMeans_result6.txt"; string Fuzzy_K_means_label_result = @"F:\Magistry files\FCM_label_result6.txt"; string Fuzzy_K_means_report_path = @"F:\Magistry files\reports\FCM_report6.txt"; float fuzziness = 0.5f; float epsilon = 0.003f; int clusterNumber = 5; int totalIteration = 0; clusterNumber = Convert.ToInt32(txtboxClusterNumber.Text); List <Centroid> resultSet = Logic.ClusteringAlgorithms.WorkedAlgorithmsFromTest.FuzzyCMeans.CreateClusterSet(clusterNumber); float[,] Result_fcm; /* * Result_fcm = FuzzyKMeans.Fcm(vSpace, clusterNumber, epsilon, fuzziness, termCollection); * FuzzyKMeans.WriteSimilarityArrayToFile(Result_fcm, Fuzzy_K_means_clusterization_result); * resultSet = FuzzyKMeans.AssignDocsToClusters(Result_fcm, clusterNumber,vSpace); * FuzzyKMeans.Show_clusters(vSpace, Result_fcm, clusterNumber); */ var result = Logic.ClusteringAlgorithms.WorkedAlgorithmsFromTest.FuzzyCMeans.Fcm(vSpace, clusterNumber, epsilon, fuzziness); Result_fcm = result.Item1; totalIteration = result.Item2; Logic.ClusteringAlgorithms.WorkedAlgorithmsFromTest.FuzzyCMeans.WriteSimilarityArrayToFile(Result_fcm, Fuzzy_K_means_clusterization_result); var assignedResult = Logic.ClusteringAlgorithms.WorkedAlgorithmsFromTest.FuzzyCMeans.AssignDocsToClusters(Result_fcm, clusterNumber, vSpace, resultSet); clusterization_stopwatch.Stop(); int[] FuzzyKMeans_label_matrix = new int[vSpace.Count]; int[] FuzzyKMeans_label_matrix1 = assignedResult.Item1; resultSet = assignedResult.Item2; FuzzyKMeans_label_matrix = Tests.Label_Matrix.ReleaseVersion_Label_Matrix_Extractions(resultSet, Fuzzy_K_means_label_result); message = RaportGeneration.ReleaseRaportGenerationFunction(resultSet, clusterNumber, totalIteration, clusterization_stopwatch, Fuzzy_K_means_report_path, algorithm); //clustResultTxtBox.AppendText(message); invokeFilesToVisualizationGenerator(resultSet, algorithm); }
private void btnStartClustering_Click(object sender, EventArgs e) { List <DocumentVector> vSpace = VectorSpaceModel.ProcessDocumentCollection(docCollection); int totalIteration = 0; List <Centroid> resultSet = DocumnetClustering.PrepareDocumentCluster(int.Parse(txtClusterNo.Text), vSpace, ref totalIteration); string msg = string.Empty; int count = 1; string k = string.Empty; string max = string.Empty; List <string> topic = new List <string>(); foreach (Centroid c in resultSet) { msg += String.Format("------------------------------[ CLUSTER {0} ]-----------------------------{1}", count, System.Environment.NewLine); k += String.Format("[ CLUSTER {0} ]", count, System.Environment.NewLine); max = string.Empty; foreach (DocumentVector document in c.GroupedDocument) { for (int i = 0; i < document.keys.Length; i++) { float m = document.VectorSpace[0]; if (document.VectorSpace[i] > 0.005 && document.keys[i] != ".") { k += document.keys[i] + ","; } msg += document.Content + System.Environment.NewLine; if (c.GroupedDocument.Count > 1) { msg += String.Format("{0}-------------------------------------------------------------------------------{0}", System.Environment.NewLine); } } msg += "-------------------------------------------------------------------------------" + System.Environment.NewLine; k += System.Environment.NewLine; topic.Add(max); count++; } } richTextBox2.Text = k; richTextBox1.Text = msg; label10.Text = totalIteration.ToString(); }
public decimal GetExchangedValue(string priceToConvert, string to_currency) { var from_currency = ""; decimal exchangedValue; Preprocess(priceToConvert); int totalIteration = 0; int final_index = -1; int collectionNumber = docCollection.DocumentList.Count - 1; List <DocumentVector> vSpace = VectorSpaceModel.ProcessDocumentCollection(docCollection); List <Centroid> resultSet = DocumnetClustering.PrepareDocumentCluster(collectionNumber, vSpace, ref totalIteration, ref final_index, currency); from_currency = resultSet[final_index].GroupedDocument[0].Content.Split(',')[0]; WriteInCurrencyDocument(from_currency, currency); decimal rate = GetRate(from_currency, to_currency); exchangedValue = value * rate; return(exchangedValue); }
protected void Button1_Click(object sender, EventArgs e) { Dictionary <int, double> siteRelevancy = new Dictionary <int, double>(); string word1 = TextBox1.Text; string word2 = TextBox2.Text; string word3 = TextBox3.Text; double weight1, weight2, weight3; // Weights are assiged in the if statement below if (Double.TryParse(TextBox4.Text, out weight1) && Double.TryParse(TextBox5.Text, out weight2) && Double.TryParse(TextBox6.Text, out weight3)) { statusLabel.Text = "Good to go!"; // Pass the weights and words to where ever they need to go siteRelevancy = VectorSpaceModel.DoVSM(word1, weight1, word2, weight2, word3, weight3); displayLinks(siteRelevancy); } else { statusLabel.Text = "Please enter only numbers in the weight boxes!"; } }
private void btnAdd_Click(object sender, EventArgs e) { int newDoc = 0; if (!string.IsNullOrEmpty(txtDoc1.Text)) { docCollection.DocumentList.Add(txtDoc1.Text); newDoc++; } if (!string.IsNullOrEmpty(txtDoc2.Text)) { newDoc++; docCollection.DocumentList.Add(txtDoc2.Text); } if (!string.IsNullOrEmpty(txtDoc3.Text)) { docCollection.DocumentList.Add(txtDoc3.Text); newDoc++; } if (!string.IsNullOrEmpty(txtDoc4.Text)) { newDoc++; docCollection.DocumentList.Add(txtDoc4.Text); } int totalDoc = 0; if (int.TryParse(docCollection.DocumentList.Count.ToString(), out totalDoc)) { lblTotalDoc.Text = totalDoc.ToString(); } txtDoc1.Clear(); txtDoc2.Clear(); txtDoc3.Clear(); txtDoc4.Clear(); if (ddlType.Text == "Incremental" && DocumnetClustering.mainCentroids.Count > 0) { switch (ddlIncAlg.Text) { case "KMeans": List <DocumentVector> vSpace = VectorSpaceModel.ProcessDocumentCollection(docCollection); for (int i = 1; i <= newDoc; i++) { DocumentVector obj = vSpace[vSpace.Count - i]; int index = DocumnetClustering.FindClosestClusterCenter(DocumnetClustering.mainCentroids, obj, ddl_sim.Text); DocumnetClustering.mainCentroids[index].GroupedDocument.Add(obj); } break; case "CMeans": List <DocumentVector> vSpace2 = VectorSpaceModel.ProcessDocumentCollection(docCollection); string outFilepath = @"E:\Dropbox\Masters\myMSc\PracticalPart\Sematic_K-MEANSClustering\FCM\HM_data_Out_centers.dat"; var reader = new StreamReader(File.OpenRead(outFilepath)); List <float[]> values = new List <float[]>(); int t = 0; while (!reader.EndOfStream) { var line = reader.ReadLine(); values.Add(Array.ConvertAll(line.Split(','), float.Parse)); t++; } for (int i = 0; i < newDoc; i++) { int closeCenter = 0; float min = 1000; int counter = 1; DocumentVector obj2 = vSpace2[vSpace2.Count - newDoc + i]; for (int l = 0; l < t; l++) { // float s = SimilarityMatrics.FindCosineSimilarity(values[l], obj2.VectorSpace); float s = ArrayDistanceFunction(values[l], obj2.VectorSpace); if (s < min) { min = s; closeCenter = counter; } counter++; } MessageBox.Show("Doc:" + (i + 1) + " Close is:" + closeCenter); DocumnetClustering.mainCentroids[closeCenter - 1].GroupedDocument.Add(obj2); } break; } printAlll(); } }
static void Main(string[] args) { var enc = Encoding.GetEncoding(1251); var files = Directory.GetFiles(Directory.GetCurrentDirectory(), "*.txt"); var list = new List <Document>(); var books = new List <BookInfo>(); Dictionary <BookInfo, Document> book2doc = new Dictionary <BookInfo, Document>(); Dictionary <DocumentBase, BookInfo> doc2book = new Dictionary <DocumentBase, BookInfo>(); var spellProc = new SpellProcessor(); foreach (var file in files) { Console.WriteLine("{1}: processing: {0}...", Path.GetFileName(file), DateTime.Now.ToString("T")); using (var rdr = new StreamReader(file, enc)) { while (!rdr.EndOfStream) { var bookInfo = new BookInfo(rdr); var text = bookInfo.Descr; if (text == null || text.Contains("PDF")) { text = bookInfo.Annot; } if (text == null || text.Contains("PDF")) { text = bookInfo.Title; } if (string.IsNullOrEmpty(text)) { continue; } // bookInfo.ProcessingText = text.ToLower(); var document = new Document(text, spellProc); book2doc[bookInfo] = document; doc2book[document] = bookInfo; // books.Add(bookInfo); list.Add(document); } } } var corpus = new Corpus(list); var model = new VectorSpaceModel(corpus); string[] skipList = new[] { "о", "общие", "сведение", "даваться", "по", "для", "профиль", "из", "на", "при", "вуз", "даны", "их", "предназначить", "студент", "подготовка", "бакалавр", "обучение", "всех", "направление", "обучаться" }; string[] reqList = new[] { "учебный", "пособие", "методический", "курсовой", "дипломный", "работа", "проект", "лабораторный" }; Dictionary <string, List <BookInfo> > keys2book = new Dictionary <string, List <BookInfo> >(); foreach (var document in list) { var ww = model.GetWeights(document); var pairs = ww.Where(x => x.Value > 0).Select(x => x).ToList(); pairs.Sort((a, b) => b.Value.CompareTo(a.Value)); // var book = doc2book[document]; book.KeyWords = new List <string>(); foreach (var pair in pairs) { var needInclude = book.KeyWords.Count < 10; var key = pair.Key; if (needInclude) { needInclude = (Array.IndexOf(skipList, key) == -1) && (key.Length > 1); } if (!needInclude) { needInclude = Array.IndexOf(reqList, key) != -1; } if (!needInclude) { continue; } book.KeyWords.Add(key); if (!keys2book.ContainsKey(key)) { keys2book[key] = new List <BookInfo>(); } keys2book[key].Add(book); } } var srcBooks = list.Select((d, i) => doc2book[d]).Where(b => b.KeyWords != null && b.KeyWords.Count > 0).ToList(); var srcTexts = srcBooks.Select(x => x.KeyWords).ToList(); var pl = new ProbLatentSemanticAnalyse(10, srcTexts); pl.Train(20, x => Console.WriteLine("{1}: step {0} finished...", x, DateTime.Now.ToString("T"))); // var phi = pl.PhiWt; var qu = pl.QuDt; var wrd = pl.WordIndexes; List <KeyValuePair <string, double> > keyz = new List <KeyValuePair <string, double> >(); using (var wrt = new StreamWriter("theme_word.lst")) { for (int t = 0; t < pl.ThemeCount; t++) { keyz.Clear(); wrt.WriteLine("-----------------------------------------------------------------"); wrt.WriteLine("theme index: {0}", t); wrt.WriteLine("words collected"); foreach (var pair in wrd) { var val = phi[pair.Value, t]; if (val >= 0.01) { keyz.Add(new KeyValuePair <string, double>(pair.Key, val)); } //if (val >= 0.01) wrt.WriteLine("[{0}] => {1}", pair.Key, val); } keyz.Sort((a, b) => b.Value.CompareTo(a.Value)); foreach (var pair in keyz) { wrt.WriteLine("[{0}] => {1}", pair.Key, pair.Value); } } wrt.Flush(); } //Dictionary<string,int> keyz = new Dictionary<string, int>(); //Dictionary<int, Dictionary<int, int>> data = new Dictionary<int, Dictionary<int, int>>(); //for (int d = 0; d < list.Count; d++) //{ // var document = list[d]; // var kws = doc2book[document].KeyWords; // if (kws == null || kws.Count == 0) continue; // data[d] = new Dictionary<int, int>(); // foreach (var kw in kws) // { // if (!keyz.ContainsKey(kw)) keyz[kw] = keyz.Count; // var ki = keyz[kw]; // if (!data[d].ContainsKey(ki)) data[d][ki] = 0; // data[d][ki] = data[d][ki] + 1; // } //} //var pl = new ProbLatentSemanticAnalyse(30, data); //pl.Train(100); ////var d = model.CosineSimilarity(list[0], list[1]); //var lsh = new LocalitySensitiveHashing(0.1, 0.5, 0.1, 0.8); //lsh.Hashing.Extractor = new NgramExtractor(3); //for (int index = 0; index < books.Count; index++) lsh.Process(index, books[index].ProcessingText); //for (int index = 0; index < books.Count; index++) //{ // var indexes = lsh.FindSimiliar(books[index].ProcessingText); // Console.WriteLine("src: {0}", books[index].ProcessingText); // //foreach (var idx in indexes) // //{ // // if (idx == index) continue; // // var dist = model.CosineSimilarity(list[index], list[idx]); // // if (dist <= 1e-2) continue; // // Console.WriteLine(); // // Console.WriteLine("dst: {0} => {1}", books[idx].ProcessingText, dist); // //} // var dists = indexes.Select(idx => model.CosineSimilarity(list[index], list[idx])).ToArray(); // Console.WriteLine("------------------------------------------------------------------------------------"); //} ////double[,] dist = new double[list.Count,list.Count]; ////for (int i = 0; i < list.Count; i++) ////{ //// for (int j = i; j < list.Count; j++) //// { //// var d = model.CosineSimilarity(list[i], list[j]); //// dist[i, j] = d; //// dist[j, i] = d; //// } ////} //var wordPairs = keys2book.Select(x => x).ToList(); //wordPairs.Sort((a,b)=>b.Value.Count.CompareTo(a.Value.Count)); //using (var wrt = new StreamWriter("keys.lst")) //{ // foreach (var wp in wordPairs) wrt.WriteLine("{0} => {1}", wp.Key, wp.Value.Count); // wrt.Flush(); //} Console.ReadLine(); }
private void KMeans(object sender, RoutedEventArgs e) { clustResultTxtBox.Document.Blocks.Clear(); var clusterization_stopwatch = Stopwatch.StartNew(); string message = null; string algorithm = " k-means;"; string PKMeans_label_resul_path = @"F:\Magistry files\data\PKMeans_label_result6.txt"; string K_means_report_path = @"F:\Magistry files\reports\PKMeans_report6.txt"; #region OldDataGeneration /* * List<string> docCollection = Logic.ClusteringAlgorithms.Used_functions.CreateDocumentCollection2.GenerateDocumentCollection_withoutLazyLoading(); * HashSet<string> termCollection = Logic.ClusteringAlgorithms.Used_functions.TFIDF2ndrealization.getTermCollection(); * Dictionary<string, int> wordIndex = Logic.ClusteringAlgorithms.Used_functions.TFIDF2ndrealization.DocumentsContainsTerm(docCollection, termCollection); * List<DocumentVector> vSpace = VectorSpaceModel.DocumentCollectionProcessing(docCollection); */ #endregion Dictionary <int, string> docCollectionDictionary = Logic.ClusteringAlgorithms.Used_functions.CreateDocumentCollection2.GenerateDocumentCollection_withoutLazyLoadingToDictionary(); HashSet <string> termCollection = Logic.ClusteringAlgorithms.Used_functions.TFIDF2ndrealization.getTermCollection(); Dictionary <string, int> wordIndex = Logic.ClusteringAlgorithms.Used_functions.TFIDF2ndrealization.DocumentsContainsTermToDictionary(docCollectionDictionary, termCollection); List <DocumentVector> vSpace = VectorSpaceModel.DocumentCollectionProcessingDictionary(docCollectionDictionary); int totalIteration = 500; int clusterNumber = 5; clusterNumber = Convert.ToInt32(txtboxClusterNumber.Text); List <Centroid> firstCentroidList = new List <Centroid>(); #region OldClusteringAlgorithm //firstCentroidList = CentroidCalculationClass.CentroidCalculationsForKMeans(vSpace, clusterNumber); //List<Centroid> resultSet = Logic.ClusteringAlgorithms.Algorithms.KMeansPPImplementations.MyKmeansPPInterpritationcs.NewKMeansClusterization(clusterNumber, docCollection, totalIteration, vSpace, wordIndex, firstCentroidList); #endregion firstCentroidList = Logic.ClusteringAlgorithms.WorkedAlgorithmsFromTest.KMeans.CentroidCalculationsForKMeans(vSpace, clusterNumber); List <Centroid> resultSet = Logic.ClusteringAlgorithms.WorkedAlgorithmsFromTest.KMeans.KMeansClustering(vSpace, clusterNumber, totalIteration, firstCentroidList); clusterization_stopwatch.Stop(); int[] PKMeans_label_matrix = new int[vSpace.Count]; PKMeans_label_matrix = Tests.Label_Matrix.ReleaseVersion_Label_Matrix_Extractions(resultSet, PKMeans_label_resul_path); #region tests_metrics //List<string> docs = Tests.DocClasses.SurveyAndMeasurementsClassOfDocuments_ListCreations(); //List<List<string>> ClassCollection = Tests.DocClasses.ListOfClasses(); /* * var distance = Tests.InterclusterDistances.d_centroids(resultSet); * var min_centroid_distances = Tests.InterclusterDistances.d_min_centroids(resultSet); * var max_intracluster_d = Tests.IntraclusterDistances.d_max(resultSet); * var min_intracluster_d = Tests.IntraclusterDistances.d_min(resultSet); * * var median_intracluster_d = Tests.IntraclusterDistances.d_sr(resultSet); * //string DistanceMetricsFilePath = @"F:\Magistry files\distanceMetrics\KmeansDistanceMetrics1.txt"; * //for(int iK=0; iK<clusterNumber; iK++) * //{ * // for (int jK = 0; jK < clusterNumber; jK++) * // { * // File.WriteAllText(DistanceMetricsFilePath, distance[iK, jK].ToString()); * // } * //} * * * /* * var Recall_result = Tests.Recall.Recall_Calculating(resultSet, docs); * var Precision_result = Tests.Precision.Precision_Calculating(resultSet, docs); * var Purity = Tests.Purity.Purity_Calculating(resultSet, ClassCollection, vSpace); * var Fmeasure = Tests.F1Measure.F1_Measure_Calculating(resultSet, ClassCollection); * var GMeasure = Tests.F1Measure.G_Measure_Calculating(resultSet, ClassCollection); * var NMI = Tests.NormilizedMutualInformation.NMI_Calculating(resultSet, ClassCollection, vSpace); * var Entropy = Tests.Entropy.Enthropy_Calculating(resultSet, ClassCollection); */ #endregion message = RaportGeneration.ReleaseRaportGenerationFunction(resultSet, clusterNumber, totalIteration, clusterization_stopwatch, K_means_report_path, algorithm); //clustResultTxtBox.AppendText(message); invokeFilesToVisualizationGenerator(resultSet, algorithm); }
private void Gravitational_Click(object sender, RoutedEventArgs e) { #region gravitational_old_working_code /* * List<string> docCollection = Logic.ClusteringAlgorithms.Used_functions.CreateDocumentCollection2.GenerateDocumentCollection_withoutLazyLoading(); * HashSet<string> termCollection = Logic.ClusteringAlgorithms.Used_functions.TFIDF2ndrealization.getTermCollection(); * Dictionary<string, int> wordIndex = Logic.ClusteringAlgorithms.Used_functions.TFIDF2ndrealization.DocumentsContainsTerm(docCollection, termCollection); * List<DocumentVector> vSpace = VectorSpaceModel.DocumentCollectionProcessing(docCollection); * int M = 1000000; * float G = -1.28171817154F; //G=1*e-4 according to 3.2.2 in article * float deltaG = 0.01F; * float epsilon = -3.28171817154F;//epsilon=1*e-6 according to 3.2.2 in article or 10^(-4)= 0.0001F; * //float epsilon = 0.6F; * float alpha = 0.06F; * var result1 = Logic.ClusteringAlgorithms.Algorithms.GravitationalClusteringAlgorithm.Gravitational(vSpace, G, deltaG, M, epsilon); * //var result2 = GravitationalClusteringAlgorithm.GetClusters(result1, alpha, vSpace); * List<string> docs = Tests.DocClasses.SurveyAndMeasurementsClassOfDocuments_ListCreations(); * List<List<string>> ClassCollection = Tests.DocClasses.ListOfClasses(); * var distance = Tests.InterclusterDistances.d_centroids(result1); * var min_centroid_distances = Tests.InterclusterDistances.d_min_centroids(result1); * var max_intracluster_d = Tests.IntraclusterDistances.d_max(result1); * var min_intracluster_d = Tests.IntraclusterDistances.d_min(result1); * var median_intracluster_d = Tests.IntraclusterDistances.d_sr(result1); * var Recall_result = Tests.Recall.Recall_Calculating(result1, docs); * var Precision_result = Tests.Precision.Precision_Calculating(result1, docs); * var Purity = Tests.Purity.Purity_Calculating(result1, ClassCollection, vSpace); * var Fmeasure = Tests.F1Measure.F1_Measure_Calculating(result1, ClassCollection); * var GMeasure = Tests.F1Measure.G_Measure_Calculating(result1, ClassCollection); * var NMI = Tests.NormilizedMutualInformation.NMI_Calculating(result1, ClassCollection, vSpace); */ #endregion clustResultTxtBox.Document.Blocks.Clear(); var clusterization_stopwatch = Stopwatch.StartNew(); string message = null; string algorithm = " Gravitational clustering algorithm;"; Dictionary <int, string> docCollectionDictionary = Logic.ClusteringAlgorithms.Used_functions.CreateDocumentCollection2.GenerateDocumentCollection_withoutLazyLoadingToDictionary(); HashSet <string> termCollection = Logic.ClusteringAlgorithms.Used_functions.TFIDF2ndrealization.getTermCollection(); Dictionary <string, int> wordIndex = Logic.ClusteringAlgorithms.Used_functions.TFIDF2ndrealization.DocumentsContainsTermToDictionary(docCollectionDictionary, termCollection); List <DocumentVector> vSpace = VectorSpaceModel.DocumentCollectionProcessingDictionary(docCollectionDictionary); int M = 500; //float G = 7 * (float)Math.Pow(10, (-6)); //float G = -1.28171817154F; //G=1*e-4 according to 3.2.2 in article float G = 6.67408313131313131F * (float)Math.Pow(10, (-6)); float deltaG = 0.001F; //float epsilon = -3.28171817154F;//epsilon=1*e-6 according to 3.2.2 in article or 10^(-4)= 0.0001F; float epsilon = 0.1F; float alpha = 0.06F; int clusterNumber = 6; clusterNumber = Convert.ToInt32(txtboxClusterNumber.Text); M = Convert.ToInt32(txtboxIterationCount.Text); string gravitational_label_resul_path = @"F:\Magistry files\data\Gravitational_label_result5.txt"; string Gravitational_report_path = @"F:\Magistry files\reports\Gravitational_report5.txt"; List <Centroid> result = new List <Centroid>(vSpace.Count); var results = Logic.ClusteringAlgorithms.WorkedAlgorithmsFromTest.GravitationalClusteringAlgorithm.GravitationalAlgorithm(vSpace, G, deltaG, M, epsilon); var get_Clusters = Logic.ClusteringAlgorithms.WorkedAlgorithmsFromTest.GravitationalClusteringAlgorithm.GetClusters(results, alpha, vSpace); List <Centroid> resultSet = Logic.ClusteringAlgorithms.WorkedAlgorithmsFromTest.GravitationalClusteringAlgorithm.RemoveSameElementsFromClusters(get_Clusters); int[] label_matrix = Tests.Label_Matrix.ReleaseVersion_Label_Matrix_Extractions(get_Clusters, gravitational_label_resul_path); clusterization_stopwatch.Stop(); message = RaportGeneration.ReleaseRaportGenerationFunction(get_Clusters, get_Clusters.Count, M, clusterization_stopwatch, Gravitational_report_path, algorithm); //clustResultTxtBox.AppendText(message); invokeFilesToVisualizationGenerator(resultSet, algorithm); }