public ActionResult TestAlgorithm(TestAlgorithmModel model) { var list = _taiLieuVanBanService.GetDocuments().Take(model.Amount).ToList(); list.Add(model.Name); var docCollection = new DocumentCollection() { DocumentList = list }; List <DocumentVector> vSpace = VectorSpaceModel.ProcessDocumentCollection(docCollection); List <Centroid> resultSet = DocumnetClustering.DocumentCluster(model.Cluster, vSpace, model.Name); string docNear = DocumnetClustering.FindClosestDocument(); var mode = new TestAlgorithmModel { Name = model.Name, Amount = model.Amount, Cluster = model.Cluster, Centroids = resultSet, DocumentNear = docNear }; return(View(mode)); }
public ActionResult StorageSuggestion(string document, string type) { string local = "Không tìm thấy tài liêu/văn bản có cùng nội dung! Tạo hồ sơ mới."; var hosos = AutoCompleteTextHoSos(GetHoSos()); var list = _taiLieuVanBanService.GetDocuments(); list.Add(document); var docCollection = new DocumentCollection() { DocumentList = list }; var cluster = _taiLieuVanBanService.CountDocumentType(type); List <DocumentVector> vSpace = VectorSpaceModel.ProcessDocumentCollection(docCollection); List <Centroid> resultSet = DocumnetClustering.DocumentCluster(cluster, vSpace, document); string documentNeedSearch = DocumnetClustering.FindClosestDocument(); if (!string.IsNullOrEmpty(documentNeedSearch)) { var taiLieuVanBan = _taiLieuVanBanService.Get(p => p.NoiDung == documentNeedSearch); local = hosos.FirstOrDefault(p => p.Id == taiLieuVanBan.HoSoId).Text; } return(Json(new { da = local }, JsonRequestBehavior.AllowGet)); }
private void btnStartClustering_Click(object sender, EventArgs e) { List <DocumentVector> vSpace = VectorSpaceModel.ProcessDocumentCollection(docCollection); int totalIteration = 0; List <Centroid> resultSet = DocumnetClustering.PrepareDocumentCluster(int.Parse(txtClusterNo.Text), vSpace, ref totalIteration); string msg = string.Empty; int count = 1; foreach (Centroid c in resultSet) { msg += String.Format("------------------------------[ CLUSTER {0} ]-----------------------------{1}", count, System.Environment.NewLine); foreach (DocumentVector document in c.GroupedDocument) { msg += document.Content + System.Environment.NewLine; if (c.GroupedDocument.Count > 1) { msg += String.Format("{0}-------------------------------------------------------------------------------{0}", System.Environment.NewLine); } } msg += "-------------------------------------------------------------------------------" + System.Environment.NewLine; count++; } richTextBox1.Text = msg; lblTotalIteration.Text = totalIteration.ToString(); }
private void button4_Click(object sender, EventArgs e) { string path_data_vSpace = "E:\\Dropbox\\Masters\\myMSc\\PracticalPart\\Sematic_K-MEANSClustering\\MScDataSets\\Reuters21578\\data\\00_" + maxNoDoc + "_vSpace.xml"; if (cboxDataSet.Text == "Reu_01") { path_data_vSpace = "E:\\Dropbox\\Masters\\myMSc\\PracticalPart\\Sematic_K-MEANSClustering\\MScDataSets\\Reuters21578\\data\\01_" + maxNoDoc + "_vSpace.xml"; } else if (cboxDataSet.Text == "Re0") { } else { //this is test ds so already assigned } if (File.Exists(path_data_vSpace)) { vSpace = DeSerializeObject <List <DocumentVector> >(path_data_vSpace); } else { var watch = System.Diagnostics.Stopwatch.StartNew(); vSpace = VectorSpaceModel.ProcessDocumentCollection(docCollection); watch.Stop(); var elapsedMs = watch.ElapsedMilliseconds; lblTime.Text = "Time: " + elapsedMs; SerializeObject(vSpace, path_data_vSpace); } MessageBox.Show("Done"); }
/// <summary> /// 開始訓練 /// </summary> public void train() { List <DocumentVector> vSpace = VectorSpaceModel.ProcessDocumentCollection(docCollection); totalIteration = 0; resultSet = DocumnetClustering.PrepareDocumentCluster(txtClusterNum, vSpace, ref totalIteration); Console.WriteLine("totalIteration: " + totalIteration.ToString()); }
public ActionResult Test() { var list = _taiLieuVanBanService.GetDocuments(); var docCollection = new DocumentCollection() { DocumentList = list }; var cluster = _taiLieuVanBanService.CountDocumentType("Thông Báo"); List <DocumentVector> vSpace = VectorSpaceModel.ProcessDocumentCollection(docCollection); List <Centroid> resultSet = DocumnetClustering.DocumentCluster(cluster, vSpace, "thông báo chính phủ mới"); return(View(resultSet)); }
public string GetFromCurrency(string priceToConvert) { var from_currency = ""; Preprocess(priceToConvert); int totalIteration = 0; int final_index = -1; int collectionNumber = docCollection.DocumentList.Count - 1; List <DocumentVector> vSpace = VectorSpaceModel.ProcessDocumentCollection(docCollection); List <Centroid> resultSet = DocumnetClustering.PrepareDocumentCluster(collectionNumber, vSpace, ref totalIteration, ref final_index, currency); from_currency = resultSet[final_index].GroupedDocument[0].Content.Split(',')[0]; return(from_currency); }
private void btnStartClustering_Click(object sender, EventArgs e) { List <DocumentVector> vSpace = VectorSpaceModel.ProcessDocumentCollection(docCollection); int totalIteration = 0; List <Centroid> resultSet = DocumnetClustering.PrepareDocumentCluster(int.Parse(txtClusterNo.Text), vSpace, ref totalIteration); string msg = string.Empty; int count = 1; string k = string.Empty; string max = string.Empty; List <string> topic = new List <string>(); foreach (Centroid c in resultSet) { msg += String.Format("------------------------------[ CLUSTER {0} ]-----------------------------{1}", count, System.Environment.NewLine); k += String.Format("[ CLUSTER {0} ]", count, System.Environment.NewLine); max = string.Empty; foreach (DocumentVector document in c.GroupedDocument) { for (int i = 0; i < document.keys.Length; i++) { float m = document.VectorSpace[0]; if (document.VectorSpace[i] > 0.005 && document.keys[i] != ".") { k += document.keys[i] + ","; } msg += document.Content + System.Environment.NewLine; if (c.GroupedDocument.Count > 1) { msg += String.Format("{0}-------------------------------------------------------------------------------{0}", System.Environment.NewLine); } } msg += "-------------------------------------------------------------------------------" + System.Environment.NewLine; k += System.Environment.NewLine; topic.Add(max); count++; } } richTextBox2.Text = k; richTextBox1.Text = msg; label10.Text = totalIteration.ToString(); }
public decimal GetExchangedValue(string priceToConvert, string to_currency) { var from_currency = ""; decimal exchangedValue; Preprocess(priceToConvert); int totalIteration = 0; int final_index = -1; int collectionNumber = docCollection.DocumentList.Count - 1; List <DocumentVector> vSpace = VectorSpaceModel.ProcessDocumentCollection(docCollection); List <Centroid> resultSet = DocumnetClustering.PrepareDocumentCluster(collectionNumber, vSpace, ref totalIteration, ref final_index, currency); from_currency = resultSet[final_index].GroupedDocument[0].Content.Split(',')[0]; WriteInCurrencyDocument(from_currency, currency); decimal rate = GetRate(from_currency, to_currency); exchangedValue = value * rate; return(exchangedValue); }
private void btnAdd_Click(object sender, EventArgs e) { int newDoc = 0; if (!string.IsNullOrEmpty(txtDoc1.Text)) { docCollection.DocumentList.Add(txtDoc1.Text); newDoc++; } if (!string.IsNullOrEmpty(txtDoc2.Text)) { newDoc++; docCollection.DocumentList.Add(txtDoc2.Text); } if (!string.IsNullOrEmpty(txtDoc3.Text)) { docCollection.DocumentList.Add(txtDoc3.Text); newDoc++; } if (!string.IsNullOrEmpty(txtDoc4.Text)) { newDoc++; docCollection.DocumentList.Add(txtDoc4.Text); } int totalDoc = 0; if (int.TryParse(docCollection.DocumentList.Count.ToString(), out totalDoc)) { lblTotalDoc.Text = totalDoc.ToString(); } txtDoc1.Clear(); txtDoc2.Clear(); txtDoc3.Clear(); txtDoc4.Clear(); if (ddlType.Text == "Incremental" && DocumnetClustering.mainCentroids.Count > 0) { switch (ddlIncAlg.Text) { case "KMeans": List <DocumentVector> vSpace = VectorSpaceModel.ProcessDocumentCollection(docCollection); for (int i = 1; i <= newDoc; i++) { DocumentVector obj = vSpace[vSpace.Count - i]; int index = DocumnetClustering.FindClosestClusterCenter(DocumnetClustering.mainCentroids, obj, ddl_sim.Text); DocumnetClustering.mainCentroids[index].GroupedDocument.Add(obj); } break; case "CMeans": List <DocumentVector> vSpace2 = VectorSpaceModel.ProcessDocumentCollection(docCollection); string outFilepath = @"E:\Dropbox\Masters\myMSc\PracticalPart\Sematic_K-MEANSClustering\FCM\HM_data_Out_centers.dat"; var reader = new StreamReader(File.OpenRead(outFilepath)); List <float[]> values = new List <float[]>(); int t = 0; while (!reader.EndOfStream) { var line = reader.ReadLine(); values.Add(Array.ConvertAll(line.Split(','), float.Parse)); t++; } for (int i = 0; i < newDoc; i++) { int closeCenter = 0; float min = 1000; int counter = 1; DocumentVector obj2 = vSpace2[vSpace2.Count - newDoc + i]; for (int l = 0; l < t; l++) { // float s = SimilarityMatrics.FindCosineSimilarity(values[l], obj2.VectorSpace); float s = ArrayDistanceFunction(values[l], obj2.VectorSpace); if (s < min) { min = s; closeCenter = counter; } counter++; } MessageBox.Show("Doc:" + (i + 1) + " Close is:" + closeCenter); DocumnetClustering.mainCentroids[closeCenter - 1].GroupedDocument.Add(obj2); } break; } printAlll(); } }