// Hàm tạo lại các file class.txt từ 9 file train public static void taoTrainClasstxt(int chiso, String splitPath, String[] className) { List <DocumentDaPhanLop> listalldocument = new List <DocumentDaPhanLop>(); int k = XuLyFile.k(); for (int i = 1; i <= k; i++) { if (i != chiso) { List <DocumentDaPhanLop> listtemp = DocumentDaPhanLop.layCacDocumentDaPhanLop(splitPath + "file" + i + ".txt"); for (int j = 0; j < listtemp.Count; j++) { listalldocument.Add(listtemp[j]); } } } for (int i = 0; i < className.Length; i++) { className[i] = className[i].Split('\\').Last().Split('.').First(); using (StreamWriter sw = new StreamWriter(splitPath + className[i] + ".txt")) { for (int j = 0; j < listalldocument.Count; j++) { if (listalldocument[j].className == className[i]) { sw.WriteLine(listalldocument[j].content); } } } Console.WriteLine("Tao file class " + className[i] + ".txt thanh cong"); } }
//Hàm tính và lưu các thông số về các class public static List <ClassInfo> ThongTinClass(String filetrue, String fileknn, String[] allclass) { List <DocumentDaPhanLop> listTrue = DocumentDaPhanLop.layCacDocumentDaPhanLop(filetrue); List <DocumentDaPhanLop> listPhanLop = DocumentDaPhanLop.layCacDocumentDaPhanLop(fileknn); List <ClassInfo> listClass = new List <ClassInfo>(); //Lay cac class trong file txt dung for (int i = 0; i < allclass.Length; i++) { String temp = allclass[i].Split('\\').Last().Split('.').First(); listClass.Add(new ClassInfo(temp, 0, 0, 0, 0, 0, 0)); } //Tinh toan cho tung class for (int k = 0; k < listClass.Count; k++) { //Xet class cua tung dong trong 2 van ban for (int i = 0; i < listTrue.Count; i++) { if (listTrue[i].className == listPhanLop[i].className && listTrue[i].className == listClass[k].ClassName) { listClass[k].NumberRightofClass++; } if (listTrue[i].className != listPhanLop[i].className && listPhanLop[i].className == listClass[k].ClassName) { listClass[k].NumberWrongofClass++; } if (listTrue[i].className != listPhanLop[i].className && listTrue[i].className == listClass[k].ClassName) { listClass[k].NumberWrongoutClass++; } } //Tính precisoin, recall, f-score if (listClass[k].NumberRightofClass != 0) { listClass[k].Precision = Math.Round((double)listClass[k].NumberRightofClass / (listClass[k].NumberRightofClass + listClass[k].NumberWrongofClass), XuLyFile.docRound()); listClass[k].Recall = Math.Round((double)listClass[k].NumberRightofClass / (listClass[k].NumberRightofClass + listClass[k].NumberWrongoutClass), XuLyFile.docRound()); listClass[k].Fscore = Math.Round((double)(2 * listClass[k].Precision * listClass[k].Recall) / (listClass[k].Precision + listClass[k].Recall), XuLyFile.docRound()); } else { listClass[k].Precision = 0; listClass[k].Recall = 0; listClass[k].Fscore = 0; } } return(listClass); }
// Đếm số document trong file text public static int demSoDocument(String input) { List <DocumentDaPhanLop> listDocument = DocumentDaPhanLop.layCacDocumentDaPhanLop(input); return(listDocument.Count); }