private void RealizaTreinamentoWeka() { TempData["DataTreinamento"] = DateTime.Now; Instances dadosTreinamento = ImportaArquivosServidor(); // Aplica o filter StringToWordVector weka.filters.Filter[] filters = new weka.filters.Filter[2]; filters[0] = new StringToWordVector(); filters[1] = AttributeSelectionFilter(2); weka.filters.MultiFilter filter = new weka.filters.MultiFilter(); filter.setInputFormat(dadosTreinamento); filter.setFilters(filters); // Cria o Classificador a partir dos dados de treinamento FilteredClassifier classifier = new FilteredClassifier(); classifier.setFilter(filter); classifier.setClassifier(new NaiveBayes()); classifier.buildClassifier(dadosTreinamento); // Realiza um CrossValidation pra expôr as estatísticas do Classificador /* * Evaluation eval = new Evaluation(dadosTreinamento); * eval.crossValidateModel(classifier, dadosTreinamento, 10, new java.util.Random(1)); * * var EstatisticasTexto = * "Instâncias Corretas: \t" + eval.correct() + " (" + Math.Round(eval.pctCorrect(),2) + "%)" + System.Environment.NewLine + "Instâncias Incorretas: \t" + eval.incorrect() + " (" + Math.Round(eval.pctIncorrect(),2) + "%)" + System.Environment.NewLine + "Total de Instâncias: \t\t" + eval.numInstances(); + + + TempData["TreinamentoRealizado"] = EstatisticasTexto; */ // Salva o Classificador (model) como /Classificador/Classificador.model SerializationHelper.write(string.Format("{0}Classificador.model", DiretorioClassificadorServidor), classifier); // Salva os dados de treinamento como /Classificador/DadosTreinamento.arff Utilidades.SalvarArff(dadosTreinamento, DiretorioClassificadorServidor, "DadosTreinamento.arff"); }
/// <summary> /// Filtered Support Vector Machine Classification with type specified. i.e. BOF or BOW /// </summary> /// <param name="type"></param> private void FilteredSVM(string type, string trainingFilePath, string directoryName, TextFilterType textFilterType) { var currDir = System.Environment.GetFolderPath(System.Environment.SpecialFolder.ApplicationData); // Combine the base folder with your specific folder.... string specificFolder = System.IO.Path.Combine(currDir, "MARC 2.0"); // Check if folder exists and if not, create it if (!Directory.Exists(specificFolder)) { Directory.CreateDirectory(specificFolder); } try { var trainingDatatsetFilePath = ""; if (type == "BOF") { trainingDatatsetFilePath = specificFolder + "\\InputData\\TrainingDatasets\\BOF Dataset.arff"; } else { trainingDatatsetFilePath = specificFolder + "\\InputData\\TrainingDatasets\\BOW Dataset.arff"; } var testDatasetFilePath = specificFolder + "\\InputData\\TrainingDatasets\\Test.arff"; // If training file path is supplied then use it. if (trainingFilePath != null) { trainingDatatsetFilePath = trainingFilePath; } java.io.BufferedReader trainReader = new BufferedReader(new FileReader(trainingDatatsetFilePath)); //File with text examples BufferedReader classifyReader = new BufferedReader(new FileReader(testDatasetFilePath)); //File with text to classify Instances trainInsts = new Instances(trainReader); Instances classifyInsts = new Instances(classifyReader); trainInsts.setClassIndex(trainInsts.numAttributes() - 1); classifyInsts.setClassIndex(classifyInsts.numAttributes() - 1); FilteredClassifier model = new FilteredClassifier(); StringToWordVector stringtowordvector = new StringToWordVector(); stringtowordvector.setTFTransform(true); model.setFilter(new StringToWordVector()); weka.classifiers.Classifier smocls = new weka.classifiers.functions.SMO(); //smocls.setOptions(weka.core.Utils.splitOptions("-C 1.0 -L 0.001 -P 1.0E-12 -N 0 -V -1 -W 1 -K \"weka.classifiers.functions.supportVector.Puk -C 250007 -O 1.0 -S 1.0\"")); smocls.setOptions(weka.core.Utils.splitOptions("-C 1.0 -L 0.0010 -P 1.0E-12 -N 0 -V -1 -W 1 -K \"weka.classifiers.functions.supportVector.PolyKernel -C 250007 -E 1.0\"")); model.setClassifier(smocls); bool exists; var directoryRoot = System.IO.Path.GetDirectoryName(Directory.GetCurrentDirectory()); directoryRoot = specificFolder; //Check if the model exists and if not then build a model switch (textFilterType) { case TextFilterType.NoFilter: exists = SVMNoFilterCheckifModelExists(trainingDatatsetFilePath); //if does not exists then build model and save it and save the file also for current filter if (!exists) { model.buildClassifier(trainInsts); Helper.Helper.WriteToBinaryFile <FilteredClassifier>(directoryRoot + @"\Model\SVM\SVMNoFilterModel.dat", model); string content = System.IO.File.ReadAllText(trainingDatatsetFilePath); using (var sW = new StreamWriter(directoryRoot + @"\Model\SVM\\SVMNoFilterFile.dat")) { sW.Write(content); } } // if exists then read the file and use the model else { model = Helper.Helper.ReadFromBinaryFile <FilteredClassifier>(directoryRoot + @"\Model\SVM\SVMNoFilterModel.dat"); } break; //Case Stopwords Removal case TextFilterType.StopwordsRemoval: exists = SVMSWRCheckifModelExists(trainingDatatsetFilePath); //if does not exists then build model and save it and save the file also for current filter if (!exists) { model.buildClassifier(trainInsts); Helper.Helper.WriteToBinaryFile <FilteredClassifier>(directoryRoot + @"\Model\SVM\SVMSWRFilterModel.dat", model); string content = System.IO.File.ReadAllText(trainingDatatsetFilePath); using (var sW = new StreamWriter(directoryRoot + @"\Model\SVM\\SVMSWRFile.dat")) { sW.Write(content); } } // if exists then read the file and use the model else { model = Helper.Helper.ReadFromBinaryFile <FilteredClassifier>(directoryRoot + @"\Model\SVM\SVMSWRFilterModel.dat"); } break; //Case Stemming case TextFilterType.Stemming: exists = SVMSTCheckifModelExists(trainingDatatsetFilePath); //if does not exists then build model and save it and save the file also for current filter if (!exists) { model.buildClassifier(trainInsts); Helper.Helper.WriteToBinaryFile <FilteredClassifier>(directoryRoot + @"\Model\SVM\SVMSTFilterModel.dat", model); string content = System.IO.File.ReadAllText(trainingDatatsetFilePath); using (var sW = new StreamWriter(directoryRoot + @"\Model\SVM\\SVMSTFile.dat")) { sW.Write(content); } } // if exists then read the file and use the model else { model = Helper.Helper.ReadFromBinaryFile <FilteredClassifier>(directoryRoot + @"\Model\SVM\SVMSTFilterModel.dat"); } break; //Case Stopwords Removal with Stemming case TextFilterType.StopwordsRemovalStemming: exists = SVMSWRSTCheckifModelExists(trainingDatatsetFilePath); //if does not exists then build model and save it and save the file also for current filter if (!exists) { model.buildClassifier(trainInsts); Helper.Helper.WriteToBinaryFile <FilteredClassifier>(directoryRoot + @"\Model\SVM\SVMSWRSTFilterModel.dat", model); string content = System.IO.File.ReadAllText(trainingDatatsetFilePath); using (var sW = new StreamWriter(directoryRoot + @"\Model\SVM\\SVMSWRSTFile.dat")) { sW.Write(content); } } // if exists then read the file and use the model else { model = Helper.Helper.ReadFromBinaryFile <FilteredClassifier>(directoryRoot + @"\Model\SVM\SVMSWRSTFilterModel.dat"); } break; default: break; } //model.buildClassifier(trainInsts); for (int i = 0; i < classifyInsts.numInstances(); i++) { classifyInsts.instance(i).setClassMissing(); double cls = model.classifyInstance(classifyInsts.instance(i)); classifyInsts.instance(i).setClassValue(cls); classification = cls == 0 ? "Bug Report" : cls == 1 ? "Feature Request" : "Other"; tempAllClassification.Add(classification); } AllClassification = tempAllClassification; } catch (Exception o) { error = o.ToString(); } }