private Arquivo AvaliarWekaClassificacao(Arquivo _arquivoAvaliado) { // Importa o Classificador que foi salvo pelo treinamento FilteredClassifier classifier = (FilteredClassifier)ImportaClassificadorSalvo(); // Importa os Dados de Treinamento salvo pelo treinamento Instances dadosTreinamento = ImportaDadosTreinamentoSalvo(); dadosTreinamento.setClassIndex(dadosTreinamento.numAttributes() - 1); // Classe fica em último int numAttributes = dadosTreinamento.numAttributes(); // Cria a instância de teste Instance instance = new DenseInstance(numAttributes); instance.setDataset(dadosTreinamento); // Inicializa todos os atributos como valor zero. for (int i = 0; i < numAttributes; i++) { instance.setValue(i, 0); } // Insere o texto a ser avaliado no primeiro atributo for (int i = 0; i < numAttributes - 1; i++) { instance.setValue(i, _arquivoAvaliado.TextoFormatado); } // Indica que a Classe está faltando, para que a mesma possa ser classificada instance.setClassMissing(); // Classifica a instância de teste var resultado = ""; try { // Realiza a classificação da instância, retornando o resultado previsto var predicao = classifier.classifyInstance(instance); instance.setClassValue(predicao); // Realiza a tradução do resultado numérico na Classificação esperada resultado = dadosTreinamento.classAttribute().value((int)predicao); var distribuicao = classifier.distributionForInstance(instance); } catch (Exception) { throw (new ClassificationException("O texto não pode ser classificado quanto à sua qualidade.")); } // Atribui o resultado ao arquivo avaliado _arquivoAvaliado.Classificacao = resultado; return(_arquivoAvaliado); }
private void RealizaTreinamentoWeka() { TempData["DataTreinamento"] = DateTime.Now; Instances dadosTreinamento = ImportaArquivosServidor(); // Aplica o filter StringToWordVector weka.filters.Filter[] filters = new weka.filters.Filter[2]; filters[0] = new StringToWordVector(); filters[1] = AttributeSelectionFilter(2); weka.filters.MultiFilter filter = new weka.filters.MultiFilter(); filter.setInputFormat(dadosTreinamento); filter.setFilters(filters); // Cria o Classificador a partir dos dados de treinamento FilteredClassifier classifier = new FilteredClassifier(); classifier.setFilter(filter); classifier.setClassifier(new NaiveBayes()); classifier.buildClassifier(dadosTreinamento); // Realiza um CrossValidation pra expôr as estatísticas do Classificador /* * Evaluation eval = new Evaluation(dadosTreinamento); * eval.crossValidateModel(classifier, dadosTreinamento, 10, new java.util.Random(1)); * * var EstatisticasTexto = * "Instâncias Corretas: \t" + eval.correct() + " (" + Math.Round(eval.pctCorrect(),2) + "%)" + System.Environment.NewLine + "Instâncias Incorretas: \t" + eval.incorrect() + " (" + Math.Round(eval.pctIncorrect(),2) + "%)" + System.Environment.NewLine + "Total de Instâncias: \t\t" + eval.numInstances(); + + + TempData["TreinamentoRealizado"] = EstatisticasTexto; */ // Salva o Classificador (model) como /Classificador/Classificador.model SerializationHelper.write(string.Format("{0}Classificador.model", DiretorioClassificadorServidor), classifier); // Salva os dados de treinamento como /Classificador/DadosTreinamento.arff Utilidades.SalvarArff(dadosTreinamento, DiretorioClassificadorServidor, "DadosTreinamento.arff"); }
/// <summary> /// Filtered Support Vector Machine Classification with type specified. i.e. BOF or BOW /// </summary> /// <param name="type"></param> private void FilteredSVM(string type, string trainingFilePath, string directoryName, TextFilterType textFilterType) { var currDir = System.Environment.GetFolderPath(System.Environment.SpecialFolder.ApplicationData); // Combine the base folder with your specific folder.... string specificFolder = System.IO.Path.Combine(currDir, "MARC 2.0"); // Check if folder exists and if not, create it if (!Directory.Exists(specificFolder)) { Directory.CreateDirectory(specificFolder); } try { var trainingDatatsetFilePath = ""; if (type == "BOF") { trainingDatatsetFilePath = specificFolder + "\\InputData\\TrainingDatasets\\BOF Dataset.arff"; } else { trainingDatatsetFilePath = specificFolder + "\\InputData\\TrainingDatasets\\BOW Dataset.arff"; } var testDatasetFilePath = specificFolder + "\\InputData\\TrainingDatasets\\Test.arff"; // If training file path is supplied then use it. if (trainingFilePath != null) { trainingDatatsetFilePath = trainingFilePath; } java.io.BufferedReader trainReader = new BufferedReader(new FileReader(trainingDatatsetFilePath)); //File with text examples BufferedReader classifyReader = new BufferedReader(new FileReader(testDatasetFilePath)); //File with text to classify Instances trainInsts = new Instances(trainReader); Instances classifyInsts = new Instances(classifyReader); trainInsts.setClassIndex(trainInsts.numAttributes() - 1); classifyInsts.setClassIndex(classifyInsts.numAttributes() - 1); FilteredClassifier model = new FilteredClassifier(); StringToWordVector stringtowordvector = new StringToWordVector(); stringtowordvector.setTFTransform(true); model.setFilter(new StringToWordVector()); weka.classifiers.Classifier smocls = new weka.classifiers.functions.SMO(); //smocls.setOptions(weka.core.Utils.splitOptions("-C 1.0 -L 0.001 -P 1.0E-12 -N 0 -V -1 -W 1 -K \"weka.classifiers.functions.supportVector.Puk -C 250007 -O 1.0 -S 1.0\"")); smocls.setOptions(weka.core.Utils.splitOptions("-C 1.0 -L 0.0010 -P 1.0E-12 -N 0 -V -1 -W 1 -K \"weka.classifiers.functions.supportVector.PolyKernel -C 250007 -E 1.0\"")); model.setClassifier(smocls); bool exists; var directoryRoot = System.IO.Path.GetDirectoryName(Directory.GetCurrentDirectory()); directoryRoot = specificFolder; //Check if the model exists and if not then build a model switch (textFilterType) { case TextFilterType.NoFilter: exists = SVMNoFilterCheckifModelExists(trainingDatatsetFilePath); //if does not exists then build model and save it and save the file also for current filter if (!exists) { model.buildClassifier(trainInsts); Helper.Helper.WriteToBinaryFile <FilteredClassifier>(directoryRoot + @"\Model\SVM\SVMNoFilterModel.dat", model); string content = System.IO.File.ReadAllText(trainingDatatsetFilePath); using (var sW = new StreamWriter(directoryRoot + @"\Model\SVM\\SVMNoFilterFile.dat")) { sW.Write(content); } } // if exists then read the file and use the model else { model = Helper.Helper.ReadFromBinaryFile <FilteredClassifier>(directoryRoot + @"\Model\SVM\SVMNoFilterModel.dat"); } break; //Case Stopwords Removal case TextFilterType.StopwordsRemoval: exists = SVMSWRCheckifModelExists(trainingDatatsetFilePath); //if does not exists then build model and save it and save the file also for current filter if (!exists) { model.buildClassifier(trainInsts); Helper.Helper.WriteToBinaryFile <FilteredClassifier>(directoryRoot + @"\Model\SVM\SVMSWRFilterModel.dat", model); string content = System.IO.File.ReadAllText(trainingDatatsetFilePath); using (var sW = new StreamWriter(directoryRoot + @"\Model\SVM\\SVMSWRFile.dat")) { sW.Write(content); } } // if exists then read the file and use the model else { model = Helper.Helper.ReadFromBinaryFile <FilteredClassifier>(directoryRoot + @"\Model\SVM\SVMSWRFilterModel.dat"); } break; //Case Stemming case TextFilterType.Stemming: exists = SVMSTCheckifModelExists(trainingDatatsetFilePath); //if does not exists then build model and save it and save the file also for current filter if (!exists) { model.buildClassifier(trainInsts); Helper.Helper.WriteToBinaryFile <FilteredClassifier>(directoryRoot + @"\Model\SVM\SVMSTFilterModel.dat", model); string content = System.IO.File.ReadAllText(trainingDatatsetFilePath); using (var sW = new StreamWriter(directoryRoot + @"\Model\SVM\\SVMSTFile.dat")) { sW.Write(content); } } // if exists then read the file and use the model else { model = Helper.Helper.ReadFromBinaryFile <FilteredClassifier>(directoryRoot + @"\Model\SVM\SVMSTFilterModel.dat"); } break; //Case Stopwords Removal with Stemming case TextFilterType.StopwordsRemovalStemming: exists = SVMSWRSTCheckifModelExists(trainingDatatsetFilePath); //if does not exists then build model and save it and save the file also for current filter if (!exists) { model.buildClassifier(trainInsts); Helper.Helper.WriteToBinaryFile <FilteredClassifier>(directoryRoot + @"\Model\SVM\SVMSWRSTFilterModel.dat", model); string content = System.IO.File.ReadAllText(trainingDatatsetFilePath); using (var sW = new StreamWriter(directoryRoot + @"\Model\SVM\\SVMSWRSTFile.dat")) { sW.Write(content); } } // if exists then read the file and use the model else { model = Helper.Helper.ReadFromBinaryFile <FilteredClassifier>(directoryRoot + @"\Model\SVM\SVMSWRSTFilterModel.dat"); } break; default: break; } //model.buildClassifier(trainInsts); for (int i = 0; i < classifyInsts.numInstances(); i++) { classifyInsts.instance(i).setClassMissing(); double cls = model.classifyInstance(classifyInsts.instance(i)); classifyInsts.instance(i).setClassValue(cls); classification = cls == 0 ? "Bug Report" : cls == 1 ? "Feature Request" : "Other"; tempAllClassification.Add(classification); } AllClassification = tempAllClassification; } catch (Exception o) { error = o.ToString(); } }
//public object Clone() { // MemoryStream ms = new MemoryStream(500000000); // System.Runtime.Serialization.Formatters.Binary.BinaryFormatter bf = // new System.Runtime.Serialization.Formatters.Binary.BinaryFormatter(null, // new System.Runtime.Serialization.StreamingContext(System.Runtime.Serialization.StreamingContextStates.Clone)); // bf.Serialize(ms, this); // ms.Seek(0, SeekOrigin.Begin); // object obj = bf.Deserialize(ms); // ms.Close(); // return obj; //} public void train(bool LoadingFromFile) { if (!LoadingFromFile) { DisplayMessage("Begin training on Efigi galaxies..."); Console.Write("Begin training on Efigi galaxies..."); } else { DisplayImage(0); DisplayMessage("Load from file..."); Console.Write("Load from file..."); frV = new GeneralMatrix(ReadFVMatrix(0)); fgV = new GeneralMatrix(ReadFVMatrix(1)); fbV = new GeneralMatrix(ReadFVMatrix(2)); } weka.classifiers.trees.M5P tree = new weka.classifiers.trees.M5P(); String[] options = new String[1]; weka.core.converters.ConverterUtils.DataSource source = new weka.core.converters.ConverterUtils.DataSource(OutputDir + "Results/" + "resultsGalaxy.arff"); data = source.getDataSet(); if (data == null) { DisplayMessage("Cannot load from file."); throw new Exception("Arff File not valid"); } data.setClassIndex(0); tree.buildClassifier(data); StreamWriter output = new StreamWriter(OutputDir + "Results/" + "classification.txt"); rmse = 0.0; int classifiedCount = 0; weka.filters.unsupervised.attribute.Remove rm = new weka.filters.unsupervised.attribute.Remove(); rm.setInputFormat(data); fc = new FilteredClassifier(); fc.setFilter(rm); fc.setClassifier(tree); for (int i = 0; i < data.numInstances(); i++) { int classPrediction = (int)Math.Round(fc.classifyInstance(data.instance(i))); if (classPrediction < -6) { classPrediction = -6; } else if (classPrediction > 11) { classPrediction = 11; } int actualClass = (int)Math.Round(data.instance(i).classValue()); int error = Math.Abs(classPrediction - actualClass); rmse += error * error; classifiedCount++; output.WriteLine("\n" + classPrediction + ", " + error); if (i % 10 == 0 && !LoadingFromFile) DisplayImage(i); } rmse = Math.Sqrt(rmse / classifiedCount); output.WriteLine("\nRMSE: " + rmse); DisplayMessage("RMSE: " + rmse); output.Flush(); output.Close(); output.Dispose(); readyToClassify = true; Console.WriteLine("Finished training on Efigi galaxies; RMSE: " + rmse.ToString()); }