示例#1
0
        private Arquivo AvaliarWekaClassificacao(Arquivo _arquivoAvaliado)
        {
            // Importa o Classificador que foi salvo pelo treinamento
            FilteredClassifier classifier = (FilteredClassifier)ImportaClassificadorSalvo();

            // Importa os Dados de Treinamento salvo pelo treinamento
            Instances dadosTreinamento = ImportaDadosTreinamentoSalvo();

            dadosTreinamento.setClassIndex(dadosTreinamento.numAttributes() - 1); // Classe fica em último

            int numAttributes = dadosTreinamento.numAttributes();

            // Cria a instância de teste
            Instance instance = new DenseInstance(numAttributes);

            instance.setDataset(dadosTreinamento);

            // Inicializa todos os atributos como valor zero.
            for (int i = 0; i < numAttributes; i++)
            {
                instance.setValue(i, 0);
            }

            // Insere o texto a ser avaliado no primeiro atributo
            for (int i = 0; i < numAttributes - 1; i++)
            {
                instance.setValue(i, _arquivoAvaliado.TextoFormatado);
            }

            // Indica que a Classe está faltando, para que a mesma possa ser classificada
            instance.setClassMissing();

            // Classifica a instância de teste
            var resultado = "";

            try
            {
                // Realiza a classificação da instância, retornando o resultado previsto
                var predicao = classifier.classifyInstance(instance);
                instance.setClassValue(predicao);
                // Realiza a tradução do resultado numérico na Classificação esperada
                resultado = dadosTreinamento.classAttribute().value((int)predicao);

                var distribuicao = classifier.distributionForInstance(instance);
            } catch (Exception)
            {
                throw (new ClassificationException("O texto não pode ser classificado quanto à sua qualidade."));
            }

            // Atribui o resultado ao arquivo avaliado
            _arquivoAvaliado.Classificacao = resultado;

            return(_arquivoAvaliado);
        }
示例#2
0
        private void RealizaTreinamentoWeka()
        {
            TempData["DataTreinamento"] = DateTime.Now;

            Instances dadosTreinamento = ImportaArquivosServidor();

            // Aplica o filter StringToWordVector
            weka.filters.Filter[] filters = new weka.filters.Filter[2];
            filters[0] = new StringToWordVector();
            filters[1] = AttributeSelectionFilter(2);

            weka.filters.MultiFilter filter = new weka.filters.MultiFilter();
            filter.setInputFormat(dadosTreinamento);
            filter.setFilters(filters);

            // Cria o Classificador a partir dos dados de treinamento
            FilteredClassifier classifier = new FilteredClassifier();

            classifier.setFilter(filter);
            classifier.setClassifier(new NaiveBayes());
            classifier.buildClassifier(dadosTreinamento);

            // Realiza um CrossValidation pra expôr as estatísticas do Classificador

            /*
             * Evaluation eval = new Evaluation(dadosTreinamento);
             * eval.crossValidateModel(classifier, dadosTreinamento, 10, new java.util.Random(1));
             *
             * var EstatisticasTexto =
             *  "Instâncias Corretas: \t" + eval.correct() + "  (" + Math.Round(eval.pctCorrect(),2) + "%)" + System.Environment.NewLine
             + "Instâncias Incorretas: \t" + eval.incorrect() + "  (" + Math.Round(eval.pctIncorrect(),2) + "%)" + System.Environment.NewLine
             + "Total de Instâncias: \t\t" + eval.numInstances();
             +
             +
             + TempData["TreinamentoRealizado"] = EstatisticasTexto;
             */

            // Salva o Classificador (model) como /Classificador/Classificador.model
            SerializationHelper.write(string.Format("{0}Classificador.model", DiretorioClassificadorServidor), classifier);

            // Salva os dados de treinamento como /Classificador/DadosTreinamento.arff
            Utilidades.SalvarArff(dadosTreinamento, DiretorioClassificadorServidor, "DadosTreinamento.arff");
        }
示例#3
0
        /// <summary>
        /// Filtered Support Vector Machine Classification with type specified. i.e. BOF or BOW
        /// </summary>
        /// <param name="type"></param>
        private void FilteredSVM(string type, string trainingFilePath, string directoryName, TextFilterType textFilterType)
        {
            var currDir = System.Environment.GetFolderPath(System.Environment.SpecialFolder.ApplicationData);

            // Combine the base folder with your specific folder....
            string specificFolder = System.IO.Path.Combine(currDir, "MARC 2.0");

            // Check if folder exists and if not, create it
            if (!Directory.Exists(specificFolder))
            {
                Directory.CreateDirectory(specificFolder);
            }



            try
            {
                var trainingDatatsetFilePath = "";
                if (type == "BOF")
                {
                    trainingDatatsetFilePath = specificFolder + "\\InputData\\TrainingDatasets\\BOF Dataset.arff";
                }
                else
                {
                    trainingDatatsetFilePath = specificFolder + "\\InputData\\TrainingDatasets\\BOW Dataset.arff";
                }

                var testDatasetFilePath = specificFolder + "\\InputData\\TrainingDatasets\\Test.arff";

                // If training file path is supplied then use it.
                if (trainingFilePath != null)
                {
                    trainingDatatsetFilePath = trainingFilePath;
                }

                java.io.BufferedReader trainReader    = new BufferedReader(new FileReader(trainingDatatsetFilePath)); //File with text examples
                BufferedReader         classifyReader = new BufferedReader(new FileReader(testDatasetFilePath));      //File with text to classify

                Instances trainInsts    = new Instances(trainReader);
                Instances classifyInsts = new Instances(classifyReader);

                trainInsts.setClassIndex(trainInsts.numAttributes() - 1);
                classifyInsts.setClassIndex(classifyInsts.numAttributes() - 1);

                FilteredClassifier model = new FilteredClassifier();

                StringToWordVector stringtowordvector = new StringToWordVector();
                stringtowordvector.setTFTransform(true);
                model.setFilter(new StringToWordVector());

                weka.classifiers.Classifier smocls = new weka.classifiers.functions.SMO();

                //smocls.setOptions(weka.core.Utils.splitOptions("-C 1.0 -L 0.001 -P 1.0E-12 -N 0 -V -1 -W 1 -K \"weka.classifiers.functions.supportVector.Puk -C 250007 -O 1.0 -S 1.0\""));
                smocls.setOptions(weka.core.Utils.splitOptions("-C 1.0 -L 0.0010 -P 1.0E-12 -N 0 -V -1 -W 1 -K \"weka.classifiers.functions.supportVector.PolyKernel -C 250007 -E 1.0\""));
                model.setClassifier(smocls);

                bool exists;
                var  directoryRoot = System.IO.Path.GetDirectoryName(Directory.GetCurrentDirectory());
                directoryRoot = specificFolder;
                //Check if the model exists and if not then build a model
                switch (textFilterType)
                {
                case TextFilterType.NoFilter:
                    exists = SVMNoFilterCheckifModelExists(trainingDatatsetFilePath);

                    //if does not exists then build model and save it and save the file also for current filter
                    if (!exists)
                    {
                        model.buildClassifier(trainInsts);
                        Helper.Helper.WriteToBinaryFile <FilteredClassifier>(directoryRoot + @"\Model\SVM\SVMNoFilterModel.dat", model);
                        string content = System.IO.File.ReadAllText(trainingDatatsetFilePath);
                        using (var sW = new StreamWriter(directoryRoot + @"\Model\SVM\\SVMNoFilterFile.dat"))
                        {
                            sW.Write(content);
                        }
                    }
                    // if exists then read the file and use the model
                    else
                    {
                        model = Helper.Helper.ReadFromBinaryFile <FilteredClassifier>(directoryRoot + @"\Model\SVM\SVMNoFilterModel.dat");
                    }

                    break;

                //Case Stopwords Removal
                case TextFilterType.StopwordsRemoval:
                    exists = SVMSWRCheckifModelExists(trainingDatatsetFilePath);
                    //if does not exists then build model and save it and save the file also for current filter
                    if (!exists)
                    {
                        model.buildClassifier(trainInsts);
                        Helper.Helper.WriteToBinaryFile <FilteredClassifier>(directoryRoot + @"\Model\SVM\SVMSWRFilterModel.dat", model);
                        string content = System.IO.File.ReadAllText(trainingDatatsetFilePath);
                        using (var sW = new StreamWriter(directoryRoot + @"\Model\SVM\\SVMSWRFile.dat"))
                        {
                            sW.Write(content);
                        }
                    }
                    // if exists then read the file and use the model
                    else
                    {
                        model = Helper.Helper.ReadFromBinaryFile <FilteredClassifier>(directoryRoot + @"\Model\SVM\SVMSWRFilterModel.dat");
                    }

                    break;

                //Case Stemming
                case TextFilterType.Stemming:
                    exists = SVMSTCheckifModelExists(trainingDatatsetFilePath);
                    //if does not exists then build model and save it and save the file also for current filter
                    if (!exists)
                    {
                        model.buildClassifier(trainInsts);
                        Helper.Helper.WriteToBinaryFile <FilteredClassifier>(directoryRoot + @"\Model\SVM\SVMSTFilterModel.dat", model);
                        string content = System.IO.File.ReadAllText(trainingDatatsetFilePath);
                        using (var sW = new StreamWriter(directoryRoot + @"\Model\SVM\\SVMSTFile.dat"))
                        {
                            sW.Write(content);
                        }
                    }
                    // if exists then read the file and use the model
                    else
                    {
                        model = Helper.Helper.ReadFromBinaryFile <FilteredClassifier>(directoryRoot + @"\Model\SVM\SVMSTFilterModel.dat");
                    }
                    break;

                //Case Stopwords Removal with Stemming
                case TextFilterType.StopwordsRemovalStemming:
                    exists = SVMSWRSTCheckifModelExists(trainingDatatsetFilePath);
                    //if does not exists then build model and save it and save the file also for current filter
                    if (!exists)
                    {
                        model.buildClassifier(trainInsts);
                        Helper.Helper.WriteToBinaryFile <FilteredClassifier>(directoryRoot + @"\Model\SVM\SVMSWRSTFilterModel.dat", model);
                        string content = System.IO.File.ReadAllText(trainingDatatsetFilePath);
                        using (var sW = new StreamWriter(directoryRoot + @"\Model\SVM\\SVMSWRSTFile.dat"))
                        {
                            sW.Write(content);
                        }
                    }
                    // if exists then read the file and use the model
                    else
                    {
                        model = Helper.Helper.ReadFromBinaryFile <FilteredClassifier>(directoryRoot + @"\Model\SVM\SVMSWRSTFilterModel.dat");
                    }
                    break;

                default:
                    break;
                }

                //model.buildClassifier(trainInsts);
                for (int i = 0; i < classifyInsts.numInstances(); i++)
                {
                    classifyInsts.instance(i).setClassMissing();
                    double cls = model.classifyInstance(classifyInsts.instance(i));
                    classifyInsts.instance(i).setClassValue(cls);
                    classification = cls == 0 ? "Bug Report"
                                    : cls == 1 ? "Feature Request"
                                    : "Other";
                    tempAllClassification.Add(classification);
                }
                AllClassification = tempAllClassification;
            }
            catch (Exception o)
            {
                error = o.ToString();
            }
        }
        //public object Clone() {
        //    MemoryStream ms = new MemoryStream(500000000);
        //    System.Runtime.Serialization.Formatters.Binary.BinaryFormatter bf =
        //        new System.Runtime.Serialization.Formatters.Binary.BinaryFormatter(null,
        //            new System.Runtime.Serialization.StreamingContext(System.Runtime.Serialization.StreamingContextStates.Clone));
        //    bf.Serialize(ms, this);
        //    ms.Seek(0, SeekOrigin.Begin);
        //    object obj = bf.Deserialize(ms);
        //    ms.Close();
        //    return obj;
        //}
        public void train(bool LoadingFromFile)
        {
            if (!LoadingFromFile) {
                DisplayMessage("Begin training on Efigi galaxies...");
                Console.Write("Begin training on Efigi galaxies...");
            }
            else {
                DisplayImage(0);
                DisplayMessage("Load from file...");
                Console.Write("Load from file...");

                frV = new GeneralMatrix(ReadFVMatrix(0));
                fgV = new GeneralMatrix(ReadFVMatrix(1));
                fbV = new GeneralMatrix(ReadFVMatrix(2));
            }

            weka.classifiers.trees.M5P tree = new weka.classifiers.trees.M5P();

            String[] options = new String[1];
            weka.core.converters.ConverterUtils.DataSource source = new weka.core.converters.ConverterUtils.DataSource(OutputDir + "Results/" + "resultsGalaxy.arff");
            data = source.getDataSet();
            if (data == null) {
                DisplayMessage("Cannot load from file.");
                throw new Exception("Arff File not valid");
            }
            data.setClassIndex(0);
            tree.buildClassifier(data);

            StreamWriter output = new StreamWriter(OutputDir + "Results/" + "classification.txt");

            rmse = 0.0;
            int classifiedCount = 0;

            weka.filters.unsupervised.attribute.Remove rm = new weka.filters.unsupervised.attribute.Remove();
            rm.setInputFormat(data);
            fc = new FilteredClassifier();
            fc.setFilter(rm);
            fc.setClassifier(tree);

            for (int i = 0; i < data.numInstances(); i++) {
                int classPrediction = (int)Math.Round(fc.classifyInstance(data.instance(i)));
                if (classPrediction < -6) {
                    classPrediction = -6;
                }
                else if (classPrediction > 11) {
                    classPrediction = 11;
                }

                int actualClass = (int)Math.Round(data.instance(i).classValue());

                int error = Math.Abs(classPrediction - actualClass);
                rmse += error * error;
                classifiedCount++;

                output.WriteLine("\n" + classPrediction + ", " + error);
                if (i % 10 == 0 && !LoadingFromFile)
                    DisplayImage(i);
            }

            rmse = Math.Sqrt(rmse / classifiedCount);
            output.WriteLine("\nRMSE: " + rmse);

            DisplayMessage("RMSE: " + rmse);

            output.Flush();
            output.Close();
            output.Dispose();

            readyToClassify = true;

            Console.WriteLine("Finished training on Efigi galaxies; RMSE: " + rmse.ToString());
        }