/// <summary> /// Initialize an instance of the evaluator with a training set and a Weka classifier. /// </summary> /// <param name="trainingSetPath">The path of the ARFF file to be used as the training set.</param> /// <param name="classifier">The Weka classifer to be used during evaluation.</param> public WekaClassification(string trainingSetPath, Classifier classifier) { //Load Training Set from file OriginalDataset = new ConverterUtils.DataSource(trainingSetPath).getDataSet(); OriginalDataset.setClassIndex(OriginalDataset.numAttributes() - 1); //Assign classifier Classifier = classifier; }
public void Classify() { // Train Dataset yolu ConverterUtils.DataSource source1 = new ConverterUtils.DataSource(TrainPath); Instances train = source1.getDataSet(); if (train.classIndex() == -1) { train.setClassIndex(train.numAttributes() - 1); } //Tahmin edilecek data'nın yolu ConverterUtils.DataSource source2 = new ConverterUtils.DataSource(Application.StartupPath + "\\UrunPredict.arff"); Instances test = source2.getDataSet(); if (test.classIndex() == -1) { test.setClassIndex(train.numAttributes() - 1); } //Sınıflandırma algoritmasının belirlenmesi /* weka.classifiers.lazy.IBk ibk = new weka.classifiers.lazy.IBk(); * //En yakın komşu 'k' değeri 3 olarak belirleniyor * ibk.setKNN(3); * ibk.buildClassifier(train);*/ //NAIVE BAYES ALGORITMASI UYGULAMA NaiveBayes naiveBayes = new NaiveBayes(); naiveBayes.buildClassifier(train); //Sınıflandırma işlemi uygulanıyor double label = naiveBayes.classifyInstance(test.instance(0)); test.instance(0).setClassValue(label); //Yeni sınıflandısrılan verinin train veri setine eklenmesi string AddClassifiedData = txtSicaklik.Text + "," + txtNem.Text + "," + txtYagis.Text + "," + cmbDeniz.Text + "," + test.instance(0).stringValue(4); StreamWriter Kayit = File.AppendText(TrainPath); Kayit.WriteLine("\n" + AddClassifiedData); Kayit.Close(); ShowImageAndInfo(test.instance(0).stringValue(4)); }
//converts a libsvm file to arff public static bool ConvertLIBSVM2ARFF(string filename, int featureToRemove) { try { var source = new ConverterUtils.DataSource(filename); Instances insts = source.getDataSet(); if (featureToRemove > -1) { insts.deleteAttributeAt(featureToRemove); } if (insts.classIndex() == -1) { insts.setClassIndex(insts.numAttributes()); } if (!insts.classAttribute().isNominal()) { var filter = new weka.filters.unsupervised.attribute.NumericToNominal(); filter.setOptions(weka.core.Utils.splitOptions("-R last")); filter.setInputFormat(insts); insts = Filter.useFilter(insts, filter); } SaveArff(insts, filename); GuiPreferences.Instance.setLog("Converted to arff."); } catch (Exception e) { //optional save method /*var writer = new java.io.BufferedWriter(new java.io.FileWriter(filename+"1.arff")); * writer.write(insts.toString()); * writer.flush(); * writer.close();*/ GuiPreferences.Instance.setLog(e.ToString() + " "); GuiPreferences.Instance.setLog(filename); return(false); } return(true); }
/// <summary> /// loads arff file using weka, assigns index as last feature and converts to nominal if needed /// </summary> /// <returns></returns> public static Instances loadDataSetFile(string filename) { ConverterUtils.DataSource source = new ConverterUtils.DataSource(GuiPreferences.Instance.WorkDirectory + filename); Instances data = source.getDataSet(); //assign last as index. if (data.classIndex() == -1) { data.setClassIndex(data.numAttributes() - 1); } //if class not nominal, convert to if (!data.classAttribute().isNominal()) { var filter = new weka.filters.unsupervised.attribute.NumericToNominal(); filter.setOptions(weka.core.Utils.splitOptions("-R last")); //filter.setAttributeIndices("last"); filter.setInputFormat(data); data = Filter.useFilter(data, filter); } return(data); }
public static double EvaluateClassifier(WekaClassifier wekaClassifier, string datasetPath) { //classifier.i Instances dataset = new ConverterUtils.DataSource(datasetPath).getDataSet(); dataset.setClassIndex(dataset.numAttributes() - 1); if (wekaClassifier.AttributesToRemove != null && wekaClassifier.AttributesToRemove.Length != 0) { dataset = GetReducedDataset(dataset, wekaClassifier.AttributesToRemove, null); } double quality = 0; //evaluate classifier performance Evaluation eval = new Evaluation(dataset); eval.evaluateModel(wekaClassifier.Classifier, dataset); //return result based on evaluation quality = eval.correct() / dataset.numInstances(); //TODO: I'm assuming that this is accuracy. If not please tell me, and I can modify it. return(quality); }
public static void ClassifyTest() { try { weka.core.Instances insts = new ConverterUtils.DataSource("weka").getDataSet(); insts.setClassIndex(insts.numAttributes() - 1); weka.classifiers.Classifier cl = new weka.classifiers.trees.J48(); Console.WriteLine("Performing " + percentSplit + "% split evaluation."); //randomize the order of the instances in the dataset. weka.filters.Filter myRandom = new weka.filters.unsupervised.instance.Randomize(); myRandom.setInputFormat(insts); insts = weka.filters.Filter.useFilter(insts, myRandom); int trainSize = insts.numInstances()*percentSplit/100; int testSize = insts.numInstances() - trainSize; weka.core.Instances train = new weka.core.Instances(insts, 0, trainSize); cl.buildClassifier(train); int numCorrect = 0; for (int i = trainSize; i < insts.numInstances(); i++) { weka.core.Instance currentInst = insts.instance(i); double predictedClass = cl.classifyInstance(currentInst); if (predictedClass == insts.instance(i).classValue()) numCorrect++; } Console.WriteLine(numCorrect + " out of " + testSize + " correct (" + (double) ((double) numCorrect/(double) testSize*100.0) + "%)"); } catch (java.lang.Exception ex) { ex.printStackTrace(); } }
public static Instances WekaPipeline_Unprocessed(libSVM_ExtendedProblem _trialProblem) { //export to libsvm file if (_trialProblem.samples == null) { GuiPreferences.Instance.setLog("Export Failed: Problem has no samples!"); return(null); } string trainFileName = GuiPreferences.Instance.WorkDirectory /*+ GuiPreferences.Instance.FileName*/ + "TrainSet"; //todo add proper named to saved files, check if null is logical at all. if ((_trialProblem.samples != null)) { _trialProblem.Save(trainFileName + ".libsvm"); GuiPreferences.Instance.setLog("saved Original Problem LibSVM file: " + trainFileName + ".libsvm"); } //separate DS to 3rd and 4th TR ////example: ExecuteSelectKthVectorScript(@"TrainSet", @"H:\My_Dropbox\VERE\MRI_data\Tirosh\20120508.Rapid+NullClass.day2\4\rtp\"); KthExtractionManager.ExecuteSelectKthVectorScript(/*GuiPreferences.Instance.FileName +*/ "TrainSet", GuiPreferences.Instance.WorkDirectory); GuiPreferences.Instance.setLog("Created TR3 & TR4 files"); //normalize 3rd and 4th TR files. NormalizationManager.ScaleTrFiles(GuiPreferences.Instance.WorkDirectory); GuiPreferences.Instance.setLog("Normalized TR3 & TR4 files"); //convert tr4 and tr3 to arff + REMOVE 204801 FAKE FEATURE, THAT WAS PLACES TO MAKE SURE WE GET 204800 FEATURES IN THE ARFF FILE. if (WekaCommonFileOperation.ConvertLIBSVM2ARFF(GuiPreferences.Instance.WorkDirectory + "TrainSet_3th_vectors_scaledCS.libsvm", 204800)) { GuiPreferences.Instance.setLog("Converted to ARFF: TrainSet_3th_vectors_scaledCS.libsvm"); } if (WekaCommonFileOperation.ConvertLIBSVM2ARFF(GuiPreferences.Instance.WorkDirectory + "TrainSet_4th_vectors_scaledCS.libsvm", 204800)) { GuiPreferences.Instance.setLog("Converted to ARFF: TrainSet_4th_vectors_scaledCS.libsvm"); } //---------------------------------- filter tr3 based on top 1000 from tr4 (the trick) ----------------------------- //load TR4 ConverterUtils.DataSource source = new ConverterUtils.DataSource(GuiPreferences.Instance.WorkDirectory + "TrainSet_4th_vectors_scaledCS.libsvm.arff"); Instances data = source.getDataSet(); //assign last as index. if (data.classIndex() == -1) { data.setClassIndex(data.numAttributes() - 1); } //if class not nominal, convert to if (!data.classAttribute().isNominal()) { var filter = new weka.filters.unsupervised.attribute.NumericToNominal(); filter.setOptions(weka.core.Utils.splitOptions("-R last")); //filter.setAttributeIndices("last"); filter.setInputFormat(data); data = Filter.useFilter(data, filter); } //run ig and get top 1000 or up to 1000 bigger than zero, from tr4 WekaTrainingMethods.useLowLevelInformationGainFeatureSelection(data); TrainingTesting_SharedVariables._trainTopIGFeatures = Preferences.Instance.attsel.selectedAttributes(); //this should be done ONCE Preferences.Instance.fastvector = RealTimeProcessing.CreateFastVector(TrainingTesting_SharedVariables._trainTopIGFeatures.Length); GuiPreferences.Instance.setLog("created fast vector of length " + TrainingTesting_SharedVariables._trainTopIGFeatures.Length.ToString()); //serialize (save) topIG indices to file. XMLSerializer.serializeArrayToFile(GuiPreferences.Instance.WorkDirectory + "TrainSet_" + GuiPreferences.Instance.NudClassifyUsingTR.ToString() + "th_vectors_scaledCS_filteredIG_indices.xml", TrainingTesting_SharedVariables._trainTopIGFeatures); GuiPreferences.Instance.setLog("saved IG indices to a file (in the same order as IG gave it)"); //int [] _trainTopIGFeatures_loaded = DeserializeArrayToFile(GuiPreferences.Instance.WorkDirectory + "TrainSet_3th_vectors_scaledCS_filteredIG_indices.xml"); GuiPreferences.Instance.setLog(TrainingTesting_SharedVariables._trainTopIGFeatures.Length.ToString() + " features above zero value selected (including the Class feature)"); //load tr3 source = new ConverterUtils.DataSource(GuiPreferences.Instance.WorkDirectory + "TrainSet_" + GuiPreferences.Instance.NudClassifyUsingTR.ToString() + "th_vectors_scaledCS.libsvm.arff"); data = source.getDataSet(); //filter top IG data = WekaTrainingMethods.useRemoveFilter(data, TrainingTesting_SharedVariables._trainTopIGFeatures, true); //after filtering last feature needs to be the class if (data.classIndex() == -1) { data.setClassIndex(data.numAttributes() - 1); } //save filtered to a file WekaCommonFileOperation.SaveLIBSVM(data, GuiPreferences.Instance.WorkDirectory + "TrainSet_" + GuiPreferences.Instance.NudClassifyUsingTR.ToString() + "th_vectors_scaledCS_filteredIG"); return(data); }
/// <summary> /// Evaluates the quality of the classifier on a data set post reduction. /// </summary> /// <param name="attributesToRemove">The 0-based indices of the attributes to remove from the data set.</param> /// <param name="instancesToRemove">The 0-based indicies of the instances to remove from the data set.</param> /// <returns>Accuracy of the classifier post evaluation.</returns> public static Instances GetReducedDataset(Instances dataset, int[] attributesToRemove, int[] instancesToRemove) { #region Validation if (dataset == null) { throw new Exception("Class has not been properly initialized. Please reconstruct the class and try again."); } #endregion //build attributes filter string attributesFilterString = ""; if (attributesToRemove != null && attributesToRemove.Length > 0) { attributesFilterString = "-R "; for (int i = 0; i < attributesToRemove.Length; i++) { attributesFilterString += (attributesToRemove[i] + 1).ToString(); if (i < attributesToRemove.Length - 1) { attributesFilterString += ","; } } } //build instances filter string instancesFilterString = ""; if (instancesToRemove != null && instancesToRemove.Length > 0) { instancesFilterString = "-R "; for (int i = 0; i < instancesToRemove.Length; i++) { instancesFilterString += (instancesToRemove[i] + 1).ToString(); if (i < instancesToRemove.Length - 1) { instancesFilterString += ","; } } } //get reduced set Instances reducedDataset = new ConverterUtils.DataSource(dataset).getDataSet();//OriginalDataset; reducedDataset.setClassIndex(reducedDataset.numAttributes() - 1); if (!String.IsNullOrWhiteSpace(attributesFilterString)) { Remove attributesFilter = new Remove(); attributesFilter.setOptions(Utils.splitOptions(attributesFilterString)); attributesFilter.setInputFormat(reducedDataset); reducedDataset = Filter.useFilter(reducedDataset, attributesFilter); } if (!String.IsNullOrWhiteSpace(instancesFilterString)) { weka.filters.unsupervised.instance.RemoveRange instancesFilter = new weka.filters.unsupervised.instance.RemoveRange(); instancesFilter.setOptions(Utils.splitOptions(instancesFilterString)); instancesFilter.setInputFormat(reducedDataset); reducedDataset = Filter.useFilter(reducedDataset, instancesFilter); } return(reducedDataset); }
/// <summary> /// tests if iron python works. /// </summary> public void TestIronPython() { /*IronPythonCLS ir = new IronPythonCLS(); * var res = ir.ExecuteBusinessRules(); * GuiPreferences.Instance.setLog(res.ToString());*/ //ExecuteSelectKthVectorScript(); string CsharpFileName = @"TrainSet"; string CsharpDirectory = @"H:\My_Dropbox\VERE\MRI_data\Tirosh\20120508.Rapid+NullClass.day2\4\rtp\"; /*ExecuteSelectKthVectorScript(CsharpFileName, CsharpDirectory); * svm_scale_java svmscale = new svm_scale_java(); * * string commandLine = "-l 0 " + * "-s " + CsharpDirectory + "TrainSet_3th_vectors_scale_paramcs.libsvm " + * "-o " + CsharpDirectory + "TrainSet_3th_vectors_scaledcs.libsvm " + * CsharpDirectory + "TrainSet_3th_vectors.libsvm"; * * string[] commandArray = commandLine.Split(' '); * svmscale.run(commandArray); * * commandLine = "-l 0 " + * "-s " + CsharpDirectory + "TrainSet_4th_vectors_scale_paramcs.libsvm " + * "-o " + CsharpDirectory + "TrainSet_4th_vectors_scaledcs.libsvm " + * CsharpDirectory + "TrainSet_4th_vectors.libsvm"; * commandArray = commandLine.Split(' '); * svmscale.run(commandArray);*/ ////////////////////////WekaCommon.Main(null); ////////////////////////var source = new ConverterUtils.DataSource(CsharpDirectory + "TrainSet_3th_vectors_scaledCS.libsvm"); //convert tr4 and tr3 to arff /*if (WekaCommonFileOperation.ConvertLIBSVM2ARFF(CsharpDirectory + "TrainSet_3th_vectors_scaledCS.libsvm")) * GuiPreferences.Instance.setLog("Converted to ARFF: TrainSet_3th_vectors_scaledCS.libsvm"); * if (WekaCommonFileOperation.ConvertLIBSVM2ARFF(CsharpDirectory + "TrainSet_4th_vectors_scaledCS.libsvm")) * GuiPreferences.Instance.setLog("Converted to ARFF: TrainSet_4th_vectors_scaledCS.libsvm");*/ //infogain on tr4 and get 1000 top features. ConverterUtils.DataSource source = new ConverterUtils.DataSource(CsharpDirectory + "TrainSet_4th_vectors_scaledCS.libsvm.arff"); Instances data = source.getDataSet(); if (data.classIndex() == -1) { data.setClassIndex(data.numAttributes() - 1); } if (!data.classAttribute().isNominal()) { var filter = new weka.filters.unsupervised.attribute.NumericToNominal(); filter.setOptions(weka.core.Utils.splitOptions("-R last")); //filter.setAttributeIndices("last"); filter.setInputFormat(data); data = Filter.useFilter(data, filter); } int[] topIGFeatures = Preferences.Instance.attsel.selectedAttributes(); //load tr3 source = new ConverterUtils.DataSource(CsharpDirectory + "TrainSet_" + GuiPreferences.Instance.NudClassifyUsingTR.ToString() + "th_vectors_scaledCS.libsvm.arff"); data = source.getDataSet(); int[] invertedTopIGFeatures = new int[data.numAttributes() - topIGFeatures.Length]; //alternative use of the filter, var dict = topIGFeatures.ToDictionary(key => key, value => value); int position = 0; for (int feat = 0; feat < data.numAttributes(); feat++) { if (!dict.ContainsKey(feat)) { invertedTopIGFeatures[position] = feat; position++; } } //filter top IG //data = WekaCommonMethods.useRemoveFilter(data, topIGFeatures, true); data = WekaTrainingMethods.useRemoveFilter(data, invertedTopIGFeatures, false); WekaCommonFileOperation.SaveArff(data, CsharpDirectory + "TrainSet_" + GuiPreferences.Instance.NudClassifyUsingTR.ToString() + "th_vectors_scaledCS_filteredIG2.libsvm1.arff"); //train /*weka.classifiers.functions.SMO smo = new SMO(); * smo.setOptions(weka.core.Utils.splitOptions(" -C 1.0 -L 0.001 -P 1.0E-12 -N 0 -V -1 -W 1 -K \"weka.classifiers.functions.supportVector.PolyKernel -C 250007 -E 1.0\"")); * if (data.classIndex() == -1) * data.setClassIndex(data.numAttributes() - 1); * * * * smo.buildClassifier(data); * * * //test on self should get 100% * weka.classifiers.Evaluation eval = new weka.classifiers.Evaluation(data); * eval.evaluateModel(smo, data); * GuiPreferences.Instance.setLog(eval.toSummaryString("\nResults\n======\n", false)); * * //save model serialize model * weka.core.SerializationHelper.write(CsharpDirectory + "TrainSet_3th_vectors_scaledCS_filteredIG.libsvm.arff.model", smo); * * //load model deserialize model * smo = (weka.classifiers.functions.SMO)weka.core.SerializationHelper.read(CsharpDirectory + "TrainSet_3th_vectors_scaledCS_filteredIG.libsvm.arff.model"); * * //test loaded model * eval = new weka.classifiers.Evaluation(data); * eval.evaluateModel(smo, data); * GuiPreferences.Instance.setLog(eval.toSummaryString("\nResults\n======\n", false));*/ //display top IG. //PublicMethods.plotBrainDicomViewer(); if (Preferences.Instance.attsel == null) { GuiPreferences.Instance.setLog("there are no ranked IG attributes or selected attr, continuing but please fix this possible bug."); } string dicomDir = CsharpDirectory; dicomDir = dicomDir.Substring(0, dicomDir.Length - 4) + @"master\"; string[] files = System.IO.Directory.GetFiles(dicomDir, "*.dcm"); string firstFile = files[0].Substring(files[0].LastIndexOf(@"\") + 1); bool thresholdOrVoxelAmount; if (GuiPreferences.Instance.IgSelectionType == IGType.Threshold) { thresholdOrVoxelAmount = true; } else { thresholdOrVoxelAmount = false; } Form plotForm = new DicomImageViewer.MainForm(dicomDir + firstFile, firstFile, Preferences.Instance.attsel.rankedAttributes(), Convert.ToDouble(GuiPreferences.Instance.NudIGThreshold), Convert.ToInt32(GuiPreferences.Instance.NudIGVoxelAmount), thresholdOrVoxelAmount, GuiPreferences.Instance.WorkDirectory + "brain"); plotForm.StartPosition = FormStartPosition.CenterParent; plotForm.ShowDialog(); plotForm.Close(); }