public void Test() { weka.core.Instances insts = new weka.core.Instances(new java.io.FileReader("D:\\android_analysis\\attributes.arff")); insts.setClassIndex(insts.numAttributes() - 1); weka.classifiers.Classifier cl = new weka.classifiers.trees.J48(); cl.buildClassifier(insts); weka.filters.Filter myRandom = new weka.filters.unsupervised.instance.Randomize(); myRandom.setInputFormat(insts); insts = weka.filters.Filter.useFilter(insts, myRandom); int trainSize = (int)(insts.numInstances() * 0.66); int testSize = insts.numInstances() - trainSize; weka.core.Instances train = new weka.core.Instances(insts, 0, trainSize); cl.buildClassifier(train); for (int i = trainSize; i < insts.numInstances(); i++) { weka.core.Instance currentInst = insts.instance(i); double predictedClass = cl.classifyInstance(currentInst); double[] distrs = cl.distributionForInstance(currentInst); string actual = insts.classAttribute().value((int)currentInst.classValue()); string predicted = insts.classAttribute().value((int)predictedClass); System.Console.WriteLine("ID: " + (i + 1) + ", " + actual + " --> " + predicted); } }
static public int step_test() { predict_numCorrect = 0; double predictedClass; double actualClass; listResult = new List <Weka_EachResult>(); weka.core.Instance currentInst; for (int i = split_trainSize; i < insts.numInstances(); i++) { currentInst = insts.instance(i); predictedClass = classifier.classifyInstance(currentInst); actualClass = insts.instance(i).classValue(); if (predictedClass == actualClass) { predict_numCorrect++; } //================================== listResult.Add(new Weka_EachResult() { ID = i.ToString(), Actual = actualClass.ToString(), Predict = predictedClass.ToString(), Diff = (predictedClass - actualClass).ToString() }); } //currentInst = insts.lastInstance(); //TheSys.showError(currentInst.ToString(), true); return(predict_numCorrect); }
public static void Main(String[] args) { try { // Load the model java.io.ObjectInputStream stream = new java.io.ObjectInputStream(new java.io.FileInputStream("iris_j48.model")); weka.classifiers.Classifier qhClassifier = (weka.classifiers.Classifier)stream.readObject(); stream.close(); // This model was trained on 66% of instances from the iris dataset. Test the model on remaining 34% instances. weka.core.Instances insts = new weka.core.Instances(new java.io.FileReader("iris.arff")); insts.setClassIndex(insts.numAttributes() - 1); int percentSplit = 66; int trainSize = insts.numInstances() * percentSplit / 100; int testSize = insts.numInstances() - trainSize; int numCorrect = 0; for (int i = trainSize; i < insts.numInstances(); i++) { weka.core.Instance currentInst = insts.instance(i); double predictedClass = qhClassifier.classifyInstance(currentInst); if (predictedClass == insts.instance(i).classValue()) { numCorrect++; } } Console.WriteLine(numCorrect + " out of " + testSize + " correct (" + (double)((double)numCorrect / (double)testSize * 100.0) + "%)"); } catch (java.lang.Exception e) { e.printStackTrace(); } }
public static void FilterInstances(weka.core.Instances allInstances) { DateTime nextHpDate = DateTime.MinValue; java.util.LinkedList deleteList = new java.util.LinkedList(); for (int i = 0; i < allInstances.numInstances(); ++i) { DateTime nowDate = WekaUtils.GetDateValueFromInstances(allInstances, 0, i); if (TestParameters2.RealTimeMode && i == allInstances.numInstances() - 1) { allInstances.instance(i).setClassValue(0); allInstances.instance(i).setValue(1, WekaUtils.GetTimeFromDate(Parameters.MaxDate) * 1000); } else { if (nowDate < nextHpDate) { deleteList.Add(allInstances.instance(i)); } else { DateTime hpDate = WekaUtils.GetDateValueFromInstances(allInstances, 1, i); nextHpDate = hpDate; } } } allInstances.removeAll(deleteList); }
public void test_has_class_value_with_limited_training_set() { TestingRow5[] rows = new[] { new TestingRow5 { CLASS = 1.0, ATT_1 = "1" }, new TestingRow5 { CLASS = 33.0, ATT_1 = "2" }, new TestingRow5 { CLASS = 22.0, ATT_1 = "3" }, new TestingRow5 { CLASS = 33.0, ATT_1 = "4" }, new TestingRow5 { CLASS = 33.0, ATT_1 = "5" }, new TestingRow5 { CLASS = 11.0, ATT_1 = "2" } }; InstancesBuilder <TestingRow5> builder = new InstancesBuilder <TestingRow5>(rows, 0, 3); weka.core.Instances instances = builder.Build(); Assert.AreEqual(3, instances.numAttributes()); Assert.AreEqual(6, instances.numInstances()); CollectionAssert.AreEqual(new[] { "1", "2", "3", "4", "5", "2" }, instances.GetAttrStrings(1)); CollectionAssert.AreEqual(new[] { "0", "1", "0", "0", "0", "1" }, instances.GetAttrStrings(2)); }
public void WriteFile(List <List <string> > numericDataset, string file, List <string> atrNames, List <string> targetValues, bool isTargetNumeric) { weka.core.FastVector targetVals = new weka.core.FastVector(); weka.core.Instances dataRel; for (int i = 0; i < targetValues.Count; i++) { targetVals.addElement(targetValues[i]); } weka.core.Instances data; weka.core.FastVector atts = new weka.core.FastVector(); // fill and prepare the dataset for the arrf file for (int j = 0; j < insts.numAttributes(); j++) { if (j == insts.numAttributes() - 1 && isTargetNumeric == false) // target value can be nominal { atts.addElement(new weka.core.Attribute(atrNames[j], targetVals)); } else { atts.addElement(new weka.core.Attribute(atrNames[j])); } } data = new weka.core.Instances("MyRelation", atts, 0); for (int i = 0; i < insts.numInstances(); i++) { double[] vals = new double[insts.numAttributes()]; for (int j = 0; j < insts.numAttributes(); j++) { if (j == insts.numAttributes() - 1 && isTargetNumeric == false) // target value can be nominal { vals[j] = targetVals.indexOf(numericDataset[j][i]); } else { vals[j] = Convert.ToDouble(numericDataset[j][i]); } } data.add(new weka.core.DenseInstance(1.0, vals)); } if (File.Exists(file)) { File.Delete(file); } var saver = new weka.core.converters.ArffSaver(); saver.setInstances(data); saver.setFile(new java.io.File(file)); // files are saved into {AppFolder}/bin/Debug folder. You can find two files in this path. saver.writeBatch(); }
private void btnLoadStep_Click(object sender, EventArgs e) { if (m_loadStepInstances == null) { using (OpenFileDialog d = new OpenFileDialog()) { d.Filter = "Arff File|*.arff"; if (d.ShowDialog() == System.Windows.Forms.DialogResult.OK) { m_loadStepInstances = new weka.core.Instances(new java.io.BufferedReader(new java.io.FileReader(d.FileName))); m_loadStepInstances.setClassIndex(m_loadStepInstances.numAttributes() - 1); clear_all(); } } } else { for (int i = m_loadStepIdx; i < m_loadStepInstances.numInstances(); ++i) { var ins = m_loadStepInstances.instance(i); var p = new valuePoint(ins.value(0), ins.value(1), (int)ins.classValue()); if (p.x < 0 || p.x >= 1 || p.y < 0 || p.y >= 1) { continue; } point_list.Add(p); draw_point(p); m_loadStepIdx = i + 1; if (i % 1000 == 0) { break; } } pictureBox1.Invalidate(); if (m_loadStepIdx == m_loadStepInstances.numInstances()) { m_loadStepIdx = 0; m_loadStepInstances = null; } } }
//Knn public static double Knn(weka.core.Instances insts) { try { insts.setClassIndex(insts.numAttributes() - 1); Knncl = new weka.classifiers.lazy.IBk(); weka.filters.Filter myDummy = new weka.filters.unsupervised.attribute.NominalToBinary(); myDummy.setInputFormat(insts); insts = weka.filters.Filter.useFilter(insts, myDummy); weka.filters.Filter myNormalize = new weka.filters.unsupervised.instance.Normalize(); myNormalize.setInputFormat(insts); insts = weka.filters.Filter.useFilter(insts, myNormalize); weka.filters.Filter myRandom = new weka.filters.unsupervised.instance.Randomize(); myRandom.setInputFormat(insts); insts = weka.filters.Filter.useFilter(insts, myRandom); int trainSize = insts.numInstances() * percentSplit / 100; int testSize = insts.numInstances() - trainSize; weka.core.Instances train = new weka.core.Instances(insts, 0, trainSize); Knncl.buildClassifier(train); int numCorrect = 0; for (int i = trainSize; i < insts.numInstances(); i++) { weka.core.Instance currentInst = insts.instance(i); double predictedClass = Knncl.classifyInstance(currentInst); if (predictedClass == insts.instance(i).classValue()) { numCorrect++; } } return((double)numCorrect / (double)testSize * 100.0); } catch (java.lang.Exception ex) { ex.printStackTrace(); return(0); } }
//private weka.core.Instances m_instances; public void evaluateModel(double[] v, weka.core.Instances instances) { double[] c = new double[instances.numInstances()]; for (int i = 0; i < c.Length; ++i) { c[i] = instances.instance(i).classValue(); } evaluateModel(v, c); }
//Only 1 output: last instance static public string do_Classification_bySerialClassfier_1out_standAlone (SerializedClassifier serialClassifier, weka.core.Instances instances, int colClass) { instances.setClassIndex(colClass); weka.core.Instance each = instances.instance(instances.numInstances() - 1); double predictedClass = serialClassifier.classifyInstance(each); return(instances.classAttribute().value((int)predictedClass)); }
public static string classifyTest() { try { String result = ""; weka.core.Instances insts = new weka.core.Instances(new java.io.FileReader("C:\\Program Files\\Weka-3-7\\data\\iris.arff")); insts.setClassIndex(insts.numAttributes() - 1); weka.classifiers.Classifier cl = new weka.classifiers.trees.J48(); // Console.WriteLine("Performing " + percentSplit + "% split evaluation."); result += "Performing " + percentSplit + "% split evaluation.\n"; //randomize the order of the instances in the dataset. weka.filters.Filter myRandom = new weka.filters.unsupervised.instance.Randomize(); myRandom.setInputFormat(insts); insts = weka.filters.Filter.useFilter(insts, myRandom); int trainSize = insts.numInstances() * percentSplit / 100; int testSize = insts.numInstances() - trainSize; weka.core.Instances train = new weka.core.Instances(insts, 0, trainSize); cl.buildClassifier(train); int numCorrect = 0; for (int i = trainSize; i < insts.numInstances(); i++) { weka.core.Instance currentInst = insts.instance(i); double predictedClass = cl.classifyInstance(currentInst); if (predictedClass == insts.instance(i).classValue()) { numCorrect++; } } //Console.WriteLine(numCorrect + " out of " + testSize + " correct (" + (double)((double)numCorrect / (double)testSize * 100.0) + "%)"); result += (numCorrect + " out of " + testSize + " correct (" + (double)((double)numCorrect / (double)testSize * 100.0) + "%)"); return(result); } catch (Exception ex) { MessageBox.Show(ex.Message); return(ex.Message); } }
public static void classifyTest() { try { Console.WriteLine("Hello Java, from C#!"); weka.core.Instances insts = new weka.core.Instances(new java.io.FileReader("D:/iris.arff")); insts.setClassIndex(insts.numAttributes() - 1); weka.classifiers.Classifier cl = new weka.classifiers.trees.J48(); Console.WriteLine("Performing " + 33 + "% split evaluation."); //randomize the order of the instances in the dataset. weka.filters.Filter myRandom = new weka.filters.unsupervised.instance.Randomize(); myRandom.setInputFormat(insts); insts = weka.filters.Filter.useFilter(insts, myRandom); int trainSize = insts.numInstances() * 33 / 100; int testSize = insts.numInstances() - trainSize; weka.core.Instances train = new weka.core.Instances(insts, 0, trainSize); cl.buildClassifier(train); int numCorrect = 0; for (int i = trainSize; i < insts.numInstances(); i++) { weka.core.Instance currentInst = insts.instance(i); double predictedClass = cl.classifyInstance(currentInst); if (predictedClass == insts.instance(i).classValue()) numCorrect++; } Console.WriteLine(numCorrect + " out of " + testSize + " correct (" + (double)((double)numCorrect / (double)testSize * 100.0) + "%)"); } catch (Exception ex) { } }
public static void CalculateSuccessForAnn(weka.core.Instances originalInsts) { try { var form = Form.ActiveForm as Form1; form.successPrcAnn.Text = "Training..."; form.successRtAnn.Text = "../" + testSize; weka.core.Instances insts = originalInsts; // Pre-process insts = ConvertNominalToNumeric(insts); insts = Normalize(insts); // Classify weka.classifiers.Classifier cl = new weka.classifiers.functions.MultilayerPerceptron(); weka.core.Instances train = new weka.core.Instances(insts, 0, trainSize); cl.buildClassifier(train); int numCorrect = 0; double percentage = 0; for (int i = trainSize; i < insts.numInstances(); i++) { weka.core.Instance currentInst = insts.instance(i); double predictedClass = cl.classifyInstance(currentInst); if (predictedClass == insts.instance(i).classValue()) { numCorrect++; } percentage = (double)numCorrect / (double)testSize * 100.0; form.successRtAnn.Text = numCorrect + "/" + testSize; form.successPrcAnn.Text = String.Format("{0:0.00}", percentage) + "%"; } succesRates.Add(Classifier.ANN, percentage); classifiers.Add(Classifier.ANN, cl); } catch (java.lang.Exception ex) { ex.printStackTrace(); MessageBox.Show(ex.ToString(), "Error for Neural Network", MessageBoxButtons.OK, MessageBoxIcon.Error); } catch (Exception) { MessageBox.Show("Error for Neural Network", "Error for Neural Network", MessageBoxButtons.OK, MessageBoxIcon.Error); } }
public String PrintClassifierTestReport() { try { Debug.Log("Classifier: Number of instances: " + playerData.numInstances()); weka.classifiers.Evaluation eval = new weka.classifiers.Evaluation(playerData); eval.crossValidateModel(classifier, playerData, 10, new java.util.Random(1)); Debug.Log(eval.toSummaryString("\nClassifier: Cross Validate Results: \n======\n", false)); return(eval.toSummaryString("\nResults\n======\n", false)); } catch (java.lang.Exception ex) { Debug.LogError(ex.getMessage()); } return(null); }
public void Test2() { java.io.ObjectInputStream ois = new java.io.ObjectInputStream(new java.io.FileInputStream("D:\\android_analysis\\som_model.model")); weka.classifiers.Classifier cl = (weka.classifiers.Classifier)ois.readObject(); ois.close(); weka.core.Instances insts = new weka.core.Instances(new java.io.FileReader("D:\\android_analysis\\test1.arff")); insts.setClassIndex(insts.numAttributes() - 1); for (int i = 0; i < insts.numInstances(); i++) { weka.core.Instance currentInst = insts.instance(i); double predictedClass = cl.classifyInstance(currentInst); double[] distrs = cl.distributionForInstance(currentInst); //string actual = insts.classAttribute().value((int)currentInst.classValue()); //string predicted = insts.classAttribute().value((int)predictedClass); // System.Console.WriteLine("ID: " + (i + 1) + ", " + predicted); } }
public void test_ignore_attributes() { TestingRow6[] rows = new[] { new TestingRow6 { CLASS = 1.0, ATT_1 = "1", ATT_2 = "1.1", ATT_3 = "1.2" }, new TestingRow6 { CLASS = 2.0, ATT_1 = "2", ATT_2 = "2.1", ATT_3 = "2.2" } }; InstancesBuilder <TestingRow6> builder = new InstancesBuilder <TestingRow6>(rows, 0); weka.core.Instances instances = builder.Build(); Assert.AreEqual(3, instances.numAttributes()); // 1 is ignored Assert.AreEqual(2, instances.numInstances()); CollectionAssert.AreEqual(new[] { "1", "2" }, instances.GetAttrStrings(1)); CollectionAssert.AreEqual(new[] { "1.2", "2.2" }, instances.GetAttrStrings(2)); }
public void flatten_attribute() { FlattenClass[] rows = new[] { new FlattenClass { CLASS = 0, Att1 = new [] { 1, 2, 3, 4, 5 } }, new FlattenClass { CLASS = 1, Att1 = new [] { 6, 7, 8, 9, 10 } } }; InstancesBuilder <FlattenClass> builder = new InstancesBuilder <FlattenClass>(rows, 0); weka.core.Instances instances = builder.Build(); Assert.AreEqual(2, instances.numInstances()); Assert.AreEqual(6, instances.numAttributes()); CollectionAssert.AreEqual(new[] { "1", "2" }, instances.GetAttrStrings(1)); CollectionAssert.AreEqual(new[] { "1.2", "2.2" }, instances.GetAttrStrings(2)); }
static public List <string> do_Classification_bySerialClassfier_standAlone (SerializedClassifier serialClassifier, weka.core.Instances instances, int colClass) { instances.setClassIndex(colClass); List <string> listPredictClass = new List <string>(); double predictedClass; double actualClass; predict_numCorrect = 0; weka.core.Instance each; for (int i = 0; i < instances.numInstances(); i++) { each = instances.instance(i); predictedClass = serialClassifier.classifyInstance(each); actualClass = instances.instance(i).classValue(); if (predictedClass == actualClass) { predict_numCorrect++; } //Get Class Name listPredictClass.Add(instances.classAttribute().value((int)predictedClass)); } return(listPredictClass); }
public override double classifyInstance(weka.core.Instance instance) { if (m_instances.numInstances() == 0) { return(2); } if (m_instances.numAttributes() != instance.numAttributes()) { throw new AssertException("different attribute."); } int n = instance.numAttributes(); List <Tuple <int, int> > dist = new List <Tuple <int, int> >(); for (int i = 0; i < m_instances.numInstances(); ++i) { int d1 = 0, d2 = 0; weka.core.Instance instanceI = m_instances.instance(i); for (int j = 0; j < n; ++j) { //d += (int)((instanceI.value(j) - instance.value(j)) * (instanceI.value(j) - instance.value(j))); if (instanceI.value(j) != instance.value(j)) { d1++; } if (instance.value(j) != 0) { d2++; } } int c = (int)instanceI.classValue(); dist.Add(new Tuple <int, int>(d1, c)); } if (dist.Count == 0) { return(2); } dist.Sort(new Comparison <Tuple <int, int> >((x, y) => { return(x.Item1.CompareTo(y.Item1)); })); int sum = 0, count = 0; for (int i = 0; i < dist.Count; ++i) { if (dist[i].Item1 < 4) { sum += dist[i].Item2; count++; } else { break; } } if (count == 0) { return(2); } if (count < m_instances.numInstances() / 70) { return(2); } return((int)Math.Round((double)sum / count)); }
private void button1_Click(object sender, EventArgs e) { string fname = ""; OpenFileDialog dialog = new OpenFileDialog(); dialog.Filter = "Weka Files (*.arff)|*.arff|All files (*.*)|*.*"; dialog.InitialDirectory = Application.StartupPath; dialog.Title = "Select a .arff file"; if (dialog.ShowDialog() == DialogResult.OK) { fname = dialog.FileName; //label5.Text = System.IO.Directory.; } if (fname == "") { return; } try { weka.core.Instances insts = new weka.core.Instances(new java.io.FileReader(fname.ToString())); insts.setClassIndex(insts.numAttributes() - 1); Classifier cl = new weka.classifiers.functions.SMO(); //label1.Text = "Performing " + percentSplit + "% split evaluation."; //randomize the order of the instances in the dataset. weka.filters.Filter myRandom = new weka.filters.unsupervised.instance.Randomize(); myRandom.setInputFormat(insts); insts = weka.filters.Filter.useFilter(insts, myRandom); int trainSize = insts.numInstances() * percentSplit / 100; int testSize = insts.numInstances() - trainSize; weka.core.Instances train = new weka.core.Instances(insts, 0, trainSize); cl.buildClassifier(train); int numCorrect = 0; for (int i = trainSize; i < insts.numInstances(); i++) { weka.core.Instance currentInst = insts.instance(i); double predictedClass = cl.classifyInstance(currentInst); if (predictedClass == insts.instance(i).classValue()) { numCorrect++; } } //label1.Text = numCorrect + " out of " + testSize + " correct (" + //(double)((double)numCorrect / (double)testSize * 100.0) + "%)"; label6.Text = testSize.ToString(); label7.Text = numCorrect.ToString(); label8.Text = (double)((double)numCorrect / (double)testSize * 100.0) + "%"; double result_perc = (double)((double)numCorrect / (double)testSize * 100.0); result_perc = Math.Truncate(result_perc); try { // Send Data On Serial port SerialPort serialPort = new SerialPort("COM" + textBox1.Text + "", Int32.Parse(textBox2.Text), Parity.None, 8); serialPort.Open(); if (result_perc <= 75) { serialPort.WriteLine("1"); } serialPort.WriteLine("a"); serialPort.Close(); } catch (Exception ex) { MessageBox.Show(ex.Message); } } catch (java.lang.Exception ex) { MessageBox.Show(ex.getMessage().ToString(), ""); } }
public double GetProb(weka.core.Instances instances) { x = 0; y = 0; z = 0; for (int i = 0; i < instances.numInstances(); ++i) { double v = instances.instance(i).classValue(); if (v == 0) { x++; } else if (v == 1) { y++; } else if (v == 2) { z++; } else if (v == 3) { w++; } else { throw new ArgumentException("invalid v"); } } int n = x + y + z + w; double tp = (TestParameters2.tpStart + 1 + TestParameters2.tpCount) / 2.0; double sl = (TestParameters2.slStart + 1 + TestParameters2.slCount) / 2.0; double r1 = (x * tp - y * sl + w / 2 * tp - w / 2 * sl) * x + (y * tp - x * sl + w / 2 * tp - w / 2 * sl) * y - (x * sl + y * sl + w * sl) * z + tp * w * n; r1 = r1 / n / n; double r; if (x > y) { r = x * tp - y * sl - z * sl + w * tp; } else if (x < y) { r = -x * sl + y * tp - z * sl + w * tp; } else { r = 0; } r = r / n; //DateTime date = WekaUtils.GetDateTimeValueFromInstances(instances, 0, 0); //DateTime hpdate = WekaUtils.GetDateTimeValueFromInstances(instances, 1, 0); //using (StreamWriter sw = new StreamWriter("d:\\p.txt", true)) //{ // sw.Write(string.Format("{2}, {3}, {4}, {5}, ", // date.ToString(Parameters.DateTimeFormat), hpdate.ToString(Parameters.DateTimeFormat), // x, y, z, w, r)); //} return(r1); }
// Test the classification result of each map that a user played, // with the data available as if they were playing through it public static void classifyTest(String dataString, String playerID) { String results = ""; try { java.io.StringReader stringReader = new java.io.StringReader(dataString); java.io.BufferedReader buffReader = new java.io.BufferedReader(stringReader); /* NOTE THAT FOR NAIVE BAYES ALL WEIGHTS CAN BE = 1*/ //weka.core.converters.ConverterUtils.DataSource source = new weka.core.converters.ConverterUtils.DataSource("iris.arff"); weka.core.Instances data = new weka.core.Instances(buffReader); //source.getDataSet(); // setting class attribute if the data format does not provide this information // For example, the XRFF format saves the class attribute information as well if (data.classIndex() == -1) { data.setClassIndex(data.numAttributes() - 1); } weka.classifiers.Classifier cl; for (int i = 3; i < data.numInstances(); i++) { cl = new weka.classifiers.bayes.NaiveBayes(); //cl = new weka.classifiers.trees.J48(); //cl = new weka.classifiers.lazy.IB1(); //cl = new weka.classifiers.functions.MultilayerPerceptron(); ((weka.classifiers.functions.MultilayerPerceptron)cl).setHiddenLayers("12"); weka.core.Instances subset = new weka.core.Instances(data, 0, i); cl.buildClassifier(subset); weka.classifiers.Evaluation eval = new weka.classifiers.Evaluation(subset); eval.crossValidateModel(cl, subset, 3, new java.util.Random(1)); results = results + eval.pctCorrect(); // For accuracy measurement /* For Mathews Correlation Coefficient */ //double TP = eval.numTruePositives(1); //double FP = eval.numFalsePositives(1); //double TN = eval.numTrueNegatives(1); //double FN = eval.numFalseNegatives(1); //double correlationCoeff = ((TP*TN)-(FP*FN))/Math.Sqrt((TP+FP)*(TP+FN)*(TN+FP)*(TN+FN)); //results = results + correlationCoeff; if (i != data.numInstances() - 1) { results = results + ", "; } if (i == data.numInstances() - 1) { Debug.Log("Player: " + playerID + ", Num Maps: " + data.numInstances() + ", AUC: " + eval.areaUnderROC(1)); } } } catch (java.lang.Exception ex) { Debug.LogError(ex.getMessage()); } // Write values to file for a matlab read // For accuracy StreamWriter writer = new StreamWriter("DataForMatlab/" + playerID + "_CrossFoldValidations_NeuralNet.txt"); //StreamWriter writer = new StreamWriter("DataForMatlab/"+playerID+"_CrossFoldCorrCoeff.txt"); // For mathews cc writer.WriteLine(results); writer.Close(); Debug.Log(playerID + " has been written to file"); }
/// <summary> Calculates the area under the ROC curve. This is normalised so /// that 0.5 is random, 1.0 is perfect and 0.0 is bizarre. /// /// </summary> /// <param name="tcurve">a previously extracted threshold curve Instances. /// </param> /// <returns> the ROC area, or Double.NaN if you don't pass in /// a ThresholdCurve generated Instances. /// </returns> public static double getROCArea(Instances tcurve) { //UPGRADE_NOTE: Final was removed from the declaration of 'n '. "ms-help://MS.VSCC.v80/dv_commoner/local/redirect.htm?index='!DefaultContextWindowIndex'&keyword='jlca1003'" int n = tcurve.numInstances(); if (!RELATION_NAME.Equals(tcurve.relationName()) || (n == 0)) { return System.Double.NaN; } //UPGRADE_NOTE: Final was removed from the declaration of 'tpInd '. "ms-help://MS.VSCC.v80/dv_commoner/local/redirect.htm?index='!DefaultContextWindowIndex'&keyword='jlca1003'" int tpInd = tcurve.attribute(TRUE_POS_NAME).index(); //UPGRADE_NOTE: Final was removed from the declaration of 'fpInd '. "ms-help://MS.VSCC.v80/dv_commoner/local/redirect.htm?index='!DefaultContextWindowIndex'&keyword='jlca1003'" int fpInd = tcurve.attribute(FALSE_POS_NAME).index(); //UPGRADE_NOTE: Final was removed from the declaration of 'tpVals '. "ms-help://MS.VSCC.v80/dv_commoner/local/redirect.htm?index='!DefaultContextWindowIndex'&keyword='jlca1003'" double[] tpVals = tcurve.attributeToDoubleArray(tpInd); //UPGRADE_NOTE: Final was removed from the declaration of 'fpVals '. "ms-help://MS.VSCC.v80/dv_commoner/local/redirect.htm?index='!DefaultContextWindowIndex'&keyword='jlca1003'" double[] fpVals = tcurve.attributeToDoubleArray(fpInd); //UPGRADE_NOTE: Final was removed from the declaration of 'tp0 '. "ms-help://MS.VSCC.v80/dv_commoner/local/redirect.htm?index='!DefaultContextWindowIndex'&keyword='jlca1003'" double tp0 = tpVals[0]; //UPGRADE_NOTE: Final was removed from the declaration of 'fp0 '. "ms-help://MS.VSCC.v80/dv_commoner/local/redirect.htm?index='!DefaultContextWindowIndex'&keyword='jlca1003'" double fp0 = fpVals[0]; double area = 0.0; //starts at high values and goes down double xlast = 1.0; double ylast = 1.0; for (int i = 1; i < n; i++) { //UPGRADE_NOTE: Final was removed from the declaration of 'x '. "ms-help://MS.VSCC.v80/dv_commoner/local/redirect.htm?index='!DefaultContextWindowIndex'&keyword='jlca1003'" double x = fpVals[i] / fp0; //UPGRADE_NOTE: Final was removed from the declaration of 'y '. "ms-help://MS.VSCC.v80/dv_commoner/local/redirect.htm?index='!DefaultContextWindowIndex'&keyword='jlca1003'" double y = tpVals[i] / tp0; //UPGRADE_NOTE: Final was removed from the declaration of 'areaDelta '. "ms-help://MS.VSCC.v80/dv_commoner/local/redirect.htm?index='!DefaultContextWindowIndex'&keyword='jlca1003'" double areaDelta = (y + ylast) * (xlast - x) / 2.0; /* System.err.println("[" + i + "]" + " x=" + x + " y'=" + y + " xl=" + xlast + " yl=" + ylast + " a'=" + areaDelta); */ area += areaDelta; xlast = x; ylast = y; } //make sure ends at 0,0 if (xlast > 0.0) { //UPGRADE_NOTE: Final was removed from the declaration of 'areaDelta '. "ms-help://MS.VSCC.v80/dv_commoner/local/redirect.htm?index='!DefaultContextWindowIndex'&keyword='jlca1003'" double areaDelta = ylast * xlast / 2.0; //System.err.println(" a'=" + areaDelta); area += areaDelta; } //System.err.println(" area'=" + area); return area; }
public bool PrepareDataset() { try { weka.filters.Filter missingFilter = new weka.filters.unsupervised.attribute.ReplaceMissingValues(); // missing values handled missingFilter.setInputFormat(insts); insts = weka.filters.Filter.useFilter(insts, missingFilter); bool isTargetNumeric = insts.attribute(insts.numAttributes() - 1).isNumeric(); List <bool> isNumeric = new List <bool>(); List <bool> is2Categorical = new List <bool>(); List <List <string> > numericColumns = new List <List <string> >(); List <string> atrNames = new List <string>(); for (int i = 0; i < insts.numAttributes(); i++) { atrNames.Add(insts.attribute(i).name()); bool isNum = insts.attribute(i).isNumeric(); isNumeric.Add(isNum); if (isNum == true) { numericColumns.Add(new List <string>()); for (int j = 0; j < insts.numInstances(); j++) { numericColumns[numericColumns.Count - 1].Add(insts.instance(j).toString(i)); } } } weka.filters.unsupervised.attribute.Discretize myDiscretize = new weka.filters.unsupervised.attribute.Discretize(); myDiscretize.setInputFormat(insts); myDiscretize.setFindNumBins(true); insts = weka.filters.Filter.useFilter(insts, myDiscretize); List <List <string> > atrs = new List <List <string> >(); for (int i = 0; i < insts.numAttributes(); i++) { atrs.Add(new List <string>()); for (int j = 0; j < insts.attribute(i).numValues(); j++) { string sub_category = insts.attribute(i).value(j); string temp = sub_category.Replace("'", string.Empty); atrs[atrs.Count - 1].Add(temp); } if (atrs[atrs.Count - 1].Count == 2) { is2Categorical.Add(true); } else { is2Categorical.Add(false); } } List <List <string> > lst = new List <List <string> >(); for (int i = 0; i < insts.numInstances(); i++) { lst.Add(new List <string>()); for (int j = 0; j < insts.instance(i).numValues(); j++) { string temp = insts.instance(i).toString(j); temp = temp.Replace("\\", string.Empty); temp = temp.Replace("'", string.Empty); lst[lst.Count - 1].Add(temp); } } List <string> targetValues = atrs[insts.numAttributes() - 1]; List <List <string> > giniDataset = ConvertToNumericWithGini(lst, atrs); giniDataset = Arrange2CategoricalColumns(giniDataset, lst, is2Categorical); giniDataset = ChangeBackNumericalColumns(giniDataset, numericColumns, isNumeric); WriteFile(giniDataset, filename + "-numeric-gini.arff", atrNames, targetValues, isTargetNumeric); List <List <string> > twoingDataset = ConvertToNumericWithTwoing(lst, atrs); twoingDataset = Arrange2CategoricalColumns(twoingDataset, lst, is2Categorical); twoingDataset = ChangeBackNumericalColumns(twoingDataset, numericColumns, isNumeric); WriteFile(twoingDataset, filename + "-numeric-twoing.arff", atrNames, targetValues, isTargetNumeric); return(true); } catch (Exception e) { return(false); } }
//public object Clone() { // MemoryStream ms = new MemoryStream(500000000); // System.Runtime.Serialization.Formatters.Binary.BinaryFormatter bf = // new System.Runtime.Serialization.Formatters.Binary.BinaryFormatter(null, // new System.Runtime.Serialization.StreamingContext(System.Runtime.Serialization.StreamingContextStates.Clone)); // bf.Serialize(ms, this); // ms.Seek(0, SeekOrigin.Begin); // object obj = bf.Deserialize(ms); // ms.Close(); // return obj; //} public void train(bool LoadingFromFile) { if (!LoadingFromFile) { DisplayMessage("Begin training on Efigi galaxies..."); Console.Write("Begin training on Efigi galaxies..."); } else { DisplayImage(0); DisplayMessage("Load from file..."); Console.Write("Load from file..."); frV = new GeneralMatrix(ReadFVMatrix(0)); fgV = new GeneralMatrix(ReadFVMatrix(1)); fbV = new GeneralMatrix(ReadFVMatrix(2)); } weka.classifiers.trees.M5P tree = new weka.classifiers.trees.M5P(); String[] options = new String[1]; weka.core.converters.ConverterUtils.DataSource source = new weka.core.converters.ConverterUtils.DataSource(OutputDir + "Results/" + "resultsGalaxy.arff"); data = source.getDataSet(); if (data == null) { DisplayMessage("Cannot load from file."); throw new Exception("Arff File not valid"); } data.setClassIndex(0); tree.buildClassifier(data); StreamWriter output = new StreamWriter(OutputDir + "Results/" + "classification.txt"); rmse = 0.0; int classifiedCount = 0; weka.filters.unsupervised.attribute.Remove rm = new weka.filters.unsupervised.attribute.Remove(); rm.setInputFormat(data); fc = new FilteredClassifier(); fc.setFilter(rm); fc.setClassifier(tree); for (int i = 0; i < data.numInstances(); i++) { int classPrediction = (int)Math.Round(fc.classifyInstance(data.instance(i))); if (classPrediction < -6) { classPrediction = -6; } else if (classPrediction > 11) { classPrediction = 11; } int actualClass = (int)Math.Round(data.instance(i).classValue()); int error = Math.Abs(classPrediction - actualClass); rmse += error * error; classifiedCount++; output.WriteLine("\n" + classPrediction + ", " + error); if (i % 10 == 0 && !LoadingFromFile) DisplayImage(i); } rmse = Math.Sqrt(rmse / classifiedCount); output.WriteLine("\nRMSE: " + rmse); DisplayMessage("RMSE: " + rmse); output.Flush(); output.Close(); output.Dispose(); readyToClassify = true; Console.WriteLine("Finished training on Efigi galaxies; RMSE: " + rmse.ToString()); }
public void classifyTest() { try { CSV2Arff(); java.io.FileReader arrfFile = new java.io.FileReader("D:/final_version/Gesture-Gis-master/GestureGis2/ComparisonFeaturefile.arff"); weka.core.Instances insts = new weka.core.Instances(arrfFile); //weka.core.Instances insts2 = new weka.core.Instances(new java.io.FileReader("D:/Gesture-Gis-master/GestureGis2/ComparisonFeaturefile.arff")); insts.setClassIndex(insts.numAttributes() - 1); //int percentSplit = 66; weka.classifiers.Classifier cl = new weka.classifiers.trees.J48(); //Console.WriteLine("Performing " + percentSplit + "% split evaluation."); //randomize the order of the instances in the dataset. //weka.filters.Filter myRandom = new weka.filters.unsupervised.instance.Randomize(); //myRandom.setInputFormat(insts); //insts = weka.filters.Filter.useFilter(insts, myRandom); int count = insts.numInstances(); int trainSize = count - 1; int testSize = count - trainSize; weka.core.Instances train = new weka.core.Instances(insts, 0, trainSize); cl.buildClassifier(train); //weka.core.Instance current = insts2.instance(0); int numCorrect = 0; /*for (int i = trainSize; i < insts.numInstances(); i++) * { * weka.core.Instance currentInst = insts.instance(i); * double predictedClass = cl.classifyInstance(currentInst); * if (predictedClass == insts.instance(i).classValue()) * numCorrect++; * }*/ int index = count - 1; weka.core.Instance currentInst = insts.instance(index); double predictedClass = cl.classifyInstance(currentInst); int pre = (int)predictedClass; if (predictedClass == insts.instance(index).classValue()) { numCorrect++; } //insts.instance(index).classAttribute(); //insts.attribute(11); string s = insts.toString(); s = s.Substring(s.IndexOf("{") + 1); s = s.Substring(0, s.IndexOf("}")); s = s.Substring(0, s.Length); string[] ae = s.Split(','); /*ArrayList arr = new ArrayList(); * string path_class = @"D:\final_version\Gesture-Gis-master\GestureGis2\Classfile.txt"; * using (StreamReader reader = new StreamReader(path_class)) * { * while (!reader.EndOfStream) * { * arr.Add(reader.ReadLine()); * } * reader.Close(); * }*/ PredictedClassbyWeka = (string)(ae[pre]); arrfFile.close(); //insts.instance(index).attribute(3); /*System.Diagnostics.Debug.WriteLine(numCorrect + " out of " + testSize + " correct (" + * (double)((double)numCorrect / (double)testSize * 100.0) + "%)"); * Console.WriteLine(numCorrect + " out of " + testSize + " correct (" + * (double)((double)numCorrect / (double)testSize * 100.0) + "%)");*/ } catch (java.lang.Exception ex) { ex.printStackTrace(); } }
// Test the classification result of each map that a user played, // with the data available as if they were playing through it public static void classifyTest(String dataString, String playerID) { try { java.io.StringReader stringReader = new java.io.StringReader(dataString); java.io.BufferedReader buffReader = new java.io.BufferedReader(stringReader); /* NOTE THAT FOR NAIVE BAYES ALL WEIGHTS CAN BE = 1*/ //weka.core.converters.ConverterUtils.DataSource source = new weka.core.converters.ConverterUtils.DataSource("iris.arff"); weka.core.Instances thisData = new weka.core.Instances(buffReader); //source.getDataSet(); if (thisData.classIndex() == -1) { thisData.setClassIndex(thisData.numAttributes() - 1); } weka.core.Instances thisUniqueData = new weka.core.Instances(thisData); if (thisUniqueData.classIndex() == -1) { thisUniqueData.setClassIndex(thisUniqueData.numAttributes() - 1); } thisUniqueData.delete(); if (allUniqueData == null) { allUniqueData = new weka.core.Instances(thisData); if (allUniqueData.classIndex() == -1) { allUniqueData.setClassIndex(allUniqueData.numAttributes() - 1); } allUniqueData.delete(); } weka.core.InstanceComparator com = new weka.core.InstanceComparator(false); for (int i = 0; i < thisData.numInstances(); i++) { bool dup = false; for (int j = 0; j < allUniqueData.numInstances(); j++) { if (com.compare(thisData.instance(i), allUniqueData.instance(j)) == 0) { Debug.Log("Duplicate found!"); dup = true; break; } } if (!dup) { allUniqueData.add(thisData.instance(i)); } else { dupInstances++; } } for (int i = 0; i < thisData.numInstances(); i++) { bool dup = false; for (int j = 0; j < thisUniqueData.numInstances(); j++) { if (com.compare(thisData.instance(i), thisUniqueData.instance(j)) == 0) { Debug.Log("Duplicate found!"); dup = true; break; } } if (!dup) { thisUniqueData.add(thisData.instance(i)); } else { dupInstancesSamePlayer++; } } //Debug.Log("All Data Instance Count = " + thisData.numInstances()); //Debug.Log("Unique Data Instance Count = " + thisUniqueData.numInstances()); //Debug.Log("Done!"); } catch (java.lang.Exception ex) { Debug.LogError(ex.getMessage()); } }
public override void buildClassifier(Instances instances) { m_mustValue = null; var weights = MincostLiblinearClassifier.GetCount(instances); if (weights[0] == 0) { m_mustValue = 2; m_delta = 0; return; } else if (weights[2] == 0) { m_mustValue = 0; m_delta = 0; return; } m_sampleInstances = new Instances(instances, 0); // can classifier handle the data? getCapabilities().testWithFail(instances); Instances trainInstances = new Instances(instances, 0, instances.numInstances()); AddInstancesAccordWeight(trainInstances); if (System.IO.File.Exists(m_trainingFile)) { System.IO.File.Delete(m_trainingFile); } libsvmSaver.setInstances(trainInstances); libsvmSaver.setFile(new java.io.File(m_trainingFile)); libsvmSaver.writeBatch(); //ConvertNorminalToString(m_trainingFile); if (System.IO.File.Exists(m_modelFile)) { System.IO.File.Delete(m_modelFile); } string[] options = Utils.splitOptions(m_trainArgs); int idx = Utils.getOptionPos('c', options); if (idx != -1) { double c = Convert.ToDouble(options[idx + 1]); c = c * trainInstances.numInstances() / 100.0; options[idx + 1] = c.ToString(); m_trainArgs = Utils.joinOptions(options); } learner.ExecuteLearner(s_learnerPath, m_trainingFile, m_modelFile, m_trainArgs); if (!System.IO.File.Exists(m_modelFile)) { throw new InvalidOperationException(learner.Output); } m_modelData = System.IO.File.ReadAllBytes(m_modelFile); GetBestDelta(instances); if (System.IO.File.Exists(m_trainingFile)) { System.IO.File.Delete(m_trainingFile); } }
//private void ConvertNorminalToString(string fileName) //{ // List<string> list = new List<string>(); // using (System.IO.StreamReader sr = new System.IO.StreamReader(fileName)) // { // while (true) // { // if (sr.EndOfStream) // break; // string s = sr.ReadLine(); // if (string.IsNullOrEmpty(s)) // continue; // int idx = s.IndexOf(' '); // string c = idx == -1 ? s : s.Substring(0, idx); // if (Convert.ToDouble(c) == 0) // { // list.Add("-1.0 " + (idx == -1 ? string.Empty : s.Substring(idx + 1))); // } // else if (Convert.ToDouble(c) == 1) // { // list.Add("0.0 " + (idx == -1 ? string.Empty : s.Substring(idx + 1))); // } // else if (Convert.ToDouble(c) == 2) // { // list.Add("+1.0 " + (idx == -1 ? string.Empty : s.Substring(idx + 1))); // } // else // { // list.Add(s); // } // } // } // using (System.IO.StreamWriter sw = new System.IO.StreamWriter(fileName)) // { // foreach (string s in list) // { // sw.WriteLine(s); // } // } //} //private Random m_randomGenerator; private void AddInstancesAccordWeight(Instances instances) { // 0, 2 double[] weights = MincostLiblinearClassifier.GetCount(instances); if (weights == null) return; double c = m_tp / m_sl; if (c == 1 && weights[0] == weights[1]) return; int n = 0; int toCopyClass = 0; if (c >= 1) { int shouldWeight1 = (int)(c * weights[1]); n = (int)(shouldWeight1 - weights[1]); toCopyClass = 2; } else { int shouldShouldWeight0 = (int)(1 / c * weights[0]); n = (int)(weights[1] - weights[0]); toCopyClass = 0; } //m_randomGenerator = new Random((int)System.DateTime.Now.Ticks); List<Instance> copyInstances = new List<Instance>(); for (int i = 0; i < instances.numInstances(); ++i) { if (instances.instance(i).classValue() == toCopyClass) { copyInstances.Add(instances.instance(i)); } } int nAll = n / copyInstances.Count; for (int j = 0; j < nAll; ++j) { for (int i = 0; i < copyInstances.Count; ++i) { Instance newInstance = new weka.core.DenseInstance(copyInstances[i]); instances.add(newInstance); newInstance.setDataset(instances); } } //for (int j = 0; j < n - nAll * copyInstances.Count; ++j) //{ // int idx = (int)(m_randomGenerator.NextDouble() * copyInstances.Count); // idx = Math.Min(idx, copyInstances.Count - 1); // Instance newInstance = new weka.core.DenseInstance(copyInstances[idx]); // instances.add(newInstance); // newInstance.setDataset(instances); //} if (n - nAll * copyInstances.Count > 0) { Instance avgInstance = new weka.core.DenseInstance(instances.numAttributes()); for (int i = 0; i < avgInstance.numAttributes(); ++i) { double sum = 0; for (int j = 0; j < copyInstances.Count; ++j) { sum += copyInstances[j].value(i); } avgInstance.setValue(i, sum / copyInstances.Count); } for (int j = 0; j < n - nAll * copyInstances.Count; ++j) { Instance newInstance = new weka.core.DenseInstance(avgInstance); instances.add(newInstance); } } }
public double[] distributionForInstances(Instances instances) { double[] ret = new double[instances.numInstances()]; if (m_mustValue.HasValue) { for (int i = 0; i < ret.Length; ++i) ret[i] = m_mustValue == 0 ? m_delta.Value - 1 : m_delta.Value + 1; return ret; } if (System.IO.File.Exists(m_testFile)) { System.IO.File.Delete(m_testFile); } libsvmSaver.setInstances(instances); libsvmSaver.setFile(new java.io.File(m_testFile)); libsvmSaver.writeBatch(); //ConvertNorminalToString(m_testFile); if (System.IO.File.Exists(m_testOutputFile)) { System.IO.File.Delete(m_testOutputFile); } classifier.ExecuteClassifier(s_classifierPath, m_testFile, m_modelFile, m_testOutputFile); if (!System.IO.File.Exists(m_testOutputFile)) { throw new InvalidOperationException(classifier.Output); } using (System.IO.StreamReader sr = new System.IO.StreamReader(m_testOutputFile)) { for (int i = 0; i < ret.Length; ++i) { string s = sr.ReadLine(); ret[i] = Double.Parse(s); } } if (System.IO.File.Exists(m_testFile)) { System.IO.File.Delete(m_testFile); } if (System.IO.File.Exists(m_testOutputFile)) { System.IO.File.Delete(m_testOutputFile); } return ret; }
public double[] classifyInstances(Instances instances) { double[] ret = new double[instances.numInstances()]; double[] v = distributionForInstances(instances); for (int i = 0; i < ret.Length; ++i) { ret[i] = v[i] > m_delta ? 2 : 0; } return ret; }
//Added by Alain Espinosa/////////////////////////////////////////////////////// /// <summary> Filters an entire set of instances and returnsthe new set. /// /// </summary> /// <param name="data">the data to be filtered /// </param> /// <returns> the filtered set of data /// </returns> /// <exception cref="Exception">if the filter can't be used successfully /// </exception> public virtual Instances FilterInstances(Instances data) { setInputFormat(data); for (int i = 0; i < data.numInstances(); i++) { input(data.instance(i)); } batchFinished(); Instances newData = getOutputFormat(); Instance processed; while ((processed = output()) != null) { newData.add(processed); } return newData; }
public void ExecuteBest(DateTime nowDate, List <CandidateClassifier> minScoreInfos) { OutputRealDealSummary(nowDate); float totalCost = 0; int totalDeal = 0; if (minScoreInfos != null) { List <DealInfo> candidateDeals = new List <DealInfo>(); foreach (var minScoreInfo in minScoreInfos) { minScoreInfo.WekaData.GenerateData(false, true); weka.core.Instances minTestInstances = minScoreInfo.WekaData.CurrentTestInstances; weka.core.Instances minTestInstancesNew = minScoreInfo.WekaData.CurrentTestInstancesNew; if (minTestInstances.numInstances() > 0) { MyEvaluation eval = new MyEvaluation(minScoreInfo.CostMatrix); eval.evaluateModel(minScoreInfo.CurrentTestRet, minScoreInfo.CurrentClassValue); float vol; // vol = (float)minScoreInfo.MoneyManagement.GetVolume(null); vol = 0.1F; //vol = (float)Math.Round(minScore / -20000.0, 1); //WekaUtils.DebugAssert(vol > 0); int tp = (int)eval.numTruePositives(1); int fp = (int)eval.numFalsePositives(1); double minScore = minScoreInfo.Deals.NowScore; //WekaUtils.Instance.WriteLog(string.Format("Best Classifier: N={0},TC={1},TP={2},FP={3},TD={4},TV={5},TTP={6},TFP={7},", // minScoreInfo.Name, minScoreInfo.Deals.NowScore.ToString(Parameters.DoubleFormatString), // minScoreInfo.Deals.NowTp, minScoreInfo.Deals.NowFp, minScoreInfo.Deals.NowDeal, // minScoreInfo.Deals.TotalVolume.ToString("N2"), tp, fp), // true, ConsoleColor.DarkGreen); // Exclude //if (TestParameters.EnableExcludeClassifier) //{ //if (minScoreInfo.ExcludeClassifier == null) //{ // string modelFileName4Exclude = GetExcludeModelFileName(minScoreInfo.Name); // minScoreInfo.ExcludeClassifier = WekaUtils.TryLoadClassifier(modelFileName4Exclude); //} //} for (int i = 0; i < minScoreInfo.CurrentTestRet.Length; i++) { if (minScoreInfo.CurrentTestRet[i] == 2) { if (minScoreInfo.ExcludeClassifier != null) { double cv2 = minScoreInfo.ExcludeClassifier.classifyInstance(minTestInstancesNew.instance(i)); if (cv2 != 2) { continue; } } candidateDeals.Add(new DealInfo(WekaUtils.GetDateValueFromInstances(minTestInstances, 0, i), (float)WekaUtils.GetValueFromInstance(minTestInstances, "mainClose", i), minScoreInfo.DealType, (float)vol, (float)(minScoreInfo.CurrentClassValue[i] == 2 ? -minScoreInfo.Tp : minScoreInfo.Sl), WekaUtils.GetDateValueFromInstances(minTestInstances, 1, i))); } } float nowCost = (float)eval.totalCost(); int nowDeal = tp + fp; totalCost += nowCost * vol; totalDeal += nowDeal; //float diff = Math.Abs(totalCost - realDealsInfo.TotalCost); //WekaUtils.DebugAssert(diff < 5); //WekaUtils.DebugAssert(Math.Abs(totalDeal - (realDealsInfo.NowDeal + realDealsInfo.CurrentDeal)) == 0); //if (diff > 0.5) // totalCost = realDealsInfo.TotalCost; IterateClassifierInfos((k, i, j, h) => { if (m_classifierInfoIdxs[k, i, j, h] == minScoreInfo) { m_totalScores[k, i, j, h] += nowCost; m_totalDeals[k, i, j, h] += nowDeal; return(false); } else { return(true); } }); } } int selectCount = 50; for (int i = 0; i < selectCount; ++i) { if (candidateDeals.Count == 0) { break; } int selectedDealIdx = (int)Math.Round(m_randomGenerator.NextDouble() * candidateDeals.Count); if (selectedDealIdx == candidateDeals.Count) { selectedDealIdx = candidateDeals.Count - 1; } m_realDealsInfo.AddDeal(candidateDeals[selectedDealIdx]); candidateDeals.RemoveAt(selectedDealIdx); } } //if (m_enableDetailLogLevel2) //{ // IterateClassifierInfos((k, i, j, h) => // { // if (h == m_currentTestHour && m_totalScores[k, i, j, h] != 0) // { // WekaUtils.Instance.WriteLog(string.Format("Predict score for {0}: TC={1}, TD={2}", m_classifierInfoIdxs[k, i, j, h].Name, m_totalScores[k, i, j, h].ToString(Parameters.DoubleFormatString), m_totalDeals[k, i, j, h])); // } // }); //} if (!string.IsNullOrEmpty(currentSummary)) { WekaUtils.Instance.WriteLog(currentSummary, true, ConsoleColor.Red); System.Console.Title = nowDate.ToString(Parameters.DateTimeFormat) + ":" + currentSummary; } //if (m_enableDetailLogLevel2) //{ // if (TestParameters.EnablePerhourTrain) // { // double[] totalCostPerHour = new double[m_classifierInfoIdxs.GetLength(3)]; // int[] totalDealPerHour = new int[m_classifierInfoIdxs.GetLength(3)]; // IterateClassifierInfos((k, i, j, h) => // { // totalCostPerHour[h] += m_totalScores[k, i, j, h]; // totalDealPerHour[h] += m_totalDeals[k, i, j, h]; // }); // for (int i = 0; i < totalCostPerHour.Length; ++i) // { // if (totalCostPerHour[i] == 0) // continue; // WekaUtils.Instance.WriteLog(string.Format("Predict score Per hour of {0}: TC={1}, TD={2}", i, totalCostPerHour[i].ToString(Parameters.DoubleFormatString), totalDealPerHour[i])); // } // } //if ((m_testTimeStart.Month == 4 || m_testTimeStart.Month == 3) && m_testTimeStart.Day == 20) //{ // foreach (var cls in m_classifierInfos) // { // WekaUtils.Instance.WriteLog(cls.Value.Deals.PrintAll()); // } //} //if (m_currentTestHour == 0) //{ // var cls = m_classifierInfoIdxs[0, 0, 0, 0]; // WekaUtils.Instance.WriteLog((cls.Classifier as RandomClassifier).GetCountInfo()); //} //} //if (m_saveDataFile) //{ // var files = System.IO.Directory.GetFiles(m_baseDir, "*.arff"); // if (files.Length > 100) // { // foreach (string fileName in files) // { // try // { // System.IO.File.Delete(fileName); // } // catch (Exception) // { // } // } // } //} }
public override void buildClassifier(Instances instances) { m_mustValue = -1; var weights = GetCount(instances); if (weights[0] == 0) { m_mustValue = 2; return; } else if (weights[2] == 0) { m_mustValue = 0; return; } // can classifier handle the data? getCapabilities().testWithFail(instances); //instances.deleteWithMissingClass(); Instances trainInstances = null; if (m_delta == -1) { // double delta = 0.5; // double maxDelta = 1; // double minDelta = 0.5; // for (int it = 0; it < 10; ++it) // { // delta = (minDelta + maxDelta) / 2.0; // trainInstances = new Instances(instances, 0, instances.numInstances()); // var cls = TrainOnce(trainInstances, delta); // double a = 0, b = 0; // foreach (Instance i in trainInstances) // { // var v = cls.classifyInstance(i); // if (v == 0) // continue; // if (i.classValue() == 0) // a++; // else if (i.classValue() == 2) // b++; // } // if (b == 0) // { // maxDelta = delta; // } // else // { // minDelta = delta; // } // } // delta = Math.Max(0.5, delta - 0.02); // double bestDelta = delta; // double bestCost = double.MaxValue; // weka.classifiers.Classifier bestCls = null; // for (int it = 0; it < 20; ++it) // { // trainInstances = new Instances(instances, 0, instances.numInstances()); // var cls = TrainOnce(trainInstances, delta); // MyEvaluation eval = new MyEvaluation(TestParameter.CostMatrix); // m_Classifier = cls; // m_delta = delta; // eval.evaluateModel(this, instances); // if (eval.totalCost() < bestCost) // { // bestDelta = delta; // bestCost = eval.totalCost(); // bestCls = cls; // } // delta += 0.002; // } // this.m_Classifier = bestCls; // this.m_delta = bestDelta; trainInstances = new Instances(instances, 0, instances.numInstances()); var cls = TrainOnce(trainInstances, 0.5); this.m_Classifier = cls; GetBestDelta(instances); } else { trainInstances = new Instances(instances, 0, instances.numInstances()); var cls = TrainOnce(trainInstances, m_delta); this.m_Classifier = cls; } }
public double[] distributionForInstances(Instances instances) { double[] v = new double[instances.numInstances()]; for(int i=0; i<v.Length; ++i) v[i] = m_Classifier.distributionForInstance(instances.instance(i))[1]; return v; }
private weka.classifiers.Classifier TrainOnce(Instances trainInstances, double delta) { var cls = weka.classifiers.AbstractClassifier.makeCopy(m_Classifier); bool changed = false; for (int i = 0; i < trainInstances.numInstances(); ++i) { if (trainInstances.instance(i).classValue() == 1) { trainInstances.instance(i).setClassMissing(); } } for (int i = 0; i < trainInstances.numInstances(); ++i) { if (trainInstances.instance(i).classValue() == 2) { trainInstances.instance(i).setClassValue(1); } } while (true) { changed = false; double[] w = GetCount(trainInstances); //string ws = string.Empty; //if (w != null) //{ // ws = w[1].ToString(Parameters.DoubleFormatString) + " " + w[0].ToString(Parameters.DoubleFormatString); //} double cost = m_tp / m_sl; string ws; if (w[1] == 0 || w[0] == 0) { ws = string.Empty; } else { ws = cost.ToString("N2") + " 1"; } var linear = cls as weka.classifiers.functions.LibLINEAR; if (linear != null) { linear.setWeights(ws); } else { var svm = cls as weka.classifiers.functions.LibSVM; if (svm != null) { svm.setWeights(ws); } } cls.buildClassifier(trainInstances); foreach (Instance i in trainInstances) { if (i.classValue() == 0) continue; double v = cls.classifyInstance(i); if (v == 0 || cls.distributionForInstance(i)[1] < delta) { i.setClassValue(0); changed = true; } if (v == 1) { } } if (!changed) break; } return cls; }
/// <summary> Gets the index of the instance with the closest threshold value to the /// desired target /// /// </summary> /// <param name="tcurve">a set of instances that have been generated by this class /// </param> /// <param name="threshold">the target threshold /// </param> /// <returns> the index of the instance that has threshold closest to /// the target, or -1 if this could not be found (i.e. no data, or /// bad threshold target) /// </returns> public static int getThresholdInstance(Instances tcurve, double threshold) { if (!RELATION_NAME.Equals(tcurve.relationName()) || (tcurve.numInstances() == 0) || (threshold < 0) || (threshold > 1.0)) { return - 1; } if (tcurve.numInstances() == 1) { return 0; } double[] tvals = tcurve.attributeToDoubleArray(tcurve.numAttributes() - 1); int[] sorted = Utils.sort(tvals); return binarySearch(sorted, tvals, threshold); }
private void button1_Click(object sender, EventArgs e) { OpenFileDialog file = new OpenFileDialog(); if (file.ShowDialog() == DialogResult.OK) { string filename = file.FileName; string filee = Path.GetFileName(filename); bool attributeType; string attributeName = " "; int numAttributeValue = 0; string attributeValueName = " "; textBox1.Text = filee + " chosen succesfully!"; ///////Decision Tree weka.core.Instances insts = new weka.core.Instances(new java.io.FileReader(filename)); insts.setClassIndex(insts.numAttributes() - 1); //find nominal or numeric attributes and create dropbox or textbox int numofAttributes = insts.numAttributes() - 1; for (int i = 0; i < numofAttributes; i++) { attributeType = insts.attribute(i).isNumeric(); attributeName = insts.attribute(i).name(); dataGridView1.Rows.Add(attributeName); if (attributeType == true) { } else { numAttributeValue = insts.attribute(i).numValues(); string[] name = new string[numAttributeValue]; for (int j = 0; j < numAttributeValue; j++) { attributeValueName = insts.attribute(i).value(j); name[j] += attributeValueName; } DataGridViewComboBoxCell combo = new DataGridViewComboBoxCell(); combo.DataSource = name.ToList(); dataGridView1.Rows[i].Cells[1] = combo; } } cl = new weka.classifiers.trees.J48(); textBox2.Text = "Performing " + percentSplit + "% split evaluation."; //filling missing values weka.filters.Filter missingval = new weka.filters.unsupervised.attribute.ReplaceMissingValues(); missingval.setInputFormat(insts); insts = weka.filters.Filter.useFilter(insts, missingval); weka.filters.Filter myNormalized = new weka.filters.unsupervised.instance.Normalize(); myNormalized.setInputFormat(insts); insts = weka.filters.Filter.useFilter(insts, myNormalized); //randomize the order of the instances in the dataset. weka.filters.Filter myRandom = new weka.filters.unsupervised.instance.Randomize(); myRandom.setInputFormat(insts); insts = weka.filters.Filter.useFilter(insts, myRandom); int trainSize = insts.numInstances() * percentSplit / 100; int testSize = insts.numInstances() - trainSize; weka.core.Instances train = new weka.core.Instances(insts, 0, trainSize); cl.buildClassifier(train); string str = cl.toString(); int numCorrect = 0; for (int i = trainSize; i < insts.numInstances(); i++) { weka.core.Instance currentInst = insts.instance(i); double predictedClass = cl.classifyInstance(currentInst); if (predictedClass == insts.instance(i).classValue()) { numCorrect++; } } textBox3.Text = numCorrect + " out of " + testSize + " correct (" + (double)((double)numCorrect / (double)testSize * 100.0) + "%)"; //////////Naive Bayes //dosya okuma weka.core.Instances insts2 = new weka.core.Instances(new java.io.FileReader(filename)); insts2.setClassIndex(insts2.numAttributes() - 1); //naive bayes cl2 = new weka.classifiers.bayes.NaiveBayes(); //filling missing values weka.filters.Filter missingval2 = new weka.filters.unsupervised.attribute.ReplaceMissingValues(); missingval2.setInputFormat(insts2); insts2 = weka.filters.Filter.useFilter(insts2, missingval2); //for naive bayes weka.filters.Filter discrete2 = new weka.filters.unsupervised.attribute.Discretize(); discrete2.setInputFormat(insts2); insts2 = weka.filters.Filter.useFilter(insts2, discrete2); //randomize the order of the instances in the dataset. -ortak weka.filters.Filter myRandom2 = new weka.filters.unsupervised.instance.Randomize(); myRandom2.setInputFormat(insts2); insts2 = weka.filters.Filter.useFilter(insts2, myRandom2); //ortak int trainSize2 = insts2.numInstances() * percentSplit / 100; int testSize2 = insts2.numInstances() - trainSize2; weka.core.Instances train2 = new weka.core.Instances(insts2, 0, trainSize2); cl2.buildClassifier(train2); string str2 = cl2.toString(); int numCorrect2 = 0; for (int i = trainSize2; i < insts2.numInstances(); i++) { weka.core.Instance currentInst2 = insts2.instance(i); double predictedClass2 = cl2.classifyInstance(currentInst2); if (predictedClass2 == insts2.instance(i).classValue()) { numCorrect2++; } } textBox4.Text = numCorrect2 + " out of " + testSize2 + " correct (" + (double)((double)numCorrect2 / (double)testSize2 * 100.0) + "%)"; /////////K-Nearest Neigbour //dosya okuma weka.core.Instances insts3 = new weka.core.Instances(new java.io.FileReader(filename)); insts3.setClassIndex(insts3.numAttributes() - 1); cl3 = new weka.classifiers.lazy.IBk(); //filling missing values weka.filters.Filter missingval3 = new weka.filters.unsupervised.attribute.ReplaceMissingValues(); missingval3.setInputFormat(insts3); insts3 = weka.filters.Filter.useFilter(insts3, missingval3); //Convert to dummy attribute knn,svm,neural network weka.filters.Filter dummy3 = new weka.filters.unsupervised.attribute.NominalToBinary(); dummy3.setInputFormat(insts3); insts3 = weka.filters.Filter.useFilter(insts3, dummy3); //normalize numeric attribute weka.filters.Filter myNormalized3 = new weka.filters.unsupervised.instance.Normalize(); myNormalized3.setInputFormat(insts3); insts3 = weka.filters.Filter.useFilter(insts3, myNormalized3); //randomize the order of the instances in the dataset. weka.filters.Filter myRandom3 = new weka.filters.unsupervised.instance.Randomize(); myRandom3.setInputFormat(insts3); insts3 = weka.filters.Filter.useFilter(insts3, myRandom3); int trainSize3 = insts3.numInstances() * percentSplit / 100; int testSize3 = insts3.numInstances() - trainSize3; weka.core.Instances train3 = new weka.core.Instances(insts3, 0, trainSize3); cl3.buildClassifier(train3); string str3 = cl3.toString(); int numCorrect3 = 0; for (int i = trainSize3; i < insts3.numInstances(); i++) { weka.core.Instance currentInst3 = insts3.instance(i); double predictedClass3 = cl3.classifyInstance(currentInst3); if (predictedClass3 == insts3.instance(i).classValue()) { numCorrect3++; } } textBox5.Text = numCorrect3 + " out of " + testSize3 + " correct (" + (double)((double)numCorrect3 / (double)testSize3 * 100.0) + "%)"; //////////Artificial neural network //dosya okuma weka.core.Instances insts4 = new weka.core.Instances(new java.io.FileReader(filename)); insts4.setClassIndex(insts4.numAttributes() - 1); cl4 = new weka.classifiers.functions.MultilayerPerceptron(); //filling missing values weka.filters.Filter missingval4 = new weka.filters.unsupervised.attribute.ReplaceMissingValues(); missingval4.setInputFormat(insts4); insts4 = weka.filters.Filter.useFilter(insts4, missingval4); //Convert to dummy attribute weka.filters.Filter dummy4 = new weka.filters.unsupervised.attribute.NominalToBinary(); dummy4.setInputFormat(insts4); insts4 = weka.filters.Filter.useFilter(insts4, dummy4); //normalize numeric attribute weka.filters.Filter myNormalized4 = new weka.filters.unsupervised.instance.Normalize(); myNormalized4.setInputFormat(insts4); insts4 = weka.filters.Filter.useFilter(insts4, myNormalized4); //randomize the order of the instances in the dataset. weka.filters.Filter myRandom4 = new weka.filters.unsupervised.instance.Randomize(); myRandom4.setInputFormat(insts4); insts4 = weka.filters.Filter.useFilter(insts4, myRandom4); int trainSize4 = insts4.numInstances() * percentSplit / 100; int testSize4 = insts4.numInstances() - trainSize4; weka.core.Instances train4 = new weka.core.Instances(insts4, 0, trainSize4); cl4.buildClassifier(train4); string str4 = cl4.toString(); int numCorrect4 = 0; for (int i = trainSize4; i < insts4.numInstances(); i++) { weka.core.Instance currentInst4 = insts4.instance(i); double predictedClass4 = cl4.classifyInstance(currentInst4); if (predictedClass4 == insts4.instance(i).classValue()) { numCorrect4++; } } textBox6.Text = numCorrect4 + " out of " + testSize4 + " correct (" + (double)((double)numCorrect4 / (double)testSize4 * 100.0) + "%)"; ///////Support Vector Machine // dosya okuma weka.core.Instances insts5 = new weka.core.Instances(new java.io.FileReader(filename)); insts5.setClassIndex(insts5.numAttributes() - 1); cl5 = new weka.classifiers.functions.SMO(); //filling missing values weka.filters.Filter missingval5 = new weka.filters.unsupervised.attribute.ReplaceMissingValues(); missingval5.setInputFormat(insts5); insts5 = weka.filters.Filter.useFilter(insts5, missingval5); //Convert to dummy attribute weka.filters.Filter dummy5 = new weka.filters.unsupervised.attribute.NominalToBinary(); dummy5.setInputFormat(insts5); insts5 = weka.filters.Filter.useFilter(insts5, dummy5); //normalize numeric attribute weka.filters.Filter myNormalized5 = new weka.filters.unsupervised.instance.Normalize(); myNormalized5.setInputFormat(insts5); insts5 = weka.filters.Filter.useFilter(insts5, myNormalized5); //randomize the order of the instances in the dataset. weka.filters.Filter myRandom5 = new weka.filters.unsupervised.instance.Randomize(); myRandom5.setInputFormat(insts5); insts5 = weka.filters.Filter.useFilter(insts5, myRandom5); int trainSize5 = insts5.numInstances() * percentSplit / 100; int testSize5 = insts5.numInstances() - trainSize5; weka.core.Instances train5 = new weka.core.Instances(insts5, 0, trainSize5); cl5.buildClassifier(train5); string str5 = cl5.toString(); int numCorrect5 = 0; for (int i = trainSize5; i < insts5.numInstances(); i++) { weka.core.Instance currentInst5 = insts5.instance(i); double predictedClass5 = cl5.classifyInstance(currentInst5); if (predictedClass5 == insts5.instance(i).classValue()) { numCorrect5++; } } textBox7.Text = numCorrect5 + " out of " + testSize5 + " correct (" + (double)((double)numCorrect5 / (double)testSize5 * 100.0) + "%)"; string result1 = textBox3.Text; string output1 = result1.Split('(', ')')[1]; output1 = output1.Remove(output1.Length - 1); double r1 = Convert.ToDouble(output1); string result2 = textBox4.Text; string output2 = result2.Split('(', ')')[1]; output2 = output2.Remove(output2.Length - 1); double r2 = Convert.ToDouble(output2); string result3 = textBox5.Text; string output3 = result3.Split('(', ')')[1]; output3 = output3.Remove(output3.Length - 1); double r3 = Convert.ToDouble(output3); string result4 = textBox6.Text; string output4 = result4.Split('(', ')')[1]; output4 = output4.Remove(output4.Length - 1); double r4 = Convert.ToDouble(output4); string result5 = textBox7.Text; string output5 = result5.Split('(', ')')[1]; output5 = output5.Remove(output5.Length - 1); double r5 = Convert.ToDouble(output5); double[] max_array = new double[] { r1, r2, r3, r4, r5 }; double max = max_array.Max(); if (r1 == max) { textBox8.Text = "Best Algoritm is Decision Tree Algorithm "; } else if (r2 == max) { textBox8.Text = "Best Algoritm is Naive Bayes Algorithm "; } else if (r3 == max) { textBox8.Text = "Best Algoritm is K-Nearest Neighbour Algorithm "; } else if (r4 == max) { textBox8.Text = "Best Algoritm is Artificial Neural Network Algorithm "; } else if (r5 == max) { textBox8.Text = "Best Algoritm is Support Vector Machine Algorithm "; } } }
public static string classifyTest(string file, string classifier) { string data = "No data"; try { //weka.core.Instances insts = new weka.core.Instances(new java.io.FileReader("C:\\Users\\kinli\\source\\repos\\WebApplication2\\WebApplication2\\iris.arff")); weka.core.Instances insts = new weka.core.Instances(new java.io.FileReader(file)); insts.setClassIndex(insts.numAttributes() - 1); weka.classifiers.Classifier cl = new weka.classifiers.trees.J48(); if (classifier == "J48") { cl = new weka.classifiers.trees.J48(); } else if (classifier == "MLP") { cl = new weka.classifiers.functions.MultilayerPerceptron(); } else if (classifier == "NaiveBayes") { cl = new weka.classifiers.bayes.NaiveBayes(); } //data = ("Performing " + percentSplit + "% split evaluation.\n"); data = ("Performing use training set evaluation.\n"); //randomize the order of the instances in the dataset. weka.filters.Filter myRandom = new weka.filters.unsupervised.instance.Randomize(); myRandom.setInputFormat(insts); insts = weka.filters.Filter.useFilter(insts, myRandom); /* * int trainSize = insts.numInstances() * percentSplit / 100; * int testSize = insts.numInstances() - trainSize; * weka.core.Instances train = new weka.core.Instances(insts, 0, trainSize); * * cl.buildClassifier(train); * int numCorrect = 0; * for (int i = trainSize; i < insts.numInstances(); i++) * { * weka.core.Instance currentInst = insts.instance(i); * double predictedClass = cl.classifyInstance(currentInst); * if (predictedClass == insts.instance(i).classValue()) * numCorrect++; * }*/ cl.buildClassifier(insts); int numCorrect = 0; for (int i = 0; i < insts.numInstances(); i++) { weka.core.Instance currentInst = insts.instance(i); double predictedClass = cl.classifyInstance(currentInst); if (predictedClass == insts.instance(i).classValue()) { numCorrect++; } } data = data + (numCorrect + " out of " + insts.numInstances() + " correct (" + (double)((double)numCorrect / (double)insts.numInstances() * 100.0) + "%)"); } catch (java.lang.Exception ex) { data = "Error"; ex.printStackTrace(); } return(data); }
private void Classify(string path) { readyToTest = false; // initialize flag // Try reading file, if failed exit function insts = ReadFile(path); if (insts == null) { // Error occured reading file, display error message MessageBox.Show("Instances are null!", "Error", MessageBoxButtons.OK, MessageBoxIcon.Error); return; } var form = Form.ActiveForm as Form1; // get the current form object // Reset UI and lists succesRates.Clear(); classifiers.Clear(); form.inputPanel.Controls.Clear(); inputObjects.Clear(); form.textMostSuccessful.Text = ""; form.testResult.Text = ""; // Place attribute inputs on UI, max 18, numeric and nominal int offsetV = 60; int offsetH = 10; int width = 75; int height = 30; for (int i = 0; i < insts.numAttributes() - 1; i++) { // Create and place label Label label = new Label(); label.Width = width; label.Height = height; label.Text = insts.attribute(i).name(); label.Parent = form.inputPanel; label.Location = new Point((width * (i % 8)) + offsetH, (height * (i / 8)) + (offsetV * (i / 8))); // NumericUpDown for numeric and ComboBox for nominal values if (insts.attribute(i).isNumeric()) { NumericUpDown nud = new NumericUpDown(); nud.Width = width - 10; nud.Height = height; nud.Parent = form.inputPanel; nud.Location = new Point((width * (i % 8)) + offsetH, (height * (i / 8)) + (offsetV * (i / 8)) + height); inputObjects.Add(new UserInput(nud)); } else { string[] values = insts.attribute(i).toString().Split('{', '}')[1].Split(','); ComboBox comboBox = new ComboBox(); comboBox.DataSource = values; comboBox.Width = width - 10; comboBox.Height = height; comboBox.Parent = form.inputPanel; comboBox.Location = new Point((width * (i % 8)) + offsetH, (height * (i / 8)) + (offsetV * (i / 8)) + height); inputObjects.Add(new UserInput(comboBox)); } } // Set train and test sizes trainSize = insts.numInstances() * percentSplit / 100; testSize = insts.numInstances() - trainSize; // Set target attribute insts.setClassIndex(insts.numAttributes() - 1); // Randomize weka.filters.Filter rndFilter = new weka.filters.unsupervised.instance.Randomize(); rndFilter.setInputFormat(insts); insts = weka.filters.Filter.useFilter(insts, rndFilter); // Start threads for each method Thread t_SuccessNb = new Thread(() => CalculateSuccessForNb(insts)); t_SuccessNb.Start(); Thread t_SuccessKn = new Thread(() => CalculateSuccessForKn(insts)); t_SuccessKn.Start(); Thread t_SuccessDt = new Thread(() => CalculateSuccessForDt(insts)); t_SuccessDt.Start(); Thread t_SuccessAnn = new Thread(() => CalculateSuccessForAnn(insts)); t_SuccessAnn.Start(); Thread t_SuccessSvm = new Thread(() => CalculateSuccessForSvm(insts)); t_SuccessSvm.Start(); // Wait for threads t_SuccessNb.Join(); t_SuccessKn.Join(); t_SuccessDt.Join(); t_SuccessAnn.Join(); t_SuccessSvm.Join(); // Find out which algorithm has the best success rate foreach (var item in succesRates) { if (highestSuccessRate.Equals(default(KeyValuePair <Classifier, double>)) || highestSuccessRate.Value < item.Value) { highestSuccessRate = item; } } form.textMostSuccessful.Text = "Most successful algorithm is " + highestSuccessRate.Key + " and it will be used for testing."; readyToTest = true; // switch flag }
/// <summary> Calculates the n point precision result, which is the precision averaged /// over n evenly spaced (w.r.t recall) samples of the curve. /// /// </summary> /// <param name="tcurve">a previously extracted threshold curve Instances. /// </param> /// <param name="n">the number of points to average over. /// </param> /// <returns> the n-point precision. /// </returns> public static double getNPointPrecision(Instances tcurve, int n) { if (!RELATION_NAME.Equals(tcurve.relationName()) || (tcurve.numInstances() == 0)) { return System.Double.NaN; } int recallInd = tcurve.attribute(RECALL_NAME).index(); int precisInd = tcurve.attribute(PRECISION_NAME).index(); double[] recallVals = tcurve.attributeToDoubleArray(recallInd); int[] sorted = Utils.sort(recallVals); double isize = 1.0 / (n - 1); double psum = 0; for (int i = 0; i < n; i++) { int pos = binarySearch(sorted, recallVals, i * isize); double recall = recallVals[sorted[pos]]; double precis = tcurve.instance(sorted[pos]).value_Renamed(precisInd); /* System.err.println("Point " + (i + 1) + ": i=" + pos + " r=" + (i * isize) + " p'=" + precis + " r'=" + recall); */ // interpolate figures for non-endpoints while ((pos != 0) && (pos < sorted.Length - 1)) { pos++; double recall2 = recallVals[sorted[pos]]; if (recall2 != recall) { double precis2 = tcurve.instance(sorted[pos]).value_Renamed(precisInd); double slope = (precis2 - precis) / (recall2 - recall); double offset = precis - recall * slope; precis = isize * i * slope + offset; /* System.err.println("Point2 " + (i + 1) + ": i=" + pos + " r=" + (i * isize) + " p'=" + precis2 + " r'=" + recall2 + " p''=" + precis); */ break; } } psum += precis; } return psum / n; }
public static string IncrementTest(weka.core.Instances allInstancesWithDate, Func<weka.classifiers.Classifier> clsCreator, string removeAttributes, string resultFile, int step) { //if (!(TestParameters2.UsePartialHpDataM1 || TestParameters2.UsePartialHpData)) //{ // HpData.Instance.Clear(); //} int trainMinutes = TestParameters2.MinTrainPeriod * WekaUtils.GetMinuteofPeriod(TestParameters2.CandidateParameter.MainPeriod); string ret = string.Empty; string sampleFile = null; // resultFile.Replace("Increment", "sample"); bool useInstanceWeight = false; bool enablePerHour = false; bool enableDiffClass = false; int sameClassCount = -1;// TestParameters2.MaxTrainSize / 3; // allInstances.numClasses(); bool enableDiffHpTime = false; bool enableRemoveLittle = false; bool enableRemoveLargeThanMid = true; bool enableFilter = false; if (!TestParameters2.RealTimeMode && File.Exists(resultFile)) File.Delete(resultFile); weka.core.Instances allInstances; var filter = new weka.filters.MultiFilter(); //filter.setOptions(weka.core.Utils.splitOptions("-F \"weka.filters.unsupervised.attribute.Remove -R 1,4\" -F \"weka.filters.unsupervised.attribute.Discretize -B 10 -M -1.0 -R first-last\"")); filter.setOptions(weka.core.Utils.splitOptions(string.Format("-F \"weka.filters.unsupervised.attribute.Remove -R {0} \"", removeAttributes))); filter.setInputFormat(allInstancesWithDate); allInstances = weka.filters.Filter.useFilter(allInstancesWithDate, filter); long[] jHpTimes = new long[allInstancesWithDate.numInstances()]; DateTime[] jDates = new DateTime[allInstancesWithDate.numInstances()]; DateTime[] jHpDates = new DateTime[allInstancesWithDate.numInstances()]; int[] jHps = new int[allInstancesWithDate.numInstances()]; for (int j = 0; j < jDates.Length; ++j) { jDates[j] = WekaUtils.GetDateValueFromInstances(allInstancesWithDate, 0, j); jHpTimes[j] = WekaUtils.GetTimeValueFromInstances(allInstancesWithDate, 1, j); jHpDates[j] = WekaUtils.GetDateFromTime(jHpTimes[j]); jHps[j] = (int)allInstances.instance(j).classValue(); } #region "action" Func<Tuple<int, int>, Tuple<weka.core.Instances, weka.core.Instances, long, int, int>> action = (tuple) => { var ai = tuple.Item1; DateTime nowDate = WekaUtils.GetDateValueFromInstances(allInstancesWithDate, 0, ai); if (nowDate < TestParameters2.TrainStartTime || nowDate > TestParameters2.TrainEndTime) return null; DateTime nowHpDate = WekaUtils.GetDateValueFromInstances(allInstancesWithDate, 1, ai); double nowClass = allInstancesWithDate.instance(ai).classValue(); double preJClass = -1; List<weka.core.Instance> listTrainInstances = new List<weka.core.Instance>(ai / 2); List<weka.core.Instance> listTrainInstancesWithDate = new List<weka.core.Instance>(ai / 2); int[] counts = new int[allInstancesWithDate.numClasses()]; long maxjHpTime = -1; int maxjHpTimeHp = 2; int maxjHpTimeCount = 0; bool enoughTrainMinutes = false; DateTime firstDate = nowDate.AddMinutes(-trainMinutes); int classIdxWithDate = allInstancesWithDate.classIndex(); int classIdx = allInstances.classIndex(); for (int j = ai - 1; j >= 0; --j) { long jHpTime = jHpTimes[j]; // WekaUtils.GetTimeValueFromInstances(allInstancesWithDate, 1, j); DateTime jHpDate = jHpDates[j];// WekaUtils.GetDateFromTime(jHpTime); DateTime jDate = jDates[j];// WekaUtils.GetDateValueFromInstances(allInstancesWithDate, 0, j); int jHp = jHps[j]; // (int)allInstancesWithDate.instance(j).value(classIdxWithDate); if (enablePerHour) { if (nowDate.Hour != jDate.Hour) continue; } weka.core.Instance instInsert = null; weka.core.Instance instInsertWithDate = null; if (jHpDate <= nowDate) { if (jHpTime > maxjHpTime) { maxjHpTime = jHpTime; maxjHpTimeHp = jHp; maxjHpTimeCount = 1; } else if (jHpTime == maxjHpTime) { maxjHpTimeCount++; } instInsert = new weka.core.DenseInstance(allInstances.instance(j)); //instInsert.setDataset(trainInstances); instInsertWithDate = new weka.core.DenseInstance(allInstancesWithDate.instance(j)); //instInsertWithDate.setDataset(trainInstancesWithDate); } else { if (TestParameters2.UsePartialHpDataM1 || TestParameters2.UsePartialHpData) { Tuple<int, long> hp = null; if (TestParameters2.UsePartialHpDataM1) { hp = HpData.Instance.GetHpSumByM1(TestParameters2.CandidateParameter.MainSymbol, TestParameters2.CandidateParameter.MainPeriod, WekaUtils.GetTimeFromDate(nowDate), WekaUtils.GetTimeFromDate(jDate)); if (hp.Item2 == 0) hp = null; } else if (TestParameters2.UsePartialHpData) { var hps = HpData.Instance.GetHpSum(TestParameters2.CandidateParameter.MainSymbol, TestParameters2.CandidateParameter.MainPeriod, WekaUtils.GetTimeFromDate(nowDate), WekaUtils.GetTimeFromDate(jDate)); if (hps.ContainsKey(jDate)) { hp = hps[jDate]; } } if (hp != null) { if (WekaUtils.GetDateFromTime(hp.Item2) > nowDate) { throw new AssertException("hpdate should less than now"); } jHp = hp.Item1; jHpTime = hp.Item2; if (jHpTime > maxjHpTime) { maxjHpTime = jHpTime; maxjHpTimeHp = jHp; maxjHpTimeCount = 0; } else if (jHpTime == maxjHpTime) { maxjHpTimeCount++; } instInsert = new weka.core.DenseInstance(allInstances.instance(j)); //instInsert.setDataset(trainInstances); //instInsert.setClassValue(jHp); instInsert.setValue(classIdx, jHp); instInsertWithDate = new weka.core.DenseInstance(allInstancesWithDate.instance(j)); //instInsertWithDate.setDataset(trainInstancesWithDate); //instInsertWithDate.setClassValue(jHp); instInsertWithDate.setValue(classIdxWithDate, jHp); instInsertWithDate.setValue(1, jHpTime * 1000); } } } if (instInsert == null) continue; double jClass = jHp; if (enableDiffClass && jClass == preJClass) continue; if (sameClassCount > 0) { if (counts[(int)jClass] >= sameClassCount) continue; counts[(int)jClass]++; } if (enableFilter && j > 0 && Filter(jDate, allInstancesWithDate.instance(j), allInstancesWithDate.instance(j - 1))) continue; if (useInstanceWeight) instInsert.setWeight((nowDate - jDate).TotalMinutes); listTrainInstances.Add(instInsert); listTrainInstancesWithDate.Add(instInsertWithDate); preJClass = jClass; if (jDate <= firstDate) { enoughTrainMinutes = true; break; } } //weka.core.Instances trainInstances2 = new weka.core.Instances(allInstancesNoDate, 0); //for (int x = trainInstances.numInstances() - 1; x >= 0; --x) //{ // weka.core.Instance inst = new weka.core.DenseInstance(trainInstances.instance(x)); // trainInstances2.add(inst); //} //WekaUtils.SaveInstances(trainInstances2, "d:\\a.arff"); //if (trainInstances.numInstances() >= trainLength) // break; if (!enoughTrainMinutes) { Console.WriteLine(string.Format("{0}, not enough trainMinutes", nowDate.ToString(Parameters.DateTimeFormat))); return null; } if (listTrainInstances.Count < TestParameters2.MinTrainSize) { Console.WriteLine(string.Format("{0}, numInstances {1} < minTrainSize {2}", nowDate.ToString(Parameters.DateTimeFormat), listTrainInstances.Count, TestParameters2.MinTrainSize)); return null; } //else if (listTrainInstances.Count == 1) //{ // lock (WekaUtils.Instance) // { // WekaUtils.Instance.WriteLog("trainInstances.numInstances() == 1, nowDate = " + nowDate.ToString()); // if (!System.IO.File.Exists("d:\\a.arff")) // { // WekaUtils.SaveInstances(trainInstances, "d:\\a.arff"); // } // } //} weka.core.Instances trainInstances = new weka.core.Instances(allInstances, listTrainInstances.Count); weka.core.Instances trainInstancesWithDate = new weka.core.Instances(allInstancesWithDate, listTrainInstancesWithDate.Count); WekaUtils.AddInstanceQuickly(trainInstances, listTrainInstances); WekaUtils.AddInstanceQuickly(trainInstancesWithDate, listTrainInstancesWithDate); if (enableRemoveLittle) { double preClass = 2; for (int ii = 0; ii < trainInstances.numInstances(); ++ii) { var iiClass = trainInstances.instance(ii).classValue(); if (iiClass == 2) continue; int jj = ii + 1; while (jj < trainInstances.numInstances()) { if (trainInstances.instance(jj).classValue() == iiClass) jj++; else break; } int count = jj - ii; if (count < 5) { for (jj = 0; jj < count; ++jj) { trainInstances.instance(ii + jj).setClassValue(preClass); } } else { preClass = iiClass; ii += count; } } } if (enableDiffHpTime) { Dictionary<long, int> jDictHpTimes = new Dictionary<long, int>(); int n = trainInstances.numInstances(); List<weka.core.Instance> list1 = new List<weka.core.Instance>(n); List<weka.core.Instance> list2 = new List<weka.core.Instance>(n); //java.util.LinkedList deleteList = new java.util.LinkedList(); for (int j = 0; j < n; ++j) { long jHpTime = WekaUtils.GetTimeValueFromInstances(trainInstancesWithDate, 1, j); if (jDictHpTimes.ContainsKey(jHpTime)) { continue; } else { jDictHpTimes[jHpTime] = list1.Count; list1.Add(trainInstances.instance(j)); list2.Add(trainInstancesWithDate.instance(j)); } } weka.core.Instances newTrainInstances = new weka.core.Instances(trainInstances, list1.Count); weka.core.Instances newTrainInstancesWithDate = new weka.core.Instances(trainInstancesWithDate, list2.Count); WekaUtils.AddInstanceQuickly(newTrainInstances, list1); WekaUtils.AddInstanceQuickly(newTrainInstancesWithDate, list2); trainInstances = newTrainInstances; trainInstancesWithDate = newTrainInstancesWithDate; } if (enableRemoveLargeThanMid) { int n = trainInstances.numInstances(); long[] lastTimes = new long[n]; for (int j = 0; j < n; ++j) { long openTime = WekaUtils.GetTimeValueFromInstances(trainInstancesWithDate, 0, j); long closeTime = WekaUtils.GetTimeValueFromInstances(trainInstancesWithDate, 1, j); lastTimes[j] = (long)(closeTime - openTime); } Array.Sort(lastTimes); long midLastTime = lastTimes[lastTimes.Count() / 2]; List<weka.core.Instance> list1 = new List<weka.core.Instance>(n); List<weka.core.Instance> list2 = new List<weka.core.Instance>(n); //java.util.LinkedList deleteList = new java.util.LinkedList(); for (int j = 0; j < n; ++j) { if (lastTimes[j] > midLastTime) { //deleteList.add(trainInstances.instance(j)); } else { list1.Add(trainInstances.instance(j)); list2.Add(trainInstancesWithDate.instance(j)); } } weka.core.Instances newTrainInstances = new weka.core.Instances(trainInstances, list1.Count); weka.core.Instances newTrainInstancesWithDate = new weka.core.Instances(trainInstancesWithDate, list2.Count); WekaUtils.AddInstanceQuickly(newTrainInstances, list1); WekaUtils.AddInstanceQuickly(newTrainInstancesWithDate, list2); trainInstances = newTrainInstances; trainInstancesWithDate = newTrainInstancesWithDate; //trainInstances.removeAll(deleteList); } if (!string.IsNullOrEmpty(sampleFile)) { lock (sampleFile) { if (!System.IO.File.Exists(sampleFile)) { WekaUtils.SaveInstances(trainInstancesWithDate, sampleFile); } } } //using (StreamWriter sw = new StreamWriter("d:\\p.txt", true)) //{ // sw.Write("{0},{1},", nowDate.ToString(Parameters.DateTimeFormat), nowHpDate.ToString(Parameters.DateTimeFormat)); //} return new Tuple<weka.core.Instances, weka.core.Instances, long, int, int>(trainInstances, trainInstancesWithDate, maxjHpTime, maxjHpTimeHp, maxjHpTimeCount); }; #endregion //allInstancesNoDate = allInstances; if (!TestParameters2.RealTimeMode) { int tpb = 0, fpb = 0, tps = 0, fps = 0; int db = 0, ds = 0, dn = 0; int parallelStep = 1; if (TestParameters.EnableMultiThread) { parallelStep = 100; } int startIdx = TestParameters2.MinTrainPeriod * 2 / 3; for (int i0 = startIdx; i0 < allInstancesWithDate.numInstances() - step; i0 += step * parallelStep) { List<Tuple<int, int>> toTest = new List<Tuple<int, int>>(); for (int i = i0; i < Math.Min(i0 + step * parallelStep, allInstancesWithDate.numInstances() - step); i += step) { toTest.Add(new Tuple<int, int>(i, toTest.Count)); } var toRet0 = new Tuple<weka.core.Instances, weka.core.Instances, long, int, int>[toTest.Count]; if (TestParameters.EnableMultiThread) { Parallel.ForEach(toTest, (tuple) => { int i = tuple.Item1 - i0; toRet0[i] = action(tuple); }); } else { for (int i = 0; i < toTest.Count; ++i) { var r = action(toTest[i]); toRet0[i] = r; } } object[, ,] toRet = new object[toTest.Count, step, 9]; bool[] toRet1 = new bool[toTest.Count]; for (int i = 0; i < toTest.Count; ++i) { for (int j = 0; j < step; ++j) toRet[i, j, 0] = 2; if (toRet0[i] != null) { var maxjHpTime = toRet0[i].Item3; var maxjHpTimeHp = toRet0[i].Item4; var maxjHpTimeCount = toRet0[i].Item5; toRet1[i] = IncrementTestTrainCheck(resultFile, maxjHpTime, maxjHpTimeHp, maxjHpTimeCount, toTest[i]); } } if (TestParameters.EnableMultiThread) { Parallel.ForEach(toTest, (tuple) => { int i = tuple.Item1 - i0; if (toRet0[i] != null) { var trainInstances = toRet0[i].Item1; var trainInstancesWithDate = toRet0[i].Item2; var maxjHpTime = toRet0[i].Item3; var maxjHpTimeHp = toRet0[i].Item4; var r = IncrementTestTrain(toRet1[i], clsCreator, resultFile, step, trainInstances, trainInstancesWithDate, allInstances, allInstancesWithDate, maxjHpTime, maxjHpTimeHp, toTest[i]); if (r != null) { for (int j = 0; j < toRet.GetLength(1); ++j) for (int k = 0; k < toRet.GetLength(2); ++k) toRet[i, j, k] = r[j, k]; } } }); } else { for (int i = 0; i < toTest.Count; ++i) { if (toRet0[i] != null) { var trainInstances = toRet0[i].Item1; var trainInstancesWithDate = toRet0[i].Item2; var maxjHpTime = toRet0[i].Item3; var maxjHpTimeHp = toRet0[i].Item4; var r = IncrementTestTrain(toRet1[i], clsCreator, resultFile, step, trainInstances, trainInstancesWithDate, allInstances, allInstancesWithDate, maxjHpTime, maxjHpTimeHp, toTest[i]); if (r != null) { for (int j = 0; j < toRet.GetLength(1); ++j) for (int k = 0; k < toRet.GetLength(2); ++k) toRet[i, j, k] = r[j, k]; } } } } for (int i = 0; i < toTest.Count; ++i) { //action(toTest[i]); for (int j = 0; j < step; ++j) { int d = (int)toRet[i, j, 0]; if (d != 0 && d != 1 && d != 2) throw new AssertException("d should be -1, 0, 1 or 2. but it's" + d.ToString()); if (toRet[i, j, 6] == null) { //Console.WriteLine("toRet[i, j, 6] == null"); continue; } long maxJHpTime = (long)toRet[i, j, 6]; //if (lastMaxJHpTime >= (long)toRet[i, j, 6]) // continue; //lastMaxJHpTime = (long)toRet[i, j, 6]; if (d == 0) db++; else if (d == 1) ds++; else dn++; int v = (int)toRet[i, j, 1]; if (d != 2) { if (v == 3 || d == v) { if (d == 0) { tpb++; } else if (d == 1) { tps++; } } else { if (d == 0) { fpb++; } else if (d == 1) { fps++; } } } DateTime nowDate = WekaUtils.GetDateFromTime((long)toRet[i, j, 3]); DateTime nextDate = WekaUtils.GetDateFromTime((long)toRet[i, j, 2]); //nextDate = WekaUtils.GetDateFromTime(maxJHpTime); //if (j == 0) { if (tpb + fpb + tps + fps != 0) { ret = (string.Format("{0}, tn={1},tc={2}, db={3},ds={4},dn={5}, tpb={6},fpb={7},tps={8},fps={9},p={10}", nextDate.ToString(Parameters.DateTimeFormat), toRet[i, j, 4], toRet[i, j, 5], db, ds, dn, tpb, fpb, tps, fps, ((double)(tpb+tps) / (tpb + fpb + tps + fps)).ToString("F2"))); } else { ret = (string.Format("{0}, tn={1},tc={2}, db={3},ds={4},dn={5}, tpb={6},fpb={7},tps={8},fps={9},p={10}", nextDate.ToString(Parameters.DateTimeFormat), toRet[i, j, 4], toRet[i, j, 5], db, ds, dn, tpb, fpb, tps, fps, 0)); } WekaUtils.Instance.WriteLog(ret); } //if (d != 2) { using (StreamWriter sw = new StreamWriter(resultFile, true)) { sw.WriteLine(string.Format("{1}, {2}, {6}, {3}, {4}, {5}", nowDate, nextDate.ToString(Parameters.DateTimeFormat), d, v, (double)toRet[i, j, 7], WekaUtils.GetDateFromTime(maxJHpTime).ToString(Parameters.DateTimeFormat), toRet[i, j, 8])); } } } } } } else { for (int i0 = allInstancesWithDate.numInstances() - 1; i0 < allInstancesWithDate.numInstances() && i0 >= 0; i0++) { List<Tuple<int, int>> toTest = new List<Tuple<int, int>>(); toTest.Add(new Tuple<int, int>(i0, toTest.Count)); var toRet0 = new Tuple<weka.core.Instances, weka.core.Instances, long, int, int>[toTest.Count]; for (int i = 0; i < toTest.Count; ++i) { var r = action(toTest[i]); toRet0[i] = r; } object[, ,] toRet = new object[toTest.Count, step, 9]; for (int i = 0; i < toTest.Count; ++i) { for (int j = 0; j < toRet.GetLength(1); ++j) toRet[i, j, 0] = 2; if (toRet0[i] != null) { var trainInstances = toRet0[i].Item1; var trainInstancesWithDate = toRet0[i].Item2; var maxjHpTime = toRet0[i].Item3; var maxjHpTimeHp = toRet0[i].Item4; var maxjHpTimeCount = toRet0[i].Item5; bool b = IncrementTestTrainCheck(resultFile, maxjHpTime, maxjHpTimeHp, maxjHpTimeCount, toTest[i]); var r = IncrementTestTrain(b, clsCreator, resultFile, step, trainInstances, trainInstancesWithDate, allInstances, allInstancesWithDate, maxjHpTime, maxjHpTimeHp, toTest[i]); if (r != null) { for (int j = 0; j < toRet.GetLength(1); ++j) for (int k = 0; k < toRet.GetLength(2); ++k) toRet[i, j, k] = r[j, k]; } } } for (int j = 0; j < step; ++j) { int d = (int)toRet[0, j, 0]; if (d != 0 && d != 1 && d != 2) throw new AssertException("d should be -1, 0, 1 or 2."); if (toRet[0, j, 6] == null) continue; long maxJHpTime = (long)toRet[0, j, 6]; //if (lastMaxJHpTime >= (long)toRet[i, j, 6]) // continue; //lastMaxJHpTime = (long)toRet[i, j, 6]; int v = (int)toRet[0, j, 1]; DateTime nowDate = WekaUtils.GetDateFromTime((long)toRet[0, j, 3]); DateTime nextDate = WekaUtils.GetDateFromTime((long)toRet[0, j, 2]); //nextDate = WekaUtils.GetDateFromTime(maxJHpTime); //if (d != 2) { using (StreamWriter sw = new StreamWriter(resultFile, true)) { sw.WriteLine(string.Format("{1}, {2}, {6}, {3}, {4}, {5}", nowDate, nextDate.ToString(Parameters.DateTimeFormat), d, v, (double)toRet[0, j, 7], WekaUtils.GetDateFromTime(maxJHpTime).ToString(Parameters.DateTimeFormat), toRet[0, j, 8])); } } } } } return ret; }
/// <summary> Applies the cost matrix to a set of instances. If a random number generator is /// supplied the instances will be resampled, otherwise they will be rewighted. /// Adapted from code once sitting in Instances.java /// /// </summary> /// <param name="data">the instances to reweight. /// </param> /// <param name="random">a random number generator for resampling, if null then instances are /// rewighted. /// </param> /// <returns> a new dataset reflecting the cost of misclassification. /// </returns> /// <exception cref="Exception">if the data has no class or the matrix in inappropriate. /// </exception> public virtual Instances applyCostMatrix(Instances data, System.Random random) { double sumOfWeightFactors = 0, sumOfMissClassWeights, sumOfWeights; double[] weightOfInstancesInClass, weightFactor, weightOfInstances; Instances newData; if (data.classIndex() < 0) { throw new System.Exception("Class index is not set!"); } if (size() != data.numClasses()) { throw new System.Exception("Misclassification cost matrix has " + "wrong format!"); } weightFactor = new double[data.numClasses()]; weightOfInstancesInClass = new double[data.numClasses()]; for (int j = 0; j < data.numInstances(); j++) { //UPGRADE_WARNING: Data types in Visual C# might be different. Verify the accuracy of narrowing conversions. "ms-help://MS.VSCC.v80/dv_commoner/local/redirect.htm?index='!DefaultContextWindowIndex'&keyword='jlca1042'" weightOfInstancesInClass[(int) data.instance(j).classValue()] += data.instance(j).weight(); } sumOfWeights = Utils.sum(weightOfInstancesInClass); // normalize the matrix if not already for (int i = 0; i < size(); i++) if (!Utils.eq(getXmlElement(i, i), 0)) { CostMatrix normMatrix = new CostMatrix(this); normMatrix.normalize(); return normMatrix.applyCostMatrix(data, random); } for (int i = 0; i < data.numClasses(); i++) { // Using Kai Ming Ting's formula for deriving weights for // the classes and Breiman's heuristic for multiclass // problems. sumOfMissClassWeights = 0; for (int j = 0; j < data.numClasses(); j++) { if (Utils.sm(getXmlElement(i, j), 0)) { throw new System.Exception("Neg. weights in misclassification " + "cost matrix!"); } sumOfMissClassWeights += getXmlElement(i, j); } weightFactor[i] = sumOfMissClassWeights * sumOfWeights; sumOfWeightFactors += sumOfMissClassWeights * weightOfInstancesInClass[i]; } for (int i = 0; i < data.numClasses(); i++) { weightFactor[i] /= sumOfWeightFactors; } // Store new weights weightOfInstances = new double[data.numInstances()]; for (int i = 0; i < data.numInstances(); i++) { //UPGRADE_WARNING: Data types in Visual C# might be different. Verify the accuracy of narrowing conversions. "ms-help://MS.VSCC.v80/dv_commoner/local/redirect.htm?index='!DefaultContextWindowIndex'&keyword='jlca1042'" weightOfInstances[i] = data.instance(i).weight() * weightFactor[(int) data.instance(i).classValue()]; } // Change instances weight or do resampling if (random != null) { return data.resampleWithWeights(random, weightOfInstances); } else { Instances instances = new Instances(data); for (int i = 0; i < data.numInstances(); i++) { instances.instance(i).Weight = weightOfInstances[i]; } return instances; } }
/// <summary> Filters an entire set of instances through a filter and returns /// the new set. /// /// </summary> /// <param name="data">the data to be filtered /// </param> /// <param name="filter">the filter to be used /// </param> /// <returns> the filtered set of data /// </returns> /// <exception cref="Exception">if the filter can't be used successfully /// </exception> public static Instances useFilter(Instances data, Filter filter) { /* System.err.println(filter.getClass().getName() + " in:" + data.numInstances()); */ for (int i = 0; i < data.numInstances(); i++) { filter.input(data.instance(i)); } filter.batchFinished(); Instances newData = filter.getOutputFormat(); Instance processed; while ((processed = filter.output()) != null) { newData.add(processed); } /* System.err.println(filter.getClass().getName() + " out:" + newData.numInstances()); */ return newData; }
private void result_Click(object sender, EventArgs e) { ArrayList algorithms = new ArrayList(); algorithms.Add("Naive Bayes"); algorithms.Add("K Nearest Neighbor"); algorithms.Add("Decision Tree"); algorithms.Add("Neural Network"); algorithms.Add("Support Vector Machine"); ArrayList successPercent = new ArrayList(); double res_Naive, res_KNN, res_NN, res_Tree, res_SVM = 0.0; string nameOfAlgo = ""; //NAIVE BAYES ALGORITHM weka.core.Instances insts = new weka.core.Instances(new java.io.FileReader(fileDirectory)); //CREATIING DYNAMIC GRIDVIEW FOR ADDING NEW INSTANCE dataGridView1.ColumnCount = 2; dataGridView1.RowCount = insts.numAttributes(); String[,] matrixOfInstances = new String[insts.numInstances(), insts.numAttributes()]; for (int y = 0; y < insts.numAttributes() - 1; y++) { dataGridView1.Rows[y].Cells[0].Value = insts.attribute(y).name(); if (insts.attribute(y).isNominal()) { //nominalDataValues.Add(insts.attribute(y).toString()); string phrase = insts.attribute(y).toString(); string[] first = phrase.Split('{'); string[] second = first[1].Split('}'); string[] attributeValues = second[0].Split(','); DataGridViewComboBoxCell comboColumn = new DataGridViewComboBoxCell(); foreach (var a in attributeValues) { comboColumn.Items.Add(a); } dataGridView1.Rows[y].Cells[1] = comboColumn; } } insts.setClassIndex(insts.numAttributes() - 1); cl_Naive = new weka.classifiers.bayes.NaiveBayes(); weka.filters.Filter myNominalData = new weka.filters.unsupervised.attribute.Discretize(); myNominalData.setInputFormat(insts); insts = weka.filters.Filter.useFilter(insts, myNominalData); //randomize the order of the instances in the dataset. weka.filters.Filter myRandom = new weka.filters.unsupervised.instance.Randomize(); myRandom.setInputFormat(insts); insts = weka.filters.Filter.useFilter(insts, myRandom); int trainSize = insts.numInstances() * percentSplit / 100; int testSize = insts.numInstances() - trainSize; weka.core.Instances train = new weka.core.Instances(insts, 0, trainSize); cl_Naive.buildClassifier(train); string str = cl_Naive.toString(); int numCorrect = 0; for (int i = trainSize; i < insts.numInstances(); i++) { weka.core.Instance currentInst = insts.instance(i); double predictedClass = cl_Naive.classifyInstance(currentInst); if (predictedClass == insts.instance(i).classValue()) { numCorrect++; } } res_Naive = (double)((double)numCorrect / (double)testSize * 100.0); successPercent.Add(res_Naive); //kNN weka.core.Instances insts2 = new weka.core.Instances(new java.io.FileReader(fileDirectory)); insts2.setClassIndex(insts2.numAttributes() - 1); cl_Knn = new weka.classifiers.lazy.IBk(); //Nominal to Binary weka.filters.Filter myBinaryData = new weka.filters.unsupervised.attribute.NominalToBinary(); myBinaryData.setInputFormat(insts2); insts2 = weka.filters.Filter.useFilter(insts2, myBinaryData); //Normalization weka.filters.Filter myNormalized = new weka.filters.unsupervised.instance.Normalize(); myNormalized.setInputFormat(insts2); insts2 = weka.filters.Filter.useFilter(insts2, myNormalized); //randomize the order of the instances in the dataset. weka.filters.Filter myRandom2 = new weka.filters.unsupervised.instance.Randomize(); myRandom2.setInputFormat(insts2); insts2 = weka.filters.Filter.useFilter(insts2, myRandom2); int trainSize2 = insts2.numInstances() * percentSplit / 100; int testSize2 = insts2.numInstances() - trainSize2; weka.core.Instances train2 = new weka.core.Instances(insts2, 0, trainSize2); cl_Knn.buildClassifier(train2); string str2 = cl_Knn.toString(); int numCorrect2 = 0; for (int i = trainSize2; i < insts2.numInstances(); i++) { weka.core.Instance currentInst2 = insts2.instance(i); double predictedClass = cl_Knn.classifyInstance(currentInst2); if (predictedClass == insts2.instance(i).classValue()) { numCorrect2++; } } res_KNN = (double)((double)numCorrect2 / (double)testSize2 * 100.0); successPercent.Add(res_KNN); //Decision tree weka.core.Instances insts3 = new weka.core.Instances(new java.io.FileReader(fileDirectory)); insts3.setClassIndex(insts3.numAttributes() - 1); cl_Tree = new weka.classifiers.trees.J48(); weka.filters.Filter myNormalized2 = new weka.filters.unsupervised.instance.Normalize(); myNormalized2.setInputFormat(insts3); insts3 = weka.filters.Filter.useFilter(insts3, myNormalized2); //randomize the order of the instances in the dataset. weka.filters.Filter myRandom3 = new weka.filters.unsupervised.instance.Randomize(); myRandom3.setInputFormat(insts3); insts3 = weka.filters.Filter.useFilter(insts3, myRandom3); int trainSize3 = insts3.numInstances() * percentSplit / 100; int testSize3 = insts3.numInstances() - trainSize3; weka.core.Instances train3 = new weka.core.Instances(insts3, 0, trainSize3); cl_Tree.buildClassifier(train3); string str3 = cl_Tree.toString(); int numCorrect3 = 0; for (int i = trainSize3; i < insts3.numInstances(); i++) { weka.core.Instance currentInst3 = insts3.instance(i); double predictedClass = cl_Tree.classifyInstance(currentInst3); if (predictedClass == insts3.instance(i).classValue()) { numCorrect3++; } } res_Tree = (double)((double)numCorrect3 / (double)testSize3 * 100.0); successPercent.Add(res_Tree); //Neural Network weka.core.Instances insts4 = new weka.core.Instances(new java.io.FileReader(fileDirectory)); insts4.setClassIndex(insts4.numAttributes() - 1); cl_NN = new weka.classifiers.functions.MultilayerPerceptron(); //Nominal to Binary weka.filters.Filter myBinaryData2 = new weka.filters.unsupervised.attribute.NominalToBinary(); myBinaryData2.setInputFormat(insts4); insts4 = weka.filters.Filter.useFilter(insts4, myBinaryData2); //Normalization weka.filters.Filter myNormalized3 = new weka.filters.unsupervised.instance.Normalize(); myNormalized3.setInputFormat(insts4); insts4 = weka.filters.Filter.useFilter(insts4, myNormalized3); //randomize the order of the instances in the dataset. weka.filters.Filter myRandom4 = new weka.filters.unsupervised.instance.Randomize(); myRandom4.setInputFormat(insts4); insts4 = weka.filters.Filter.useFilter(insts4, myRandom4); int trainSize4 = insts4.numInstances() * percentSplit / 100; int testSize4 = insts4.numInstances() - trainSize4; weka.core.Instances train4 = new weka.core.Instances(insts4, 0, trainSize4); cl_NN.buildClassifier(train4); string str4 = cl_NN.toString(); int numCorrect4 = 0; for (int i = trainSize4; i < insts4.numInstances(); i++) { weka.core.Instance currentInst4 = insts4.instance(i); double predictedClass = cl_NN.classifyInstance(currentInst4); if (predictedClass == insts4.instance(i).classValue()) { numCorrect4++; } } res_NN = (double)((double)numCorrect4 / (double)testSize4 * 100.0); successPercent.Add(res_NN); //SVM weka.core.Instances insts5 = new weka.core.Instances(new java.io.FileReader(fileDirectory)); insts5.setClassIndex(insts5.numAttributes() - 1); cl_SVM = new weka.classifiers.functions.SMO(); //Nominal to Binary weka.filters.Filter myBinaryData3 = new weka.filters.unsupervised.attribute.NominalToBinary(); myBinaryData3.setInputFormat(insts5); insts5 = weka.filters.Filter.useFilter(insts5, myBinaryData3); //Normalization weka.filters.Filter myNormalized4 = new weka.filters.unsupervised.instance.Normalize(); myNormalized4.setInputFormat(insts5); insts5 = weka.filters.Filter.useFilter(insts5, myNormalized4); //randomize the order of the instances in the dataset. weka.filters.Filter myRandom5 = new weka.filters.unsupervised.instance.Randomize(); myRandom5.setInputFormat(insts5); insts5 = weka.filters.Filter.useFilter(insts5, myRandom5); int trainSize5 = insts5.numInstances() * percentSplit / 100; int testSize5 = insts5.numInstances() - trainSize5; weka.core.Instances train5 = new weka.core.Instances(insts5, 0, trainSize5); cl_SVM.buildClassifier(train5); string str5 = cl_SVM.toString(); int numCorrect5 = 0; for (int i = trainSize5; i < insts5.numInstances(); i++) { weka.core.Instance currentInst5 = insts5.instance(i); double predictedClass = cl_SVM.classifyInstance(currentInst5); if (predictedClass == insts5.instance(i).classValue()) { numCorrect5++; } } res_SVM = (double)((double)numCorrect5 / (double)testSize5 * 100.0); successPercent.Add(res_SVM); for (int i = 0; i < successPercent.Count; i++) { if ((double)successPercent[i] > max) { max = (double)successPercent[i]; count = i + 1; } } for (int i = 0; i < count; i++) { nameOfAlgo = (string)algorithms[i]; } textBox1.Text = nameOfAlgo + " is the most successful algorithm for this data set." + "(" + max + "%)\n"; }
public override double classifyInstance(weka.core.Instance instance) { if (m_instances.numInstances() == 0) { return(2); } if (m_instances.numAttributes() != instance.numAttributes()) { throw new AssertException("different attribute."); } int n = (instance.numAttributes() - 1) / 2; List <Tuple <int, int> > dist = new List <Tuple <int, int> >(); for (int i = 0; i < m_instances.numInstances(); ++i) { int d1 = 0, d2 = 0; weka.core.Instance instanceI = m_instances.instance(i); for (int j = 0; j < n; ++j) { //d += (int)((instanceI.value(j) - instance.value(j)) * (instanceI.value(j) - instance.value(j))); if (instanceI.value(j) != instance.value(j)) { if (instance.value(j) == 2 || instanceI.value(j) == 2) { d1++; } else { d1 += 4; } } } for (int j = n; j < 2 * n; ++j) { //d += (int)((instanceI.value(j) - instance.value(j)) * (instanceI.value(j) - instance.value(j))); if (instanceI.value(j) != instance.value(j)) { if (instance.value(j) == 2 || instanceI.value(j) == 2) { d2++; } else { d2 += 4; } } } int c = (int)instanceI.classValue(); //if (c == 0) //{ // if (d1 < n / 4 && d1 < d2) // { // dist.Add(new Tuple<int, int>(d1, c)); // } //} //else if (c == 1) //{ // if (d2 < n / 4 && d2 < d1) // { // dist.Add(new Tuple<int, int>(d2, c)); // } //} //else //{ // throw new AssertException(""); //} dist.Add(new Tuple <int, int>(d1 + d2, c)); } if (dist.Count == 0) { return(2); } dist.Sort(new Comparison <Tuple <int, int> >((x, y) => { return(x.Item1.CompareTo(y.Item1)); })); int sum = 0, count = 0; for (int i = 0; i < dist.Count; ++i) { if (dist[i].Item1 < n / 4 * 2 * 4) { if (dist[i].Item2 != 2 && dist[i].Item2 != 3) { sum += dist[i].Item2; count++; } else { } } else { break; } } if (count == 0) { return(2); } if (count < m_instances.numInstances() / 30) { return(2); } return((int)Math.Round((double)sum / count)); }