public override void buildClassifier(Instances instances) { m_mustValue = null; var weights = MincostLiblinearClassifier.GetCount(instances); if (weights[0] == 0) { m_mustValue = 2; m_delta = 0; return; } else if (weights[2] == 0) { m_mustValue = 0; m_delta = 0; return; } m_sampleInstances = new Instances(instances, 0); // can classifier handle the data? getCapabilities().testWithFail(instances); Instances trainInstances = new Instances(instances, 0, instances.numInstances()); AddInstancesAccordWeight(trainInstances); if (System.IO.File.Exists(m_trainingFile)) { System.IO.File.Delete(m_trainingFile); } libsvmSaver.setInstances(trainInstances); libsvmSaver.setFile(new java.io.File(m_trainingFile)); libsvmSaver.writeBatch(); //ConvertNorminalToString(m_trainingFile); if (System.IO.File.Exists(m_modelFile)) { System.IO.File.Delete(m_modelFile); } string[] options = Utils.splitOptions(m_trainArgs); int idx = Utils.getOptionPos('c', options); if (idx != -1) { double c = Convert.ToDouble(options[idx + 1]); c = c * trainInstances.numInstances() / 100.0; options[idx + 1] = c.ToString(); m_trainArgs = Utils.joinOptions(options); } learner.ExecuteLearner(s_learnerPath, m_trainingFile, m_modelFile, m_trainArgs); if (!System.IO.File.Exists(m_modelFile)) { throw new InvalidOperationException(learner.Output); } m_modelData = System.IO.File.ReadAllBytes(m_modelFile); GetBestDelta(instances); if (System.IO.File.Exists(m_trainingFile)) { System.IO.File.Delete(m_trainingFile); } }
//private void ConvertNorminalToString(string fileName) //{ // List<string> list = new List<string>(); // using (System.IO.StreamReader sr = new System.IO.StreamReader(fileName)) // { // while (true) // { // if (sr.EndOfStream) // break; // string s = sr.ReadLine(); // if (string.IsNullOrEmpty(s)) // continue; // int idx = s.IndexOf(' '); // string c = idx == -1 ? s : s.Substring(0, idx); // if (Convert.ToDouble(c) == 0) // { // list.Add("-1.0 " + (idx == -1 ? string.Empty : s.Substring(idx + 1))); // } // else if (Convert.ToDouble(c) == 1) // { // list.Add("0.0 " + (idx == -1 ? string.Empty : s.Substring(idx + 1))); // } // else if (Convert.ToDouble(c) == 2) // { // list.Add("+1.0 " + (idx == -1 ? string.Empty : s.Substring(idx + 1))); // } // else // { // list.Add(s); // } // } // } // using (System.IO.StreamWriter sw = new System.IO.StreamWriter(fileName)) // { // foreach (string s in list) // { // sw.WriteLine(s); // } // } //} //private Random m_randomGenerator; private void AddInstancesAccordWeight(Instances instances) { // 0, 2 double[] weights = MincostLiblinearClassifier.GetCount(instances); if (weights == null) { return; } double c = m_tp / m_sl; if (c == 1 && weights[0] == weights[1]) { return; } int n = 0; int toCopyClass = 0; if (c >= 1) { int shouldWeight1 = (int)(c * weights[1]); n = (int)(shouldWeight1 - weights[1]); toCopyClass = 2; } else { int shouldShouldWeight0 = (int)(1 / c * weights[0]); n = (int)(weights[1] - weights[0]); toCopyClass = 0; } //m_randomGenerator = new Random((int)System.DateTime.Now.Ticks); List <Instance> copyInstances = new List <Instance>(); for (int i = 0; i < instances.numInstances(); ++i) { if (instances.instance(i).classValue() == toCopyClass) { copyInstances.Add(instances.instance(i)); } } int nAll = n / copyInstances.Count; for (int j = 0; j < nAll; ++j) { for (int i = 0; i < copyInstances.Count; ++i) { Instance newInstance = new weka.core.DenseInstance(copyInstances[i]); instances.add(newInstance); newInstance.setDataset(instances); } } //for (int j = 0; j < n - nAll * copyInstances.Count; ++j) //{ // int idx = (int)(m_randomGenerator.NextDouble() * copyInstances.Count); // idx = Math.Min(idx, copyInstances.Count - 1); // Instance newInstance = new weka.core.DenseInstance(copyInstances[idx]); // instances.add(newInstance); // newInstance.setDataset(instances); //} if (n - nAll * copyInstances.Count > 0) { Instance avgInstance = new weka.core.DenseInstance(instances.numAttributes()); for (int i = 0; i < avgInstance.numAttributes(); ++i) { double sum = 0; for (int j = 0; j < copyInstances.Count; ++j) { sum += copyInstances[j].value(i); } avgInstance.setValue(i, sum / copyInstances.Count); } for (int j = 0; j < n - nAll * copyInstances.Count; ++j) { Instance newInstance = new weka.core.DenseInstance(avgInstance); instances.add(newInstance); } } }