public List <double> testMLPUsingWeka(string[] attributeArray, string[] classNames, double[] dataValues, string classHeader, string defaultclass, string modelName, int hiddelLayers = 7, double learningRate = 0.03, double momentum = 0.4, int decimalPlaces = 2, int trainingTime = 1000) { java.util.ArrayList classLabel = new java.util.ArrayList(); foreach (string className in classNames) { classLabel.Add(className); } weka.core.Attribute classHeaderName = new weka.core.Attribute(classHeader, classLabel); java.util.ArrayList attributeList = new java.util.ArrayList(); foreach (string attribute in attributeArray) { weka.core.Attribute newAttribute = new weka.core.Attribute(attribute); attributeList.Add(newAttribute); } attributeList.add(classHeaderName); weka.core.Instances data = new weka.core.Instances("TestInstances", attributeList, 0); data.setClassIndex(data.numAttributes() - 1); // Set instance's values for the attributes weka.core.Instance inst_co = new DenseInstance(data.numAttributes()); for (int i = 0; i < data.numAttributes() - 1; i++) { inst_co.setValue(i, dataValues.ElementAt(i)); } inst_co.setValue(classHeaderName, defaultclass); data.add(inst_co); java.io.File path = new java.io.File("/models/"); weka.classifiers.functions.MultilayerPerceptron clRead = loadModel(modelName, path); clRead.setHiddenLayers(hiddelLayers.ToString()); clRead.setLearningRate(learningRate); clRead.setMomentum(momentum); clRead.setNumDecimalPlaces(decimalPlaces); clRead.setTrainingTime(trainingTime); weka.filters.Filter myRandom = new weka.filters.unsupervised.instance.Randomize(); myRandom.setInputFormat(data); data = weka.filters.Filter.useFilter(data, myRandom); double classValue = clRead.classifyInstance(data.get(0)); double[] predictionDistribution = clRead.distributionForInstance(data.get(0)); List <double> predictionDistributions = new List <double>(); for (int predictionDistributionIndex = 0; predictionDistributionIndex < predictionDistribution.Count(); predictionDistributionIndex++) { string classValueString1 = classLabel.get(predictionDistributionIndex).ToString(); double prob = predictionDistribution[predictionDistributionIndex] * 100; predictionDistributions.Add(prob); } List <double> prediction = new List <double>(); prediction.Add(classValue); prediction.AddRange(predictionDistributions); return(prediction); }
private void button1_Click(object sender, EventArgs e) { weka.core.Instances insts = new weka.core.Instances(new java.io.FileReader(file)); double[] Data = new double[insts.numAttributes()]; for (int i = 0; i < list.Count; i++) { if (list[i].GetType() == typeof(TextBox)) { TextBox txt = (TextBox)list[i]; string value = txt.Text.Replace('.', ','); Data[i] = Convert.ToDouble(value); } else { ComboBox combobox = (ComboBox)list[i]; Data[i] = Convert.ToDouble(combobox.SelectedIndex); } } // Data[(insts.numAttributes() - 1] = 0; insts.setClassIndex(insts.numAttributes() - 1); Instance newInsts = new Instance(1.0, Data); insts.add(newInsts); string type = model.GetType().ToString(); if (type == "weka.classifiers.bayes.NaiveBayes") { weka.filters.Filter myDiscretize = new weka.filters.unsupervised.attribute.Discretize(); myDiscretize.setInputFormat(insts); insts = weka.filters.Filter.useFilter(insts, myDiscretize); } else if (type == "weka.classifiers.lazy.IBk") { weka.filters.Filter myDummy = new weka.filters.unsupervised.attribute.NominalToBinary(); myDummy.setInputFormat(insts); insts = weka.filters.Filter.useFilter(insts, myDummy); weka.filters.Filter myNormalize = new weka.filters.unsupervised.instance.Normalize(); myNormalize.setInputFormat(insts); insts = weka.filters.Filter.useFilter(insts, myNormalize); } double index = model.classifyInstance(insts.lastInstance()); string result = insts.attribute(insts.numAttributes() - 1).value(Convert.ToInt16(index)); MessageBox.Show(result); }
public string testHybridEmotionUsingWeka(string[] attributeArray, string[] classNames, double[] dataValues, string classHeader, string defaultclass, string modelName) { java.util.ArrayList classLabel = new java.util.ArrayList(); foreach (string className in classNames) { classLabel.Add(className); } weka.core.Attribute classHeaderName = new weka.core.Attribute(classHeader, classLabel); java.util.ArrayList attributeList = new java.util.ArrayList(); foreach (string attribute in attributeArray) { weka.core.Attribute newAttribute = new weka.core.Attribute(attribute); attributeList.Add(newAttribute); } attributeList.add(classHeaderName); weka.core.Instances data = new weka.core.Instances("TestInstances", attributeList, 0); data.setClassIndex(data.numAttributes() - 1); // Set instance's values for the attributes weka.core.Instance inst_co = new DenseInstance(data.numAttributes()); for (int i = 0; i < data.numAttributes() - 1; i++) { inst_co.setValue(i, dataValues.ElementAt(i)); } inst_co.setValue(classHeaderName, defaultclass); data.add(inst_co); java.io.File path = new java.io.File("/models/"); weka.classifiers.meta.Bagging clRead = loadBaggingModel(modelName, path); weka.filters.Filter myRandom = new weka.filters.unsupervised.instance.Randomize(); myRandom.setInputFormat(data); data = weka.filters.Filter.useFilter(data, myRandom); double classValue = clRead.classifyInstance(data.get(0)); string classValueString = classLabel.get(Int32.Parse(classValue.ToString())).ToString(); return(classValueString); }
public Instances CreateInstancesWithoutClass(cExtendedTable Input) { weka.core.FastVector atts = new FastVector(); int columnNo = 0; // Descriptors loop for (int i = 0; i < Input.Count; i++) { //if (ParentScreening.ListDescriptors[i].IsActive() == false) continue; atts.addElement(new weka.core.Attribute(Input[i].Name)); columnNo++; } // weka.core.FastVector attVals = new FastVector(); Instances data1 = new Instances("MyRelation", atts, 0); for (int IdxRow = 0; IdxRow < Input[0].Count; IdxRow++) { double[] vals = new double[data1.numAttributes()]; for (int Col = 0; Col < columnNo; Col++) { // if (Glo .ListDescriptors[Col].IsActive() == false) continue; vals[Col] = Input[Col][IdxRow];// double.Parse(dt.Rows[IdxRow][Col].ToString()); } data1.add(new DenseInstance(1.0, vals)); } return data1; }
public List <double> testSMOUsingWeka(string[] attributeArray, string[] classNames, double[] dataValues, string classHeader, string defaultclass, string modelName, int hiddelLayers = 7, double learningRate = 0.03, double momentum = 0.4, int decimalPlaces = 2, int trainingTime = 1000) { java.util.ArrayList classLabel = new java.util.ArrayList(); foreach (string className in classNames) { classLabel.Add(className); } weka.core.Attribute classHeaderName = new weka.core.Attribute(classHeader, classLabel); java.util.ArrayList attributeList = new java.util.ArrayList(); foreach (string attribute in attributeArray) { weka.core.Attribute newAttribute = new weka.core.Attribute(attribute); attributeList.Add(newAttribute); } attributeList.add(classHeaderName); weka.core.Instances data = new weka.core.Instances("TestInstances", attributeList, 0); data.setClassIndex(data.numAttributes() - 1); // Set instance's values for the attributes weka.core.Instance inst_co = new DenseInstance(data.numAttributes()); for (int i = 0; i < data.numAttributes() - 1; i++) { inst_co.setValue(i, Math.Round(dataValues.ElementAt(i), 5)); } inst_co.setValue(classHeaderName, defaultclass); data.add(inst_co); weka.core.Instance currentInst = data.get(0); int j = 0; //foreach (float value in dataValues) //{ // // double roundedValue = Math.Round(value); // //var rounded = Math.Floor(value * 100) / 100; // if (array.ElementAt(j) != value) // { // System.Console.WriteLine("Masla occur"); // } // j++; //} //double predictedClass = cl.classifyInstance(data.get(0)); weka.classifiers.functions.SMO clRead = new weka.classifiers.functions.SMO(); try { java.io.File path = new java.io.File("/models/"); clRead = loadSMOModel(modelName, path); } catch (Exception e) { //string p1 = Assembly.GetExecutingAssembly().Location; string ClassifierName = Path.GetFileName(Path.GetFileName(modelName)); string Path1 = HostingEnvironment.MapPath(@"~//libs//models//" + ClassifierName); //string Path1 = HostingEnvironment.MapPath(@"~//libs//models//FusionCustomized.model"); clRead = (weka.classifiers.functions.SMO)weka.core.SerializationHelper.read(modelName); } // weka.classifiers.functions.SMO clRead = loadSMOModel(modelName, path); clRead.setBatchSize("100"); clRead.setCalibrator(new weka.classifiers.functions.Logistic()); clRead.setKernel(new weka.classifiers.functions.supportVector.PolyKernel()); clRead.setEpsilon(1.02E-12); clRead.setC(1.0); clRead.setDebug(false); clRead.setChecksTurnedOff(false); clRead.setFilterType(new SelectedTag(weka.classifiers.functions.SMO.FILTER_NORMALIZE, weka.classifiers.functions.SMO.TAGS_FILTER)); double classValue = clRead.classifyInstance(data.get(0)); double[] predictionDistribution = clRead.distributionForInstance(data.get(0)); //for (int predictionDistributionIndex = 0; // predictionDistributionIndex < predictionDistribution.Count(); // predictionDistributionIndex++) //{ // string classValueString1 = classLabel.get(predictionDistributionIndex).ToString(); // double prob= predictionDistribution[predictionDistributionIndex]*100; // System.Console.WriteLine(classValueString1 + ":" + prob); //} List <double> prediction = new List <double>(); prediction.Add(classValue); //prediction.AddRange(predictionDistribution); return(prediction); }
private Instances createWhyInstances() { FastVector fvWhy = createWhyFastVector(); Instances whyInstances = new Instances("WhyInstances", fvWhy, listSecondaryWhyCandidates.Count); foreach (Token candidate in listSecondaryWhyCandidates) { if (candidate.Value == null) continue; Instance whyInstance = createSingleWhyInstance(fvWhy, candidate); whyInstance.setDataset(whyInstances); whyInstances.add(whyInstance); } whyInstances.setClassIndex(fvWhy.size() - 1); return whyInstances; }
private static Instances CreateInstanceOnFly(double[] a, double[] b) { FastVector atts; Instances data; double[] vals; // 1. set up attributes atts = new FastVector(); // - numeric atts.addElement(new Attribute("att1")); atts.addElement(new Attribute("att2")); // 2. create Instances object data = new Instances("MyRelation", atts, 0); for (int i = 0; i < a.Length; ++i) { // 3. fill with data // first instance vals = new double[data.numAttributes()]; // - numeric vals[0] = a[i]; // - nominal vals[1] = b[i]; data.add(new weka.core.DenseInstance(1.0, vals)); } return data; }
public static void Test() { weka.core.Instances data = new weka.core.Instances(new java.io.FileReader("./data/Classification/Communication.arff")); data.setClassIndex(data.numAttributes() - 1); weka.classifiers.Classifier cls = new weka.classifiers.bayes.BayesNet(); //Save BayesNet results in .txt file using (System.IO.StreamWriter file = new System.IO.StreamWriter("./data/Classification/Communication_Report.txt")) { file.WriteLine("Performing " + percentSplit + "% split evaluation."); int runs = 1; // perform cross-validation for (int i = 0; i < runs; i++) { // randomize data int seed = i + 1; java.util.Random rand = new java.util.Random(seed); weka.core.Instances randData = new weka.core.Instances(data); randData.randomize(rand); //weka.classifiers.Evaluation eval = new weka.classifiers.Evaluation(randData); int trainSize = (int)Math.Round((double)data.numInstances() * percentSplit / 100); int testSize = data.numInstances() - trainSize; weka.core.Instances train = new weka.core.Instances(data, 0, 0); weka.core.Instances test = new weka.core.Instances(data, 0, 0); train.setClassIndex(train.numAttributes() - 1); test.setClassIndex(test.numAttributes() - 1); //Print classifier analytics for all the dataset file.WriteLine("EVALUATION OF TEST DATASET."); //int numCorrect = 0; for (int j = 0; j < data.numInstances(); j++) { weka.core.Instance currentInst = randData.instance(j); if (j < trainSize) { train.add(currentInst); } else { test.add(currentInst); /* * double predictedClass = cls.classifyInstance(currentInst); * * double[] prediction = cls.distributionForInstance(currentInst); * * for (int p = 0; p < prediction.Length; p++) * { * file.WriteLine("Probability of class [{0}] for [{1}] is: {2}", currentInst.classAttribute().value(p), currentInst, Math.Round(prediction[p], 4)); * } * file.WriteLine(); * * file.WriteLine(); * if (predictedClass == data.instance(j).classValue()) * numCorrect++;*/ } } // build and evaluate classifier cls.buildClassifier(train); // Test the model weka.classifiers.Evaluation eval = new weka.classifiers.Evaluation(randData); eval.evaluateModel(cls, test); // Print the results as in Weka explorer: //Print statistics String strSummaryTest = eval.toSummaryString(); file.WriteLine(strSummaryTest); file.WriteLine(); //Print detailed class statistics file.WriteLine(eval.toClassDetailsString()); file.WriteLine(); //Print confusion matrix file.WriteLine(eval.toMatrixString()); file.WriteLine(); // Get the confusion matrix double[][] cmMatrixTest = eval.confusionMatrix(); System.Console.WriteLine("Bayesian Network results saved in Communication_Report.txt file successfully."); } } }
/// <summary> Method for testing this class. /// /// </summary> /// <param name="argv">should contain one element: the name of an ARFF file /// </param> //@ requires argv != null; //@ requires argv.length == 1; //@ requires argv[0] != null; public static void test(System.String[] argv) { Instances instances, secondInstances, train, test, empty; //Instance instance; //UPGRADE_TODO: The differences in the expected value of parameters for constructor 'java.util.Random.Random' may cause compilation errors. "ms-help://MS.VSCC.v80/dv_commoner/local/redirect.htm?index='!DefaultContextWindowIndex'&keyword='jlca1092'" System.Random random = new System.Random((System.Int32) 2); //UPGRADE_ISSUE: Class hierarchy differences between 'java.io.Reader' and 'System.IO.StreamReader' may cause compilation errors. "ms-help://MS.VSCC.v80/dv_commoner/local/redirect.htm?index='!DefaultContextWindowIndex'&keyword='jlca1186'" System.IO.StreamReader reader; int start, num; //double newWeight; FastVector testAtts, testVals; int i, j; try { if (argv.Length > 1) { throw (new System.Exception("Usage: Instances [<filename>]")); } // Creating set of instances from scratch testVals = new FastVector(2); testVals.addElement("first_value"); testVals.addElement("second_value"); testAtts = new FastVector(2); testAtts.addElement(new Attribute("nominal_attribute", testVals)); testAtts.addElement(new Attribute("numeric_attribute")); instances = new Instances("test_set", testAtts, 10); instances.add(new Instance(instances.numAttributes())); instances.add(new Instance(instances.numAttributes())); instances.add(new Instance(instances.numAttributes())); instances.ClassIndex = 0; System.Console.Out.WriteLine("\nSet of instances created from scratch:\n"); //UPGRADE_TODO: Method 'java.io.PrintStream.println' was converted to 'System.Console.Out.WriteLine' which has a different behavior. "ms-help://MS.VSCC.v80/dv_commoner/local/redirect.htm?index='!DefaultContextWindowIndex'&keyword='jlca1073_javaioPrintStreamprintln_javalangObject'" System.Console.Out.WriteLine(instances); if (argv.Length == 1) { System.String filename = argv[0]; //UPGRADE_TODO: Constructor 'java.io.FileReader.FileReader' was converted to 'System.IO.StreamReader' which has a different behavior. "ms-help://MS.VSCC.v80/dv_commoner/local/redirect.htm?index='!DefaultContextWindowIndex'&keyword='jlca1073'" reader = new System.IO.StreamReader(filename, System.Text.Encoding.Default); // Read first five instances and print them System.Console.Out.WriteLine("\nFirst five instances from file:\n"); instances = new Instances(reader, 1); instances.ClassIndex = instances.numAttributes() - 1; i = 0; while ((i < 5) && (instances.readInstance(reader))) { i++; } //UPGRADE_TODO: Method 'java.io.PrintStream.println' was converted to 'System.Console.Out.WriteLine' which has a different behavior. "ms-help://MS.VSCC.v80/dv_commoner/local/redirect.htm?index='!DefaultContextWindowIndex'&keyword='jlca1073_javaioPrintStreamprintln_javalangObject'" System.Console.Out.WriteLine(instances); // Read all the instances in the file //UPGRADE_TODO: Constructor 'java.io.FileReader.FileReader' was converted to 'System.IO.StreamReader' which has a different behavior. "ms-help://MS.VSCC.v80/dv_commoner/local/redirect.htm?index='!DefaultContextWindowIndex'&keyword='jlca1073'" reader = new System.IO.StreamReader(filename, System.Text.Encoding.Default); instances = new Instances(reader); // Make the last attribute be the class instances.ClassIndex = instances.numAttributes() - 1; // Print header and instances. System.Console.Out.WriteLine("\nDataset:\n"); //UPGRADE_TODO: Method 'java.io.PrintStream.println' was converted to 'System.Console.Out.WriteLine' which has a different behavior. "ms-help://MS.VSCC.v80/dv_commoner/local/redirect.htm?index='!DefaultContextWindowIndex'&keyword='jlca1073_javaioPrintStreamprintln_javalangObject'" System.Console.Out.WriteLine(instances); System.Console.Out.WriteLine("\nClass index: " + instances.classIndex()); } // Test basic methods based on class index. System.Console.Out.WriteLine("\nClass name: " + instances.classAttribute().name()); System.Console.Out.WriteLine("\nClass index: " + instances.classIndex()); System.Console.Out.WriteLine("\nClass is nominal: " + instances.classAttribute().Nominal); System.Console.Out.WriteLine("\nClass is numeric: " + instances.classAttribute().Numeric); System.Console.Out.WriteLine("\nClasses:\n"); for (i = 0; i < instances.numClasses(); i++) { System.Console.Out.WriteLine(instances.classAttribute().value_Renamed(i)); } System.Console.Out.WriteLine("\nClass values and labels of instances:\n"); for (i = 0; i < instances.numInstances(); i++) { Instance inst = instances.instance(i); System.Console.Out.Write(inst.classValue() + "\t"); System.Console.Out.Write(inst.toString(inst.classIndex())); if (instances.instance(i).classIsMissing()) { System.Console.Out.WriteLine("\tis missing"); } else { System.Console.Out.WriteLine(); } } // Create random weights. System.Console.Out.WriteLine("\nCreating random weights for instances."); for (i = 0; i < instances.numInstances(); i++) { instances.instance(i).Weight = random.NextDouble(); } // Print all instances and their weights (and the sum of weights). System.Console.Out.WriteLine("\nInstances and their weights:\n"); System.Console.Out.WriteLine(instances.instancesAndWeights()); System.Console.Out.Write("\nSum of weights: "); System.Console.Out.WriteLine(instances.sumOfWeights()); // Insert an attribute secondInstances = new Instances(instances); Attribute testAtt = new Attribute("Inserted"); secondInstances.insertAttributeAt(testAtt, 0); System.Console.Out.WriteLine("\nSet with inserted attribute:\n"); //UPGRADE_TODO: Method 'java.io.PrintStream.println' was converted to 'System.Console.Out.WriteLine' which has a different behavior. "ms-help://MS.VSCC.v80/dv_commoner/local/redirect.htm?index='!DefaultContextWindowIndex'&keyword='jlca1073_javaioPrintStreamprintln_javalangObject'" System.Console.Out.WriteLine(secondInstances); System.Console.Out.WriteLine("\nClass name: " + secondInstances.classAttribute().name()); // Delete the attribute secondInstances.deleteAttributeAt(0); System.Console.Out.WriteLine("\nSet with attribute deleted:\n"); //UPGRADE_TODO: Method 'java.io.PrintStream.println' was converted to 'System.Console.Out.WriteLine' which has a different behavior. "ms-help://MS.VSCC.v80/dv_commoner/local/redirect.htm?index='!DefaultContextWindowIndex'&keyword='jlca1073_javaioPrintStreamprintln_javalangObject'" System.Console.Out.WriteLine(secondInstances); System.Console.Out.WriteLine("\nClass name: " + secondInstances.classAttribute().name()); // Test if headers are equal System.Console.Out.WriteLine("\nHeaders equal: " + instances.equalHeaders(secondInstances) + "\n"); // Print data in internal format. System.Console.Out.WriteLine("\nData (internal values):\n"); for (i = 0; i < instances.numInstances(); i++) { for (j = 0; j < instances.numAttributes(); j++) { if (instances.instance(i).isMissing(j)) { System.Console.Out.Write("? "); } else { System.Console.Out.Write(instances.instance(i).value_Renamed(j) + " "); } } System.Console.Out.WriteLine(); } // Just print header System.Console.Out.WriteLine("\nEmpty dataset:\n"); empty = new Instances(instances, 0); //UPGRADE_TODO: Method 'java.io.PrintStream.println' was converted to 'System.Console.Out.WriteLine' which has a different behavior. "ms-help://MS.VSCC.v80/dv_commoner/local/redirect.htm?index='!DefaultContextWindowIndex'&keyword='jlca1073_javaioPrintStreamprintln_javalangObject'" System.Console.Out.WriteLine(empty); System.Console.Out.WriteLine("\nClass name: " + empty.classAttribute().name()); // Create copy and rename an attribute and a value (if possible) if (empty.classAttribute().Nominal) { Instances copy = new Instances(empty, 0); copy.renameAttribute(copy.classAttribute(), "new_name"); copy.renameAttributeValue(copy.classAttribute(), copy.classAttribute().value_Renamed(0), "new_val_name"); System.Console.Out.WriteLine("\nDataset with names changed:\n" + copy); System.Console.Out.WriteLine("\nOriginal dataset:\n" + empty); } // Create and prints subset of instances. start = instances.numInstances() / 4; num = instances.numInstances() / 2; System.Console.Out.Write("\nSubset of dataset: "); System.Console.Out.WriteLine(num + " instances from " + (start + 1) + ". instance"); secondInstances = new Instances(instances, start, num); System.Console.Out.WriteLine("\nClass name: " + secondInstances.classAttribute().name()); // Print all instances and their weights (and the sum of weights). System.Console.Out.WriteLine("\nInstances and their weights:\n"); System.Console.Out.WriteLine(secondInstances.instancesAndWeights()); System.Console.Out.Write("\nSum of weights: "); System.Console.Out.WriteLine(secondInstances.sumOfWeights()); // Create and print training and test sets for 3-fold // cross-validation. System.Console.Out.WriteLine("\nTrain and test folds for 3-fold CV:"); if (instances.classAttribute().Nominal) { instances.stratify(3); } for (j = 0; j < 3; j++) { //UPGRADE_TODO: The differences in the expected value of parameters for constructor 'java.util.Random.Random' may cause compilation errors. "ms-help://MS.VSCC.v80/dv_commoner/local/redirect.htm?index='!DefaultContextWindowIndex'&keyword='jlca1092'" train = instances.trainCV(3, j, new System.Random((System.Int32) 1)); test = instances.testCV(3, j); // Print all instances and their weights (and the sum of weights). System.Console.Out.WriteLine("\nTrain: "); System.Console.Out.WriteLine("\nInstances and their weights:\n"); System.Console.Out.WriteLine(train.instancesAndWeights()); System.Console.Out.Write("\nSum of weights: "); System.Console.Out.WriteLine(train.sumOfWeights()); System.Console.Out.WriteLine("\nClass name: " + train.classAttribute().name()); System.Console.Out.WriteLine("\nTest: "); System.Console.Out.WriteLine("\nInstances and their weights:\n"); System.Console.Out.WriteLine(test.instancesAndWeights()); System.Console.Out.Write("\nSum of weights: "); System.Console.Out.WriteLine(test.sumOfWeights()); System.Console.Out.WriteLine("\nClass name: " + test.classAttribute().name()); } // Randomize instances and print them. System.Console.Out.WriteLine("\nRandomized dataset:"); instances.randomize(random); // Print all instances and their weights (and the sum of weights). System.Console.Out.WriteLine("\nInstances and their weights:\n"); System.Console.Out.WriteLine(instances.instancesAndWeights()); System.Console.Out.Write("\nSum of weights: "); System.Console.Out.WriteLine(instances.sumOfWeights()); // Sort instances according to first attribute and // print them. System.Console.Out.Write("\nInstances sorted according to first attribute:\n "); instances.sort(0); // Print all instances and their weights (and the sum of weights). System.Console.Out.WriteLine("\nInstances and their weights:\n"); System.Console.Out.WriteLine(instances.instancesAndWeights()); System.Console.Out.Write("\nSum of weights: "); System.Console.Out.WriteLine(instances.sumOfWeights()); } catch (System.Exception) { //.WriteStackTrace(e, Console.Error); } }
/// <summary> Merges two sets of Instances together. The resulting set will have /// all the attributes of the first set plus all the attributes of the /// second set. The number of instances in both sets must be the same. /// /// </summary> /// <param name="first">the first set of Instances /// </param> /// <param name="second">the second set of Instances /// </param> /// <returns> the merged set of Instances /// </returns> /// <exception cref="IllegalArgumentException">if the datasets are not the same size /// </exception> public static Instances mergeInstances(Instances first, Instances second) { if (first.numInstances() != second.numInstances()) { throw new System.ArgumentException("Instance sets must be of the same size"); } // Create the vector of merged attributes FastVector newAttributes = new FastVector(); for (int i = 0; i < first.numAttributes(); i++) { newAttributes.addElement(first.attribute(i)); } for (int i = 0; i < second.numAttributes(); i++) { newAttributes.addElement(second.attribute(i)); } // Create the set of Instances Instances merged = new Instances(first.relationName() + '_' + second.relationName(), newAttributes, first.numInstances()); // Merge each instance for (int i = 0; i < first.numInstances(); i++) { merged.add(first.instance(i).mergeInstance(second.instance(i))); } return merged; }
/// <summary> Copies instances from one set to the end of another /// one. /// /// </summary> /// <param name="source">the source of the instances /// </param> /// <param name="from">the position of the first instance to be copied /// </param> /// <param name="dest">the destination for the instances /// </param> /// <param name="num">the number of instances to be copied /// </param> //@ requires 0 <= from && from <= numInstances() - num; //@ requires 0 <= num; protected internal virtual void copyInstances(int from, Instances dest, int num) { for (int i = 0; i < num; i++) { dest.add(instance(from + i)); } }
/// <summary> Creates a new dataset of the same size using random sampling /// with replacement according to the given weight vector. The /// weights of the instances in the new dataset are set to one. /// The length of the weight vector has to be the same as the /// number of instances in the dataset, and all weights have to /// be positive. /// /// </summary> /// <param name="random">a random number generator /// </param> /// <param name="weights">the weight vector /// </param> /// <returns> the new dataset /// </returns> /// <exception cref="IllegalArgumentException">if the weights array is of the wrong /// length or contains negative weights. /// </exception> public virtual Instances resampleWithWeights(System.Random random, double[] weights) { if (weights.Length != numInstances()) { throw new System.ArgumentException("weights.length != numInstances."); } Instances newData = new Instances(this, numInstances()); if (numInstances() == 0) { return newData; } double[] probabilities = new double[numInstances()]; double sumProbs = 0, sumOfWeights = Utils.sum(weights); for (int i = 0; i < numInstances(); i++) { sumProbs += random.NextDouble(); probabilities[i] = sumProbs; } Utils.normalize(probabilities, sumProbs / sumOfWeights); // Make sure that rounding errors don't mess things up probabilities[numInstances() - 1] = sumOfWeights; int k = 0; int l = 0; sumProbs = 0; while ((k < numInstances() && (l < numInstances()))) { if (weights[l] < 0) { throw new System.ArgumentException("Weights have to be positive."); } sumProbs += weights[l]; while ((k < numInstances()) && (probabilities[k] <= sumProbs)) { newData.add(instance(l)); newData.instance(k).Weight = 1; k++; } l++; } return newData; }
/// <summary> Creates a new dataset of the same size using random sampling /// with replacement. /// /// </summary> /// <param name="random">a random number generator /// </param> /// <returns> the new dataset /// </returns> public virtual Instances resample(System.Random random) { Instances newData = new Instances(this, numInstances()); while (newData.numInstances() < numInstances()) { newData.add(instance(random.Next(numInstances()))); } return newData; }
// Test the classification result of each map that a user played, // with the data available as if they were playing through it public static void classifyTest(String dataString, String playerID) { try { java.io.StringReader stringReader = new java.io.StringReader(dataString); java.io.BufferedReader buffReader = new java.io.BufferedReader(stringReader); /* NOTE THAT FOR NAIVE BAYES ALL WEIGHTS CAN BE = 1*/ //weka.core.converters.ConverterUtils.DataSource source = new weka.core.converters.ConverterUtils.DataSource("iris.arff"); weka.core.Instances thisData = new weka.core.Instances(buffReader); //source.getDataSet(); if (thisData.classIndex() == -1) thisData.setClassIndex(thisData.numAttributes() - 1); weka.core.Instances thisUniqueData = new weka.core.Instances(thisData); if (thisUniqueData.classIndex() == -1) thisUniqueData.setClassIndex(thisUniqueData.numAttributes() - 1); thisUniqueData.delete(); if (allUniqueData == null) { allUniqueData = new weka.core.Instances(thisData); if (allUniqueData.classIndex() == -1) allUniqueData.setClassIndex(allUniqueData.numAttributes() - 1); allUniqueData.delete(); } weka.core.InstanceComparator com = new weka.core.InstanceComparator(false); for (int i = 0; i < thisData.numInstances(); i++) { bool dup = false; for (int j = 0; j < allUniqueData.numInstances(); j++) { if (com.compare(thisData.instance(i),allUniqueData.instance(j)) == 0) { Debug.Log("Duplicate found!"); dup = true; break; } } if (!dup) allUniqueData.add(thisData.instance(i)); else dupInstances++; } for (int i = 0; i < thisData.numInstances(); i++) { bool dup = false; for (int j = 0; j < thisUniqueData.numInstances(); j++) { if (com.compare(thisData.instance(i),thisUniqueData.instance(j)) == 0) { Debug.Log("Duplicate found!"); dup = true; break; } } if (!dup) thisUniqueData.add(thisData.instance(i)); else dupInstancesSamePlayer++; } //Debug.Log("All Data Instance Count = " + thisData.numInstances()); //Debug.Log("Unique Data Instance Count = " + thisUniqueData.numInstances()); //Debug.Log("Done!"); } catch (java.lang.Exception ex) { Debug.LogError(ex.getMessage()); } }
/// <summary> /// Create an instances structure with classes for supervised methods /// </summary> /// <param name="NumClass"></param> /// <returns></returns> public Instances CreateInstancesWithClasses(cInfoClass InfoClass, int NeutralClass) { weka.core.FastVector atts = new FastVector(); int columnNo = 0; for (int i = 0; i < ParentScreening.ListDescriptors.Count; i++) { if (ParentScreening.ListDescriptors[i].IsActive() == false) continue; atts.addElement(new weka.core.Attribute(ParentScreening.ListDescriptors[i].GetName())); columnNo++; } weka.core.FastVector attVals = new FastVector(); for (int i = 0; i < InfoClass.NumberOfClass; i++) attVals.addElement("Class__" + (i).ToString()); atts.addElement(new weka.core.Attribute("Class__", attVals)); Instances data1 = new Instances("MyRelation", atts, 0); int IdxWell = 0; foreach (cWell CurrentWell in this.ListActiveWells) { if (CurrentWell.GetCurrentClassIdx() == NeutralClass) continue; double[] vals = new double[data1.numAttributes()]; int IdxCol = 0; for (int Col = 0; Col < ParentScreening.ListDescriptors.Count; Col++) { if (ParentScreening.ListDescriptors[Col].IsActive() == false) continue; vals[IdxCol++] = CurrentWell.ListSignatures[Col].GetValue(); } vals[columnNo] = InfoClass.CorrespondanceTable[CurrentWell.GetCurrentClassIdx()]; data1.add(new DenseInstance(1.0, vals)); IdxWell++; } data1.setClassIndex((data1.numAttributes() - 1)); return data1; }
/// <summary> Sets distribution associated with model.</summary> public override void resetDistribution(Instances data) { Instances insts = new Instances(data, data.numInstances()); for (int i = 0; i < data.numInstances(); i++) { if (whichSubset(data.instance(i)) > - 1) { insts.add(data.instance(i)); } } Distribution newD = new Distribution(insts, this); newD.addInstWithUnknown(data, m_attIndex); m_distribution = newD; }
/// <summary> /// Create an instances structure with classes for supervised methods /// </summary> /// <param name="NumClass"></param> /// <returns></returns> public Instances CreateInstancesWithClasses(List<bool> ListClassSelected) { weka.core.FastVector atts = new FastVector(); int columnNo = 0; for (int i = 0; i < ParentScreening.ListDescriptors.Count; i++) { if (ParentScreening.ListDescriptors[i].IsActive() == false) continue; atts.addElement(new weka.core.Attribute(ParentScreening.ListDescriptors[i].GetName())); columnNo++; } weka.core.FastVector attVals = new FastVector(); foreach (var item in cGlobalInfo.ListWellClasses) { attVals.addElement(item.Name); } atts.addElement(new weka.core.Attribute("ClassAttribute", attVals)); Instances data1 = new Instances("MyRelation", atts, 0); int IdxWell = 0; foreach (cWell CurrentWell in this.ListActiveWells) { if (!ListClassSelected[CurrentWell.GetCurrentClassIdx()]) continue; double[] vals = new double[data1.numAttributes()]; int IdxCol = 0; for (int Col = 0; Col < ParentScreening.ListDescriptors.Count; Col++) { if (ParentScreening.ListDescriptors[Col].IsActive() == false) continue; vals[IdxCol++] = CurrentWell.ListSignatures[Col].GetValue(); } vals[columnNo] = CurrentWell.GetCurrentClassIdx(); data1.add(new DenseInstance(1.0, vals)); IdxWell++; } data1.setClassIndex((data1.numAttributes() - 1)); return data1; }
public static void BayesTest() { try { weka.core.Instances insts = new weka.core.Instances(new java.io.FileReader("iris.arff")); insts.setClassIndex(insts.numAttributes() - 1); weka.classifiers.Classifier cl = new weka.classifiers.bayes.BayesNet(); System.Console.WriteLine("Performing " + percentSplit + "% split evaluation."); //randomize the order of the instances in the dataset. weka.filters.Filter myRandom = new weka.filters.unsupervised.instance.Randomize(); myRandom.setInputFormat(insts); insts = weka.filters.Filter.useFilter(insts, myRandom); int trainSize = insts.numInstances() * percentSplit / 100; int testSize = insts.numInstances() - trainSize; weka.core.Instances train = new weka.core.Instances(insts, 0, trainSize); weka.core.Instances test = new weka.core.Instances(insts, 0, 0); cl.buildClassifier(train); //print model System.Console.WriteLine(cl); int numCorrect = 0; for (int i = trainSize; i < insts.numInstances(); i++) { weka.core.Instance currentInst = insts.instance(i); double predictedClass = cl.classifyInstance(currentInst); test.add(currentInst); double[] prediction = cl.distributionForInstance(currentInst); for (int x = 0; x < prediction.Length; x++) { System.Console.WriteLine("Probability of class [{0}] for [{1}] is: {2}", currentInst.classAttribute().value(x), currentInst, Math.Round(prediction[x], 4)); } System.Console.WriteLine(); if (predictedClass == insts.instance(i).classValue()) { numCorrect++; } } System.Console.WriteLine(numCorrect + " out of " + testSize + " correct (" + (double)((double)numCorrect / (double)testSize * 100.0) + "%)"); // Train the model weka.classifiers.Evaluation eTrain = new weka.classifiers.Evaluation(train); eTrain.evaluateModel(cl, train); // Print the results as in Weka explorer: //Print statistics String strSummaryTrain = eTrain.toSummaryString(); System.Console.WriteLine(strSummaryTrain); //Print detailed class statistics System.Console.WriteLine(eTrain.toClassDetailsString()); //Print confusion matrix System.Console.WriteLine(eTrain.toMatrixString()); // Get the confusion matrix double[][] cmMatrixTrain = eTrain.confusionMatrix(); // Test the model weka.classifiers.Evaluation eTest = new weka.classifiers.Evaluation(test); eTest.evaluateModel(cl, test); // Print the results as in Weka explorer: //Print statistics String strSummaryTest = eTest.toSummaryString(); System.Console.WriteLine(strSummaryTest); //Print detailed class statistics System.Console.WriteLine(eTest.toClassDetailsString()); //Print confusion matrix System.Console.WriteLine(eTest.toMatrixString()); // Get the confusion matrix double[][] cmMatrixTest = eTest.confusionMatrix(); } catch (java.lang.Exception ex) { ex.printStackTrace(); } }
/// <summary> /// Create an instances structure with classes for supervised methods /// </summary> /// <param name="NumClass"></param> /// <returns></returns> public Instances CreateInstancesWithClassesWithPlateBasedDescriptor(int NumberOfClass) { weka.core.FastVector atts = new FastVector(); int columnNo = 0; for (int i = 0; i < ParentScreening.ListPlateBaseddescriptorNames.Count; i++) { atts.addElement(new weka.core.Attribute(ParentScreening.ListPlateBaseddescriptorNames[i])); columnNo++; } weka.core.FastVector attVals = new FastVector(); for (int i = 0; i < NumberOfClass; i++) attVals.addElement("Class" + (i).ToString()); atts.addElement(new weka.core.Attribute("Class", attVals)); Instances data1 = new Instances("MyRelation", atts, 0); int IdxWell = 0; foreach (cWell CurrentWell in this.ListActiveWells) { if (CurrentWell.GetCurrentClassIdx() == -1) continue; double[] vals = new double[data1.numAttributes()]; int IdxCol = 0; for (int Col = 0; Col < ParentScreening.ListPlateBaseddescriptorNames.Count; Col++) { vals[IdxCol++] = CurrentWell.ListPlateBasedDescriptors[Col].GetValue(); } vals[columnNo] = CurrentWell.GetCurrentClassIdx(); data1.add(new DenseInstance(1.0, vals)); IdxWell++; } data1.setClassIndex((data1.numAttributes() - 1)); return data1; }
public void TrainandTest(CandidateClassifier classifierInfo, CandidateParameter cp) { //string dealType = classifierInfo.DealType; Classifier cls = null; if (TestParameters.UseTrain) { string modelFileName = GetModelFileName(classifierInfo.Name); if (TestParameters.SaveModel) { cls = WekaUtils.TryLoadClassifier(modelFileName); } Instances trainInstancesNew, trainInstances; trainInstances = m_trainInstances; trainInstancesNew = m_trainInstancesNew; if (cls == null) { if (classifierInfo.Classifier == null) { classifierInfo.Classifier = WekaUtils.CreateClassifier(cp.ClassifierType, m_currentTp, m_currentSl); } cls = classifierInfo.Classifier; } else { if (TestParameters.EnableDetailLog) { System.Console.WriteLine("Model is loaded."); } } if (m_enableTrainSplitTest) { Instances trainTrainInst, trainTestInst; DateTime splitTrainTimeEnd; if (m_trainSplitTestNums != -1) { int trainTrainSize = trainInstancesNew.numInstances() - m_trainSplitTestNums; int trainTestSize = m_trainSplitTestNums; trainTrainInst = new Instances(trainInstancesNew, 0, trainTrainSize); trainTestInst = new Instances(trainInstancesNew, trainTrainSize, trainTestSize); splitTrainTimeEnd = WekaUtils.GetDateValueFromInstances(trainInstances, 0, trainTrainSize); } else if (m_trainSplitPercent != -1) { if (m_trainSplitPercent == 100.0) { int trainTrainSize = trainInstancesNew.numInstances(); trainTrainInst = new Instances(trainInstancesNew, 0, trainTrainSize); trainTestInst = new Instances(trainInstancesNew, 0, trainTrainSize); splitTrainTimeEnd = WekaUtils.GetDateValueFromInstances(trainInstances, 0, trainTrainSize); } else { int trainTrainSize = (int)Math.Round(trainInstancesNew.numInstances() * m_trainSplitPercent / 100); int trainTestSize = trainInstancesNew.numInstances() - trainTrainSize; trainTrainInst = new Instances(trainInstancesNew, 0, trainTrainSize); trainTestInst = new Instances(trainInstancesNew, trainTrainSize, trainTestSize); splitTrainTimeEnd = WekaUtils.GetDateValueFromInstances(trainInstances, 0, trainTrainSize); } } else { trainTrainInst = new Instances(trainInstancesNew, 0); trainTestInst = new Instances(trainInstancesNew, 0); DateTime dt = WekaUtils.GetDateValueFromInstances(trainInstances, 0, trainInstances.numInstances() - 1); splitTrainTimeEnd = m_trainTimeEnd.AddMinutes(-TestParameters.BatchTestMinutes); while (splitTrainTimeEnd > dt) { splitTrainTimeEnd = splitTrainTimeEnd.AddMinutes(-TestParameters.BatchTestMinutes); } for (int i = 0; i < trainInstances.numInstances(); ++i) { dt = WekaUtils.GetDateValueFromInstances(trainInstances, 0, i); if (dt <= splitTrainTimeEnd) { var ins = new DenseInstance(trainInstancesNew.instance(i)); trainTrainInst.add(ins); } else { var ins = new DenseInstance(trainInstancesNew.instance(i)); trainTestInst.add(ins); } } } cls = WekaUtils.TrainInstances(trainTrainInst, TestParameters.SaveModel ? modelFileName : null, cls); //m_classifierQueue[dealType].Enqueue(new ClassifierInfo(cls, splitTrainTimeEnd)); //foreach (var i in m_classifierQueue[dealType]) //{ // var e = WekaUtils.TestInstances(trainTestInst, i.Cls); // i.TotalCost = i.TotalCost * m_classifierQueueFactor + e.totalCost(); // i.TotalNum = (int)(i.TotalNum * m_classifierQueueFactor) + (int)e.numInstances(); //} //WriteEvalSummary(eval1, string.Format("Test Data from {0} to {1}", m_testTimeStart.ToString(Parameters.DateTimeFormat), m_testTimeEnd.ToString(Parameters.DateTimeFormat))); } else { cls = WekaUtils.TrainInstances(trainInstancesNew, TestParameters.SaveModel ? modelFileName : null, cls); //m_classifierQueue[dealType].Enqueue(new ClassifierInfo(cls, m_trainTimeEnd)); //foreach (var i in m_classifierQueue[dealType]) //{ // var e = WekaUtils.TestInstances(trainInstancesNew, i.Cls); // i.TotalCost = i.TotalCost * m_classifierQueueFactor + e.totalCost(); // i.TotalNum = (int)(i.TotalNum * m_classifierQueueFactor) + (int)e.numInstances(); //} } if (TestParameters.EnableDetailLog) { System.Console.WriteLine("Model is trained."); } classifierInfo.Classifier = cls; //if (classifierInfo.CurrentTrainInstances1 != null) //{ // classifierInfo.CurrentTrainInstances1.clear(); //} //if (classifierInfo.CurrentTrainInstancesNew1 != null) //{ // classifierInfo.CurrentTrainInstancesNew1.clear(); //} //classifierInfo.CurrentTrainInstances = new Instances(trainInstances, 0, trainInstances.numInstances()); //classifierInfo.CurrentTrainInstancesNew = new Instances(trainInstancesNew, 0, trainInstancesNew.numInstances()); if (classifierInfo.MoneyManagement == null) { classifierInfo.MoneyManagement = WekaUtils.CreateMoneyManagement(cp.MoneyManagementType, m_currentTp, m_currentSl); } IMoneyManagement mm = WekaUtils.TrainInstances4MM(trainInstancesNew, TestParameters.SaveModel ? modelFileName : null, classifierInfo.MoneyManagement); classifierInfo.MoneyManagement = mm; } else { if (classifierInfo.Classifier == null) { classifierInfo.Classifier = WekaUtils.CreateClassifier(cp.ClassifierType, m_currentTp, m_currentSl); } cls = classifierInfo.Classifier; if (classifierInfo.MoneyManagement == null) { classifierInfo.MoneyManagement = WekaUtils.CreateMoneyManagement(cp.MoneyManagementType, m_currentTp, m_currentSl); } } if (m_enableTest) { Instances testInstancesNew, testInstances; testInstances = m_testInstances; testInstancesNew = m_testInstancesNew; double[] cv = WekaUtils.ClassifyInstances(testInstancesNew, cls); if (TestParameters.EnableExcludeClassifier) { bool hasPositive = false; for (int i = 0; i < cv.Length; ++i) { if (cv[i] == 2) { hasPositive = true; break; } } if (hasPositive) { // Exclude if (classifierInfo.ExcludeClassifier == null) { string modelFileName4Exclude = GetExcludeModelFileName(classifierInfo.Name); classifierInfo.ExcludeClassifier = WekaUtils.TryLoadClassifier(modelFileName4Exclude); } if (classifierInfo.ExcludeClassifier != null) { double[] cv2 = WekaUtils.ClassifyInstances(testInstancesNew, classifierInfo.ExcludeClassifier); // cv2 == 0 -> is class, otherwise = double.NaN; for (int i = 0; i < cv.Length; ++i) { cv[i] = cv[i] == 2 && cv2[i] == 2 ? 2 : 0; } } } } classifierInfo.CurrentTestRet = cv; classifierInfo.CurrentClassValue = new double[testInstances.numInstances()]; for (int i = 0; i < testInstances.numInstances(); ++i) { classifierInfo.CurrentClassValue[i] = testInstances.instance(i).classValue(); } for (int i = 0; i < testInstances.numInstances(); i++) { if (cv[i] == 2) { double openPrice = testInstances.instance(i).value(testInstances.attribute("mainClose")); DateTime openTime = WekaUtils.GetDateValueFromInstances(testInstances, 0, i); if (testInstances.instance(i).classValue() == 2 || testInstances.instance(i).classValue() == 0) { classifierInfo.Deals.AddDeal(openTime, openPrice, classifierInfo.DealType, classifierInfo.MoneyManagement.GetVolume(testInstances.instance(i)), testInstances.instance(i).classValue() == 2 ? -classifierInfo.Tp : classifierInfo.Sl, WekaUtils.GetDateValueFromInstances(testInstances, 1, i)); } else if (testInstances.instance(i).classValue() == 1) { double closePriceTp, closePriceSl; if (classifierInfo.DealType == 'B') { closePriceTp = openPrice + classifierInfo.Tp * DealsInfo.GetPoint(0); closePriceSl = openPrice - classifierInfo.Sl * DealsInfo.GetPoint(0); } else { closePriceTp = openPrice - classifierInfo.Tp * DealsInfo.GetPoint(0); closePriceSl = openPrice + classifierInfo.Sl * DealsInfo.GetPoint(0); } classifierInfo.Deals.AddDeal(openTime, openPrice, classifierInfo.DealType, classifierInfo.MoneyManagement.GetVolume(testInstances.instance(i)), closePriceTp, closePriceSl); } else { throw new AssertException("classValue should be 0,1,2."); } } } } }
/// <summary> /// Create an instances structure without classes for unsupervised methods /// </summary> /// <returns>a weka Instances object</returns> public Instances CreateInstancesWithoutClass() { weka.core.FastVector atts = new FastVector(); int columnNo = 0; // Descriptors loop for (int i = 0; i < ParentScreening.ListDescriptors.Count; i++) { if (ParentScreening.ListDescriptors[i].IsActive() == false) continue; atts.addElement(new weka.core.Attribute(ParentScreening.ListDescriptors[i].GetName())); columnNo++; } weka.core.FastVector attVals = new FastVector(); Instances data1 = new Instances("MyRelation", atts, 0); foreach (cWell CurrentWell in this.ListActiveWells) { double[] vals = new double[data1.numAttributes()]; int IdxRealCol = 0; for (int Col = 0; Col < ParentScreening.ListDescriptors.Count; Col++) { if (ParentScreening.ListDescriptors[Col].IsActive() == false) continue; vals[IdxRealCol++] = CurrentWell.ListSignatures[Col].GetValue(); } data1.add(new DenseInstance(1.0, vals)); } return data1; }
/// <summary> /// Create a single instance for WEKA /// </summary> /// <param name="NClasses">Number of classes</param> /// <returns>the weka instances</returns> public Instances CreateInstanceForNClasses(cInfoClass InfoClass) { List<double> AverageList = new List<double>(); for (int i = 0; i < Parent.ListDescriptors.Count; i++) if (Parent.ListDescriptors[i].IsActive()) AverageList.Add(GetAverageValuesList()[i]); weka.core.FastVector atts = new FastVector(); List<string> NameList = Parent.ListDescriptors.GetListNameActives(); for (int i = 0; i < NameList.Count; i++) atts.addElement(new weka.core.Attribute(NameList[i])); weka.core.FastVector attVals = new FastVector(); for (int i = 0; i < InfoClass.NumberOfClass; i++) attVals.addElement("Class" + i); atts.addElement(new weka.core.Attribute("Class__", attVals)); Instances data1 = new Instances("SingleInstance", atts, 0); double[] newTable = new double[AverageList.Count + 1]; Array.Copy(AverageList.ToArray(), 0, newTable, 0, AverageList.Count); //newTable[AverageList.Count] = 1; data1.add(new DenseInstance(1.0, newTable)); data1.setClassIndex((data1.numAttributes() - 1)); return data1; }
/// <summary> Select only instances with weights that contribute to /// the specified quantile of the weight distribution /// /// </summary> /// <param name="data">the input instances /// </param> /// <param name="quantile">the specified quantile eg 0.9 to select /// 90% of the weight mass /// </param> /// <returns> the selected instances /// </returns> protected internal virtual Instances selectWeightQuantile(Instances data, double quantile) { int numInstances = data.numInstances(); Instances trainData = new Instances(data, numInstances); double[] weights = new double[numInstances]; double sumOfWeights = 0; for (int i = 0; i < numInstances; i++) { weights[i] = data.instance(i).weight(); sumOfWeights += weights[i]; } double weightMassToSelect = sumOfWeights * quantile; int[] sortedIndices = Utils.sort(weights); // Select the instances sumOfWeights = 0; for (int i = numInstances - 1; i >= 0; i--) { Instance instance = (Instance) data.instance(sortedIndices[i]).copy(); trainData.add(instance); sumOfWeights += weights[sortedIndices[i]]; if ((sumOfWeights > weightMassToSelect) && (i > 0) && (weights[sortedIndices[i]] != weights[sortedIndices[i - 1]])) { break; } } if (m_Debug) { System.Console.Error.WriteLine("Selected " + trainData.numInstances() + " out of " + numInstances); } return trainData; }