public void Build() { this.featureIndex = new int[this.features.Count]; for(int i=0;(i< this.features.Count);i++) { for (int j = 0; (j < Extractor.ArffAttributeLabels.Length); j++) if (((string)this.features[i]).Equals(Extractor.ArffAttributeLabels[j])) { this.featureIndex[i] = j; break; } } instances = new Instances(new StreamReader(this.filename)); instances.Class = instances.attribute(this.features.Count); classifier = new J48(); classifier.buildClassifier(instances); //setup the feature vector //fvWekaAttributes = new FastVector(this.features.Count + 1); //for (i = 0; (i < this.features.Count); i++) // fvWekaAttributes.addElement(new weka.core.Attribute(this.features[i])); //Setup the class attribute //FastVector fvClassVal = new FastVector(); //for (i = 0; (i < this.classes.Count); i++) // fvClassVal.addElement(this.classes[i]); //weka.core.Attribute ClassAttribute = new weka.core.Attribute("activity", fvClassVal); }
public ID3Node Train(string arffFilePath, double confidenceLevel, int maxDepth = 0) { // Load the examples into S Instances S = new weka.core.Instances(new java.io.FileReader(arffFilePath)); return(this.Train(S, confidenceLevel, maxDepth)); }
public void EvaluateIncrementalBatches(int batchSize) { //Randomize Filter Randomize randomizeFilter = new Randomize(); randomizeFilter.setInputFormat(this.data); //RemoveRange Filter //number of classes int numClasses = this.data.attribute(this.data.numAttributes() - 1).numValues(); Instances[] classInstances = new Instances[numClasses]; for (int i = 1; (i <= numClasses); i++) { //RemoveWithValues Filter RemoveWithValues removeValuesFilter = new RemoveWithValues(); removeValuesFilter.setInputFormat(this.data); // removeValuesFilter.set_AttributeIndex("last"); // removeValuesFilter. removeValuesFilter.set_MatchMissingValues(false); removeValuesFilter.set_NominalIndices("1-1"); classInstances[i] = Filter.useFilter(this.data, removeValuesFilter); } }
/// <summary> Sets the format of the input instances. /// /// </summary> /// <param name="instanceInfo">an Instances object containing the input instance /// structure (any instances contained in the object are ignored - only the /// structure is required). /// </param> /// <returns> true if the outputFormat may be collected immediately /// </returns> /// <exception cref="Exception">if the inputFormat can't be set successfully /// </exception> public override bool setInputFormat(Instances instanceInfo) { base.setInputFormat(instanceInfo); m_removeFilter = null; return false; }
/// <summary> /// Dummy attribute /// </summary> /// <param name="instances"></param> /// <returns></returns> public void NominalToBinary(weka.core.Instances instances) { weka.filters.Filter nominalToBinary = new weka.filters.unsupervised.attribute.NominalToBinary(); nominalToBinary.setInputFormat(instances); instances = weka.filters.Filter.useFilter(instances, nominalToBinary); this.Instance = instances; }
public void Test(string arffFilePath, ID3Node root) { // Load the examples into S Instances S = new weka.core.Instances(new java.io.FileReader(arffFilePath)); this.Test(S, root); }
/// <summary> /// Randomize select random data from data set /// </summary> /// <param name="instances"></param> /// <returns></returns> public void Randomize(weka.core.Instances instances) { weka.filters.Filter randomize = new weka.filters.unsupervised.instance.Randomize(); randomize.setInputFormat(this.Instance); instances = weka.filters.Filter.useFilter(instances, randomize); this.Instance = instances; }
/// <summary> Generates the classifier. /// /// </summary> /// <param name="instances">set of instances serving as training data /// </param> /// <exception cref="Exception">if the classifier has not been generated successfully /// </exception> public override void buildClassifier(Instances instances) { double sumOfWeights = 0; m_Class = instances.classAttribute(); m_ClassValue = 0; switch (instances.classAttribute().type()) { case weka.core.Attribute.NUMERIC: m_Counts = null; break; case weka.core.Attribute.NOMINAL: m_Counts = new double[instances.numClasses()]; for (int i = 0; i < m_Counts.Length; i++) { m_Counts[i] = 1; } sumOfWeights = instances.numClasses(); break; default: throw new System.Exception("ZeroR can only handle nominal and numeric class" + " attributes."); } System.Collections.IEnumerator enu = instances.enumerateInstances(); //UPGRADE_TODO: Method 'java.util.Enumeration.hasMoreElements' was converted to 'System.Collections.IEnumerator.MoveNext' which has a different behavior. "ms-help://MS.VSCC.v80/dv_commoner/local/redirect.htm?index='!DefaultContextWindowIndex'&keyword='jlca1073_javautilEnumerationhasMoreElements'" while (enu.MoveNext()) { //UPGRADE_TODO: Method 'java.util.Enumeration.nextElement' was converted to 'System.Collections.IEnumerator.Current' which has a different behavior. "ms-help://MS.VSCC.v80/dv_commoner/local/redirect.htm?index='!DefaultContextWindowIndex'&keyword='jlca1073_javautilEnumerationnextElement'" Instance instance = (Instance) enu.Current; if (!instance.classIsMissing()) { if (instances.classAttribute().Nominal) { //UPGRADE_WARNING: Data types in Visual C# might be different. Verify the accuracy of narrowing conversions. "ms-help://MS.VSCC.v80/dv_commoner/local/redirect.htm?index='!DefaultContextWindowIndex'&keyword='jlca1042'" m_Counts[(int) instance.classValue()] += instance.weight(); } else { m_ClassValue += instance.weight() * instance.classValue(); } sumOfWeights += instance.weight(); } } if (instances.classAttribute().Numeric) { if (Utils.gr(sumOfWeights, 0)) { m_ClassValue /= sumOfWeights; } } else { m_ClassValue = Utils.maxIndex(m_Counts); Utils.normalize(m_Counts, sumOfWeights); } }
public static string Classify(bool useRubine, float duration, bool righthandedness, List<float> SpeakerAngles, PointCollection pointHist, StylusPointCollection S, List<List<int>> hist, List<List<int>> ihist) { // Convert all parameters to format used in GestureTests List<Vector2> InterpretedPoints = new List<Vector2>(); List<Vector2> StylusPoints = new List<Vector2>(); List<Vector2> VelocityHistory = new List<Vector2>(); List<Vector2> InverseVelocityHistory = new List<Vector2>(); foreach(Point P in pointHist) InterpretedPoints.Add(new Vector2((float)P.X,(float)P.Y)); foreach(StylusPoint P in S) StylusPoints.Add(new Vector2((float)P.X,(float)P.Y)); for (int i = 0; i < hist[0].Count; i++) { VelocityHistory.Add(new Vector2(hist[0][i], hist[1][i])); InverseVelocityHistory.Add(new Vector2(ihist[0][i], ihist[1][i])); } // Create a new Sample, compute the features, and classify GS = new GestureSample(GestureTests.Types.GestureType.unknown, righthandedness,duration,SpeakerAngles,InterpretedPoints,StylusPoints,VelocityHistory,InverseVelocityHistory); GS.ComputeFeatures(GestureFeatures.PointsStroke); if (useRubine) return EC.Recognizer.Classify(GS).ToString(); WriteARFF(); Instances test = new Instances(new java.io.FileReader("outfile.arff")); test.setClassIndex(0); double clsLabel = cls.classifyInstance(test.instance(0)); test.instance(0).setClassValue(clsLabel); // Return the appropriate label return ((GestureType2D)((int)clsLabel+1)).ToString(); }
/// <summary> /// Determines all categories --> categories /// Determines category numbers of each attributes -->> categoryTypeNumber /// Determines target numbers and amounts of each categories of each attributes -->> categoryTypeTargetNumber /// [i][j][k] i means attributes, j means categories, k means targets /// </summary> /// <param name="insts"></param> private void DataPreparation(weka.core.Instances insts) { for (int i = 0; i < insts.numAttributes(); i++) { string[] categoryType = new string[insts.attribute(i).numValues()]; for (int j = 0; j < insts.attribute(i).numValues(); j++) { categoryType[j] = insts.attribute(i).value(j).ToString(); } categories.Add(categoryType); } List <List <string> > lst = new List <List <string> >(); for (int i = 0; i < insts.numInstances(); i++) { lst.Add(new List <string>()); for (int j = 0; j < insts.instance(i).numValues(); j++) { lst[lst.Count - 1].Add(insts.instance(i).toString(j)); } } List <int[]> categoryTypeNumber = new List <int[]>(); List <int[, ]> categoryTypeTargetNumber = new List <int[, ]>(); for (int i = 0; i < categories.Count; i++) { categoryTypeNumber.Add(new int[categories[i].Length]); categoryTypeTargetNumber.Add(new int[categories[i].Length, categories[categories.Count - 1].Length]); } for (int i = 0; i < lst.Count; i++) //Satır { for (int j = 0; j < lst[i].Count; j++) //Sütün { for (int k = 0; k < categories[j].Length; k++) //Kategori Sayısı { string targetValue = lst[i][lst[i].Count - 1]; if (lst[i][j].Contains(categories[j][k])) { categoryTypeNumber[j][k] += 1; for (int trgt = 0; trgt < categories[categories.Count - 1].Length; trgt++) { if (targetValue == categories[categories.Count - 1][trgt]) { categoryTypeTargetNumber[j][k, trgt] += 1; } } } } } } Twoing(insts, categoryTypeNumber, categoryTypeTargetNumber); Gini(insts, categoryTypeNumber, categoryTypeTargetNumber); LogInfo("Dataset is saved.\r\n\r\n"); LogInfo("TWOING : " + twoingPath + "\r\n\r\n"); LogInfo("GINI : " + giniPath + "\r\n"); }
public void EndTrainingSession() { Stream s = new MemoryStream (); TextWriter tw = new StreamWriter (s); AbstractBasicTextVector.WriteInstancesArff (tw, vectors, "c45recommender", tags, results); tw.Flush (); s.Position = 0; Instances source = new Instances (new InputStreamReader (new InputStreamWrapper (s))); tw.Close (); s.Close (); Instances[] derived = new Instances[this.not]; classifiers = new AbstractClassifier[this.not]; int[] args = new int[this.not - 1]; int l = source.numAttributes () - this.not; for (int i = 0; i < this.not-1; i++) { args [i] = i + l + 1; } for (int i = 0; i < this.not; i++) { Remove rem = new Remove (); rem.setAttributeIndicesArray (args); rem.setInputFormat (source); derived [i] = Filter.useFilter (source, rem); classifiers [i] = GenerateClassifier (); derived [i].setClassIndex (derived [i].numAttributes () - 1); classifiers [i].buildClassifier (derived [i]); if (i < this.not - 1) { args [i] = l + i; } } datasets = derived; }
/// <summary> /// Normalize to numeric instance /// </summary> /// <param name="instances"></param> /// <returns></returns> public void Normalization(weka.core.Instances instances) { weka.filters.Filter normalized = new weka.filters.unsupervised.instance.Normalize(); normalized.setInputFormat(instances); instances = weka.filters.Filter.useFilter(instances, normalized); this.Instance = instances; }
/// <summary> Sets the format of the input instances. /// /// </summary> /// <param name="instanceInfo">an Instances object containing the input /// instance structure (any instances contained in the object are /// ignored - only the structure is required). /// </param> /// <returns> true if the outputFormat may be collected immediately /// </returns> /// <exception cref="Exception">if the input format can't be set /// successfully /// </exception> public virtual bool setInputFormat(Instances instanceInfo) { base.setInputFormat(instanceInfo); setOutputFormat(instanceInfo); m_ModesAndMeans = null; return true; }
public void Discreatization(weka.core.Instances instances) { weka.filters.unsupervised.attribute.Discretize discretized = new weka.filters.unsupervised.attribute.Discretize(); discretized.setInputFormat(instances); //discretize.setFindNumBins(true); instances = weka.filters.Filter.useFilter(instances, discretized); this.Instance = instances; }
public static void SaveInstances(weka.core.Instances instances, string fileName) { var file = new java.io.File(fileName); //m_arffSaver.setDestination(file); m_arffSaver.setInstances(instances); m_arffSaver.setFile(file); m_arffSaver.writeBatch(); }
/// <summary> /// Discreatize instance if the instance is numeric /// </summary> /// <param name="insts"></param> private void Discreatization(weka.core.Instances insts) { weka.filters.unsupervised.attribute.Discretize myDiscretize = new weka.filters.unsupervised.attribute.Discretize(); myDiscretize.setInputFormat(insts); myDiscretize.setFindNumBins(true); insts = weka.filters.Filter.useFilter(insts, myDiscretize); DataPreparation(insts); }
public List <double> testMLPUsingWeka(string[] attributeArray, string[] classNames, double[] dataValues, string classHeader, string defaultclass, string modelName, int hiddelLayers = 7, double learningRate = 0.03, double momentum = 0.4, int decimalPlaces = 2, int trainingTime = 1000) { java.util.ArrayList classLabel = new java.util.ArrayList(); foreach (string className in classNames) { classLabel.Add(className); } weka.core.Attribute classHeaderName = new weka.core.Attribute(classHeader, classLabel); java.util.ArrayList attributeList = new java.util.ArrayList(); foreach (string attribute in attributeArray) { weka.core.Attribute newAttribute = new weka.core.Attribute(attribute); attributeList.Add(newAttribute); } attributeList.add(classHeaderName); weka.core.Instances data = new weka.core.Instances("TestInstances", attributeList, 0); data.setClassIndex(data.numAttributes() - 1); // Set instance's values for the attributes weka.core.Instance inst_co = new DenseInstance(data.numAttributes()); for (int i = 0; i < data.numAttributes() - 1; i++) { inst_co.setValue(i, dataValues.ElementAt(i)); } inst_co.setValue(classHeaderName, defaultclass); data.add(inst_co); java.io.File path = new java.io.File("/models/"); weka.classifiers.functions.MultilayerPerceptron clRead = loadModel(modelName, path); clRead.setHiddenLayers(hiddelLayers.ToString()); clRead.setLearningRate(learningRate); clRead.setMomentum(momentum); clRead.setNumDecimalPlaces(decimalPlaces); clRead.setTrainingTime(trainingTime); weka.filters.Filter myRandom = new weka.filters.unsupervised.instance.Randomize(); myRandom.setInputFormat(data); data = weka.filters.Filter.useFilter(data, myRandom); double classValue = clRead.classifyInstance(data.get(0)); double[] predictionDistribution = clRead.distributionForInstance(data.get(0)); List <double> predictionDistributions = new List <double>(); for (int predictionDistributionIndex = 0; predictionDistributionIndex < predictionDistribution.Count(); predictionDistributionIndex++) { string classValueString1 = classLabel.get(predictionDistributionIndex).ToString(); double prob = predictionDistribution[predictionDistributionIndex] * 100; predictionDistributions.Add(prob); } List <double> prediction = new List <double>(); prediction.Add(classValue); prediction.AddRange(predictionDistributions); return(prediction); }
public static void AddInstanceQuickly(weka.core.Instances instances, IList <weka.core.Instance> listInstances) { java.util.ArrayList arrayListTrainInstances = Feng.Utils.ReflectionHelper.GetObjectValue(instances, "m_Instances") as java.util.ArrayList; foreach (var i in listInstances) { i.setDataset(instances); arrayListTrainInstances.add(i); } }
// Test the classification result of each map that a user played, // with the data available as if they were playing through it public static void classifyTest(String dataString, String playerID) { String results = ""; try { java.io.StringReader stringReader = new java.io.StringReader(dataString); java.io.BufferedReader buffReader = new java.io.BufferedReader(stringReader); /* NOTE THAT FOR NAIVE BAYES ALL WEIGHTS CAN BE = 1*/ //weka.core.converters.ConverterUtils.DataSource source = new weka.core.converters.ConverterUtils.DataSource("iris.arff"); weka.core.Instances data = new weka.core.Instances(buffReader); //source.getDataSet(); // setting class attribute if the data format does not provide this information // For example, the XRFF format saves the class attribute information as well if (data.classIndex() == -1) data.setClassIndex(data.numAttributes() - 1); weka.classifiers.Classifier cl; for (int i = 3; i < data.numInstances(); i++) { cl = new weka.classifiers.bayes.NaiveBayes(); //cl = new weka.classifiers.trees.J48(); //cl = new weka.classifiers.lazy.IB1(); //cl = new weka.classifiers.functions.MultilayerPerceptron(); ((weka.classifiers.functions.MultilayerPerceptron)cl).setHiddenLayers("12"); weka.core.Instances subset = new weka.core.Instances(data,0,i); cl.buildClassifier(subset); weka.classifiers.Evaluation eval = new weka.classifiers.Evaluation(subset); eval.crossValidateModel(cl, subset, 3, new java.util.Random(1)); results = results + eval.pctCorrect(); // For accuracy measurement /* For Mathews Correlation Coefficient */ //double TP = eval.numTruePositives(1); //double FP = eval.numFalsePositives(1); //double TN = eval.numTrueNegatives(1); //double FN = eval.numFalseNegatives(1); //double correlationCoeff = ((TP*TN)-(FP*FN))/Math.Sqrt((TP+FP)*(TP+FN)*(TN+FP)*(TN+FN)); //results = results + correlationCoeff; if (i != data.numInstances()-1) results = results + ", "; if(i == data.numInstances()-1) Debug.Log("Player: " + playerID + ", Num Maps: " + data.numInstances() + ", AUC: " + eval.areaUnderROC(1)); } } catch (java.lang.Exception ex) { Debug.LogError(ex.getMessage()); } // Write values to file for a matlab read // For accuracy StreamWriter writer = new StreamWriter("DataForMatlab/"+playerID+"_CrossFoldValidations_NeuralNet.txt"); //StreamWriter writer = new StreamWriter("DataForMatlab/"+playerID+"_CrossFoldCorrCoeff.txt"); // For mathews cc writer.WriteLine(results); writer.Close(); Debug.Log(playerID + " has been written to file"); }
/// <summary> Sets the format of the input instances. /// /// </summary> /// <param name="instanceInfo">an Instances object containing the input /// instance structure (any instances contained in the object are /// ignored - only the structure is required). /// </param> /// <returns> true if the outputFormat may be collected immediately /// </returns> /// <exception cref="Exception">if the input format can't be set /// successfully /// </exception> public override bool setInputFormat(Instances instanceInfo) { base.setInputFormat(instanceInfo); m_Columns.Upper = instanceInfo.numAttributes() - 1; setOutputFormat(); m_Indices = null; return true; }
public static double classifyTrain_Test(string classifierFileName, Classifier _classifier) { double performance = 0.0; try { FileReader javaFileReader = new FileReader(classifierFileName); weka.core.Instances insts = new weka.core.Instances(javaFileReader); javaFileReader.close(); insts.setClassIndex(insts.numAttributes() - 1); System.Console.WriteLine("Performing " + percentSplit + "% split evaluation."); int trainSize = insts.numInstances() * percentSplit / 100; int testSize = insts.numInstances() - trainSize; weka.core.Instances train = new weka.core.Instances(insts, 0, trainSize); _classifier.buildClassifier(train); int numCorrect = 0; var numnerOfInst = insts.numInstances(); int dataIndex = 0; for (int i = trainSize; i < numnerOfInst; i++) { dataIndex++; weka.core.Instance currentInst = insts.instance(i); double predictClass = _classifier.classifyInstance(currentInst); double[] dist = _classifier.distributionForInstance(currentInst); string actualClass = insts.classAttribute().value((int)insts.instance(i).classValue()); string predictedClass = insts.classAttribute().value((int)predictClass); var abcd = _classifier.getClass(); if (predictedClass == actualClass) { numCorrect++; } } performance = (double)((double)numCorrect / (double)testSize) * 100; System.Console.WriteLine(numCorrect + " out of " + testSize + " correct (" + performance.toString() + "%)"); } catch (java.lang.Exception ex) { ex.printStackTrace(); } return(performance); }
public ArffStore parse() { ArffStore arffStore = new ArffStore(); instances = new Instances(new StreamReader(this.filename)); //foreach (weka.core.Attribute attribute in instances.enumerateAttributes()) // arffStore.Features.Add(attribute.name); return arffStore; }
public BinC45ModelSelection(XmlNode model,Instances allData) { foreach (XmlAttribute xAttribute in model.Attributes) { if (xAttribute.Name == Constants.MIN_NO_OBJ_ATTRIBUTE) this.m_minNoObj = Convert.ToInt32(xAttribute.Value); } m_allData = allData; }
public static void classifierTwo(string classifierFileName, string predictionModel) { FileReader javaFileReader = new FileReader(classifierFileName); weka.core.Instances wekaInsts = new weka.core.Instances(javaFileReader); javaFileReader.close(); wekaInsts.setClassIndex(wekaInsts.numAttributes() - 1); //Classifier nbTree = (Classifier)SerializationHelper.read(Model) as J48; Instances testDataSet = new Instances(new BufferedReader(new FileReader(classifierFileName))); testDataSet.setClassIndex(wekaInsts.numAttributes() - 1); //testDataSet.setClassIndex(10); Evaluation evaluation = new Evaluation(testDataSet); J48 model = new J48(); //Classifier myClassifier = (Classifier)SerializationHelper.read(Model) as NaiveBayes; //Classifier myClassifier = new NaiveBayes(); for (int i = 0; i < testDataSet.numInstances(); i++) { Instance instance = testDataSet.instance(i); //evaluation.evaluateModelOnceAndRecordPrediction(myClassifier, instance); //evaluation.evaluateModelOnce(myClassifier, instance); } foreach (object o in evaluation.predictions().toArray()) { NominalPrediction prediction = o as NominalPrediction; if (prediction != null) { double[] distribution = prediction.distribution(); double predicted = prediction.predicted(); for (int i = 0; i < distribution.Length; i++) { System.Console.WriteLine(distribution[i]); } System.Console.WriteLine(predicted); } } System.Console.WriteLine(evaluation); System.Console.ReadKey(); }
public Evaluator(string arffFile) { this.data = new Instances(new StreamReader(arffFile)); this.data.ClassIndex = this.data.numAttributes() - 1; this.numExamples = this.data.m_Instances.size(); this.classCount = this.data.attribute(this.data.numAttributes() - 1).numValues(); // this.trainingSizeMatrix=new double[ }
public static double SupportVectorMachineTest(weka.core.Instances insts) { try { //weka.core.Instances insts = new weka.core.Instances(new java.io.FileReader("iris.arff")); insts.setClassIndex(insts.numAttributes() - 1); SupportVectorMachine = new weka.classifiers.functions.SMO(); weka.filters.Filter myDummy = new weka.filters.unsupervised.attribute.NominalToBinary(); myDummy.setInputFormat(insts); insts = weka.filters.Filter.useFilter(insts, myDummy); weka.filters.Filter myNormalize = new weka.filters.unsupervised.instance.Normalize(); myNormalize.setInputFormat(insts); insts = weka.filters.Filter.useFilter(insts, myNormalize); weka.filters.Filter myRandom = new weka.filters.unsupervised.instance.Randomize(); myRandom.setInputFormat(insts); insts = weka.filters.Filter.useFilter(insts, myRandom); int trainSize = insts.numInstances() * percentSplit / 100; int testSize = insts.numInstances() - trainSize; weka.core.Instances train = new weka.core.Instances(insts, 0, trainSize); SupportVectorMachine.buildClassifier(train); int numCorrect = 0; for (int i = trainSize; i < insts.numInstances(); i++) { weka.core.Instance currentInst = insts.instance(i); double predictedClass = SupportVectorMachine.classifyInstance(currentInst); if (predictedClass == insts.instance(i).classValue()) { numCorrect++; } } return((double)numCorrect / (double)testSize * 100.0); } catch (java.lang.Exception ex) { ex.printStackTrace(); return(0); } }
public static void classifierOne(string classifierFileName, string predictionModel) { FileReader javaFileReader = new FileReader(classifierFileName); weka.core.Instances wekaInsts = new weka.core.Instances(javaFileReader); javaFileReader.close(); wekaInsts.setClassIndex(wekaInsts.numAttributes() - 1); Classifier cl = new SMO(); //Classifier cl = new NaiveBayes(); java.util.Random random = new java.util.Random(1); Evaluation evaluation = new Evaluation(wekaInsts); evaluation.crossValidateModel(cl, wekaInsts, 10, random); foreach (object o in evaluation.getMetricsToDisplay().toArray()) { } int count = 0; StringBuilder sb = new StringBuilder(); foreach (object o in evaluation.predictions().toArray()) { NominalPrediction prediction = o as NominalPrediction; if (prediction != null) { double[] distribution = prediction.distribution(); double predicted = prediction.predicted(); double actual = prediction.actual(); string revision = prediction.getRevision(); double weight = prediction.weight(); double margine = prediction.margin(); //bool equals = prediction.@equals(); string distributions = String.Empty; for (int i = 0; i < distribution.Length; i++) { //System.Console.WriteLine(distribution[i]); distributions += distribution[i]; } var predictionLine = String.Format("{0} - {1} - {2} - {3} - {4} - {5}\n", actual, predicted, revision, weight, margine, distributions); sb.Append(predictionLine); //System.Console.WriteLine(predicted); } count++; } File_Helper.WriteToFile(sb, predictionModel + "NbCl.txt"); System.Console.WriteLine(count); System.Console.ReadKey(); }
private async Task loadFileAndMakeElements(string location) { if (location.EndsWith(".csv")) { weka.core.converters.CSVLoader csvLoader = new weka.core.converters.CSVLoader(); csvLoader.setSource(new java.io.File(location)); insts = csvLoader.getDataSet(); insts.setClassIndex(insts.numAttributes() - 1); } else { insts = new weka.core.Instances(new java.io.FileReader(location)); insts.setClassIndex(insts.numAttributes() - 1); } flowLayoutPanel1.Controls.Clear(); for (int i = 0; i < insts.numAttributes() - 1; i++) { if (insts.attribute(i).isNominal() == true) { if (insts.attribute(i).numValues() > 0) { Label lbl = new Label(); lbl.Text = insts.attribute(i).name().Trim(); lbl.Enabled = true; ComboBox cmbBox = new ComboBox(); cmbBox.Name = insts.attribute(i).name(); for (int m = 0; m < insts.attribute(i).numValues(); m++) { cmbBox.Items.Add(insts.attribute(i).value(m)); } cmbBox.DropDownStyle = ComboBoxStyle.DropDownList; cmbBox.Enabled = true; flowLayoutPanel1.Controls.Add(lbl); flowLayoutPanel1.Controls.Add(cmbBox); } else { } } else if (insts.attribute(i).isNumeric() == true) { Label lbl = new Label(); lbl.Text = insts.attribute(i).name().Trim(); TextBox txtBox = new TextBox(); txtBox.Name = insts.attribute(i).name(); txtBox.KeyPress += new KeyPressEventHandler(txtBox_Keypress); flowLayoutPanel1.Controls.Add(lbl); flowLayoutPanel1.Controls.Add(txtBox); } } }
public VectorClassif(int nbTags) { tagsNb = nbTags; ArrayList nomi = new ArrayList(); nomi.add("0"); nomi.add("1"); ArrayList attr = new ArrayList(); weka.core.Attribute stringAttr = new weka.core.Attribute("todoString", (List)null); attr.add(stringAttr); for (int i = 1; i <= nbTags; i++) { attr.add(new weka.core.Attribute("label" + i, nomi)); } oDataSet = new Instances("Todo-Instances", attr, 500); }
/// <summary> /// Adds teta results of gini results to the list /// Change the attributes of the arff file /// Adds the attributes to arff file /// </summary> /// <param name="insts"></param> /// <param name="result"></param> /// <param name="path"></param> private void CreateNewDataset(weka.core.Instances insts, List <double[]> result, string path) { //Tetaları Listeye Ekle List <List <string> > lst = new List <List <string> >(); for (int i = 0; i < insts.numInstances(); i++) { lst.Add(new List <string>()); for (int j = 0; j < insts.instance(i).numValues() - 1; j++) { string value = insts.instance(i).toString(j); for (int k = 0; k < categories[j].Length; k++) { if (insts.instance(i).toString(j) == categories[j][k]) { lst[lst.Count - 1].Add(String.Format("{0:0.00}", result[j][k])); break; } } } } //Attiribute Değiştir for (int i = 0; i < insts.numAttributes() - 1; i++) { string name = insts.attribute(i).name().ToString(); insts.deleteAttributeAt(i); weka.core.Attribute att = new weka.core.Attribute(name); insts.insertAttributeAt(att, i); } //Attiributeları yaz for (int i = 0; i < insts.numInstances(); i++) { for (int j = 0; j < insts.instance(i).numValues() - 1; j++) { insts.instance(i).setValue(j, Convert.ToDouble(lst[i][j])); } } if (File.Exists(path)) { File.Delete(path); } var saver = new ArffSaver(); saver.setInstances(insts); saver.setFile(new java.io.File(path)); saver.writeBatch(); }
public void Build(weka.core.Instances instances) { WekaUtils.DebugAssert(instances.numClasses() == 3, "instance's numClasses should be 3."); m_counts = new int[instances.numClasses()]; for (int i = 0; i < m_counts.Length; i++) { m_counts[i] = 0; } foreach (Instance instance in instances) { int v = (int)instance.classValue(); m_counts[v]++; } }
private void button1_Click(object sender, EventArgs e) { weka.core.Instances insts = new weka.core.Instances(new java.io.FileReader(file)); double[] Data = new double[insts.numAttributes()]; for (int i = 0; i < list.Count; i++) { if (list[i].GetType() == typeof(TextBox)) { TextBox txt = (TextBox)list[i]; string value = txt.Text.Replace('.', ','); Data[i] = Convert.ToDouble(value); } else { ComboBox combobox = (ComboBox)list[i]; Data[i] = Convert.ToDouble(combobox.SelectedIndex); } } // Data[(insts.numAttributes() - 1] = 0; insts.setClassIndex(insts.numAttributes() - 1); Instance newInsts = new Instance(1.0, Data); insts.add(newInsts); string type = model.GetType().ToString(); if (type == "weka.classifiers.bayes.NaiveBayes") { weka.filters.Filter myDiscretize = new weka.filters.unsupervised.attribute.Discretize(); myDiscretize.setInputFormat(insts); insts = weka.filters.Filter.useFilter(insts, myDiscretize); } else if (type == "weka.classifiers.lazy.IBk") { weka.filters.Filter myDummy = new weka.filters.unsupervised.attribute.NominalToBinary(); myDummy.setInputFormat(insts); insts = weka.filters.Filter.useFilter(insts, myDummy); weka.filters.Filter myNormalize = new weka.filters.unsupervised.instance.Normalize(); myNormalize.setInputFormat(insts); insts = weka.filters.Filter.useFilter(insts, myNormalize); } double index = model.classifyInstance(insts.lastInstance()); string result = insts.attribute(insts.numAttributes() - 1).value(Convert.ToInt16(index)); MessageBox.Show(result); }
public void trainMachineForEmotionUsingWeka(string wekaFile, string modelName, int hiddelLayers = 7, double learningRate = 0.03, double momentum = 0.4, int decimalPlaces = 2, int trainingTime = 1000) { //"C:\\Users\\Gulraiz\\Desktop\\Genereted2.arff" "MLP" try { weka.core.Instances insts = new weka.core.Instances(new java.io.FileReader(wekaFile)); insts.setClassIndex(insts.numAttributes() - 1); weka.classifiers.functions.MultilayerPerceptron cl; cl = new weka.classifiers.functions.MultilayerPerceptron(); cl.setHiddenLayers(hiddelLayers.ToString()); cl.setLearningRate(learningRate); cl.setMomentum(momentum); cl.setNumDecimalPlaces(decimalPlaces); cl.setTrainingTime(trainingTime); System.Console.WriteLine("Performing " + percentSplit + "% split evaluation."); //randomize the order of the instances in the dataset. weka.filters.Filter myRandom = new weka.filters.unsupervised.instance.Randomize(); myRandom.setInputFormat(insts); insts = weka.filters.Filter.useFilter(insts, myRandom); int trainSize = insts.numInstances() * percentSplit / 100; int testSize = insts.numInstances() - trainSize; weka.core.Instances train = new weka.core.Instances(insts, 0, trainSize); java.io.File path = new java.io.File("/models/"); cl.buildClassifier(train); saveModel(cl, modelName, path); #region test whole set //int numCorrect = 0; //for (int i = trainSize; i < insts.numInstances(); i++) //{ // weka.core.Instance currentInst = insts.instance(i); // double predictedClass = cl.classifyInstance(currentInst); // if (predictedClass == insts.instance(i).classValue()) // numCorrect++; //} //System.Console.WriteLine(numCorrect + " out of " + testSize + " correct (" + // (double)((double)numCorrect / (double)testSize * 100.0) + "%)"); #endregion } catch (java.lang.Exception ex) { ex.printStackTrace(); } }
public static void JackKnife_Test_prepare(string classifierFileName, int baseClasses, Classifier _classifie) { for (int singleClass = 1; singleClass <= baseClasses; singleClass++) { string eachFileName = String.Format("{0}_{1}.arff", classifierFileName, singleClass); FileReader javaFileReader = new FileReader(eachFileName); weka.core.Instances insts = new weka.core.Instances(javaFileReader); javaFileReader.close(); insts.setClassIndex(insts.numAttributes() - 1); var totalnstances = insts.numInstances(); //insts.re } }
private void BtnBrowse_Click(object sender, EventArgs e) { categories = new List <string[]>(); txtInfo.Text = ""; btnTwoing.Enabled = false; btnGini.Enabled = false; DialogResult result = openFileDialog.ShowDialog(); if (result == DialogResult.OK) { txtPath.Text = openFileDialog.FileName; btnTwoing.Enabled = true; btnGini.Enabled = true; } weka.core.Instances insts = new weka.core.Instances(new java.io.FileReader(txtPath.Text)); Discreatization(insts); }
public static double NaiveBayesTest(weka.core.Instances insts) { try { //weka.core.Instances insts = new weka.core.Instances(new java.io.FileReader("iris.arff")); insts.setClassIndex(insts.numAttributes() - 1); NaiveBayescl = new weka.classifiers.bayes.NaiveBayes(); //discretize weka.filters.Filter myDiscretize = new weka.filters.unsupervised.attribute.Discretize(); myDiscretize.setInputFormat(insts); insts = weka.filters.Filter.useFilter(insts, myDiscretize); weka.filters.Filter myRandom = new weka.filters.unsupervised.instance.Randomize(); myRandom.setInputFormat(insts); insts = weka.filters.Filter.useFilter(insts, myRandom); int trainSize = insts.numInstances() * percentSplit / 100; int testSize = insts.numInstances() - trainSize; weka.core.Instances train = new weka.core.Instances(insts, 0, trainSize); NaiveBayescl.buildClassifier(train); int numCorrect = 0; for (int i = trainSize; i < insts.numInstances(); i++) { weka.core.Instance currentInst = insts.instance(i); double predictedClass = NaiveBayescl.classifyInstance(currentInst); if (predictedClass == insts.instance(i).classValue()) { numCorrect++; } } return((double)numCorrect / (double)testSize * 100.0); } catch (java.lang.Exception ex) { ex.printStackTrace(); return(0); } }
public void EndTrainingSession() { Console.WriteLine("End"); stv = new StringToWordVector(); stv.setAttributeNamePrefix("#"); stv.setLowerCaseTokens(true); stv.setOutputWordCounts(true); stv.setInputFormat(oDataSet); stv.setStemmer(new weka.core.stemmers.LovinsStemmer()); stv.setIDFTransform(true); dataSet = Filter.useFilter(oDataSet, stv); MultiLabelInstances mli = new MultiLabelInstances(dataSet, loadLabelsMeta(dataSet, tagsNb)); BinaryRelevance br = new mulan.classifier.transformation.BinaryRelevance(new NaiveBayes()); lps = new mulan.classifier.meta.RAkEL(br); br.setDebug(true); lps.setDebug(true); lps.build(mli); }
/// <summary> /// Train /// </summary> /// <param name="instances"></param> /// <returns></returns> public TrainModel Train(weka.core.Instances instances, Classifier classifier) { const int percentSplit = 66; int trainSize = instances.numInstances() * percentSplit / 100; int testSize = instances.numInstances() - trainSize; weka.core.Instances train = new weka.core.Instances(instances, 0, trainSize); classifier.buildClassifier(train); return(this.Classifier = new TrainModel() { PercentSplit = percentSplit, classifier = classifier, TestSize = testSize, TrainSize = trainSize, Instance = instances }); }
/// <summary> Creates a distribution with only one bag according /// to instances in source. /// /// </summary> /// <exception cref="Exception">if something goes wrong /// </exception> public Distribution(Instances source) { m_perClassPerBag = new double[1][]; for (int i = 0; i < 1; i++) { m_perClassPerBag[i] = new double[0]; } m_perBag = new double[1]; totaL = 0; m_perClass = new double[source.numClasses()]; m_perClassPerBag[0] = new double[source.numClasses()]; System.Collections.IEnumerator enu = source.enumerateInstances(); //UPGRADE_TODO: Method 'java.util.Enumeration.hasMoreElements' was converted to 'System.Collections.IEnumerator.MoveNext' which has a different behavior. "ms-help://MS.VSCC.v80/dv_commoner/local/redirect.htm?index='!DefaultContextWindowIndex'&keyword='jlca1073_javautilEnumerationhasMoreElements'" while (enu.MoveNext()) { //UPGRADE_TODO: Method 'java.util.Enumeration.nextElement' was converted to 'System.Collections.IEnumerator.Current' which has a different behavior. "ms-help://MS.VSCC.v80/dv_commoner/local/redirect.htm?index='!DefaultContextWindowIndex'&keyword='jlca1073_javautilEnumerationnextElement'" add(0, (Instance) enu.Current); } }
public void trainMachineForHybridUsingWeka(string wekaFile, string modelName) { weka.core.Instances insts = new weka.core.Instances(new java.io.FileReader(wekaFile)); insts.setClassIndex(insts.numAttributes() - 1); weka.classifiers.Classifier bagging = new weka.classifiers.meta.Bagging(); System.Console.WriteLine("Performing " + percentSplit + "% split evaluation."); //randomize the order of the instances in the dataset. weka.filters.Filter myRandom = new weka.filters.unsupervised.instance.Randomize(); myRandom.setInputFormat(insts); insts = weka.filters.Filter.useFilter(insts, myRandom); int trainSize = insts.numInstances() * percentSplit / 100; int testSize = insts.numInstances() - trainSize; weka.core.Instances train = new weka.core.Instances(insts, 0, trainSize); java.io.File path = new java.io.File("/models/"); bagging.buildClassifier(train); saveModel(bagging, modelName, path); }
public List <DrugBrandInfo> AssociateDrugs(int?drugId, string dataPath) { List <DrugBrandInfo> drugList = new List <DrugBrandInfo>(); weka.core.Instances data = new weka.core.Instances(new java.io.FileReader(dataPath)); data.setClassIndex(data.numAttributes() - 1); Apriori apriori = new Apriori(); apriori.setClassIndex(data.classIndex()); apriori.buildAssociations(data); FastVector[] vector = apriori.getAllTheRules(); for (int i = 0; i < vector[0].size(); i++) { string value1 = ((AprioriItemSet)vector[0].elementAt(i)).toString(data); string value2 = ((AprioriItemSet)vector[1].elementAt(i)).toString(data); string[] set1 = value1.Split(' ', '='); string[] set2 = value2.Split(' ', '='); if (set1[0].Equals(drugId.ToString())) { if (set1[1] == "1" && set2[1] == "1") { int brandId = Convert.ToInt32(set2[0]); var drug = db.DrugBrandInfos.SingleOrDefault(c => c.Id == brandId); drugList.Add(drug); } break; } } return(drugList); }
/// <summary> /// Calculates Pleft, Prigth, rigthTargets , PclassDivideTleft, PclassDivideTleft and teta numbers for each inctance /// Then calling the creating dataset function /// </summary> /// <param name="insts"></param> /// <param name="categoryTypeNumber"></param> /// <param name="categoryTypeTargetNumber"></param> private void Twoing(weka.core.Instances insts, List <int[]> categoryTypeNumber, List <int[, ]> categoryTypeTargetNumber) { List <double[]> categoryTetaNumber = new List <double[]>(); for (int i = 0; i < categoryTypeNumber.Count; i++) { categoryTetaNumber.Add(new double[categoryTypeNumber[i].Length]); } for (int i = 0; i < categoryTypeNumber.Count - 1; i++) { for (int j = 0; j < categoryTypeNumber[i].Length; j++) { Double pLeft = Convert.ToDouble(categoryTypeNumber[i][j]) / Convert.ToDouble(insts.numInstances()); Double pRight = 1 - pLeft; Double sumFunction = 0; for (int k = 0; k < categoryTypeNumber[categoryTypeNumber.Count - 1].Length; k++) { Double PclassDivideTleft = Convert.ToDouble(categoryTypeTargetNumber[i][j, k]) / Convert.ToDouble(categoryTypeNumber[i][j]); int sagtarafıntargetları = 0; for (int h = 0; h < categoryTypeNumber[i].Length; h++) { if (h != j) { sagtarafıntargetları += categoryTypeTargetNumber[i][h, k]; } } Double PclassDivideTRigt = Convert.ToDouble(sagtarafıntargetları) / Convert.ToDouble((insts.numInstances() - categoryTypeNumber[i][j])); sumFunction += Math.Abs(PclassDivideTleft - PclassDivideTRigt); } categoryTetaNumber[i][j] = 2 * pLeft * pRight * sumFunction; } } Instances fileInst = new Instances(insts); CreateNewDataset(fileInst, categoryTetaNumber, twoingPath); }
public string testHybridEmotionUsingWeka(string[] attributeArray, string[] classNames, double[] dataValues, string classHeader, string defaultclass, string modelName) { java.util.ArrayList classLabel = new java.util.ArrayList(); foreach (string className in classNames) { classLabel.Add(className); } weka.core.Attribute classHeaderName = new weka.core.Attribute(classHeader, classLabel); java.util.ArrayList attributeList = new java.util.ArrayList(); foreach (string attribute in attributeArray) { weka.core.Attribute newAttribute = new weka.core.Attribute(attribute); attributeList.Add(newAttribute); } attributeList.add(classHeaderName); weka.core.Instances data = new weka.core.Instances("TestInstances", attributeList, 0); data.setClassIndex(data.numAttributes() - 1); // Set instance's values for the attributes weka.core.Instance inst_co = new DenseInstance(data.numAttributes()); for (int i = 0; i < data.numAttributes() - 1; i++) { inst_co.setValue(i, dataValues.ElementAt(i)); } inst_co.setValue(classHeaderName, defaultclass); data.add(inst_co); java.io.File path = new java.io.File("/models/"); weka.classifiers.meta.Bagging clRead = loadBaggingModel(modelName, path); weka.filters.Filter myRandom = new weka.filters.unsupervised.instance.Randomize(); myRandom.setInputFormat(data); data = weka.filters.Filter.useFilter(data, myRandom); double classValue = clRead.classifyInstance(data.get(0)); string classValueString = classLabel.get(Int32.Parse(classValue.ToString())).ToString(); return(classValueString); }
/* Use when the player logs in to initially create the classifier with data from server */ public void InitializeClassifier(String dataString) { try { java.io.StringReader stringReader = new java.io.StringReader(dataString); java.io.BufferedReader buffReader = new java.io.BufferedReader(stringReader); playerData = new weka.core.Instances(buffReader); /* State where in each Instance the class attribute is, if its not already specified by the file */ if (playerData.classIndex() == -1) playerData.setClassIndex(playerData.numAttributes() - 1); /* NAIVE BAYES */ //classifier = new weka.classifiers.bayes.NaiveBayes(); /* NEURAL NET */ //classifier = new weka.classifiers.functions.MultilayerPerceptron(); //((weka.classifiers.functions.MultilayerPerceptron)classifier).setHiddenLayers("12"); /* J48 TREE */ //classifier = new weka.classifiers.trees.J48(); /* IB1 NEAREST NEIGHBOUR */ //classifier = new weka.classifiers.lazy.IB1(); /* RANDOM FOREST */ classifier = new weka.classifiers.trees.RandomForest(); classifier.buildClassifier(playerData); Debug.Log("Initialized Classifier"); } catch (java.lang.Exception ex) { Debug.LogError(ex.getMessage()); } }
/// <summary> /// Create a single instance for WEKA /// </summary> /// <param name="NClasses">Number of classes</param> /// <returns>the weka instances</returns> public Instances CreateInstanceForNClasses(cInfoClass InfoClass) { List<double> AverageList = new List<double>(); for (int i = 0; i < Parent.ListDescriptors.Count; i++) if (Parent.ListDescriptors[i].IsActive()) AverageList.Add(GetAverageValuesList()[i]); weka.core.FastVector atts = new FastVector(); List<string> NameList = Parent.ListDescriptors.GetListNameActives(); for (int i = 0; i < NameList.Count; i++) atts.addElement(new weka.core.Attribute(NameList[i])); weka.core.FastVector attVals = new FastVector(); for (int i = 0; i < InfoClass.NumberOfClass; i++) attVals.addElement("Class" + i); atts.addElement(new weka.core.Attribute("Class__", attVals)); Instances data1 = new Instances("SingleInstance", atts, 0); double[] newTable = new double[AverageList.Count + 1]; Array.Copy(AverageList.ToArray(), 0, newTable, 0, AverageList.Count); //newTable[AverageList.Count] = 1; data1.add(new DenseInstance(1.0, newTable)); data1.setClassIndex((data1.numAttributes() - 1)); return data1; }
/// <summary> Selects a model for the given train data using the given test data /// /// </summary> /// <exception cref="Exception">if model can't be selected /// </exception> public virtual ClassifierSplitModel selectModel(Instances train, Instances test) { throw new System.Exception("Model selection method not implemented"); }
/// <summary> Selects a model for the given dataset. /// /// </summary> /// <exception cref="Exception">if model can't be selected /// </exception> public abstract ClassifierSplitModel selectModel(Instances data);
private LabelsMetaData loadLabelsMeta(Instances data, int numLabels) { LabelsMetaDataImpl labelsData = new LabelsMetaDataImpl(); int numAttributes = data.numAttributes(); for (int index = numAttributes - numLabels; index < numAttributes; index++) { String attrName = data.attribute(index).name(); labelsData.addRootNode(new LabelNodeImpl(attrName)); } return labelsData; }
// ---- OPERATIONS ---- /// /// <summary> * Analyze the time series data. The similarity matrices are created /// * and filled with euclidean distances based on the tolerance values /// * for similarity. /// * </summary> /// * <param name="data"> data to be analyzed </param> public override void analyze(Instances data) { data.setClassIndex(data.numAttributes() - 1); m_data = data; m_rangeTemplates.setUpper(data.numAttributes()); //Date startFT = new Date(); // compute fourier transform FourierTransform dftFilter = new FourierTransform(); dftFilter.setInputFormat(data); dftFilter.setNumCoeffs(getNumCoeffs()); dftFilter.setUseFFT(getUseFFT()); Instances fourierdata = Filter.useFilter(data, dftFilter); Date endFT = new Date(); // time taken for FT //m_DFTTime = new Date(endFT.getTime() - startFT.getTime()); int numdim = data.numAttributes(); //ORIGINAL LINE: m_distancesFreq = new double[numdim][numdim]; //JAVA TO VB & C# CONVERTER NOTE: The following call to the 'RectangularArrays' helper class reproduces the rectangular array initialization that is automatic in Java: m_distancesFreq = RectangularArrays.ReturnRectangularDoubleArray(numdim, numdim); //ORIGINAL LINE: m_distancesTime = new double[numdim][numdim]; //JAVA TO VB & C# CONVERTER NOTE: The following call to the 'RectangularArrays' helper class reproduces the rectangular array initialization that is automatic in Java: m_distancesTime = RectangularArrays.ReturnRectangularDoubleArray(numdim, numdim); //long ftDistTime = 0; //long tDistTime = 0; // compute similarity matrices for (int i = 0; i < data.numAttributes(); ++i) { for (int j = 0; j < i; j++) { // not for template sequences if (m_rangeTemplates.isInRange(i) && m_rangeTemplates.isInRange(j)) { continue; } //Date startFTDist = new Date(); // Compute the Euclidean distance between 2 dims using FT double[] reCT = fourierdata.attributeToDoubleArray(2 * i); double[] imCT = fourierdata.attributeToDoubleArray(2 * i + 1); double[] reCS = fourierdata.attributeToDoubleArray(2 * j); double[] imCS = fourierdata.attributeToDoubleArray(2 * j + 1); m_distancesFreq[i][j] = computeEuclidean(reCT, imCT, reCS, imCS); // if found similar using FT if (m_distancesFreq[i][j] <= m_epsilon) { // then compute normal Euclidean distances between the 2 dims double[] x = data.attributeToDoubleArray(i); double[] y = data.attributeToDoubleArray(j); m_distancesTime[i][j] = computeEuclidean(x, y); } //Date endFTDist = new Date(); // time taken for computing similarity based on FT //ftDistTime += (endFTDist.getTime() - startFTDist.getTime()); // Date startDist = new Date(); //// compute similarity matrices (brute force) // double[] x1 = data.attributeToDoubleArray(i); // double[] y1 = data.attributeToDoubleArray(j); // computeEuclidean(x1, y1); // Date endDist = new Date(); //// time taken for computing similarity based brute force method // tDistTime += (endDist.getTime() - startDist.getTime()); } } //m_FTEuclideanTime = new Date(ftDistTime); //m_EuclideanTime = new Date(tDistTime); }
private static Instances CreateInstanceOnFly(double[] a, double[] b) { FastVector atts; Instances data; double[] vals; // 1. set up attributes atts = new FastVector(); // - numeric atts.addElement(new Attribute("att1")); atts.addElement(new Attribute("att2")); // 2. create Instances object data = new Instances("MyRelation", atts, 0); for (int i = 0; i < a.Length; ++i) { // 3. fill with data // first instance vals = new double[data.numAttributes()]; // - numeric vals[0] = a[i]; // - nominal vals[1] = b[i]; data.add(new weka.core.DenseInstance(1.0, vals)); } return data; }
public ClassifierManager(weka.core.Instances insts) { this.Instance = insts; }
/// <summary> Creates a "no-split"-split for a given set of instances. /// /// </summary> /// <exception cref="Exception">if split can't be built successfully /// </exception> public override void buildClassifier(Instances instances) { m_distribution = new Distribution(instances); m_numSubsets = 1; }
/// <summary> Does nothing because no condition has to be satisfied.</summary> public override System.String leftSide(Instances instances) { return ""; }
/// <summary> Does nothing because no condition has to be satisfied.</summary> public override System.String rightSide(int index, Instances instances) { return ""; }
/// <summary> Selects C4.5-type split for the given dataset.</summary> public override ClassifierSplitModel selectModel(Instances train, Instances test) { return selectModel(train); }
/// <summary> Selects C4.5-type split for the given dataset.</summary> public override ClassifierSplitModel selectModel(Instances data) { double minResult; //double currentResult; BinC45Split[] currentModel; BinC45Split bestModel = null; NoSplit noSplitModel = null; double averageInfoGain = 0; int validModels = 0; bool multiVal = true; Distribution checkDistribution; double sumOfWeights; int i; try { // Check if all Instances belong to one class or if not // enough Instances to split. checkDistribution = new Distribution(data); noSplitModel = new NoSplit(checkDistribution); if (Utils.sm(checkDistribution.total(), 2 * m_minNoObj) || Utils.eq(checkDistribution.total(), checkDistribution.perClass(checkDistribution.maxClass()))) return noSplitModel; // Check if all attributes are nominal and have a // lot of values. System.Collections.IEnumerator enu = data.enumerateAttributes(); //UPGRADE_TODO: Method 'java.util.Enumeration.hasMoreElements' was converted to 'System.Collections.IEnumerator.MoveNext' which has a different behavior. "ms-help://MS.VSCC.v80/dv_commoner/local/redirect.htm?index='!DefaultContextWindowIndex'&keyword='jlca1073_javautilEnumerationhasMoreElements'" while (enu.MoveNext()) { //UPGRADE_TODO: Method 'java.util.Enumeration.nextElement' was converted to 'System.Collections.IEnumerator.Current' which has a different behavior. "ms-help://MS.VSCC.v80/dv_commoner/local/redirect.htm?index='!DefaultContextWindowIndex'&keyword='jlca1073_javautilEnumerationnextElement'" weka.core.Attribute attribute = (weka.core.Attribute)enu.Current; if ((attribute.Numeric) || (Utils.sm((double) attribute.numValues(), (0.3 * (double) m_allData.numInstances())))) { multiVal = false; break; } } currentModel = new BinC45Split[data.numAttributes()]; sumOfWeights = data.sumOfWeights(); // For each attribute. for (i = 0; i < data.numAttributes(); i++) { // Apart from class attribute. if (i != (data).classIndex()) { // Get models for current attribute. currentModel[i] = new BinC45Split(i, m_minNoObj, sumOfWeights); currentModel[i].buildClassifier(data); // Check if useful split for current attribute // exists and check for enumerated attributes with // a lot of values. if (currentModel[i].checkModel()) if ((data.attribute(i).Numeric) || (multiVal || Utils.sm((double) data.attribute(i).numValues(), (0.3 * (double) m_allData.numInstances())))) { averageInfoGain = averageInfoGain + currentModel[i].infoGain(); validModels++; } } else currentModel[i] = null; } // Check if any useful split was found. if (validModels == 0) return noSplitModel; averageInfoGain = averageInfoGain / (double) validModels; // Find "best" attribute to split on. minResult = 0; for (i = 0; i < data.numAttributes(); i++) { if ((i != (data).classIndex()) && (currentModel[i].checkModel())) // Use 1E-3 here to get a closer approximation to the original // implementation. if ((currentModel[i].infoGain() >= (averageInfoGain - 1e-3)) && Utils.gr(currentModel[i].gainRatio(), minResult)) { bestModel = currentModel[i]; minResult = currentModel[i].gainRatio(); } } // Check if useful split was found. if (Utils.eq(minResult, 0)) return noSplitModel; // Add all Instances with unknown values for the corresponding // attribute to the distribution for the model, so that // the complete distribution is stored with the model. bestModel.distribution().addInstWithUnknown(data, bestModel.attIndex()); // Set the split point analogue to C45 if attribute numeric. bestModel.SplitPoint = m_allData; return bestModel; } catch (System.Exception e) { System.Console.WriteLine(e.StackTrace + " " + e.Message); } return null; }
/// <summary> Returns a string containing java source code equivalent to the test /// made at this node. The instance being tested is called "i". /// /// </summary> /// <param name="index">index of the nominal value tested /// </param> /// <param name="data">the data containing instance structure info /// </param> /// <returns> a value of type 'String' /// </returns> public override System.String sourceExpression(int index, Instances data) { return "true"; // or should this be false?? }
/// <summary> Initializes the split selection method with the given parameters. /// /// </summary> /// <param name="minNoObj">minimum number of instances that have to occur in /// at least two subsets induced by split /// </param> /// <param name="allData">FULL training dataset (necessary for selection of /// split points). /// </param> public BinC45ModelSelection(int minNoObj, Instances allData) { m_minNoObj = minNoObj; m_allData = allData; }
/// <summary> Sets reference to training data to null.</summary> public virtual void cleanup() { m_allData = null; }
public static void Test_predictClass(string classifierFileName) { FileReader javaFileReader = new FileReader(classifierFileName); weka.core.Instances insts = new weka.core.Instances(javaFileReader); javaFileReader.close(); insts.setClassIndex(insts.numAttributes() - 1); weka.classifiers.Classifier cl = new weka.classifiers.trees.J48(); System.Console.WriteLine("Performing " + percentSplit + "% split evaluation."); #region Manual Cross Fold Instances foldsData = new Instances(insts); int folds = 10; for (int n = 0; n < folds; n++) { Instances trainFold = foldsData.trainCV(folds, n); Instances testFold = foldsData.testCV(folds, n); } #endregion #region int trainSize = insts.numInstances() * percentSplit / 100; int testSize = insts.numInstances() - trainSize; weka.core.Instances train = new weka.core.Instances(insts, 0, trainSize); cl.buildClassifier(train); #endregion //Classifier cls = new J48(); Evaluation eval = new Evaluation(insts); java.util.Random rand = new java.util.Random(1); // using seed = 1 int fold = 10; eval.crossValidateModel(cl, insts, fold, rand); System.Console.WriteLine("toClassDetailsString" + eval.toClassDetailsString()); System.Console.WriteLine("toMatrixString\n" + eval.toMatrixString()); System.Console.WriteLine("toCumulativeMarginDistributionString\n" + eval.toCumulativeMarginDistributionString()); //System.Console.WriteLine("predictions\n" + eval.predictions()); System.Console.ReadKey(); //var numnerOfInst = insts.numInstances(); //for (int i = trainSize; i < numnerOfInst; i++) //{ // weka.core.Instance currentInst = insts.instance(i); // double pred = cl.classifyInstance(currentInst); // System.Console.WriteLine("class Index: " + insts.instance(i).classIndex()); // System.Console.WriteLine(", class value: " + insts.instance(i).classValue()); // System.Console.WriteLine(", ID: " + insts.instance(i).value(0)); // System.Console.WriteLine(", actual: " + insts.classAttribute().value((int)insts.instance(i).classValue())); // System.Console.WriteLine(", predicted: " + insts.classAttribute().value((int)pred)); //} }