public static J48 ByARFF(string ARFF, string count) { java.io.StringReader ArffReader = new java.io.StringReader(ARFF); ArffLoader.ArffReader ARFFData = new ArffLoader.ArffReader(ArffReader, Convert.ToInt32(count), true); Instances structure = ARFFData.getStructure(); structure.setClassIndex(structure.numAttributes() - 1); Instance inst; while ((inst = ARFFData.readInstance(structure)) != null) { structure.add(inst); } //Instances data = ARFFData.getData(); //J48 cls = new J48(); //cls.buildClassifier(data); //String[] options = new String[1]; //options[0] = "-U"; // unpruned tree J48 tree = new J48(); // new instance of tree //tree.setOptions(options); // set the options tree.buildClassifier(structure); // build classifier return(tree); }
public static J48 ByHeaderAndData(string header, string dataARFF) { java.io.StringReader ArffReader = new java.io.StringReader(header); ArffLoader.ArffReader ARFFData = new ArffLoader.ArffReader(ArffReader, 100, false); Instances structure = ARFFData.getStructure(); structure.setClassIndex(structure.numAttributes() - 1); ArffReader = new java.io.StringReader(dataARFF); Instances data = ARFFData.getData(); Instance inst; while ((inst = ARFFData.readInstance(data)) != null) { structure.add(inst); } J48 tree = new J48(); // new instance of tree tree.buildClassifier(structure); // build classifier return(tree); }
public void LearnModel() { Init(); foreach (Feature currFeature in DomPool.SelectorFeatures) { String featureString = currFeature.ToString(); HashSet <HtmlNode> resNodes = DomPool.RunXpathQuery(featureString); foreach (HtmlNode nd in resNodes) { if (!allNodes.Contains(nd)) { continue; } nodeFeatures[nd].Add(featureString); } } FastVector fvWekaAttributes = GetDataSetAtts(); Instances trainingSet = new Instances("TS", fvWekaAttributes, 100); trainingSet.setClassIndex(fvWekaAttributes.size() - 1); foreach (HtmlNode currNode in allNodes) { Instance item = new SparseInstance(fvWekaAttributes.size()); for (int i = 0; i < fvWekaAttributes.size() - 1; i++) { weka.core.Attribute currFeature = (weka.core.Attribute)fvWekaAttributes.elementAt(i); if (nodeFeatures[currNode].Contains(currFeature.name())) { item.setValue(currFeature, 1); } else { item.setValue(currFeature, 0); } } //set the class weka.core.Attribute classFeature = (weka.core.Attribute)fvWekaAttributes.elementAt(fvWekaAttributes.size() - 1); item.setValue(classFeature, (DomPool.TargetNodes.Contains(currNode)?"yes":"no")); item.setDataset(trainingSet); if (DomPool.TargetNodes.Contains(currNode)) { for (int t = 0; t < (DomPool.NonTargetNodes.Count() / DomPool.TargetNodes.Count()); t++) { trainingSet.add(new SparseInstance(item)); } } else { trainingSet.add(item); } } String[] options = new String[2]; options[0] = "-C"; // unpruned tree options[1] = "0.1"; J48 tree = new J48(); // new instance of tree tree.setOptions(options); // set the options tree.buildClassifier(trainingSet); // build classifier //save the resulting classifier classifierTree = tree; Reader treeDot = new StringReader(tree.graph()); TreeBuild treeBuild = new TreeBuild(); Node treeRoot = treeBuild.create(treeDot); FeaturesUsed = getTreeFeatures(treeRoot); }