/// <summary> Generates the classifier. /// /// </summary> /// <param name="instances">set of instances serving as training data /// </param> /// <exception cref="Exception">if the classifier has not been generated successfully /// </exception> public override void buildClassifier(Instances instances) { double sumOfWeights = 0; m_Class = instances.classAttribute(); m_ClassValue = 0; switch (instances.classAttribute().type()) { case weka.core.Attribute.NUMERIC: m_Counts = null; break; case weka.core.Attribute.NOMINAL: m_Counts = new double[instances.numClasses()]; for (int i = 0; i < m_Counts.Length; i++) { m_Counts[i] = 1; } sumOfWeights = instances.numClasses(); break; default: throw new System.Exception("ZeroR can only handle nominal and numeric class" + " attributes."); } System.Collections.IEnumerator enu = instances.enumerateInstances(); //UPGRADE_TODO: Method 'java.util.Enumeration.hasMoreElements' was converted to 'System.Collections.IEnumerator.MoveNext' which has a different behavior. "ms-help://MS.VSCC.v80/dv_commoner/local/redirect.htm?index='!DefaultContextWindowIndex'&keyword='jlca1073_javautilEnumerationhasMoreElements'" while (enu.MoveNext()) { //UPGRADE_TODO: Method 'java.util.Enumeration.nextElement' was converted to 'System.Collections.IEnumerator.Current' which has a different behavior. "ms-help://MS.VSCC.v80/dv_commoner/local/redirect.htm?index='!DefaultContextWindowIndex'&keyword='jlca1073_javautilEnumerationnextElement'" Instance instance = (Instance) enu.Current; if (!instance.classIsMissing()) { if (instances.classAttribute().Nominal) { //UPGRADE_WARNING: Data types in Visual C# might be different. Verify the accuracy of narrowing conversions. "ms-help://MS.VSCC.v80/dv_commoner/local/redirect.htm?index='!DefaultContextWindowIndex'&keyword='jlca1042'" m_Counts[(int) instance.classValue()] += instance.weight(); } else { m_ClassValue += instance.weight() * instance.classValue(); } sumOfWeights += instance.weight(); } } if (instances.classAttribute().Numeric) { if (Utils.gr(sumOfWeights, 0)) { m_ClassValue /= sumOfWeights; } } else { m_ClassValue = Utils.maxIndex(m_Counts); Utils.normalize(m_Counts, sumOfWeights); } }
public void Build(weka.core.Instances instances) { WekaUtils.DebugAssert(instances.numClasses() == 3, "instance's numClasses should be 3."); m_counts = new int[instances.numClasses()]; for (int i = 0; i < m_counts.Length; i++) { m_counts[i] = 0; } foreach (Instance instance in instances) { int v = (int)instance.classValue(); m_counts[v]++; } }
/// <summary> Initializes all the counters for the evaluation and also takes a /// cost matrix as parameter. /// Use <code>useNoPriors()</code> if the dataset is the test set and you /// can't initialize with the priors from the training set via /// <code>setPriors(Instances)</code>. /// /// </summary> /// <param name="data"> set of training instances, to get some header /// information and prior class distribution information /// </param> /// <param name="costMatrix"> the cost matrix---if null, default costs will be used /// </param> /// <throws> Exception if cost matrix is not compatible with </throws> /// <summary> data, the class is not defined or the class is numeric /// </summary> /// <seealso cref="useNoPriors()"> /// </seealso> /// <seealso cref="setPriors(Instances)"> /// </seealso> public Evaluation(Instances data, CostMatrix costMatrix) { m_NumClasses = data.numClasses(); m_NumFolds = 1; m_ClassIsNominal = data.classAttribute().Nominal; if (m_ClassIsNominal) { double[][] tmpArray = new double[m_NumClasses][]; for (int i = 0; i < m_NumClasses; i++) { tmpArray[i] = new double[m_NumClasses]; } m_ConfusionMatrix = tmpArray; m_ClassNames = new System.String[m_NumClasses]; for (int i = 0; i < m_NumClasses; i++) { m_ClassNames[i] = data.classAttribute().value_Renamed(i); } } m_CostMatrix = costMatrix; if (m_CostMatrix != null) { if (!m_ClassIsNominal) { throw new System.Exception("Class has to be nominal if cost matrix " + "given!"); } if (m_CostMatrix.size() != m_NumClasses) { throw new System.Exception("Cost matrix not compatible with data!"); } } m_ClassPriors = new double[m_NumClasses]; Priors = data; m_MarginCounts = new double[k_MarginResolution + 1]; }
/// <summary> Creates split on numeric attribute. /// /// </summary> /// <exception cref="Exception">if something goes wrong /// </exception> private void handleNumericAttribute(Instances trainInstances) { int firstMiss; int next = 1; int last = 0; int splitIndex = - 1; double currentInfoGain; double defaultEnt; double minSplit; Instance instance; int i; // Current attribute is a numeric attribute. m_distribution = new Distribution(2, trainInstances.numClasses()); // Only Instances with known values are relevant. System.Collections.IEnumerator enu = trainInstances.enumerateInstances(); i = 0; //UPGRADE_TODO: Method 'java.util.Enumeration.hasMoreElements' was converted to 'System.Collections.IEnumerator.MoveNext' which has a different behavior. "ms-help://MS.VSCC.v80/dv_commoner/local/redirect.htm?index='!DefaultContextWindowIndex'&keyword='jlca1073_javautilEnumerationhasMoreElements'" while (enu.MoveNext()) { //UPGRADE_TODO: Method 'java.util.Enumeration.nextElement' was converted to 'System.Collections.IEnumerator.Current' which has a different behavior. "ms-help://MS.VSCC.v80/dv_commoner/local/redirect.htm?index='!DefaultContextWindowIndex'&keyword='jlca1073_javautilEnumerationnextElement'" instance = (Instance) enu.Current; if (instance.isMissing(m_attIndex)) break; m_distribution.add(1, instance); i++; } firstMiss = i; // Compute minimum number of Instances required in each // subset. minSplit = 0.1 * (m_distribution.total()) / ((double) trainInstances.numClasses()); if (Utils.smOrEq(minSplit, m_minNoObj)) minSplit = m_minNoObj; else if (Utils.gr(minSplit, 25)) minSplit = 25; // Enough Instances with known values? if (Utils.sm((double) firstMiss, 2 * minSplit)) return ; // Compute values of criteria for all possible split // indices. defaultEnt = infoGainCrit.oldEnt(m_distribution); while (next < firstMiss) { if (trainInstances.instance(next - 1).value_Renamed(m_attIndex) + 1e-5 < trainInstances.instance(next).value_Renamed(m_attIndex)) { // Move class values for all Instances up to next // possible split point. m_distribution.shiftRange(1, 0, trainInstances, last, next); // Check if enough Instances in each subset and compute // values for criteria. if (Utils.grOrEq(m_distribution.perBag(0), minSplit) && Utils.grOrEq(m_distribution.perBag(1), minSplit)) { currentInfoGain = infoGainCrit.splitCritValue(m_distribution, m_sumOfWeights, defaultEnt); if (Utils.gr(currentInfoGain, m_infoGain)) { m_infoGain = currentInfoGain; splitIndex = next - 1; } m_index++; } last = next; } next++; } // Was there any useful split? if (m_index == 0) return ; // Compute modified information gain for best split. m_infoGain = m_infoGain - (Utils.log2(m_index) / m_sumOfWeights); if (Utils.smOrEq(m_infoGain, 0)) return ; // Set instance variables' values to values for // best split. m_numSubsets = 2; m_splitPoint = (trainInstances.instance(splitIndex + 1).value_Renamed(m_attIndex) + trainInstances.instance(splitIndex).value_Renamed(m_attIndex)) / 2; // In case we have a numerical precision problem we need to choose the // smaller value if (m_splitPoint == trainInstances.instance(splitIndex + 1).value_Renamed(m_attIndex)) { m_splitPoint = trainInstances.instance(splitIndex).value_Renamed(m_attIndex); } // Restore distributioN for best split. m_distribution = new Distribution(2, trainInstances.numClasses()); m_distribution.addRange(0, trainInstances, 0, splitIndex + 1); m_distribution.addRange(1, trainInstances, splitIndex + 1, firstMiss); // Compute modified gain ratio for best split. m_gainRatio = gainRatioCrit.splitCritValue(m_distribution, m_sumOfWeights, m_infoGain); }
/// <summary> Creates split on enumerated attribute. /// /// </summary> /// <exception cref="Exception">if something goes wrong /// </exception> private void handleEnumeratedAttribute(Instances trainInstances) { Instance instance; m_distribution = new Distribution(m_complexityIndex, trainInstances.numClasses()); // Only Instances with known values are relevant. System.Collections.IEnumerator enu = trainInstances.enumerateInstances(); //UPGRADE_TODO: Method 'java.util.Enumeration.hasMoreElements' was converted to 'System.Collections.IEnumerator.MoveNext' which has a different behavior. "ms-help://MS.VSCC.v80/dv_commoner/local/redirect.htm?index='!DefaultContextWindowIndex'&keyword='jlca1073_javautilEnumerationhasMoreElements'" while (enu.MoveNext()) { //UPGRADE_TODO: Method 'java.util.Enumeration.nextElement' was converted to 'System.Collections.IEnumerator.Current' which has a different behavior. "ms-help://MS.VSCC.v80/dv_commoner/local/redirect.htm?index='!DefaultContextWindowIndex'&keyword='jlca1073_javautilEnumerationnextElement'" instance = (Instance) enu.Current; if (!instance.isMissing(m_attIndex)) { //UPGRADE_WARNING: Data types in Visual C# might be different. Verify the accuracy of narrowing conversions. "ms-help://MS.VSCC.v80/dv_commoner/local/redirect.htm?index='!DefaultContextWindowIndex'&keyword='jlca1042'" m_distribution.add((int) instance.value_Renamed(m_attIndex), instance); } } // Check if minimum number of Instances in at least two // subsets. if (m_distribution.check(m_minNoObj)) { m_numSubsets = m_complexityIndex; m_infoGain = infoGainCrit.splitCritValue(m_distribution, m_sumOfWeights); m_gainRatio = gainRatioCrit.splitCritValue(m_distribution, m_sumOfWeights, m_infoGain); } }
/// <summary> Creates split on enumerated attribute. /// /// </summary> /// <exception cref="Exception">if something goes wrong /// </exception> private void handleEnumeratedAttribute(Instances trainInstances) { Distribution newDistribution, secondDistribution; int numAttValues; double currIG, currGR; Instance instance; int i; numAttValues = trainInstances.attribute(m_attIndex).numValues(); newDistribution = new Distribution(numAttValues, trainInstances.numClasses()); // Only Instances with known values are relevant. System.Collections.IEnumerator enu = trainInstances.enumerateInstances(); //UPGRADE_TODO: Method 'java.util.Enumeration.hasMoreElements' was converted to 'System.Collections.IEnumerator.MoveNext' which has a different behavior. "ms-help://MS.VSCC.v80/dv_commoner/local/redirect.htm?index='!DefaultContextWindowIndex'&keyword='jlca1073_javautilEnumerationhasMoreElements'" while (enu.MoveNext()) { //UPGRADE_TODO: Method 'java.util.Enumeration.nextElement' was converted to 'System.Collections.IEnumerator.Current' which has a different behavior. "ms-help://MS.VSCC.v80/dv_commoner/local/redirect.htm?index='!DefaultContextWindowIndex'&keyword='jlca1073_javautilEnumerationnextElement'" instance = (Instance) enu.Current; if (!instance.isMissing(m_attIndex)) { //UPGRADE_WARNING: Data types in Visual C# might be different. Verify the accuracy of narrowing conversions. "ms-help://MS.VSCC.v80/dv_commoner/local/redirect.htm?index='!DefaultContextWindowIndex'&keyword='jlca1042'" newDistribution.add((int) instance.value_Renamed(m_attIndex), instance); } } m_distribution = newDistribution; // For all values for (i = 0; i < numAttValues; i++) { if (Utils.grOrEq(newDistribution.perBag(i), m_minNoObj)) { secondDistribution = new Distribution(newDistribution, i); // Check if minimum number of Instances in the two // subsets. if (secondDistribution.check(m_minNoObj)) { m_numSubsets = 2; currIG = m_infoGainCrit.splitCritValue(secondDistribution, m_sumOfWeights); currGR = m_gainRatioCrit.splitCritValue(secondDistribution, m_sumOfWeights, currIG); if ((i == 0) || Utils.gr(currGR, m_gainRatio)) { m_gainRatio = currGR; m_infoGain = currIG; m_splitPoint = (double) i; m_distribution = secondDistribution; } } } } }
public override void buildClassifier(Instances insts) { if (insts.checkForStringAttributes()) { throw new Exception("Cannot handle string attributes!"); } if (insts.numClasses() > 2) { throw new System.Exception("Can only handle two-class datasets!"); } if (insts.classAttribute().Numeric) { throw new Exception("Can't handle a numeric class!"); } // Filter data m_Train = new Instances(insts); m_Train.deleteWithMissingClass(); m_ReplaceMissingValues = new ReplaceMissingValues(); m_ReplaceMissingValues.setInputFormat(m_Train); m_Train = Filter.useFilter(m_Train, m_ReplaceMissingValues); m_NominalToBinary = new NominalToBinary(); m_NominalToBinary.setInputFormat(m_Train); m_Train = Filter.useFilter(m_Train, m_NominalToBinary); /** Randomize training data */ //UPGRADE_TODO: The differences in the expected value of parameters for constructor 'java.util.Random.Random' may cause compilation errors. "ms-help://MS.VSCC.v80/dv_commoner/local/redirect.htm?index='!DefaultContextWindowIndex'&keyword='jlca1092'" m_Train.randomize(new System.Random((System.Int32) m_Seed)); /** Make space to store perceptrons */ m_Additions = new int[m_MaxK + 1]; m_IsAddition = new bool[m_MaxK + 1]; m_Weights = new int[m_MaxK + 1]; /** Compute perceptrons */ m_K = 0; for (int it = 0; it < m_NumIterations; it++) { for (int i = 0; i < m_Train.numInstances(); i++) { Instance inst = m_Train.instance(i); if (!inst.classIsMissing()) { int prediction = makePrediction(m_K, inst); //UPGRADE_WARNING: Data types in Visual C# might be different. Verify the accuracy of narrowing conversions. "ms-help://MS.VSCC.v80/dv_commoner/local/redirect.htm?index='!DefaultContextWindowIndex'&keyword='jlca1042'" int classValue = (int) inst.classValue(); if (prediction == classValue) { m_Weights[m_K]++; } else { m_IsAddition[m_K] = (classValue == 1); m_Additions[m_K] = i; m_K++; m_Weights[m_K]++; } if (m_K == m_MaxK) { //UPGRADE_NOTE: Labeled break statement was changed to a goto statement. "ms-help://MS.VSCC.v80/dv_commoner/local/redirect.htm?index='!DefaultContextWindowIndex'&keyword='jlca1012'" goto out_brk; } } } } //UPGRADE_NOTE: Label 'out_brk' was added. "ms-help://MS.VSCC.v80/dv_commoner/local/redirect.htm?index='!DefaultContextWindowIndex'&keyword='jlca1011'" out_brk: ; }
/// <summary> Creates a distribution according to given instances and /// split model. /// /// </summary> /// <exception cref="Exception">if something goes wrong /// </exception> public Distribution(Instances source, ClassifierSplitModel modelToUse) { int index; Instance instance; double[] weights; m_perClassPerBag = new double[modelToUse.numSubsets()][]; for (int i = 0; i < modelToUse.numSubsets(); i++) { m_perClassPerBag[i] = new double[0]; } m_perBag = new double[modelToUse.numSubsets()]; totaL = 0; m_perClass = new double[source.numClasses()]; for (int i = 0; i < modelToUse.numSubsets(); i++) m_perClassPerBag[i] = new double[source.numClasses()]; System.Collections.IEnumerator enu = source.enumerateInstances(); //UPGRADE_TODO: Method 'java.util.Enumeration.hasMoreElements' was converted to 'System.Collections.IEnumerator.MoveNext' which has a different behavior. "ms-help://MS.VSCC.v80/dv_commoner/local/redirect.htm?index='!DefaultContextWindowIndex'&keyword='jlca1073_javautilEnumerationhasMoreElements'" while (enu.MoveNext()) { //UPGRADE_TODO: Method 'java.util.Enumeration.nextElement' was converted to 'System.Collections.IEnumerator.Current' which has a different behavior. "ms-help://MS.VSCC.v80/dv_commoner/local/redirect.htm?index='!DefaultContextWindowIndex'&keyword='jlca1073_javautilEnumerationnextElement'" instance = (Instance) enu.Current; index = modelToUse.whichSubset(instance); if (index != - 1) add(index, instance); else { weights = modelToUse.GetWeights(instance); addWeights(instance, weights); } } }
/// <summary> Creates a distribution with only one bag according /// to instances in source. /// /// </summary> /// <exception cref="Exception">if something goes wrong /// </exception> public Distribution(Instances source) { m_perClassPerBag = new double[1][]; for (int i = 0; i < 1; i++) { m_perClassPerBag[i] = new double[0]; } m_perBag = new double[1]; totaL = 0; m_perClass = new double[source.numClasses()]; m_perClassPerBag[0] = new double[source.numClasses()]; System.Collections.IEnumerator enu = source.enumerateInstances(); //UPGRADE_TODO: Method 'java.util.Enumeration.hasMoreElements' was converted to 'System.Collections.IEnumerator.MoveNext' which has a different behavior. "ms-help://MS.VSCC.v80/dv_commoner/local/redirect.htm?index='!DefaultContextWindowIndex'&keyword='jlca1073_javautilEnumerationhasMoreElements'" while (enu.MoveNext()) { //UPGRADE_TODO: Method 'java.util.Enumeration.nextElement' was converted to 'System.Collections.IEnumerator.Current' which has a different behavior. "ms-help://MS.VSCC.v80/dv_commoner/local/redirect.htm?index='!DefaultContextWindowIndex'&keyword='jlca1073_javautilEnumerationnextElement'" add(0, (Instance) enu.Current); } }
/// <summary> Builds the boosted classifier</summary> public virtual void buildClassifier(Instances data) { m_RandomInstance = new Random(m_Seed); Instances boostData; int classIndex = data.classIndex(); if (data.classAttribute().Numeric) { throw new Exception("LogitBoost can't handle a numeric class!"); } if (m_Classifier == null) { throw new System.Exception("A base classifier has not been specified!"); } if (!(m_Classifier is WeightedInstancesHandler) && !m_UseResampling) { m_UseResampling = true; } if (data.checkForStringAttributes()) { throw new Exception("Cannot handle string attributes!"); } if (m_Debug) { System.Console.Error.WriteLine("Creating copy of the training data"); } m_NumClasses = data.numClasses(); m_ClassAttribute = data.classAttribute(); // Create a copy of the data data = new Instances(data); data.deleteWithMissingClass(); // Create the base classifiers if (m_Debug) { System.Console.Error.WriteLine("Creating base classifiers"); } m_Classifiers = new Classifier[m_NumClasses][]; for (int j = 0; j < m_NumClasses; j++) { m_Classifiers[j] = Classifier.makeCopies(m_Classifier, this.NumIterations); } // Do we want to select the appropriate number of iterations // using cross-validation? int bestNumIterations = this.NumIterations; if (m_NumFolds > 1) { if (m_Debug) { System.Console.Error.WriteLine("Processing first fold."); } // Array for storing the results double[] results = new double[this.NumIterations]; // Iterate throught the cv-runs for (int r = 0; r < m_NumRuns; r++) { // Stratify the data data.randomize(m_RandomInstance); data.stratify(m_NumFolds); // Perform the cross-validation for (int i = 0; i < m_NumFolds; i++) { // Get train and test folds Instances train = data.trainCV(m_NumFolds, i, m_RandomInstance); Instances test = data.testCV(m_NumFolds, i); // Make class numeric Instances trainN = new Instances(train); trainN.ClassIndex = - 1; trainN.deleteAttributeAt(classIndex); trainN.insertAttributeAt(new weka.core.Attribute("'pseudo class'"), classIndex); trainN.ClassIndex = classIndex; m_NumericClassData = new Instances(trainN, 0); // Get class values int numInstances = train.numInstances(); double[][] tmpArray = new double[numInstances][]; for (int i2 = 0; i2 < numInstances; i2++) { tmpArray[i2] = new double[m_NumClasses]; } double[][] trainFs = tmpArray; double[][] tmpArray2 = new double[numInstances][]; for (int i3 = 0; i3 < numInstances; i3++) { tmpArray2[i3] = new double[m_NumClasses]; } double[][] trainYs = tmpArray2; for (int j = 0; j < m_NumClasses; j++) { for (int k = 0; k < numInstances; k++) { trainYs[k][j] = (train.instance(k).classValue() == j)?1.0 - m_Offset:0.0 + (m_Offset / (double) m_NumClasses); } } // Perform iterations double[][] probs = initialProbs(numInstances); m_NumGenerated = 0; double sumOfWeights = train.sumOfWeights(); for (int j = 0; j < this.NumIterations; j++) { performIteration(trainYs, trainFs, probs, trainN, sumOfWeights); Evaluation eval = new Evaluation(train); eval.evaluateModel(this, test); results[j] += eval.correct(); } } } // Find the number of iterations with the lowest error //UPGRADE_TODO: The equivalent in .NET for field 'java.lang.Double.MAX_VALUE' may return a different value. "ms-help://MS.VSCC.v80/dv_commoner/local/redirect.htm?index='!DefaultContextWindowIndex'&keyword='jlca1043'" double bestResult = - System.Double.MaxValue; for (int j = 0; j < this.NumIterations; j++) { if (results[j] > bestResult) { bestResult = results[j]; bestNumIterations = j; } } if (m_Debug) { System.Console.Error.WriteLine("Best result for " + bestNumIterations + " iterations: " + bestResult); } } // Build classifier on all the data int numInstances2 = data.numInstances(); double[][] trainFs2 = new double[numInstances2][]; for (int i4 = 0; i4 < numInstances2; i4++) { trainFs2[i4] = new double[m_NumClasses]; } double[][] trainYs2 = new double[numInstances2][]; for (int i5 = 0; i5 < numInstances2; i5++) { trainYs2[i5] = new double[m_NumClasses]; } for (int j = 0; j < m_NumClasses; j++) { for (int i = 0, k = 0; i < numInstances2; i++, k++) { trainYs2[i][j] = (data.instance(k).classValue() == j)?1.0 - m_Offset:0.0 + (m_Offset / (double) m_NumClasses); } } // Make class numeric data.ClassIndex = - 1; data.deleteAttributeAt(classIndex); data.insertAttributeAt(new weka.core.Attribute("'pseudo class'"), classIndex); data.ClassIndex = classIndex; m_NumericClassData = new Instances(data, 0); // Perform iterations double[][] probs2 = initialProbs(numInstances2); double logLikelihood = CalculateLogLikelihood(trainYs2, probs2); m_NumGenerated = 0; if (m_Debug) { System.Console.Error.WriteLine("Avg. log-likelihood: " + logLikelihood); } double sumOfWeights2 = data.sumOfWeights(); for (int j = 0; j < bestNumIterations; j++) { double previousLoglikelihood = logLikelihood; performIteration(trainYs2, trainFs2, probs2, data, sumOfWeights2); logLikelihood = CalculateLogLikelihood(trainYs2, probs2); if (m_Debug) { System.Console.Error.WriteLine("Avg. log-likelihood: " + logLikelihood); } if (System.Math.Abs(previousLoglikelihood - logLikelihood) < m_Precision) { return ; } } }
/// <summary> Generates the classifier. /// /// </summary> /// <param name="instances">set of instances serving as training data /// </param> /// <exception cref="Exception">if the classifier has not been generated successfully /// </exception> public override void buildClassifier(Instances instances) { //UPGRADE_TODO: The equivalent in .NET for field 'java.lang.Double.MAX_VALUE' may return a different value. "ms-help://MS.VSCC.v80/dv_commoner/local/redirect.htm?index='!DefaultContextWindowIndex'&keyword='jlca1043'" double bestVal = System.Double.MaxValue, currVal; //UPGRADE_TODO: The equivalent in .NET for field 'java.lang.Double.MAX_VALUE' may return a different value. "ms-help://MS.VSCC.v80/dv_commoner/local/redirect.htm?index='!DefaultContextWindowIndex'&keyword='jlca1043'" double bestPoint = - System.Double.MaxValue, sum; int bestAtt = - 1, numClasses; if (instances.checkForStringAttributes()) { throw new Exception("Can't handle string attributes!"); } double[][] bestDist = new double[3][]; for (int i = 0; i < 3; i++) { bestDist[i] = new double[instances.numClasses()]; } m_Instances = new Instances(instances); m_Instances.deleteWithMissingClass(); if (m_Instances.numInstances() == 0) { throw new System.ArgumentException("No instances without missing " + "class values in training file!"); } if (instances.numAttributes() == 1) { throw new System.ArgumentException("Attribute missing. Need at least one " + "attribute other than class attribute!"); } if (m_Instances.classAttribute().Nominal) { numClasses = m_Instances.numClasses(); } else { numClasses = 1; } // For each attribute bool first = true; for (int i = 0; i < m_Instances.numAttributes(); i++) { if (i != m_Instances.classIndex()) { // Reserve space for distribution. double[][] tmpArray = new double[3][]; for (int i2 = 0; i2 < 3; i2++) { tmpArray[i2] = new double[numClasses]; } m_Distribution = tmpArray; // Compute value of criterion for best split on attribute if (m_Instances.attribute(i).Nominal) { currVal = findSplitNominal(i); } else { currVal = findSplitNumeric(i); } if ((first) || (currVal < bestVal)) { bestVal = currVal; bestAtt = i; bestPoint = m_SplitPoint; for (int j = 0; j < 3; j++) { Array.Copy(m_Distribution[j], 0, bestDist[j], 0, numClasses); } } // First attribute has been investigated first = false; } } // Set attribute, split point and distribution. m_AttIndex = bestAtt; m_SplitPoint = bestPoint; m_Distribution = bestDist; if (m_Instances.classAttribute().Nominal) { for (int i = 0; i < m_Distribution.Length; i++) { double sumCounts = Utils.sum(m_Distribution[i]); if (sumCounts == 0) { // This means there were only missing attribute values Array.Copy(m_Distribution[2], 0, m_Distribution[i], 0, m_Distribution[2].Length); Utils.normalize(m_Distribution[i]); } else { Utils.normalize(m_Distribution[i], sumCounts); } } } // Save memory m_Instances = new Instances(m_Instances, 0); }