public void TestRemoveItem() { smallDistribution.RemoveItem("item1"); smallDistribution.RemoveItem("item2"); smallDistribution.RemoveItem("item3"); Assert.AreEqual(2, smallDistribution.GetCount("item1")); Assert.AreEqual(1, smallDistribution.GetCount("item2")); smallDistribution.AddItem("item1"); smallDistribution.AddItem("item2"); smallDistribution.AddItem("item3"); }
public void TestRemoveDistribution() { DiscreteDistribution discreteDistribution = new DiscreteDistribution(); discreteDistribution.AddItem("item1"); discreteDistribution.AddItem("item1"); discreteDistribution.AddItem("item2"); smallDistribution.RemoveDistribution(discreteDistribution); Assert.AreEqual(1, smallDistribution.GetCount("item1")); Assert.AreEqual(1, smallDistribution.GetCount("item2")); Assert.AreEqual(1, smallDistribution.GetCount("item3")); smallDistribution.AddDistribution(discreteDistribution); }
public void TestAddItem3() { Random random = new Random(); DiscreteDistribution discreteDistribution = new DiscreteDistribution(); for (int i = 0; i < 1000; i++) { discreteDistribution.AddItem("" + random.Next(1000)); } for (int i = 0; i < 1000000; i++) { discreteDistribution.AddItem("" + random.Next(1000000)); } Assert.AreEqual(discreteDistribution.Count / 1000000.0, 0.632, 0.001); }
public override Dictionary <string, double> PredictProbability(Instance.Instance instance) { var distribution = new DiscreteDistribution(); foreach (var tree in _forest) { distribution.AddItem(tree.Predict(instance)); } return(distribution.GetProbabilityDistribution()); }
/** * <summary> The predict method takes an {@link Instance} as an input and loops through the {@link ArrayList} of {@link DecisionTree}s. * Makes prediction for the items of that ArrayList and returns the maximum item of that ArrayList.</summary> * * <param name="instance">Instance to make prediction.</param> * <returns>The maximum prediction of a given Instance.</returns> */ public override string Predict(Instance.Instance instance) { var distribution = new DiscreteDistribution(); foreach (var tree in _forest) { distribution.AddItem(tree.Predict(instance)); } return(distribution.GetMaxItem()); }
/** * <summary> The classDistribution method returns the distribution of all the class labels of instances.</summary> * * <returns>Distribution of the class labels.</returns> */ public DiscreteDistribution ClassDistribution() { var distribution = new DiscreteDistribution(); foreach (var instance in _list) { distribution.AddItem(instance.GetClassLabel()); } return(distribution); }
public void TestGetProbability1() { Random random = new Random(); DiscreteDistribution discreteDistribution = new DiscreteDistribution(); for (int i = 0; i < 1000; i++) { discreteDistribution.AddItem("" + i); } Assert.AreEqual(0.001, discreteDistribution.GetProbability("" + random.Next(1000)), 0.0); }
public void TestGetSum2() { Random random = new Random(); DiscreteDistribution discreteDistribution = new DiscreteDistribution(); for (int i = 0; i < 1000; i++) { discreteDistribution.AddItem("" + random.Next(1000)); } Assert.AreEqual(1000, discreteDistribution.GetSum(), 0.0); }
/** * <summary> The discreteIndexedAttributeClassDistribution method takes an attribute index and an attribute value as inputs. * It loops through the instances, gets the corresponding value of given attribute index and given attribute value. * Then, adds the class label of that instance to the discrete indexed distributions list.</summary> * * <param name="attributeIndex">Index of the attribute.</param> * <param name="attributeValue">Value of the attribute.</param> * <returns>Distribution of the class labels.</returns> */ public DiscreteDistribution DiscreteIndexedAttributeClassDistribution(int attributeIndex, int attributeValue) { var distribution = new DiscreteDistribution(); foreach (var instance in _list) { if (((DiscreteIndexedAttribute)instance.GetAttribute(attributeIndex)).GetIndex() == attributeValue) { distribution.AddItem(instance.GetClassLabel()); } } return(distribution); }
public void TestGetProbabilityLaplaceSmoothing1() { Random random = new Random(); DiscreteDistribution discreteDistribution = new DiscreteDistribution(); for (int i = 0; i < 1000; i++) { discreteDistribution.AddItem("" + i); } Assert.AreEqual(2.0 / 2001, discreteDistribution.GetProbabilityLaplaceSmoothing("" + random.Next(1000)), 0.0); Assert.AreEqual(1.0 / 2001, discreteDistribution.GetProbabilityLaplaceSmoothing("item0"), 0.0); }
/** * <summary> The attributeDistribution method takes an index as an input and if the attribute of the instance at given index is * discrete, it returns the distribution of the attributes of that instance.</summary> * * <param name="index">Index of the attribute.</param> * <returns>Distribution of the attribute.</returns> */ public DiscreteDistribution AttributeDistribution(int index) { var distribution = new DiscreteDistribution(); if (_list[0].GetAttribute(index) is DiscreteAttribute) { foreach (var instance in _list) { distribution.AddItem(((DiscreteAttribute)instance.GetAttribute(index)).GetValue()); } } return(distribution); }
public void Setup() { smallDistribution = new DiscreteDistribution(); smallDistribution.AddItem("item1"); smallDistribution.AddItem("item2"); smallDistribution.AddItem("item3"); smallDistribution.AddItem("item1"); smallDistribution.AddItem("item2"); smallDistribution.AddItem("item1"); }
public void TestAddDistribution2() { DiscreteDistribution discreteDistribution1 = new DiscreteDistribution(); for (int i = 0; i < 1000; i++) { discreteDistribution1.AddItem("" + i); } DiscreteDistribution discreteDistribution2 = new DiscreteDistribution(); for (int i = 500; i < 1000; i++) { discreteDistribution2.AddItem("" + (1000 + i)); } discreteDistribution1.AddDistribution(discreteDistribution2); Assert.AreEqual(1500, discreteDistribution1.Count); }
public void TestAddItem2() { Random random = new Random(); DiscreteDistribution discreteDistribution = new DiscreteDistribution(); for (int i = 0; i < 1000; i++) { discreteDistribution.AddItem("" + random.Next(1000)); } int count = 0; for (int i = 0; i < 1000; i++) { if (discreteDistribution.ContainsItem("" + i)) { count += discreteDistribution.GetCount("" + i); } } Assert.AreEqual(1000, count); }
/** * <summary> The DecisionNode method takes {@link InstanceList} data as input and then it sets the class label parameter by finding * the most occurred class label of given data, it then gets distinct class labels as class labels List. Later, it adds ordered * indices to the indexList and shuffles them randomly. Then, it gets the class distribution of given data and finds the best entropy value * of these class distribution. * <p/> * If an attribute of given data is {@link DiscreteIndexedAttribute}, it creates a Distribution according to discrete indexed attribute class distribution * and finds the entropy. If it is better than the last best entropy it reassigns the best entropy, best attribute and best split value according to * the newly founded best entropy's index. At the end, it also add new distribution to the class distribution . * <p/> * If an attribute of given data is {@link DiscreteAttribute}, it directly finds the entropy. If it is better than the last best entropy it * reassigns the best entropy, best attribute and best split value according to the newly founded best entropy's index. * <p/> * If an attribute of given data is {@link ContinuousAttribute}, it creates two distributions; left and right according to class distribution * and discrete distribution respectively, and finds the entropy. If it is better than the last best entropy it reassigns the best entropy, * best attribute and best split value according to the newly founded best entropy's index. At the end, it also add new distribution to * the right distribution and removes from left distribution.</summary> * * <param name="data"> {@link InstanceList} input.</param> * <param name="condition">{@link DecisionCondition} to check.</param> * <param name="parameter">RandomForestParameter like seed, ensembleSize, attributeSubsetSize.</param> * <param name="isStump"> Refers to decision trees with only 1 splitting rule.</param> */ public DecisionNode(InstanceList.InstanceList data, DecisionCondition condition, RandomForestParameter parameter, bool isStump) { int bestAttribute = -1, size; double bestSplitValue = 0; this._condition = condition; this._data = data; _classLabel = Classifier.Classifier.GetMaximum(data.GetClassLabels()); _leaf = true; var classLabels = data.GetDistinctClassLabels(); if (classLabels.Count == 1) { return; } if (isStump && condition != null) { return; } var indexList = new List <int>(); for (var i = 0; i < data.Get(0).AttributeSize(); i++) { indexList.Add(i); } if (parameter != null && parameter.GetAttributeSubsetSize() < data.Get(0).AttributeSize()) { size = parameter.GetAttributeSubsetSize(); } else { size = data.Get(0).AttributeSize(); } var classDistribution = data.ClassDistribution(); var bestEntropy = data.ClassDistribution().Entropy(); for (var j = 0; j < size; j++) { var index = indexList[j]; double entropy; if (data.Get(0).GetAttribute(index) is DiscreteIndexedAttribute) { for (var k = 0; k < ((DiscreteIndexedAttribute)data.Get(0).GetAttribute(index)).GetMaxIndex(); k++) { var distribution = data.DiscreteIndexedAttributeClassDistribution(index, k); if (distribution.GetSum() > 0) { classDistribution.RemoveDistribution(distribution); entropy = (classDistribution.Entropy() * classDistribution.GetSum() + distribution.Entropy() * distribution.GetSum()) / data.Size(); if (entropy < bestEntropy) { bestEntropy = entropy; bestAttribute = index; bestSplitValue = k; } classDistribution.AddDistribution(distribution); } } } else { if (data.Get(0).GetAttribute(index) is DiscreteAttribute) { entropy = EntropyForDiscreteAttribute(index); if (entropy < bestEntropy) { bestEntropy = entropy; bestAttribute = index; } } else { if (data.Get(0).GetAttribute(index) is ContinuousAttribute) { data.Sort(index); var previousValue = double.MinValue; var leftDistribution = data.ClassDistribution(); var rightDistribution = new DiscreteDistribution(); for (var k = 0; k < data.Size(); k++) { var instance = data.Get(k); if (k == 0) { previousValue = ((ContinuousAttribute)instance.GetAttribute(index)).GetValue(); } else { if (((ContinuousAttribute)instance.GetAttribute(index)).GetValue() != previousValue) { var splitValue = (previousValue + ((ContinuousAttribute)instance.GetAttribute(index)) .GetValue()) / 2; previousValue = ((ContinuousAttribute)instance.GetAttribute(index)).GetValue(); entropy = (leftDistribution.GetSum() / data.Size()) * leftDistribution.Entropy() + (rightDistribution.GetSum() / data.Size()) * rightDistribution.Entropy(); if (entropy < bestEntropy) { bestEntropy = entropy; bestSplitValue = splitValue; bestAttribute = index; } } } leftDistribution.RemoveItem(instance.GetClassLabel()); rightDistribution.AddItem(instance.GetClassLabel()); } } } } } if (bestAttribute != -1) { _leaf = false; if (data.Get(0).GetAttribute(bestAttribute) is DiscreteIndexedAttribute) { CreateChildrenForDiscreteIndexed(bestAttribute, (int)bestSplitValue, parameter, isStump); } else { if (data.Get(0).GetAttribute(bestAttribute) is DiscreteAttribute) { CreateChildrenForDiscrete(bestAttribute, parameter, isStump); } else { if (data.Get(0).GetAttribute(bestAttribute) is ContinuousAttribute) { CreateChildrenForContinuous(bestAttribute, bestSplitValue, parameter, isStump); } } } } }