Ejemplo n.º 1
0
		/// <summary> Generates the classifier.
		/// 
		/// </summary>
		/// <param name="instances">set of instances serving as training data 
		/// </param>
		/// <exception cref="Exception">if the classifier has not been generated successfully
		/// </exception>
		public override void  buildClassifier(Instances instances)
		{
			
			double sumOfWeights = 0;
			
			m_Class = instances.classAttribute();
			m_ClassValue = 0;
			switch (instances.classAttribute().type())
			{
				
				case weka.core.Attribute.NUMERIC: 
					m_Counts = null;
					break;

                case weka.core.Attribute.NOMINAL: 
					m_Counts = new double[instances.numClasses()];
					for (int i = 0; i < m_Counts.Length; i++)
					{
						m_Counts[i] = 1;
					}
					sumOfWeights = instances.numClasses();
					break;
				
				default: 
					throw new System.Exception("ZeroR can only handle nominal and numeric class" + " attributes.");
				
			}
			System.Collections.IEnumerator enu = instances.enumerateInstances();
			//UPGRADE_TODO: Method 'java.util.Enumeration.hasMoreElements' was converted to 'System.Collections.IEnumerator.MoveNext' which has a different behavior. "ms-help://MS.VSCC.v80/dv_commoner/local/redirect.htm?index='!DefaultContextWindowIndex'&keyword='jlca1073_javautilEnumerationhasMoreElements'"
			while (enu.MoveNext())
			{
				//UPGRADE_TODO: Method 'java.util.Enumeration.nextElement' was converted to 'System.Collections.IEnumerator.Current' which has a different behavior. "ms-help://MS.VSCC.v80/dv_commoner/local/redirect.htm?index='!DefaultContextWindowIndex'&keyword='jlca1073_javautilEnumerationnextElement'"
				Instance instance = (Instance) enu.Current;
				if (!instance.classIsMissing())
				{
					if (instances.classAttribute().Nominal)
					{
						//UPGRADE_WARNING: Data types in Visual C# might be different.  Verify the accuracy of narrowing conversions. "ms-help://MS.VSCC.v80/dv_commoner/local/redirect.htm?index='!DefaultContextWindowIndex'&keyword='jlca1042'"
						m_Counts[(int) instance.classValue()] += instance.weight();
					}
					else
					{
						m_ClassValue += instance.weight() * instance.classValue();
					}
					sumOfWeights += instance.weight();
				}
			}
			if (instances.classAttribute().Numeric)
			{
				if (Utils.gr(sumOfWeights, 0))
				{
					m_ClassValue /= sumOfWeights;
				}
			}
			else
			{
				m_ClassValue = Utils.maxIndex(m_Counts);
				Utils.normalize(m_Counts, sumOfWeights);
			}
		}
Ejemplo n.º 2
0
		/// <summary> Creates split on numeric attribute.
		/// 
		/// </summary>
		/// <exception cref="Exception">if something goes wrong
		/// </exception>
		private void  handleNumericAttribute(Instances trainInstances)
		{
			
			int firstMiss;
			int next = 1;
			int last = 0;
			int splitIndex = - 1;
			double currentInfoGain;
			double defaultEnt;
			double minSplit;
			Instance instance;
			int i;
			
			// Current attribute is a numeric attribute.
			m_distribution = new Distribution(2, trainInstances.numClasses());
			
			// Only Instances with known values are relevant.
			System.Collections.IEnumerator enu = trainInstances.enumerateInstances();
			i = 0;
			//UPGRADE_TODO: Method 'java.util.Enumeration.hasMoreElements' was converted to 'System.Collections.IEnumerator.MoveNext' which has a different behavior. "ms-help://MS.VSCC.v80/dv_commoner/local/redirect.htm?index='!DefaultContextWindowIndex'&keyword='jlca1073_javautilEnumerationhasMoreElements'"
			while (enu.MoveNext())
			{
				//UPGRADE_TODO: Method 'java.util.Enumeration.nextElement' was converted to 'System.Collections.IEnumerator.Current' which has a different behavior. "ms-help://MS.VSCC.v80/dv_commoner/local/redirect.htm?index='!DefaultContextWindowIndex'&keyword='jlca1073_javautilEnumerationnextElement'"
				instance = (Instance) enu.Current;
				if (instance.isMissing(m_attIndex))
					break;
				m_distribution.add(1, instance);
				i++;
			}
			firstMiss = i;
			
			// Compute minimum number of Instances required in each
			// subset.
			minSplit = 0.1 * (m_distribution.total()) / ((double) trainInstances.numClasses());
			if (Utils.smOrEq(minSplit, m_minNoObj))
				minSplit = m_minNoObj;
			else if (Utils.gr(minSplit, 25))
				minSplit = 25;
			
			// Enough Instances with known values?
			if (Utils.sm((double) firstMiss, 2 * minSplit))
				return ;
			
			// Compute values of criteria for all possible split
			// indices.
			defaultEnt = infoGainCrit.oldEnt(m_distribution);
			while (next < firstMiss)
			{
				
				if (trainInstances.instance(next - 1).value_Renamed(m_attIndex) + 1e-5 < trainInstances.instance(next).value_Renamed(m_attIndex))
				{
					
					// Move class values for all Instances up to next 
					// possible split point.
					m_distribution.shiftRange(1, 0, trainInstances, last, next);
					
					// Check if enough Instances in each subset and compute
					// values for criteria.
					if (Utils.grOrEq(m_distribution.perBag(0), minSplit) && Utils.grOrEq(m_distribution.perBag(1), minSplit))
					{
						currentInfoGain = infoGainCrit.splitCritValue(m_distribution, m_sumOfWeights, defaultEnt);
						if (Utils.gr(currentInfoGain, m_infoGain))
						{
							m_infoGain = currentInfoGain;
							splitIndex = next - 1;
						}
						m_index++;
					}
					last = next;
				}
				next++;
			}
			
			// Was there any useful split?
			if (m_index == 0)
				return ;
			
			// Compute modified information gain for best split.
			m_infoGain = m_infoGain - (Utils.log2(m_index) / m_sumOfWeights);
			if (Utils.smOrEq(m_infoGain, 0))
				return ;
			
			// Set instance variables' values to values for
			// best split.
			m_numSubsets = 2;
			m_splitPoint = (trainInstances.instance(splitIndex + 1).value_Renamed(m_attIndex) + trainInstances.instance(splitIndex).value_Renamed(m_attIndex)) / 2;
			
			// In case we have a numerical precision problem we need to choose the
			// smaller value
			if (m_splitPoint == trainInstances.instance(splitIndex + 1).value_Renamed(m_attIndex))
			{
				m_splitPoint = trainInstances.instance(splitIndex).value_Renamed(m_attIndex);
			}
			
			// Restore distributioN for best split.
			m_distribution = new Distribution(2, trainInstances.numClasses());
			m_distribution.addRange(0, trainInstances, 0, splitIndex + 1);
			m_distribution.addRange(1, trainInstances, splitIndex + 1, firstMiss);
			
			// Compute modified gain ratio for best split.
			m_gainRatio = gainRatioCrit.splitCritValue(m_distribution, m_sumOfWeights, m_infoGain);
		}
Ejemplo n.º 3
0
		/// <summary> Creates split on enumerated attribute.
		/// 
		/// </summary>
		/// <exception cref="Exception">if something goes wrong
		/// </exception>
		private void  handleEnumeratedAttribute(Instances trainInstances)
		{
			
			Instance instance;
			
			m_distribution = new Distribution(m_complexityIndex, trainInstances.numClasses());
			
			// Only Instances with known values are relevant.
			System.Collections.IEnumerator enu = trainInstances.enumerateInstances();
			//UPGRADE_TODO: Method 'java.util.Enumeration.hasMoreElements' was converted to 'System.Collections.IEnumerator.MoveNext' which has a different behavior. "ms-help://MS.VSCC.v80/dv_commoner/local/redirect.htm?index='!DefaultContextWindowIndex'&keyword='jlca1073_javautilEnumerationhasMoreElements'"
			while (enu.MoveNext())
			{
				//UPGRADE_TODO: Method 'java.util.Enumeration.nextElement' was converted to 'System.Collections.IEnumerator.Current' which has a different behavior. "ms-help://MS.VSCC.v80/dv_commoner/local/redirect.htm?index='!DefaultContextWindowIndex'&keyword='jlca1073_javautilEnumerationnextElement'"
				instance = (Instance) enu.Current;
				if (!instance.isMissing(m_attIndex))
				{
					//UPGRADE_WARNING: Data types in Visual C# might be different.  Verify the accuracy of narrowing conversions. "ms-help://MS.VSCC.v80/dv_commoner/local/redirect.htm?index='!DefaultContextWindowIndex'&keyword='jlca1042'"
					m_distribution.add((int) instance.value_Renamed(m_attIndex), instance);
				}
			}
			
			// Check if minimum number of Instances in at least two
			// subsets.
			if (m_distribution.check(m_minNoObj))
			{
				m_numSubsets = m_complexityIndex;
				m_infoGain = infoGainCrit.splitCritValue(m_distribution, m_sumOfWeights);
				m_gainRatio = gainRatioCrit.splitCritValue(m_distribution, m_sumOfWeights, m_infoGain);
			}
		}
Ejemplo n.º 4
0
		/// <summary> Creates split on enumerated attribute.
		/// 
		/// </summary>
		/// <exception cref="Exception">if something goes wrong
		/// </exception>
		private void  handleEnumeratedAttribute(Instances trainInstances)
		{
			
			Distribution newDistribution, secondDistribution;
			int numAttValues;
			double currIG, currGR;
			Instance instance;
			int i;
			
			numAttValues = trainInstances.attribute(m_attIndex).numValues();
			newDistribution = new Distribution(numAttValues, trainInstances.numClasses());
			
			// Only Instances with known values are relevant.
			System.Collections.IEnumerator enu = trainInstances.enumerateInstances();
			//UPGRADE_TODO: Method 'java.util.Enumeration.hasMoreElements' was converted to 'System.Collections.IEnumerator.MoveNext' which has a different behavior. "ms-help://MS.VSCC.v80/dv_commoner/local/redirect.htm?index='!DefaultContextWindowIndex'&keyword='jlca1073_javautilEnumerationhasMoreElements'"
			while (enu.MoveNext())
			{
				//UPGRADE_TODO: Method 'java.util.Enumeration.nextElement' was converted to 'System.Collections.IEnumerator.Current' which has a different behavior. "ms-help://MS.VSCC.v80/dv_commoner/local/redirect.htm?index='!DefaultContextWindowIndex'&keyword='jlca1073_javautilEnumerationnextElement'"
				instance = (Instance) enu.Current;
				if (!instance.isMissing(m_attIndex))
				{
					//UPGRADE_WARNING: Data types in Visual C# might be different.  Verify the accuracy of narrowing conversions. "ms-help://MS.VSCC.v80/dv_commoner/local/redirect.htm?index='!DefaultContextWindowIndex'&keyword='jlca1042'"
					newDistribution.add((int) instance.value_Renamed(m_attIndex), instance);
				}
			}
			m_distribution = newDistribution;
			
			// For all values
			for (i = 0; i < numAttValues; i++)
			{
				
				if (Utils.grOrEq(newDistribution.perBag(i), m_minNoObj))
				{
					secondDistribution = new Distribution(newDistribution, i);
					
					// Check if minimum number of Instances in the two
					// subsets.
					if (secondDistribution.check(m_minNoObj))
					{
						m_numSubsets = 2;
						currIG = m_infoGainCrit.splitCritValue(secondDistribution, m_sumOfWeights);
						currGR = m_gainRatioCrit.splitCritValue(secondDistribution, m_sumOfWeights, currIG);
						if ((i == 0) || Utils.gr(currGR, m_gainRatio))
						{
							m_gainRatio = currGR;
							m_infoGain = currIG;
							m_splitPoint = (double) i;
							m_distribution = secondDistribution;
						}
					}
				}
			}
		}
Ejemplo n.º 5
0
		/// <summary> Adds all instances with unknown values for given attribute, weighted
		/// according to frequency of instances in each bag.
		/// 
		/// </summary>
		/// <exception cref="Exception">if something goes wrong
		/// </exception>
		public void  addInstWithUnknown(Instances source, int attIndex)
		{
			
			double[] probs;
			double weight, newWeight;
			int classIndex;
			Instance instance;
			int j;
			
			probs = new double[m_perBag.Length];
			for (j = 0; j < m_perBag.Length; j++)
			{
				if (Utils.eq(totaL, 0))
				{
					probs[j] = 1.0 / probs.Length;
				}
				else
				{
					probs[j] = m_perBag[j] / totaL;
				}
			}
			System.Collections.IEnumerator enu = source.enumerateInstances();
			//UPGRADE_TODO: Method 'java.util.Enumeration.hasMoreElements' was converted to 'System.Collections.IEnumerator.MoveNext' which has a different behavior. "ms-help://MS.VSCC.v80/dv_commoner/local/redirect.htm?index='!DefaultContextWindowIndex'&keyword='jlca1073_javautilEnumerationhasMoreElements'"
			while (enu.MoveNext())
			{
				//UPGRADE_TODO: Method 'java.util.Enumeration.nextElement' was converted to 'System.Collections.IEnumerator.Current' which has a different behavior. "ms-help://MS.VSCC.v80/dv_commoner/local/redirect.htm?index='!DefaultContextWindowIndex'&keyword='jlca1073_javautilEnumerationnextElement'"
				instance = (Instance) enu.Current;
				if (instance.isMissing(attIndex))
				{
					//UPGRADE_WARNING: Data types in Visual C# might be different.  Verify the accuracy of narrowing conversions. "ms-help://MS.VSCC.v80/dv_commoner/local/redirect.htm?index='!DefaultContextWindowIndex'&keyword='jlca1042'"
					classIndex = (int) instance.classValue();
					weight = instance.weight();
					m_perClass[classIndex] = m_perClass[classIndex] + weight;
					totaL = totaL + weight;
					for (j = 0; j < m_perBag.Length; j++)
					{
						newWeight = probs[j] * weight;
						m_perClassPerBag[j][classIndex] = m_perClassPerBag[j][classIndex] + newWeight;
						m_perBag[j] = m_perBag[j] + newWeight;
					}
				}
			}
		}
Ejemplo n.º 6
0
		/// <summary> Creates a distribution according to given instances and
		/// split model.
		/// 
		/// </summary>
		/// <exception cref="Exception">if something goes wrong
		/// </exception>
		
		public Distribution(Instances source, ClassifierSplitModel modelToUse)
		{
			
			int index;
			Instance instance;
			double[] weights;
			
			m_perClassPerBag = new double[modelToUse.numSubsets()][];
			for (int i = 0; i < modelToUse.numSubsets(); i++)
			{
				m_perClassPerBag[i] = new double[0];
			}
			m_perBag = new double[modelToUse.numSubsets()];
			totaL = 0;
			m_perClass = new double[source.numClasses()];
			for (int i = 0; i < modelToUse.numSubsets(); i++)
				m_perClassPerBag[i] = new double[source.numClasses()];
			System.Collections.IEnumerator enu = source.enumerateInstances();
			//UPGRADE_TODO: Method 'java.util.Enumeration.hasMoreElements' was converted to 'System.Collections.IEnumerator.MoveNext' which has a different behavior. "ms-help://MS.VSCC.v80/dv_commoner/local/redirect.htm?index='!DefaultContextWindowIndex'&keyword='jlca1073_javautilEnumerationhasMoreElements'"
			while (enu.MoveNext())
			{
				//UPGRADE_TODO: Method 'java.util.Enumeration.nextElement' was converted to 'System.Collections.IEnumerator.Current' which has a different behavior. "ms-help://MS.VSCC.v80/dv_commoner/local/redirect.htm?index='!DefaultContextWindowIndex'&keyword='jlca1073_javautilEnumerationnextElement'"
				instance = (Instance) enu.Current;
				index = modelToUse.whichSubset(instance);
				if (index != - 1)
					add(index, instance);
				else
				{
					weights = modelToUse.GetWeights(instance);
					addWeights(instance, weights);
				}
			}
		}
Ejemplo n.º 7
0
		/// <summary> Creates a distribution with only one bag according
		/// to instances in source.
		/// 
		/// </summary>
		/// <exception cref="Exception">if something goes wrong
		/// </exception>
		public Distribution(Instances source)
		{
			
			m_perClassPerBag = new double[1][];
			for (int i = 0; i < 1; i++)
			{
				m_perClassPerBag[i] = new double[0];
			}
			m_perBag = new double[1];
			totaL = 0;
			m_perClass = new double[source.numClasses()];
			m_perClassPerBag[0] = new double[source.numClasses()];
			System.Collections.IEnumerator enu = source.enumerateInstances();
			//UPGRADE_TODO: Method 'java.util.Enumeration.hasMoreElements' was converted to 'System.Collections.IEnumerator.MoveNext' which has a different behavior. "ms-help://MS.VSCC.v80/dv_commoner/local/redirect.htm?index='!DefaultContextWindowIndex'&keyword='jlca1073_javautilEnumerationhasMoreElements'"
			while (enu.MoveNext())
			{
				//UPGRADE_TODO: Method 'java.util.Enumeration.nextElement' was converted to 'System.Collections.IEnumerator.Current' which has a different behavior. "ms-help://MS.VSCC.v80/dv_commoner/local/redirect.htm?index='!DefaultContextWindowIndex'&keyword='jlca1073_javautilEnumerationnextElement'"
				add(0, (Instance) enu.Current);
			}
		}