Esempio n. 1
0
		/// <summary> Signify that this batch of input to the filter is finished.
		/// 
		/// </summary>
		/// <returns> true if there are instances pending output
		/// </returns>
		public override bool batchFinished()
		{
			
			if (getInputFormat() == null)
			{
				throw new System.SystemException("No input instance format defined");
			}
			if (m_removeFilter == null)
			{
				
				// establish attributes to remove from first batch
				
				Instances toFilter = getInputFormat();
				int[] attsToDelete = new int[toFilter.numAttributes()];
				int numToDelete = 0;
				for (int i = 0; i < toFilter.numAttributes(); i++)
				{
					if (i == toFilter.classIndex())
						continue; // skip class
					AttributeStats stats = toFilter.attributeStats(i);
					if (stats.distinctCount < 2)
					{
						// remove constant attributes
						attsToDelete[numToDelete++] = i;
					}
					else if (toFilter.attribute(i).Nominal)
					{
						// remove nominal attributes that vary too much
						double variancePercent = (double) stats.distinctCount / (double) stats.totalCount * 100.0;
						if (variancePercent > m_maxVariancePercentage)
						{
							attsToDelete[numToDelete++] = i;
						}
					}
				}
				
				int[] finalAttsToDelete = new int[numToDelete];
				Array.Copy(attsToDelete, 0, finalAttsToDelete, 0, numToDelete);
				
				m_removeFilter = new Remove();
				m_removeFilter.SetAttributeIndicesArray(finalAttsToDelete);
				m_removeFilter.set_InvertSelection(false);
				m_removeFilter.setInputFormat(toFilter);
				
				for (int i = 0; i < toFilter.numInstances(); i++)
				{
					m_removeFilter.input(toFilter.instance(i));
				}
				m_removeFilter.batchFinished();
				
				Instance processed;
				Instances outputDataset = m_removeFilter.getOutputFormat();
				
				// restore old relation name to hide attribute filter stamp
				outputDataset.RelationName = toFilter.relationName();
				
				setOutputFormat(outputDataset);
				while ((processed = m_removeFilter.output()) != null)
				{
					processed.Dataset = outputDataset;
					push(processed);
				}
			}
			flushInput();
			
			m_NewBatch = true;
			return (numPendingOutput() != 0);
		}