Пример #1
0
		/// <summary> Sets the format of the input instances.
		/// 
		/// </summary>
		/// <param name="instanceInfo">an Instances object containing the input instance
		/// structure (any instances contained in the object are ignored - only the
		/// structure is required).
		/// </param>
		/// <returns> true if the outputFormat may be collected immediately
		/// </returns>
		/// <exception cref="Exception">if the inputFormat can't be set successfully 
		/// </exception>
		public override bool setInputFormat(Instances instanceInfo)
		{
			
			base.setInputFormat(instanceInfo);
			m_removeFilter = null;
			return false;
		}
        public void EndTrainingSession()
        {
            Stream s = new MemoryStream ();
            TextWriter tw = new StreamWriter (s);
            AbstractBasicTextVector.WriteInstancesArff (tw, vectors, "c45recommender", tags, results);
            tw.Flush ();
            s.Position = 0;
            Instances source = new Instances (new InputStreamReader (new InputStreamWrapper (s)));
            tw.Close ();
            s.Close ();

            Instances[] derived = new Instances[this.not];
            classifiers = new AbstractClassifier[this.not];
            int[] args = new int[this.not - 1];
            int l = source.numAttributes () - this.not;
            for (int i = 0; i < this.not-1; i++) {
                args [i] = i + l + 1;
            }
            for (int i = 0; i < this.not; i++) {
                Remove rem = new Remove ();
                rem.setAttributeIndicesArray (args);
                rem.setInputFormat (source);
                derived [i] = Filter.useFilter (source, rem);
                classifiers [i] = GenerateClassifier ();
                derived [i].setClassIndex (derived [i].numAttributes () - 1);
                classifiers [i].buildClassifier (derived [i]);
                if (i < this.not - 1) {
                    args [i] = l + i;
                }
            }
            datasets = derived;
        }
Пример #3
0
		/// <summary> Signify that this batch of input to the filter is finished.
		/// 
		/// </summary>
		/// <returns> true if there are instances pending output
		/// </returns>
		public override bool batchFinished()
		{
			
			if (getInputFormat() == null)
			{
				throw new System.SystemException("No input instance format defined");
			}
			if (m_removeFilter == null)
			{
				
				// establish attributes to remove from first batch
				
				Instances toFilter = getInputFormat();
				int[] attsToDelete = new int[toFilter.numAttributes()];
				int numToDelete = 0;
				for (int i = 0; i < toFilter.numAttributes(); i++)
				{
					if (i == toFilter.classIndex())
						continue; // skip class
					AttributeStats stats = toFilter.attributeStats(i);
					if (stats.distinctCount < 2)
					{
						// remove constant attributes
						attsToDelete[numToDelete++] = i;
					}
					else if (toFilter.attribute(i).Nominal)
					{
						// remove nominal attributes that vary too much
						double variancePercent = (double) stats.distinctCount / (double) stats.totalCount * 100.0;
						if (variancePercent > m_maxVariancePercentage)
						{
							attsToDelete[numToDelete++] = i;
						}
					}
				}
				
				int[] finalAttsToDelete = new int[numToDelete];
				Array.Copy(attsToDelete, 0, finalAttsToDelete, 0, numToDelete);
				
				m_removeFilter = new Remove();
				m_removeFilter.SetAttributeIndicesArray(finalAttsToDelete);
				m_removeFilter.set_InvertSelection(false);
				m_removeFilter.setInputFormat(toFilter);
				
				for (int i = 0; i < toFilter.numInstances(); i++)
				{
					m_removeFilter.input(toFilter.instance(i));
				}
				m_removeFilter.batchFinished();
				
				Instance processed;
				Instances outputDataset = m_removeFilter.getOutputFormat();
				
				// restore old relation name to hide attribute filter stamp
				outputDataset.RelationName = toFilter.relationName();
				
				setOutputFormat(outputDataset);
				while ((processed = m_removeFilter.output()) != null)
				{
					processed.Dataset = outputDataset;
					push(processed);
				}
			}
			flushInput();
			
			m_NewBatch = true;
			return (numPendingOutput() != 0);
		}