/// <summary> Sets the format of the input instances. /// /// </summary> /// <param name="instanceInfo">an Instances object containing the input instance /// structure (any instances contained in the object are ignored - only the /// structure is required). /// </param> /// <returns> true if the outputFormat may be collected immediately /// </returns> /// <exception cref="Exception">if the inputFormat can't be set successfully /// </exception> public override bool setInputFormat(Instances instanceInfo) { base.setInputFormat(instanceInfo); m_removeFilter = null; return false; }
public void EndTrainingSession() { Stream s = new MemoryStream (); TextWriter tw = new StreamWriter (s); AbstractBasicTextVector.WriteInstancesArff (tw, vectors, "c45recommender", tags, results); tw.Flush (); s.Position = 0; Instances source = new Instances (new InputStreamReader (new InputStreamWrapper (s))); tw.Close (); s.Close (); Instances[] derived = new Instances[this.not]; classifiers = new AbstractClassifier[this.not]; int[] args = new int[this.not - 1]; int l = source.numAttributes () - this.not; for (int i = 0; i < this.not-1; i++) { args [i] = i + l + 1; } for (int i = 0; i < this.not; i++) { Remove rem = new Remove (); rem.setAttributeIndicesArray (args); rem.setInputFormat (source); derived [i] = Filter.useFilter (source, rem); classifiers [i] = GenerateClassifier (); derived [i].setClassIndex (derived [i].numAttributes () - 1); classifiers [i].buildClassifier (derived [i]); if (i < this.not - 1) { args [i] = l + i; } } datasets = derived; }
/// <summary> Signify that this batch of input to the filter is finished. /// /// </summary> /// <returns> true if there are instances pending output /// </returns> public override bool batchFinished() { if (getInputFormat() == null) { throw new System.SystemException("No input instance format defined"); } if (m_removeFilter == null) { // establish attributes to remove from first batch Instances toFilter = getInputFormat(); int[] attsToDelete = new int[toFilter.numAttributes()]; int numToDelete = 0; for (int i = 0; i < toFilter.numAttributes(); i++) { if (i == toFilter.classIndex()) continue; // skip class AttributeStats stats = toFilter.attributeStats(i); if (stats.distinctCount < 2) { // remove constant attributes attsToDelete[numToDelete++] = i; } else if (toFilter.attribute(i).Nominal) { // remove nominal attributes that vary too much double variancePercent = (double) stats.distinctCount / (double) stats.totalCount * 100.0; if (variancePercent > m_maxVariancePercentage) { attsToDelete[numToDelete++] = i; } } } int[] finalAttsToDelete = new int[numToDelete]; Array.Copy(attsToDelete, 0, finalAttsToDelete, 0, numToDelete); m_removeFilter = new Remove(); m_removeFilter.SetAttributeIndicesArray(finalAttsToDelete); m_removeFilter.set_InvertSelection(false); m_removeFilter.setInputFormat(toFilter); for (int i = 0; i < toFilter.numInstances(); i++) { m_removeFilter.input(toFilter.instance(i)); } m_removeFilter.batchFinished(); Instance processed; Instances outputDataset = m_removeFilter.getOutputFormat(); // restore old relation name to hide attribute filter stamp outputDataset.RelationName = toFilter.relationName(); setOutputFormat(outputDataset); while ((processed = m_removeFilter.output()) != null) { processed.Dataset = outputDataset; push(processed); } } flushInput(); m_NewBatch = true; return (numPendingOutput() != 0); }