Ejemplo n.º 1
0
		/// <summary> Sets the format of output instances. The derived class should use this
		/// method once it has determined the outputformat. The 
		/// output queue is cleared.
		/// 
		/// </summary>
		/// <param name="outputFormat">the new output format
		/// </param>
		protected internal virtual void  setOutputFormat(Instances outputFormat)
		{
			if (outputFormat != null)
			{
				m_OutputFormat = outputFormat.stringFreeStructure();
				m_OutputStringAtts = getStringIndices(m_OutputFormat);
				
				// Rename the attribute
				System.String relationName = outputFormat.relationName() + "-" + this.GetType().FullName;
				//			if (this instanceof OptionHandler) 
				//			{
				//				String [] options = ((OptionHandler)this).getOptions();
				//				for (int i = 0; i < options.length; i++) 
				//				{
				//					relationName += options[i].trim();
				//				}
				//			}
				m_OutputFormat.RelationName = relationName;
			}
			else
			{
				m_OutputFormat = null;
			}
			m_OutputQueue = new Queue();
		}
Ejemplo n.º 2
0
		/// <summary> Sets the format of the input instances.
		/// 
		/// </summary>
		/// <param name="instanceInfo">an Instances object containing the input instance
		/// structure (any instances contained in the object are ignored - only the
		/// structure is required).
		/// </param>
		/// <returns> true if the outputFormat may be collected immediately
		/// </returns>
		/// <exception cref="Exception">if the format couldn't be set successfully
		/// </exception>
		public override bool setInputFormat(Instances instanceInfo)
		{
			
			base.setInputFormat(instanceInfo);
			
			m_SelectCols.Upper = instanceInfo.numAttributes() - 1;
			
			// Create the output buffer
			FastVector attributes = new FastVector();
			int outputClass = - 1;
			m_SelectedAttributes = m_SelectCols.Selection;
			int inStrKeepLen = 0;
			int[] inStrKeep = new int[m_SelectedAttributes.Length];
			for (int i = 0; i < m_SelectedAttributes.Length; i++)
			{
				int current = m_SelectedAttributes[i];
				if (instanceInfo.classIndex() == current)
				{
					outputClass = attributes.size();
				}
				Attribute keep = (Attribute) instanceInfo.attribute(current).copy();
				if (keep.type() == Attribute.STRING)
				{
					inStrKeep[inStrKeepLen++] = current;
				}
				attributes.addElement(keep);
			}
			m_InputStringIndex = new int[inStrKeepLen];
			Array.Copy(inStrKeep, 0, m_InputStringIndex, 0, inStrKeepLen);
			Instances outputFormat = new Instances(instanceInfo.relationName(), attributes, 0);
			outputFormat.ClassIndex = outputClass;
			setOutputFormat(outputFormat);
			return true;
		}
Ejemplo n.º 3
0
		/// <summary> Calculates the area under the ROC curve.  This is normalised so
		/// that 0.5 is random, 1.0 is perfect and 0.0 is bizarre.
		/// 
		/// </summary>
		/// <param name="tcurve">a previously extracted threshold curve Instances.
		/// </param>
		/// <returns> the ROC area, or Double.NaN if you don't pass in 
		/// a ThresholdCurve generated Instances. 
		/// </returns>
		public static double getROCArea(Instances tcurve)
		{
			
			//UPGRADE_NOTE: Final was removed from the declaration of 'n '. "ms-help://MS.VSCC.v80/dv_commoner/local/redirect.htm?index='!DefaultContextWindowIndex'&keyword='jlca1003'"
			int n = tcurve.numInstances();
			if (!RELATION_NAME.Equals(tcurve.relationName()) || (n == 0))
			{
				return System.Double.NaN;
			}
			//UPGRADE_NOTE: Final was removed from the declaration of 'tpInd '. "ms-help://MS.VSCC.v80/dv_commoner/local/redirect.htm?index='!DefaultContextWindowIndex'&keyword='jlca1003'"
			int tpInd = tcurve.attribute(TRUE_POS_NAME).index();
			//UPGRADE_NOTE: Final was removed from the declaration of 'fpInd '. "ms-help://MS.VSCC.v80/dv_commoner/local/redirect.htm?index='!DefaultContextWindowIndex'&keyword='jlca1003'"
			int fpInd = tcurve.attribute(FALSE_POS_NAME).index();
			//UPGRADE_NOTE: Final was removed from the declaration of 'tpVals '. "ms-help://MS.VSCC.v80/dv_commoner/local/redirect.htm?index='!DefaultContextWindowIndex'&keyword='jlca1003'"
			double[] tpVals = tcurve.attributeToDoubleArray(tpInd);
			//UPGRADE_NOTE: Final was removed from the declaration of 'fpVals '. "ms-help://MS.VSCC.v80/dv_commoner/local/redirect.htm?index='!DefaultContextWindowIndex'&keyword='jlca1003'"
			double[] fpVals = tcurve.attributeToDoubleArray(fpInd);
			//UPGRADE_NOTE: Final was removed from the declaration of 'tp0 '. "ms-help://MS.VSCC.v80/dv_commoner/local/redirect.htm?index='!DefaultContextWindowIndex'&keyword='jlca1003'"
			double tp0 = tpVals[0];
			//UPGRADE_NOTE: Final was removed from the declaration of 'fp0 '. "ms-help://MS.VSCC.v80/dv_commoner/local/redirect.htm?index='!DefaultContextWindowIndex'&keyword='jlca1003'"
			double fp0 = fpVals[0];
			double area = 0.0;
			//starts at high values and goes down
			double xlast = 1.0;
			double ylast = 1.0;
			for (int i = 1; i < n; i++)
			{
				//UPGRADE_NOTE: Final was removed from the declaration of 'x '. "ms-help://MS.VSCC.v80/dv_commoner/local/redirect.htm?index='!DefaultContextWindowIndex'&keyword='jlca1003'"
				double x = fpVals[i] / fp0;
				//UPGRADE_NOTE: Final was removed from the declaration of 'y '. "ms-help://MS.VSCC.v80/dv_commoner/local/redirect.htm?index='!DefaultContextWindowIndex'&keyword='jlca1003'"
				double y = tpVals[i] / tp0;
				//UPGRADE_NOTE: Final was removed from the declaration of 'areaDelta '. "ms-help://MS.VSCC.v80/dv_commoner/local/redirect.htm?index='!DefaultContextWindowIndex'&keyword='jlca1003'"
				double areaDelta = (y + ylast) * (xlast - x) / 2.0;
				/*
				System.err.println("[" + i + "]"
				+ " x=" + x
				+ " y'=" + y
				+ " xl=" + xlast
				+ " yl=" + ylast
				+ " a'=" + areaDelta);
				*/
				
				area += areaDelta;
				xlast = x;
				ylast = y;
			}
			
			//make sure ends at 0,0
			if (xlast > 0.0)
			{
				//UPGRADE_NOTE: Final was removed from the declaration of 'areaDelta '. "ms-help://MS.VSCC.v80/dv_commoner/local/redirect.htm?index='!DefaultContextWindowIndex'&keyword='jlca1003'"
				double areaDelta = ylast * xlast / 2.0;
				//System.err.println(" a'=" + areaDelta);
				area += areaDelta;
			}
			//System.err.println(" area'=" + area);
			return area;
		}
Ejemplo n.º 4
0
		/// <summary> Gets the index of the instance with the closest threshold value to the
		/// desired target
		/// 
		/// </summary>
		/// <param name="tcurve">a set of instances that have been generated by this class
		/// </param>
		/// <param name="threshold">the target threshold
		/// </param>
		/// <returns> the index of the instance that has threshold closest to
		/// the target, or -1 if this could not be found (i.e. no data, or
		/// bad threshold target)
		/// </returns>
		public static int getThresholdInstance(Instances tcurve, double threshold)
		{
			
			if (!RELATION_NAME.Equals(tcurve.relationName()) || (tcurve.numInstances() == 0) || (threshold < 0) || (threshold > 1.0))
			{
				return - 1;
			}
			if (tcurve.numInstances() == 1)
			{
				return 0;
			}
			double[] tvals = tcurve.attributeToDoubleArray(tcurve.numAttributes() - 1);
			int[] sorted = Utils.sort(tvals);
			return binarySearch(sorted, tvals, threshold);
		}
Ejemplo n.º 5
0
		/// <summary> Calculates the n point precision result, which is the precision averaged
		/// over n evenly spaced (w.r.t recall) samples of the curve.
		/// 
		/// </summary>
		/// <param name="tcurve">a previously extracted threshold curve Instances.
		/// </param>
		/// <param name="n">the number of points to average over.
		/// </param>
		/// <returns> the n-point precision.
		/// </returns>
		public static double getNPointPrecision(Instances tcurve, int n)
		{
			
			if (!RELATION_NAME.Equals(tcurve.relationName()) || (tcurve.numInstances() == 0))
			{
				return System.Double.NaN;
			}
			int recallInd = tcurve.attribute(RECALL_NAME).index();
			int precisInd = tcurve.attribute(PRECISION_NAME).index();
			double[] recallVals = tcurve.attributeToDoubleArray(recallInd);
			int[] sorted = Utils.sort(recallVals);
			double isize = 1.0 / (n - 1);
			double psum = 0;
			for (int i = 0; i < n; i++)
			{
				int pos = binarySearch(sorted, recallVals, i * isize);
				double recall = recallVals[sorted[pos]];
				double precis = tcurve.instance(sorted[pos]).value_Renamed(precisInd);
				/*
				System.err.println("Point " + (i + 1) + ": i=" + pos 
				+ " r=" + (i * isize)
				+ " p'=" + precis 
				+ " r'=" + recall);
				*/
				// interpolate figures for non-endpoints
				while ((pos != 0) && (pos < sorted.Length - 1))
				{
					pos++;
					double recall2 = recallVals[sorted[pos]];
					if (recall2 != recall)
					{
						double precis2 = tcurve.instance(sorted[pos]).value_Renamed(precisInd);
						double slope = (precis2 - precis) / (recall2 - recall);
						double offset = precis - recall * slope;
						precis = isize * i * slope + offset;
						/*
						System.err.println("Point2 " + (i + 1) + ": i=" + pos 
						+ " r=" + (i * isize)
						+ " p'=" + precis2 
						+ " r'=" + recall2
						+ " p''=" + precis);
						*/
						break;
					}
				}
				psum += precis;
			}
			return psum / n;
		}