static void Succeed <TData>( Func <TData, SortDirection, SortIndexResults>[] operators, TData data, SortDirection sortDirection, SortIndexResults expected) where TData : class { for (int i = 0; i < operators.Length; i++) { var actual = operators[i](data, sortDirection); DoubleMatrixAssert.AreEqual( expected: expected.SortedData, actual: actual.SortedData, delta: DoubleMatrixTest.Accuracy); IndexCollectionAssert.AreEqual( expected: expected.SortedIndexes, actual: actual.SortedIndexes); } }
/// <summary> /// Gets the bins for a sequence of numerical and target data. /// </summary> /// <param name="numericalData">The numerical data.</param> /// <param name="targetData">The target data.</param> /// <returns>The collection of <see cref="NumericalBin"/> instances /// corresponding to the specified numerical and target data.</returns> /// <exception cref="ArgumentNullException"> /// <paramref name="numericalData"/> is <b>null</b>. <br/> /// -or- <br/> /// <paramref name="targetData"/> is <b>null</b>. /// </exception> /// <exception cref="ArgumentException"> /// Parameter <paramref name="targetData"/> has not the /// same <see cref="DoubleMatrix.Count"/> of /// parameter <paramref name="numericalData"/>. /// </exception> public static List <NumericalBin> GetNumericalBins( DoubleMatrix numericalData, DoubleMatrix targetData) { #region Input validation if (numericalData is null) { throw new ArgumentNullException(nameof(numericalData)); } if (targetData is null) { throw new ArgumentNullException(nameof(targetData)); } if (numericalData.Count != targetData.Count) { throw new ArgumentException( string.Format( CultureInfo.InvariantCulture, ImplementationServices.GetResourceString( "STR_EXCEPT_PAR_MUST_HAVE_SAME_COUNT"), nameof(numericalData)), nameof(targetData)); } #endregion List <NumericalBin> bins; if (numericalData.Count == 1) { bins = new List <NumericalBin>(1); var bin = new NumericalBin( 0, numericalData[0], targetData.GetStorage()) { lastPosition = 0 }; bin.targetFrequencyDistribution[targetData[0]]++; bins.Add(bin); return(bins); } bins = new List <NumericalBin>(); // Identify boundary points SortIndexResults sortResults = Stat.SortIndex( numericalData, SortDirection.Ascending); var sortedAttributeData = sortResults.SortedData; var sortedClassData = targetData.Vec(sortResults.SortedIndexes); var targetCodes = sortedClassData.Distinct().OrderBy( (code) => { return(code); }).ToArray(); double currentClass, currentAttributeValue, nextAttributeValue = Double.NaN; int lastcycledPosition = sortedAttributeData.Count - 2; bool createBin = true; NumericalBin currentBin = null; // Create attribute bins (a bin is a collection of positions // in the attribute ordering which are occupied by a same // attribute value for (int i = 0; i < lastcycledPosition + 1; i++) { // Create a new bin if needed. currentAttributeValue = sortedAttributeData[i]; if (createBin) { currentBin = new NumericalBin( i, currentAttributeValue, targetCodes); createBin = false; } // Update the class distribution in the current bin. currentClass = sortedClassData[i]; currentBin.targetFrequencyDistribution[currentClass]++; int nextPosition = i + 1; nextAttributeValue = sortedAttributeData[nextPosition]; bool cutPointDetected = currentAttributeValue != nextAttributeValue; if (i < lastcycledPosition) { if (cutPointDetected) { currentBin.lastPosition = i; bins.Add(currentBin); createBin = true; } } else { // A cut point exists between the last two positions // (final cut point) if (cutPointDetected) { // Finalize the current bin currentBin.lastPosition = i; bins.Add(currentBin); // Add a last bin consisting of the last position currentBin = new NumericalBin( nextPosition, nextAttributeValue, targetCodes) { lastPosition = nextPosition }; currentBin.targetFrequencyDistribution[ sortedClassData[nextPosition]]++; bins.Add(currentBin); } else // No final cut point { currentBin.lastPosition = nextPosition; currentBin.targetFrequencyDistribution[ sortedClassData[nextPosition]]++; bins.Add(currentBin); } } } return(bins); }