Esempio n. 1
0
 static void Succeed <TData>(
     Func <TData, SortDirection, SortIndexResults>[] operators,
     TData data,
     SortDirection sortDirection,
     SortIndexResults expected)
     where TData : class
 {
     for (int i = 0; i < operators.Length; i++)
     {
         var actual = operators[i](data, sortDirection);
         DoubleMatrixAssert.AreEqual(
             expected: expected.SortedData,
             actual: actual.SortedData,
             delta: DoubleMatrixTest.Accuracy);
         IndexCollectionAssert.AreEqual(
             expected: expected.SortedIndexes,
             actual: actual.SortedIndexes);
     }
 }
Esempio n. 2
0
        /// <summary>
        /// Gets the bins for a sequence of numerical and target data.
        /// </summary>
        /// <param name="numericalData">The numerical data.</param>
        /// <param name="targetData">The target data.</param>
        /// <returns>The collection of <see cref="NumericalBin"/> instances
        /// corresponding to the specified numerical and target data.</returns>
        /// <exception cref="ArgumentNullException">
        /// <paramref name="numericalData"/> is <b>null</b>. <br/>
        /// -or- <br/>
        /// <paramref name="targetData"/> is <b>null</b>.
        /// </exception>
        /// <exception cref="ArgumentException">
        /// Parameter <paramref name="targetData"/> has not the
        /// same <see cref="DoubleMatrix.Count"/> of
        /// parameter <paramref name="numericalData"/>.
        /// </exception>
        public static List <NumericalBin> GetNumericalBins(
            DoubleMatrix numericalData,
            DoubleMatrix targetData)
        {
            #region Input validation

            if (numericalData is null)
            {
                throw new ArgumentNullException(nameof(numericalData));
            }

            if (targetData is null)
            {
                throw new ArgumentNullException(nameof(targetData));
            }

            if (numericalData.Count != targetData.Count)
            {
                throw new ArgumentException(
                          string.Format(
                              CultureInfo.InvariantCulture,
                              ImplementationServices.GetResourceString(
                                  "STR_EXCEPT_PAR_MUST_HAVE_SAME_COUNT"),
                              nameof(numericalData)),
                          nameof(targetData));
            }

            #endregion

            List <NumericalBin> bins;

            if (numericalData.Count == 1)
            {
                bins = new List <NumericalBin>(1);
                var bin = new NumericalBin(
                    0,
                    numericalData[0],
                    targetData.GetStorage())
                {
                    lastPosition = 0
                };
                bin.targetFrequencyDistribution[targetData[0]]++;
                bins.Add(bin);

                return(bins);
            }

            bins = new List <NumericalBin>();

            // Identify boundary points
            SortIndexResults sortResults = Stat.SortIndex(
                numericalData, SortDirection.Ascending);
            var sortedAttributeData = sortResults.SortedData;
            var sortedClassData     = targetData.Vec(sortResults.SortedIndexes);

            var targetCodes = sortedClassData.Distinct().OrderBy(
                (code) => { return(code); }).ToArray();

            double currentClass, currentAttributeValue,
                   nextAttributeValue       = Double.NaN;
            int          lastcycledPosition = sortedAttributeData.Count - 2;
            bool         createBin          = true;
            NumericalBin currentBin         = null;

            // Create attribute bins (a bin is a collection of positions
            // in the attribute ordering which are occupied by a same
            // attribute value
            for (int i = 0; i < lastcycledPosition + 1; i++)
            {
                // Create a new bin if needed.
                currentAttributeValue = sortedAttributeData[i];
                if (createBin)
                {
                    currentBin = new NumericalBin(
                        i,
                        currentAttributeValue,
                        targetCodes);
                    createBin = false;
                }
                // Update the class distribution in the current bin.
                currentClass = sortedClassData[i];
                currentBin.targetFrequencyDistribution[currentClass]++;

                int nextPosition = i + 1;
                nextAttributeValue = sortedAttributeData[nextPosition];

                bool cutPointDetected = currentAttributeValue != nextAttributeValue;
                if (i < lastcycledPosition)
                {
                    if (cutPointDetected)
                    {
                        currentBin.lastPosition = i;
                        bins.Add(currentBin);
                        createBin = true;
                    }
                }
                else
                {
                    // A cut point exists between the last two positions
                    // (final cut point)
                    if (cutPointDetected)
                    {
                        // Finalize the current bin
                        currentBin.lastPosition = i;
                        bins.Add(currentBin);

                        // Add a last bin consisting of the last position
                        currentBin = new NumericalBin(
                            nextPosition,
                            nextAttributeValue,
                            targetCodes)
                        {
                            lastPosition = nextPosition
                        };
                        currentBin.targetFrequencyDistribution[
                            sortedClassData[nextPosition]]++;
                        bins.Add(currentBin);
                    }
                    else   // No final cut point
                    {
                        currentBin.lastPosition = nextPosition;
                        currentBin.targetFrequencyDistribution[
                            sortedClassData[nextPosition]]++;
                        bins.Add(currentBin);
                    }
                }
            }

            return(bins);
        }