Exemple #1
0
        /// <summary>
        /// Gets the blocks for a sequence of numerical and target data.
        /// </summary>
        /// <param name="numericalData">The numerical data.</param>
        /// <param name="targetData">The target data.</param>
        /// <returns>
        /// The collection of <see cref="NumericalBlock"/> instances
        /// corresponding to the specified numerical and target data.</returns>
        /// <exception cref="ArgumentNullException">
        /// <paramref name="numericalData"/>  is <b>null</b>.<br/>
        /// -or-<br/>
        /// <paramref name="targetData"/> is <b>null</b>.
        /// </exception>
        /// <exception cref="ArgumentException">
        /// Parameter <paramref name="targetData"/> has not the
        /// same <see cref="DoubleMatrix.Count"/> of
        /// parameter <paramref name="numericalData"/>.
        /// </exception>
        public static List <NumericalBlock> GetNumericalBlocks(
            DoubleMatrix numericalData,
            DoubleMatrix targetData)
        {
            List <NumericalBin> bins =
                NumericalBin.GetNumericalBins(numericalData, targetData);

            List <NumericalBlock> blocks;

            // Numerical bins are numerical blocks if and only if
            // A. Their target distribution is heterogeneous
            //    (i.e. there are two or more target values having nonzero frequency)
            // OR
            // not(A). Their class distribution is not heterogeneous
            //    AND
            // B. No contiguous bins are not heterogeneous and share
            //    the same target mode.

            int numberOfBins = bins.Count;

            blocks = new List <NumericalBlock>(numberOfBins);

            if (1 == numberOfBins)
            {
                blocks.Add(bins[0]);
                return(blocks);
            }

            var targetCodes = bins[0].targetFrequencyDistribution.Keys.ToArray();

            for (int i = 0; i < numberOfBins; i++)
            {
                var currentBin = bins[i];

                // The following is equivalent to condition not(A) if index i
                // is not the last index.
                // Otherwise, i.e. if index i reaches the last possible value,
                // which is numberOfBins - 1,
                // this implies that the last bin is a block, since it has not
                // been previously merged.
                bool reviseCurrentBinForMerging =
                    !currentBin.IsTargetDistributionHeterogeneous;

                if (reviseCurrentBinForMerging)
                {
                    // Here if not(A) is true: verify condition B.
                    double referenceMode = currentBin.Mode;
                    int    j;
                    int    mergeLastIndex     = currentBin.lastPosition;
                    int    mergeModeFrequency =
                        currentBin.targetFrequencyDistribution[referenceMode];

                    // This will become true if and only if B holds true.
                    bool   mergeRequired       = false;
                    double mergeAttributeValue = currentBin.lastValue;
                    for (j = i + 1; j < numberOfBins; j++)
                    {
                        NumericalBlock nextBin = bins[j];
                        if ((!nextBin.IsTargetDistributionHeterogeneous) &&
                            (nextBin.Mode == referenceMode))
                        {
                            // Here if both currentBin and nextBin
                            // are not heterogeneous
                            // and they share the same class mode
                            mergeRequired       = true;
                            mergeAttributeValue = nextBin.lastValue;
                            mergeLastIndex      = nextBin.lastPosition;
                            mergeModeFrequency +=
                                nextBin.targetFrequencyDistribution[referenceMode];
                        }
                        else   // No subsequent bins are not heterogeneous and share
                               // the same mode of the current bin
                        {
                            break;
                        }
                    } // End for
                    if (mergeRequired)
                    {
                        i = j - 1; // Skip merged bins
                        NumericalBlock mergedBlock = new
                                                     (
                            currentBin.firstPosition,
                            mergeAttributeValue,
                            targetCodes)
                        {
                            firstValue   = currentBin.firstValue,
                            lastPosition = mergeLastIndex
                        };
                        mergedBlock.targetFrequencyDistribution[referenceMode] =
                            mergeModeFrequency;
                        blocks.Add(mergedBlock);
                    }
                    else   // The current bin is a block.
                    {
                        blocks.Add(currentBin);
                    }
                }
                else   // The current bin is a block.
                {
                    blocks.Add(currentBin);
                }
            }

            return(blocks);
        }
Exemple #2
0
        /// <summary>
        /// Gets the bins for a sequence of numerical and target data.
        /// </summary>
        /// <param name="numericalData">The numerical data.</param>
        /// <param name="targetData">The target data.</param>
        /// <returns>The collection of <see cref="NumericalBin"/> instances
        /// corresponding to the specified numerical and target data.</returns>
        /// <exception cref="ArgumentNullException">
        /// <paramref name="numericalData"/> is <b>null</b>. <br/>
        /// -or- <br/>
        /// <paramref name="targetData"/> is <b>null</b>.
        /// </exception>
        /// <exception cref="ArgumentException">
        /// Parameter <paramref name="targetData"/> has not the
        /// same <see cref="DoubleMatrix.Count"/> of
        /// parameter <paramref name="numericalData"/>.
        /// </exception>
        public static List <NumericalBin> GetNumericalBins(
            DoubleMatrix numericalData,
            DoubleMatrix targetData)
        {
            #region Input validation

            if (numericalData is null)
            {
                throw new ArgumentNullException(nameof(numericalData));
            }

            if (targetData is null)
            {
                throw new ArgumentNullException(nameof(targetData));
            }

            if (numericalData.Count != targetData.Count)
            {
                throw new ArgumentException(
                          string.Format(
                              CultureInfo.InvariantCulture,
                              ImplementationServices.GetResourceString(
                                  "STR_EXCEPT_PAR_MUST_HAVE_SAME_COUNT"),
                              nameof(numericalData)),
                          nameof(targetData));
            }

            #endregion

            List <NumericalBin> bins;

            if (numericalData.Count == 1)
            {
                bins = new List <NumericalBin>(1);
                var bin = new NumericalBin(
                    0,
                    numericalData[0],
                    targetData.GetStorage())
                {
                    lastPosition = 0
                };
                bin.targetFrequencyDistribution[targetData[0]]++;
                bins.Add(bin);

                return(bins);
            }

            bins = new List <NumericalBin>();

            // Identify boundary points
            SortIndexResults sortResults = Stat.SortIndex(
                numericalData, SortDirection.Ascending);
            var sortedAttributeData = sortResults.SortedData;
            var sortedClassData     = targetData.Vec(sortResults.SortedIndexes);

            var targetCodes = sortedClassData.Distinct().OrderBy(
                (code) => { return(code); }).ToArray();

            double currentClass, currentAttributeValue,
                   nextAttributeValue       = Double.NaN;
            int          lastcycledPosition = sortedAttributeData.Count - 2;
            bool         createBin          = true;
            NumericalBin currentBin         = null;

            // Create attribute bins (a bin is a collection of positions
            // in the attribute ordering which are occupied by a same
            // attribute value
            for (int i = 0; i < lastcycledPosition + 1; i++)
            {
                // Create a new bin if needed.
                currentAttributeValue = sortedAttributeData[i];
                if (createBin)
                {
                    currentBin = new NumericalBin(
                        i,
                        currentAttributeValue,
                        targetCodes);
                    createBin = false;
                }
                // Update the class distribution in the current bin.
                currentClass = sortedClassData[i];
                currentBin.targetFrequencyDistribution[currentClass]++;

                int nextPosition = i + 1;
                nextAttributeValue = sortedAttributeData[nextPosition];

                bool cutPointDetected = currentAttributeValue != nextAttributeValue;
                if (i < lastcycledPosition)
                {
                    if (cutPointDetected)
                    {
                        currentBin.lastPosition = i;
                        bins.Add(currentBin);
                        createBin = true;
                    }
                }
                else
                {
                    // A cut point exists between the last two positions
                    // (final cut point)
                    if (cutPointDetected)
                    {
                        // Finalize the current bin
                        currentBin.lastPosition = i;
                        bins.Add(currentBin);

                        // Add a last bin consisting of the last position
                        currentBin = new NumericalBin(
                            nextPosition,
                            nextAttributeValue,
                            targetCodes)
                        {
                            lastPosition = nextPosition
                        };
                        currentBin.targetFrequencyDistribution[
                            sortedClassData[nextPosition]]++;
                        bins.Add(currentBin);
                    }
                    else   // No final cut point
                    {
                        currentBin.lastPosition = nextPosition;
                        currentBin.targetFrequencyDistribution[
                            sortedClassData[nextPosition]]++;
                        bins.Add(currentBin);
                    }
                }
            }

            return(bins);
        }