Exemplo n.º 1
0
        public NaiveBayesClassifierOld(TableFixedData data)
        {
            _data = data;
            var doubleConverter = new DoubleConverter();

            _distribution = new IDistribution[data.ClassesValue.Length, data.Attributes.Length];

            for (int index = 0; index < data.Attributes.Length; index++)
            {
                if (data.Attributes[index] == TableData.ClassAttributeName)
                {
                    var column = data.GetColumn <int>(index);
                    _classesProbablityDistribution = new CategoricalDistribution(column, data.ClassesValue.Length);
                }
                else
                {
                    var isColumnNumeric = data[0, index].IsNumeric();
                    if (isColumnNumeric)
                    {
                        var values = GetDataPerClass <double>(data, index, doubleConverter);
                        for (int classIndex = 0; classIndex < data.ClassesValue.Length; classIndex++)
                        {
                            _distribution[classIndex, index] = new GaussianDistribution(values[classIndex]);
                        }
                    }
                    else
                    {
                        var values = GetDataPerClass <string>(data, index);

                        for (int classIndex = 0; classIndex < data.ClassesValue.Length; classIndex++)
                        {
                            var categoryData = values[classIndex].Select(item => data.GetSymbol(item, index)).ToArray();
                            _distribution[classIndex, index] = new CategoricalDistribution(categoryData, categoryData.Length);
                        }
                    }
                }
            }
        }
Exemplo n.º 2
0
        public static DataSample[] ToSample(TableFixedData tableFixedData)
        {
            var samples = new DataSample[tableFixedData.Count];

            for (int rowIndex = 0; rowIndex < tableFixedData.Count; rowIndex++)
            {
                var currentSample = new DataSample
                {
                    DataPoints = new DataPoint[tableFixedData.Attributes.Length - 1],
                    ClassId    = tableFixedData.Class(rowIndex)
                };
                int dataPointIndex = 0;
                for (int columnIndex = 0; columnIndex < tableFixedData.Attributes.Length; columnIndex++)
                {
                    if (tableFixedData.Attributes[columnIndex] != TableData.ClassAttributeName)
                    {
                        var value     = tableFixedData[rowIndex, columnIndex];
                        var dataPoint = new DataPoint
                        {
                            ColumnId = columnIndex,
                            Value    =
                                tableFixedData.IsDiscreteColumn(columnIndex)
                                    ? Convert.ToDouble(tableFixedData.GetSymbol(value.ToString(), columnIndex))
                                    : Convert.ToDouble(value)
                        };

                        currentSample.DataPoints[dataPointIndex] = dataPoint;

                        dataPointIndex++;
                    }
                }

                samples[rowIndex] = currentSample;
            }

            return(samples);
        }
Exemplo n.º 3
0
        public string Compute(IDataRow datarow)
        {
            var probabilities       = new double[_data.ClassesValue.Length];
            var attributes          = datarow.Attributes.ToArray();
            var doubleConverter     = new DoubleConverter();
            var maxProbabilityIndex = 0;

            for (int index = 0; index < probabilities.Length; index++)
            {
                probabilities[index] = 1;

                for (int columnIndex = 0; columnIndex < _data.Attributes.Length; columnIndex++)
                {
                    if (attributes[columnIndex] == TableData.ClassAttributeName)
                    {
                        probabilities[index] = _classesProbablityDistribution.GetLogProbability(index);
                        continue;
                    }

                    var value = datarow[attributes[columnIndex]];
                    if (!value.IsNumeric())
                    {
                        probabilities[index] = probabilities[index] + _distribution[index, columnIndex].GetLogProbability(_data.GetSymbol((string)value, columnIndex));
                    }
                    else
                    {
                        probabilities[index] = probabilities[index] + _distribution[index, columnIndex].GetLogProbability(doubleConverter.Convert(value));
                    }
                }
                if (probabilities[maxProbabilityIndex] < probabilities[index])
                {
                    maxProbabilityIndex = index;
                }
            }

            return(_data.ClassesValue[maxProbabilityIndex]);
        }