Пример #1
0
        public NaiveBayesClassifier(DataSample[] samples, int classes, ColumnDataType[] columnDataTypes)
        {
            _classes = classes;

            _distribution = new IDistribution[classes, columnDataTypes.Length];

            _classesProbablityDistribution = new CategoricalDistribution(
                samples.Select(item => item.ClassId).ToArray(), classes);
            var splitDataPerClass = SplitDataPerClass(samples, _classes, columnDataTypes.Length);

            var groups = GetClassGroups(samples, _classes);

            for (int index = 0; index < columnDataTypes.Length; index++)
            {
                //var values = GetDataPerClass(samples, _classes, index);
                Double[][] values = new double[classes][];
                for (int classIndex = 0; classIndex < classes; classIndex++)
                {
                    values[classIndex] = splitDataPerClass[index, classIndex];
                }
                //var values = splitDataPerClass[index,_]
                if (values.All(item => item == null))
                {
                    continue;
                }

                for (int classIndex = 0; classIndex < classes; classIndex++)
                {
                    var itemsOnClass = values[classIndex] ?? new double[0];

                    if (!columnDataTypes[index].IsDiscrete)
                    {
                        _distribution[classIndex, index] = new GaussianDistribution(itemsOnClass);
                    }
                    else
                    {

                        _distribution[classIndex, index] =
                            new CategoricalDistribution(itemsOnClass.Select(Convert.ToInt32).ToArray(),
                                columnDataTypes[index].NumberOfCategories, groups[classIndex]);
                    }
                }
            }
        }
Пример #2
0
        private void BuildSymbols()
        {
            _labelsDictionary = new Dictionary <string, int> [Attributes.Length];

            ColumnDataTypes = new ColumnDataType[Attributes.Length];

            var noOfRows = Count;

            for (int index = 0; index < Attributes.Length; index++)
            {
                if (_labelsDictionary[index] == null)
                {
                    _labelsDictionary[index] = new Dictionary <string, int>();
                }

                if (this[0, index].IsNumeric())
                {
                    ColumnDataTypes[index] = new ColumnDataType();
                    continue;
                }

                var symbols   = _labelsDictionary[index];
                var newSymbol = 0;

                for (int rowIndex = 0; rowIndex < noOfRows; rowIndex++)
                {
                    var currentValue = this[rowIndex, index].ToString();
                    if (!symbols.ContainsKey(currentValue))
                    {
                        symbols.Add(currentValue, newSymbol);
                        newSymbol++;
                    }
                }
                ColumnDataTypes[index] = new ColumnDataType {
                    IsDiscrete = true, NumberOfCategories = newSymbol
                };
            }
        }