public int Compute(DataSample sample)
        {
            var probabilities = GetLikelyhood(sample);

            var maxProbabilityIndex = 0;

            for (int index = 0; index < probabilities.Length; index++)
            {
                if (probabilities[maxProbabilityIndex] < probabilities[index])
                {
                    maxProbabilityIndex = index;
                }
            }

            return(maxProbabilityIndex);
        }
예제 #2
0
        public int Compute(DataSample sample)
        {
            var probabilities = GetLikelyhood(sample);

            var maxProbabilityIndex = 0;

            for (int index = 0; index < probabilities.Length; index++)
            {
                if (probabilities[maxProbabilityIndex] < probabilities[index])
                {
                    maxProbabilityIndex = index;
                }
            }

            return maxProbabilityIndex;
        }
        public void GetLikelyhood(DataSample sample, double[] result)
        {
            var probabilities = result;

            for (int index = 0; index < probabilities.Length; index++)
            {
                probabilities[index] = _classesProbablityDistribution.GetLogProbability(index);

                foreach (var dataPoint in sample.DataPoints)
                {
                    var value = Convert.ToDouble(dataPoint.Value);

                    probabilities[index] = probabilities[index] +
                                           _distribution[index, dataPoint.ColumnId].GetLogProbability(value);
                }
            }
        }
예제 #4
0
        public NaiveBayesClassifier(DataSample[] samples, int classes, ColumnDataType[] columnDataTypes)
        {
            _classes = classes;

            _distribution = new IDistribution[classes, columnDataTypes.Length];

            _classesProbablityDistribution = new CategoricalDistribution(
                samples.Select(item => item.ClassId).ToArray(), classes);
            var splitDataPerClass = SplitDataPerClass(samples, _classes, columnDataTypes.Length);

            var groups = GetClassGroups(samples, _classes);

            for (int index = 0; index < columnDataTypes.Length; index++)
            {
                //var values = GetDataPerClass(samples, _classes, index);
                Double[][] values = new double[classes][];
                for (int classIndex = 0; classIndex < classes; classIndex++)
                {
                    values[classIndex] = splitDataPerClass[index, classIndex];
                }
                //var values = splitDataPerClass[index,_]
                if (values.All(item => item == null))
                {
                    continue;
                }

                for (int classIndex = 0; classIndex < classes; classIndex++)
                {
                    var itemsOnClass = values[classIndex] ?? new double[0];

                    if (!columnDataTypes[index].IsDiscrete)
                    {
                        _distribution[classIndex, index] = new GaussianDistribution(itemsOnClass);
                    }
                    else
                    {

                        _distribution[classIndex, index] =
                            new CategoricalDistribution(itemsOnClass.Select(Convert.ToInt32).ToArray(),
                                columnDataTypes[index].NumberOfCategories, groups[classIndex]);
                    }
                }
            }
        }
        public Double[] GetLikelyhood(DataSample sample)
        {
            var probabilities = new double[_classes];

            //for (int index = 0; index < probabilities.Length; index++)
            //{
            //    probabilities[index] = _classesProbablityDistribution.GetLogProbability(index);

            //    foreach (var dataPoint in sample.DataPoints)
            //    {
            //        var value = Convert.ToDouble(dataPoint.Value);

            //        probabilities[index] = probabilities[index] +
            //                               _distribution[index, dataPoint.ColumnId].GetLogProbability(value);
            //    }
            //}
            GetLikelyhood(sample, probabilities);

            return(probabilities);
        }
예제 #6
0
        public static DataSample[] ToSample(TableFixedData tableFixedData)
        {
            var samples = new DataSample[tableFixedData.Count];

            for (int rowIndex = 0; rowIndex < tableFixedData.Count; rowIndex++)
            {
                var currentSample = new DataSample
                {
                    DataPoints = new DataPoint[tableFixedData.Attributes.Length - 1],
                    ClassId    = tableFixedData.Class(rowIndex)
                };
                int dataPointIndex = 0;
                for (int columnIndex = 0; columnIndex < tableFixedData.Attributes.Length; columnIndex++)
                {
                    if (tableFixedData.Attributes[columnIndex] != TableData.ClassAttributeName)
                    {
                        var value     = tableFixedData[rowIndex, columnIndex];
                        var dataPoint = new DataPoint
                        {
                            ColumnId = columnIndex,
                            Value    =
                                tableFixedData.IsDiscreteColumn(columnIndex)
                                    ? Convert.ToDouble(tableFixedData.GetSymbol(value.ToString(), columnIndex))
                                    : Convert.ToDouble(value)
                        };

                        currentSample.DataPoints[dataPointIndex] = dataPoint;

                        dataPointIndex++;
                    }
                }

                samples[rowIndex] = currentSample;
            }

            return(samples);
        }
예제 #7
0
        public int[] GetClassGroups(DataSample[] samples, int classes)
        {
            var ret = new int[classes];

            for (int index = 0; index < samples.Length; index++)
            {
                ret[samples[index].ClassId] ++;
            }

            return ret;
        }
예제 #8
0
        public Double[,][] SplitDataPerClass(DataSample[] samples, int classes, int columns)
        {
            var retLists = new List<Double>[columns, classes];
            var dataRet = new double[columns, classes][];

            for (int index = 0; index < samples.Length; index++)
            {
                var sample = samples[index];
                foreach (var datapoint in sample.DataPoints)
                {

                    if (retLists[datapoint.ColumnId, sample.ClassId] == null)
                    {
                        retLists[datapoint.ColumnId, sample.ClassId] = new List<double>();
                    }

                    retLists[datapoint.ColumnId, sample.ClassId].Add(datapoint.Value);

                }
            }
            for (int columnIndex = 0; columnIndex < columns; columnIndex++)
            {
                for (int index = 0; index < classes; index++)
                {
                    if (retLists[columnIndex, index] != null)
                    {
                        dataRet[columnIndex, index] = retLists[columnIndex, index].ToArray();
                    }
                }
            }
            return dataRet;
        }
예제 #9
0
        public void GetLikelyhood(DataSample sample, double[] result)
        {
            var probabilities = result;

            for (int index = 0; index < probabilities.Length; index++)
            {
                probabilities[index] = _classesProbablityDistribution.GetLogProbability(index);

                foreach (var dataPoint in sample.DataPoints)
                {
                    var value = Convert.ToDouble(dataPoint.Value);

                    probabilities[index] = probabilities[index] +
                                           _distribution[index, dataPoint.ColumnId].GetLogProbability(value);
                }
            }
        }
예제 #10
0
        public Double[] GetLikelyhood(DataSample sample)
        {
            var probabilities = new double[_classes];

            //for (int index = 0; index < probabilities.Length; index++)
            //{
            //    probabilities[index] = _classesProbablityDistribution.GetLogProbability(index);

            //    foreach (var dataPoint in sample.DataPoints)
            //    {
            //        var value = Convert.ToDouble(dataPoint.Value);

            //        probabilities[index] = probabilities[index] +
            //                               _distribution[index, dataPoint.ColumnId].GetLogProbability(value);
            //    }
            //}
            GetLikelyhood(sample, probabilities);

            return probabilities;
        }