public NaiveBayesClassifierOld(TableFixedData data) { _data = data; var doubleConverter = new DoubleConverter(); _distribution = new IDistribution[data.ClassesValue.Length, data.Attributes.Length]; for (int index = 0; index < data.Attributes.Length; index++) { if (data.Attributes[index] == TableData.ClassAttributeName) { var column = data.GetColumn <int>(index); _classesProbablityDistribution = new CategoricalDistribution(column, data.ClassesValue.Length); } else { var isColumnNumeric = data[0, index].IsNumeric(); if (isColumnNumeric) { var values = GetDataPerClass <double>(data, index, doubleConverter); for (int classIndex = 0; classIndex < data.ClassesValue.Length; classIndex++) { _distribution[classIndex, index] = new GaussianDistribution(values[classIndex]); } } else { var values = GetDataPerClass <string>(data, index); for (int classIndex = 0; classIndex < data.ClassesValue.Length; classIndex++) { var categoryData = values[classIndex].Select(item => data.GetSymbol(item, index)).ToArray(); _distribution[classIndex, index] = new CategoricalDistribution(categoryData, categoryData.Length); } } } } }
public static DataSample[] ToSample(TableFixedData tableFixedData) { var samples = new DataSample[tableFixedData.Count]; for (int rowIndex = 0; rowIndex < tableFixedData.Count; rowIndex++) { var currentSample = new DataSample { DataPoints = new DataPoint[tableFixedData.Attributes.Length - 1], ClassId = tableFixedData.Class(rowIndex) }; int dataPointIndex = 0; for (int columnIndex = 0; columnIndex < tableFixedData.Attributes.Length; columnIndex++) { if (tableFixedData.Attributes[columnIndex] != TableData.ClassAttributeName) { var value = tableFixedData[rowIndex, columnIndex]; var dataPoint = new DataPoint { ColumnId = columnIndex, Value = tableFixedData.IsDiscreteColumn(columnIndex) ? Convert.ToDouble(tableFixedData.GetSymbol(value.ToString(), columnIndex)) : Convert.ToDouble(value) }; currentSample.DataPoints[dataPointIndex] = dataPoint; dataPointIndex++; } } samples[rowIndex] = currentSample; } return(samples); }
public string Compute(IDataRow datarow) { var probabilities = new double[_data.ClassesValue.Length]; var attributes = datarow.Attributes.ToArray(); var doubleConverter = new DoubleConverter(); var maxProbabilityIndex = 0; for (int index = 0; index < probabilities.Length; index++) { probabilities[index] = 1; for (int columnIndex = 0; columnIndex < _data.Attributes.Length; columnIndex++) { if (attributes[columnIndex] == TableData.ClassAttributeName) { probabilities[index] = _classesProbablityDistribution.GetLogProbability(index); continue; } var value = datarow[attributes[columnIndex]]; if (!value.IsNumeric()) { probabilities[index] = probabilities[index] + _distribution[index, columnIndex].GetLogProbability(_data.GetSymbol((string)value, columnIndex)); } else { probabilities[index] = probabilities[index] + _distribution[index, columnIndex].GetLogProbability(doubleConverter.Convert(value)); } } if (probabilities[maxProbabilityIndex] < probabilities[index]) { maxProbabilityIndex = index; } } return(_data.ClassesValue[maxProbabilityIndex]); }