public int Compute(DataSample sample) { var probabilities = GetLikelyhood(sample); var maxProbabilityIndex = 0; for (int index = 0; index < probabilities.Length; index++) { if (probabilities[maxProbabilityIndex] < probabilities[index]) { maxProbabilityIndex = index; } } return(maxProbabilityIndex); }
public int Compute(DataSample sample) { var probabilities = GetLikelyhood(sample); var maxProbabilityIndex = 0; for (int index = 0; index < probabilities.Length; index++) { if (probabilities[maxProbabilityIndex] < probabilities[index]) { maxProbabilityIndex = index; } } return maxProbabilityIndex; }
public void GetLikelyhood(DataSample sample, double[] result) { var probabilities = result; for (int index = 0; index < probabilities.Length; index++) { probabilities[index] = _classesProbablityDistribution.GetLogProbability(index); foreach (var dataPoint in sample.DataPoints) { var value = Convert.ToDouble(dataPoint.Value); probabilities[index] = probabilities[index] + _distribution[index, dataPoint.ColumnId].GetLogProbability(value); } } }
public NaiveBayesClassifier(DataSample[] samples, int classes, ColumnDataType[] columnDataTypes) { _classes = classes; _distribution = new IDistribution[classes, columnDataTypes.Length]; _classesProbablityDistribution = new CategoricalDistribution( samples.Select(item => item.ClassId).ToArray(), classes); var splitDataPerClass = SplitDataPerClass(samples, _classes, columnDataTypes.Length); var groups = GetClassGroups(samples, _classes); for (int index = 0; index < columnDataTypes.Length; index++) { //var values = GetDataPerClass(samples, _classes, index); Double[][] values = new double[classes][]; for (int classIndex = 0; classIndex < classes; classIndex++) { values[classIndex] = splitDataPerClass[index, classIndex]; } //var values = splitDataPerClass[index,_] if (values.All(item => item == null)) { continue; } for (int classIndex = 0; classIndex < classes; classIndex++) { var itemsOnClass = values[classIndex] ?? new double[0]; if (!columnDataTypes[index].IsDiscrete) { _distribution[classIndex, index] = new GaussianDistribution(itemsOnClass); } else { _distribution[classIndex, index] = new CategoricalDistribution(itemsOnClass.Select(Convert.ToInt32).ToArray(), columnDataTypes[index].NumberOfCategories, groups[classIndex]); } } } }
public Double[] GetLikelyhood(DataSample sample) { var probabilities = new double[_classes]; //for (int index = 0; index < probabilities.Length; index++) //{ // probabilities[index] = _classesProbablityDistribution.GetLogProbability(index); // foreach (var dataPoint in sample.DataPoints) // { // var value = Convert.ToDouble(dataPoint.Value); // probabilities[index] = probabilities[index] + // _distribution[index, dataPoint.ColumnId].GetLogProbability(value); // } //} GetLikelyhood(sample, probabilities); return(probabilities); }
public static DataSample[] ToSample(TableFixedData tableFixedData) { var samples = new DataSample[tableFixedData.Count]; for (int rowIndex = 0; rowIndex < tableFixedData.Count; rowIndex++) { var currentSample = new DataSample { DataPoints = new DataPoint[tableFixedData.Attributes.Length - 1], ClassId = tableFixedData.Class(rowIndex) }; int dataPointIndex = 0; for (int columnIndex = 0; columnIndex < tableFixedData.Attributes.Length; columnIndex++) { if (tableFixedData.Attributes[columnIndex] != TableData.ClassAttributeName) { var value = tableFixedData[rowIndex, columnIndex]; var dataPoint = new DataPoint { ColumnId = columnIndex, Value = tableFixedData.IsDiscreteColumn(columnIndex) ? Convert.ToDouble(tableFixedData.GetSymbol(value.ToString(), columnIndex)) : Convert.ToDouble(value) }; currentSample.DataPoints[dataPointIndex] = dataPoint; dataPointIndex++; } } samples[rowIndex] = currentSample; } return(samples); }
public int[] GetClassGroups(DataSample[] samples, int classes) { var ret = new int[classes]; for (int index = 0; index < samples.Length; index++) { ret[samples[index].ClassId] ++; } return ret; }
public Double[,][] SplitDataPerClass(DataSample[] samples, int classes, int columns) { var retLists = new List<Double>[columns, classes]; var dataRet = new double[columns, classes][]; for (int index = 0; index < samples.Length; index++) { var sample = samples[index]; foreach (var datapoint in sample.DataPoints) { if (retLists[datapoint.ColumnId, sample.ClassId] == null) { retLists[datapoint.ColumnId, sample.ClassId] = new List<double>(); } retLists[datapoint.ColumnId, sample.ClassId].Add(datapoint.Value); } } for (int columnIndex = 0; columnIndex < columns; columnIndex++) { for (int index = 0; index < classes; index++) { if (retLists[columnIndex, index] != null) { dataRet[columnIndex, index] = retLists[columnIndex, index].ToArray(); } } } return dataRet; }
public Double[] GetLikelyhood(DataSample sample) { var probabilities = new double[_classes]; //for (int index = 0; index < probabilities.Length; index++) //{ // probabilities[index] = _classesProbablityDistribution.GetLogProbability(index); // foreach (var dataPoint in sample.DataPoints) // { // var value = Convert.ToDouble(dataPoint.Value); // probabilities[index] = probabilities[index] + // _distribution[index, dataPoint.ColumnId].GetLogProbability(value); // } //} GetLikelyhood(sample, probabilities); return probabilities; }