private T[][] GetDataPerClass <T>(TableFixedData data, int columnIndex, IValueConverter <T> converter = null) { var retLists = new List <T> [data.ClassesValue.Length]; var rows = data.Count; T[][] ret = new T[data.ClassesValue.Length][]; for (int index = 0; index < rows; index++) { var classValue = data.Class(index); if (retLists[classValue] == null) { retLists[classValue] = new List <T>(); } if (converter != null) { retLists[classValue].Add(converter.Convert(data[index, columnIndex])); } else { retLists[classValue].Add((T)data[index, columnIndex]); } } for (int index = 0; index < data.ClassesValue.Length; index++) { ret[index] = retLists[index].ToArray(); } return(ret); }
public static TableFixedData FromTableData(ITableData tableData) { var tableFixedData = new TableFixedData(); var attributesNo = tableData.Attributes.Count(); var rowsNumber = tableData.Count; tableFixedData._data = new object[rowsNumber, attributesNo]; var index = 0; var columns = new Dictionary<string, int>(); foreach (var attribute in tableData.Attributes) { columns[attribute] = index; if (attribute == TableData.ClassAttributeName) { tableFixedData._classIndex = index; } index++; } tableFixedData.Attributes = new string[columns.Count]; foreach (var item in columns) { tableFixedData.Attributes[item.Value] = item.Key; } var classes = new Dictionary<string, int>(); var currentClassesIndex = 0; for (index = 0; index < rowsNumber; index++) { for (int columnIndex = 0; columnIndex < tableFixedData.Attributes.Length; columnIndex++) { var currentValue = tableData[index][tableFixedData.Attributes[columnIndex]]; var attribute = tableFixedData.Attributes[columnIndex]; if (attribute == TableData.ClassAttributeName) { if (!classes.ContainsKey((string)currentValue)) { classes.Add((string)currentValue, currentClassesIndex); currentClassesIndex++; } currentValue = classes[(string)currentValue]; } tableFixedData._data[index, columnIndex] = currentValue; } } tableFixedData.ClassesValue = new string[classes.Count]; foreach (var item in classes) { tableFixedData.ClassesValue[item.Value] = item.Key; } return tableFixedData; }
public NaiveBayesClassifierOld(TableFixedData data) { _data = data; var doubleConverter = new DoubleConverter(); _distribution = new IDistribution[data.ClassesValue.Length, data.Attributes.Length]; for (int index = 0; index < data.Attributes.Length; index++) { if (data.Attributes[index] == TableData.ClassAttributeName) { var column = data.GetColumn <int>(index); _classesProbablityDistribution = new CategoricalDistribution(column, data.ClassesValue.Length); } else { var isColumnNumeric = data[0, index].IsNumeric(); if (isColumnNumeric) { var values = GetDataPerClass <double>(data, index, doubleConverter); for (int classIndex = 0; classIndex < data.ClassesValue.Length; classIndex++) { _distribution[classIndex, index] = new GaussianDistribution(values[classIndex]); } } else { var values = GetDataPerClass <string>(data, index); for (int classIndex = 0; classIndex < data.ClassesValue.Length; classIndex++) { var categoryData = values[classIndex].Select(item => data.GetSymbol(item, index)).ToArray(); _distribution[classIndex, index] = new CategoricalDistribution(categoryData, categoryData.Length); } } } } }
public static DataSample[] ToSample(TableFixedData tableFixedData) { var samples = new DataSample[tableFixedData.Count]; for (int rowIndex = 0; rowIndex < tableFixedData.Count; rowIndex++) { var currentSample = new DataSample { DataPoints = new DataPoint[tableFixedData.Attributes.Length - 1], ClassId = tableFixedData.Class(rowIndex) }; int dataPointIndex = 0; for (int columnIndex = 0; columnIndex < tableFixedData.Attributes.Length; columnIndex++) { if (tableFixedData.Attributes[columnIndex] != TableData.ClassAttributeName) { var value = tableFixedData[rowIndex, columnIndex]; var dataPoint = new DataPoint { ColumnId = columnIndex, Value = tableFixedData.IsDiscreteColumn(columnIndex) ? Convert.ToDouble(tableFixedData.GetSymbol(value.ToString(), columnIndex)) : Convert.ToDouble(value) }; currentSample.DataPoints[dataPointIndex] = dataPoint; dataPointIndex++; } } samples[rowIndex] = currentSample; } return(samples); }
public static TableFixedData FromTableData(ITableData tableData) { var tableFixedData = new TableFixedData(); var attributesNo = tableData.Attributes.Count(); var rowsNumber = tableData.Count; tableFixedData._data = new object[attributesNo, rowsNumber]; var index = 0; var columns = new Dictionary <string, int>(); foreach (var attribute in tableData.Attributes) { columns[attribute] = index; if (attribute == TableData.ClassAttributeName) { tableFixedData._classIndex = index; } index++; } tableFixedData.Attributes = new string[columns.Count]; foreach (var item in columns) { tableFixedData.Attributes[item.Value] = item.Key; } var classes = new Dictionary <string, int>(); var currentClassesIndex = 0; for (int columnIndex = 0; columnIndex < tableFixedData.Attributes.Length; columnIndex++) { var attribute = tableFixedData.Attributes[columnIndex]; var isColumnNumeric = tableData[attribute].IsNumeric; for (index = 0; index < rowsNumber; index++) { var currentValue = tableData[index][tableFixedData.Attributes[columnIndex]]; if (attribute == TableData.ClassAttributeName) { if (!classes.ContainsKey((string)currentValue)) { classes.Add((string)currentValue, currentClassesIndex); currentClassesIndex++; } currentValue = classes[(string)currentValue]; } else if (isColumnNumeric) { double numericValue; if (currentValue.TryConvertToNumeric(out numericValue)) { currentValue = numericValue; } } tableFixedData._data[columnIndex, index] = currentValue; } } tableFixedData.ClassesValue = new string[classes.Count]; foreach (var item in classes) { tableFixedData.ClassesValue[item.Value] = item.Key; } tableFixedData.BuildSymbols(); return(tableFixedData); }
public C45AlgorithmDataOptimized(TableFixedData data) { _data = data; }