private void FillValidationNumricValues(ColumnInformation currentCol, int colIndex) { for (int rowCounter = 0; rowCounter < _originalValidationDataTable.Rows.Count; rowCounter++) { int originalValue = int.Parse(_originalValidationDataTable.Rows[rowCounter][colIndex].ToString()); int newColumnIndexOffset = currentCol.NumberOfSubColumns; int totalNewColumnNumber = _validationDataTable.Columns.Count; bool insertToLastColumn = true; foreach (int key in currentCol.NumericRanges) { if (originalValue <= key) { _validationDataTable.Rows[rowCounter][totalNewColumnNumber - newColumnIndexOffset] = true; insertToLastColumn = false; break; } newColumnIndexOffset--; } if (insertToLastColumn) { _validationDataTable.Rows[rowCounter][totalNewColumnNumber - 1] = true; } } }
private void AnalyzeAndAddColumn(DataColumn col, int colIndex, string colType) { int disitinctValueCount = 0; ColumnInformation newColumn = new ColumnInformation(); newColumn.OriginalColumnName = col.ColumnName; Dictionary<object, int> distinctValuesDic = new Dictionary<object, int>(); object rowValue = null; foreach (DataRow row in _originalDataTable.Rows) { rowValue = row[colIndex]; if (!distinctValuesDic.ContainsKey(rowValue)) { distinctValuesDic.Add(rowValue, 1); } else { distinctValuesDic[rowValue]++; } } Type type = rowValue.GetType(); disitinctValueCount = distinctValuesDic.Count; if (colType == "bool") { AddNewColumn(col.ColumnName + "_False"); AddNewColumn(col.ColumnName + "_True"); FillDoubleBooleanValues(col, colIndex); newColumn.NumberOfSubColumns = 2; newColumn.SubColumnsNamesList.Add(col.ColumnName + "_False"); newColumn.SubColumnsNamesList.Add(col.ColumnName + "_True"); newColumn.R_Index = 0.75; } if (colType == "numeric") { int range = _range; Dictionary<int, int> sortedDistinctValuesDic = TransformIntDicToRangeColumn(distinctValuesDic, range); List<int> sortedKeys = sortedDistinctValuesDic.Keys.ToList<int>(); sortedKeys.Sort(); newColumn.NumericRanges = sortedKeys; List<string> columnsNames = new List<string>(); if (sortedKeys.Count < range) { range = sortedKeys.Count; } newColumn.NumberOfSubColumns = range; for (int i = 0; i < range; i++) { string columnName = col.ColumnName + "_" + sortedKeys[i].ToString(); columnsNames.Add(columnName); AddNewColumn(columnName); newColumn.SubColumnsNamesList.Add(columnName); } FillNumricValues(sortedKeys, columnsNames, colIndex); newColumn.R_Index = (double)(range + 1) / (double)(range * 2); } if (colType == "nominal") { List<object> sortedKeys = distinctValuesDic.Keys.ToList<object>(); List<string> columnsNames = new List<string>(); int range = newColumn.NumberOfSubColumns = distinctValuesDic.Count; foreach (KeyValuePair<object, int> pair in distinctValuesDic) { string columnName = col.ColumnName + "_" + pair.Key.ToString(); columnsNames.Add(columnName); AddNewColumn(columnName); FillNominalValues(pair.Key.ToString(), colIndex); newColumn.SubColumnsNamesList.Add(columnName); } newColumn.R_Index = (double)(range + 1) / (double)(range * 2); } _columnsInformationList.Add(newColumn); }
private void AnalyzeAndAddColumn(DataColumn col, int colIndex, string colType) { int disitinctValueCount = 0; ColumnInformation newColumn = new ColumnInformation(); newColumn.OriginalColumnName = col.ColumnName; Dictionary <object, int> distinctValuesDic = new Dictionary <object, int>(); object rowValue = null; foreach (DataRow row in _originalDataTable.Rows) { rowValue = row[colIndex]; if (!distinctValuesDic.ContainsKey(rowValue)) { distinctValuesDic.Add(rowValue, 1); } else { distinctValuesDic[rowValue]++; } } Type type = rowValue.GetType(); disitinctValueCount = distinctValuesDic.Count; if (colType == "bool") { AddNewColumn(col.ColumnName + "_False"); AddNewColumn(col.ColumnName + "_True"); FillDoubleBooleanValues(col, colIndex); newColumn.NumberOfSubColumns = 2; newColumn.SubColumnsNamesList.Add(col.ColumnName + "_False"); newColumn.SubColumnsNamesList.Add(col.ColumnName + "_True"); newColumn.R_Index = 0.75; } if (colType == "numeric") { int range = _range; Dictionary <int, int> sortedDistinctValuesDic = TransformIntDicToRangeColumn(distinctValuesDic, range); List <int> sortedKeys = sortedDistinctValuesDic.Keys.ToList <int>(); sortedKeys.Sort(); newColumn.NumericRanges = sortedKeys; List <string> columnsNames = new List <string>(); if (sortedKeys.Count < range) { range = sortedKeys.Count; } newColumn.NumberOfSubColumns = range; for (int i = 0; i < range; i++) { string columnName = col.ColumnName + "_" + sortedKeys[i].ToString(); columnsNames.Add(columnName); AddNewColumn(columnName); newColumn.SubColumnsNamesList.Add(columnName); } FillNumricValues(sortedKeys, columnsNames, colIndex); newColumn.R_Index = (double)(range + 1) / (double)(range * 2); } if (colType == "nominal") { List <object> sortedKeys = distinctValuesDic.Keys.ToList <object>(); List <string> columnsNames = new List <string>(); int range = newColumn.NumberOfSubColumns = distinctValuesDic.Count; foreach (KeyValuePair <object, int> pair in distinctValuesDic) { string columnName = col.ColumnName + "_" + pair.Key.ToString(); columnsNames.Add(columnName); AddNewColumn(columnName); FillNominalValues(pair.Key.ToString(), colIndex); newColumn.SubColumnsNamesList.Add(columnName); } newColumn.R_Index = (double)(range + 1) / (double)(range * 2); } _columnsInformationList.Add(newColumn); }
private void AnalyzeAndAddColumnToValidationSet(DataColumn col, int colIndex, string colType) { int disitinctValueCount = 0; Dictionary <object, int> distinctValuesDic = new Dictionary <object, int>(); object rowValue = null; foreach (DataRow row in _originalValidationDataTable.Rows) { rowValue = row[colIndex]; if (!distinctValuesDic.ContainsKey(rowValue)) { distinctValuesDic.Add(rowValue, 1); } else { distinctValuesDic[rowValue]++; } } Type type = rowValue.GetType(); disitinctValueCount = distinctValuesDic.Count; if (colType == "bool") { AddNewColumn(col.ColumnName + "_False", true); AddNewColumn(col.ColumnName + "_True", true); FillValidationDoubleBooleanValues(col, colIndex); } if (colType == "numeric") { ColumnInformation currentCol = null; foreach (ColumnInformation trainingCol in _columnsInformationList) { if (trainingCol.OriginalColumnName == col.ColumnName) { currentCol = trainingCol; } } for (int i = 0; i < currentCol.NumberOfSubColumns; i++) { AddNewColumn(currentCol.SubColumnsNamesList[i], true); } FillValidationNumricValues(currentCol, colIndex); } if (colType == "nominal") { List <object> sortedKeys = distinctValuesDic.Keys.ToList <object>(); List <string> columnsNames = new List <string>(); foreach (KeyValuePair <object, int> pair in distinctValuesDic) { string columnName = col.ColumnName + "_" + pair.Key.ToString(); AddNewColumn(columnName, true); FillValidationNominalValues(pair.Key.ToString(), colIndex); } } }