/// <summary> /// Connects a row classifier /// </summary> /// <param name="classifier"></param> /// <param name="dataTable"></param> /// <param name="analysis"></param> /// <param name="name">Optional name to give the node</param> /// <returns></returns> public WireBuilder AddClassifier(IRowClassifier classifier, IDataTable dataTable, IDataTableAnalysis analysis = null, string name = null) { var node = _factory.CreateClassifier(classifier, dataTable, analysis, name); _SetNode(node.RowClassifier); return(SetNewSize(node.OutputSize)); }
public DataTableVectoriser(IDataTable table, bool useTargetColumnIndex) { _column = table.Columns; _classColumnIndex = useTargetColumnIndex ? table.TargetColumnIndex : -1; _analysis = table.GetAnalysis(); foreach (var columnInfo in _analysis.ColumnInfo) { var column = table.Columns[columnInfo.ColumnIndex]; var isTarget = columnInfo.ColumnIndex == _classColumnIndex; int size = 0; var isContinuous = false; if (columnInfo is IIndexColumnInfo indexColumn) { size = Convert.ToInt32(indexColumn.MaxIndex + 1); } else { isContinuous = column.IsContinuous || !columnInfo.NumDistinct.HasValue; if (isContinuous) { size = 1; if (!isTarget) { _columnName.Add(column.Name); } } else { size = columnInfo.NumDistinct.Value; var categoryIndex = columnInfo.DistinctValues.Select(s => s.ToString()).OrderBy(s => s).Select((s, i) => Tuple.Create(s, i)).ToList(); var columnMap = categoryIndex.ToDictionary(d => d.Item1, d => d.Item2); var reverseColumnMap = categoryIndex.ToDictionary(d => d.Item2, d => d.Item1); _columnMap.Add(columnInfo.ColumnIndex, columnMap); _reverseColumnMap.Add(columnInfo.ColumnIndex, reverseColumnMap); if (!isTarget) { for (var i = 0; i < size; i++) { _columnName.Add(column.Name + ":" + reverseColumnMap[i]); } } } } if (isTarget) { _outputSize = size; _isTargetContinuous = isContinuous; _hasTarget = true; } else { _inputSize += size; } } }
//readonly List<IRow> _data = new List<IRow>(); public RowClassifier(ILinearAlgebraProvider lap, IRowClassifier classifier, IDataTable dataTable, IDataTableAnalysis analysis, string name = null) : base(name) { _lap = lap; _dataTable = dataTable; _classifier = classifier; _targetLabel = analysis.ColumnInfo.First(ci => dataTable.Columns[ci.ColumnIndex].IsTarget). DistinctValues.Select((v, i) => (v.ToString(), i)).ToDictionary(d => d.Item1, d => d.Item2); }
public IDataTableAnalysis GetAnalysis() { if (_analysis == null) { var analysis = new DataTableAnalysis(this); Process(analysis); _analysis = analysis; } return(_analysis); }
/// <summary> /// Create a row classifier node /// </summary> /// <param name="classifier">The classifier for each row</param> /// <param name="dataTable">The data table that contains the rows to classify (linked by mini batch index)</param> /// <param name="analysis">Optional data table analysis data</param> /// <param name="name">Optional name to give the node</param> /// <returns></returns> public (INode RowClassifier, int OutputSize) CreateClassifier(IRowClassifier classifier, IDataTable dataTable, IDataTableAnalysis analysis = null, string name = null) { var ret = new RowClassifier(_lap, classifier, dataTable, analysis ?? dataTable.GetAnalysis(), name); return(ret, ret.OutputSize); }