public DatasetColumnInfo(string name, DataViewType type, ColumnPurpose purpose, ColumnDimensions dimensions) { Name = name; Type = type; Purpose = purpose; Dimensions = dimensions; }
public IntermediateColumn(string name, DataViewType type, ColumnPurpose purpose, ColumnDimensions dimensions) { ColumnName = name; Type = type; Purpose = purpose; Dimensions = dimensions; }
public static ColumnDimensions[] CalcColumnDimensions(MLContext context, IDataView data, PurposeInference.Column[] purposes) { data = context.Data.TakeRows(data, MaxRowsToRead); var colDimensions = new ColumnDimensions[data.Schema.Count]; for (var i = 0; i < data.Schema.Count; i++) { var column = data.Schema[i]; var purpose = purposes[i]; // default column dimensions int? cardinality = null; bool?hasMissing = null; var itemType = column.Type.GetItemType(); // If categorical text feature, calculate cardinality if (itemType.IsText() && purpose.Purpose == ColumnPurpose.CategoricalFeature) { cardinality = DatasetDimensionsUtil.GetTextColumnCardinality(data, column); } // If numeric feature, discover missing values if (itemType == NumberDataViewType.Single) { hasMissing = column.Type.IsVector() ? DatasetDimensionsUtil.HasMissingNumericVector(data, column) : DatasetDimensionsUtil.HasMissingNumericSingleValue(data, column); } colDimensions[i] = new ColumnDimensions(cardinality, hasMissing); } return(colDimensions); }