/// <summary> /// Returns the max value for the specified metadata kind. /// The metadata type should be a KeyType with raw type U4. /// colMax will be set to the first column that has the max value for the specified metadata. /// If no column has the specified metadata, colMax is set to -1 and the method returns zero. /// The filter function is called for each column, passing in the schema and the column index, and returns /// true if the column should be considered, false if the column should be skipped. /// </summary> public static uint GetMaxMetadataKind(this Schema schema, out int colMax, string metadataKind, Func <Schema, int, bool> filterFunc = null) { uint max = 0; colMax = -1; for (int col = 0; col < schema.Count; col++) { var columnType = schema.GetMetadataTypeOrNull(metadataKind, col); if (columnType == null || !columnType.IsKey || columnType.RawKind != DataKind.U4) { continue; } if (filterFunc != null && !filterFunc(schema, col)) { continue; } uint value = 0; schema.GetMetadata(metadataKind, col, ref value); if (max < value) { max = value; colMax = col; } } return(max); }
internal static bool TryGetCategoricalFeatureIndices(Schema schema, int colIndex, out int[] categoricalFeatures) { Contracts.CheckValue(schema, nameof(schema)); Contracts.Check(colIndex >= 0, nameof(colIndex)); bool isValid = false; categoricalFeatures = null; if (!(schema.GetColumnType(colIndex) is VectorType vecType && vecType.Size > 0)) { return(isValid); } var type = schema.GetMetadataTypeOrNull(MetadataUtils.Kinds.CategoricalSlotRanges, colIndex); if (type?.RawType == typeof(VBuffer <int>)) { VBuffer <int> catIndices = default(VBuffer <int>); schema.GetMetadata(MetadataUtils.Kinds.CategoricalSlotRanges, colIndex, ref catIndices); VBufferUtils.Densify(ref catIndices); int columnSlotsCount = vecType.Size; if (catIndices.Length > 0 && catIndices.Length % 2 == 0 && catIndices.Length <= columnSlotsCount * 2) { int previousEndIndex = -1; isValid = true; var catIndicesValues = catIndices.GetValues(); for (int i = 0; i < catIndicesValues.Length; i += 2) { if (catIndicesValues[i] > catIndicesValues[i + 1] || catIndicesValues[i] <= previousEndIndex || catIndicesValues[i] >= columnSlotsCount || catIndicesValues[i + 1] >= columnSlotsCount) { isValid = false; break; } previousEndIndex = catIndicesValues[i + 1]; } if (isValid) { categoricalFeatures = catIndicesValues.ToArray(); } } } return(isValid); }
internal static IEnumerable <int> GetColumnSet(this Schema schema, string metadataKind, string value) { for (int col = 0; col < schema.Count; col++) { var columnType = schema.GetMetadataTypeOrNull(metadataKind, col); if (columnType != null && columnType.IsText) { ReadOnlyMemory <char> val = default; schema.GetMetadata(metadataKind, col, ref val); if (ReadOnlyMemoryUtils.EqualsStr(value, val)) { yield return(col); } } } }
internal static IEnumerable <int> GetColumnSet(this Schema schema, string metadataKind, uint value) { for (int col = 0; col < schema.Count; col++) { var columnType = schema.GetMetadataTypeOrNull(metadataKind, col); if (columnType != null && columnType.IsKey && columnType.RawKind == DataKind.U4) { uint val = 0; schema.GetMetadata(metadataKind, col, ref val); if (val == value) { yield return(col); } } } }