/// <summary> /// Returns the max value for the specified metadata kind. /// The metadata type should be a KeyType with raw type U4. /// colMax will be set to the first column that has the max value for the specified metadata. /// If no column has the specified metadata, colMax is set to -1 and the method returns zero. /// The filter function is called for each column, passing in the schema and the column index, and returns /// true if the column should be considered, false if the column should be skipped. /// </summary> public static uint GetMaxMetadataKind(this Schema schema, out int colMax, string metadataKind, Func <Schema, int, bool> filterFunc = null) { uint max = 0; colMax = -1; for (int col = 0; col < schema.Count; col++) { var columnType = schema.GetMetadataTypeOrNull(metadataKind, col); if (columnType == null || !columnType.IsKey || columnType.RawKind != DataKind.U4) { continue; } if (filterFunc != null && !filterFunc(schema, col)) { continue; } uint value = 0; schema.GetMetadata(metadataKind, col, ref value); if (max < value) { max = value; colMax = col; } } return(max); }
public ColumnType GetMetadataTypeOrNull(string kind, int col) { _ectx.Check(0 <= col && col < ColumnCount); if (IsPivot(col) && !ShouldPreserveMetadata(kind)) { return(null); } return(_inputSchema.GetMetadataTypeOrNull(kind, col)); }
/// <summary> /// Tries to get the metadata kind of the specified type for a column. /// </summary> /// <typeparam name="T">The raw type of the metadata, should match the PrimitiveType type</typeparam> /// <param name="schema">The schema</param> /// <param name="type">The type of the metadata</param> /// <param name="kind">The metadata kind</param> /// <param name="col">The column</param> /// <param name="value">The value to return, if successful</param> /// <returns>True if the metadata of the right type exists, false otherwise</returns> public static bool TryGetMetadata <T>(this Schema schema, PrimitiveType type, string kind, int col, ref T value) { Contracts.CheckValue(schema, nameof(schema)); Contracts.CheckValue(type, nameof(type)); var metadataType = schema.GetMetadataTypeOrNull(kind, col); if (!type.Equals(metadataType)) { return(false); } schema.GetMetadata(kind, col, ref value); return(true); }
public static bool HasKeyValues(this Schema schema, int col, int keyCount) { if (keyCount == 0) { return(false); } var type = schema.GetMetadataTypeOrNull(Kinds.KeyValues, col); return (type != null && type.IsVector && type.VectorSize == keyCount && type.ItemType.IsText); }
/// <summary> /// Returns <c>true</c> if the specified column: /// * is a vector of length N (including 0) /// * has a SlotNames metadata /// * metadata type is VBuffer<ReadOnlyMemory<char>> of length N /// </summary> public static bool HasSlotNames(this Schema schema, int col, int vectorSize) { if (vectorSize == 0) { return(false); } var type = schema.GetMetadataTypeOrNull(Kinds.SlotNames, col); return (type != null && type.IsVector && type.VectorSize == vectorSize && type.ItemType.IsText); }
/// <summary> /// The categoricalFeatures is a vector of the indices of categorical features slots. /// This vector should always have an even number of elements, and the elements should be parsed in groups of two consecutive numbers. /// So if its value is the range of numbers: 0,2,3,4,8,9 /// look at it as [0,2],[3,4],[8,9]. /// The way to interpret that is: feature with indices 0, 1, and 2 are one categorical /// Features with indices 3 and 4 are another categorical. Features 5 and 6 don't appear there, so they are not categoricals. /// </summary> public static bool TryGetCategoricalFeatureIndices(Schema schema, int colIndex, out int[] categoricalFeatures) { Contracts.CheckValue(schema, nameof(schema)); Contracts.Check(colIndex >= 0, nameof(colIndex)); bool isValid = false; categoricalFeatures = null; if (!(schema.GetColumnType(colIndex) is VectorType vecType && vecType.Size > 0)) { return(isValid); } var type = schema.GetMetadataTypeOrNull(MetadataUtils.Kinds.CategoricalSlotRanges, colIndex); if (type?.RawType == typeof(VBuffer <int>)) { VBuffer <int> catIndices = default(VBuffer <int>); schema.GetMetadata(MetadataUtils.Kinds.CategoricalSlotRanges, colIndex, ref catIndices); VBufferUtils.Densify(ref catIndices); int columnSlotsCount = vecType.Size; if (catIndices.Length > 0 && catIndices.Length % 2 == 0 && catIndices.Length <= columnSlotsCount * 2) { int previousEndIndex = -1; isValid = true; var catIndicesValues = catIndices.GetValues(); for (int i = 0; i < catIndicesValues.Length; i += 2) { if (catIndicesValues[i] > catIndicesValues[i + 1] || catIndicesValues[i] <= previousEndIndex || catIndicesValues[i] >= columnSlotsCount || catIndicesValues[i + 1] >= columnSlotsCount) { isValid = false; break; } previousEndIndex = catIndicesValues[i + 1]; } if (isValid) { categoricalFeatures = catIndicesValues.ToArray(); } } } return(isValid); }
/// <summary> /// Returns the set of column ids which match the value of specified metadata kind. /// The metadata type should be of type text. /// </summary> public static IEnumerable <int> GetColumnSet(this Schema schema, string metadataKind, string value) { for (int col = 0; col < schema.Count; col++) { var columnType = schema.GetMetadataTypeOrNull(metadataKind, col); if (columnType != null && columnType.IsText) { ReadOnlyMemory <char> val = default; schema.GetMetadata(metadataKind, col, ref val); if (ReadOnlyMemoryUtils.EqualsStr(value, val)) { yield return(col); } } } }
/// <summary> /// Returns the set of column ids which match the value of specified metadata kind. /// The metadata type should be a KeyType with raw type U4. /// </summary> public static IEnumerable <int> GetColumnSet(this Schema schema, string metadataKind, uint value) { for (int col = 0; col < schema.Count; col++) { var columnType = schema.GetMetadataTypeOrNull(metadataKind, col); if (columnType != null && columnType.IsKey && columnType.RawKind == DataKind.U4) { uint val = 0; schema.GetMetadata(metadataKind, col, ref val); if (val == value) { yield return(col); } } } }
private static void PrintSchema(TextWriter writer, Arguments args, Schema schema, ITransposeSchema tschema) { Contracts.AssertValue(writer); Contracts.AssertValue(args); Contracts.AssertValue(schema); Contracts.AssertValueOrNull(tschema); #if !CORECLR if (args.ShowJson) { writer.WriteLine("Json Schema not supported."); return; } #endif int colLim = schema.ColumnCount; var itw = new IndentedTextWriter(writer, " "); itw.WriteLine("{0} columns:", colLim); using (itw.Nest()) { var names = default(VBuffer <ReadOnlyMemory <char> >); for (int col = 0; col < colLim; col++) { var name = schema.GetColumnName(col); var type = schema.GetColumnType(col); var slotType = tschema == null ? null : tschema.GetSlotType(col); itw.WriteLine("{0}: {1}{2}", name, type, slotType == null ? "" : " (T)"); bool metaVals = args.ShowMetadataValues; if (metaVals || args.ShowMetadataTypes) { ShowMetadata(itw, schema, col, metaVals); continue; } if (!args.ShowSlots) { continue; } if (!type.IsKnownSizeVector) { continue; } ColumnType typeNames; if ((typeNames = schema.GetMetadataTypeOrNull(MetadataUtils.Kinds.SlotNames, col)) == null) { continue; } if (typeNames.VectorSize != type.VectorSize || !typeNames.ItemType.IsText) { Contracts.Assert(false, "Unexpected slot names type"); continue; } schema.GetMetadata(MetadataUtils.Kinds.SlotNames, col, ref names); if (names.Length != type.VectorSize) { Contracts.Assert(false, "Unexpected length of slot names vector"); continue; } using (itw.Nest()) { bool verbose = args.Verbose ?? false; foreach (var kvp in names.Items(all: verbose)) { if (verbose || !kvp.Value.IsEmpty) { itw.WriteLine("{0}:{1}", kvp.Key, kvp.Value); } } } } } }
public ColumnType GetMetadataTypeOrNull(string kind, int col) { Contracts.CheckNonEmpty(kind, nameof(kind)); Contracts.CheckParam(0 <= col && col < ColumnCount, nameof(col)); return(_input.GetMetadataTypeOrNull(kind, Sources[col])); }