/// <summary> /// Returns the max value for the specified metadata kind. /// The metadata type should be a KeyType with raw type U4. /// colMax will be set to the first column that has the max value for the specified metadata. /// If no column has the specified metadata, colMax is set to -1 and the method returns zero. /// The filter function is called for each column, passing in the schema and the column index, and returns /// true if the column should be considered, false if the column should be skipped. /// </summary> public static uint GetMaxMetadataKind(this ISchema schema, out int colMax, string metadataKind, Func <ISchema, int, bool> filterFunc = null) { uint max = 0; colMax = -1; for (int col = 0; col < schema.ColumnCount; col++) { var columnType = schema.GetMetadataTypeOrNull(metadataKind, col); if (columnType == null || !columnType.IsKey || columnType.RawKind != DataKind.U4) { continue; } if (filterFunc != null && !filterFunc(schema, col)) { continue; } uint value = 0; schema.GetMetadata(metadataKind, col, ref value); if (max < value) { max = value; colMax = col; } } return(max); }
/// <summary> /// This is a utility method used to determine whether <see cref="LabelNameBindableMapper"/> /// can or should be used to wrap <paramref name="mapper"/>. This will not throw, since the /// desired behavior in the event that it cannot be wrapped, is to just back off to the original /// "unwrapped" bound mapper. /// </summary> /// <param name="mapper">The mapper we are seeing if we can wrap</param> /// <param name="labelNameType">The type of the label names from the metadata (either /// originating from the key value metadata of the training label column, or deserialized /// from the model of a bindable mapper)</param> /// <returns>Whether we can call <see cref="LabelNameBindableMapper.CreateBound{T}"/> with /// this mapper and expect it to succeed</returns> public static bool CanWrap(ISchemaBoundMapper mapper, ColumnType labelNameType) { Contracts.AssertValue(mapper); Contracts.AssertValue(labelNameType); ISchemaBoundRowMapper rowMapper = mapper as ISchemaBoundRowMapper; if (rowMapper == null) { return(false); // We could cover this case, but it is of no practical worth as far as I see, so I decline to do so. } ISchema outSchema = mapper.Schema; int scoreIdx; if (!outSchema.TryGetColumnIndex(MetadataUtils.Const.ScoreValueKind.Score, out scoreIdx)) { return(false); // The mapper doesn't even publish a score column to attach the metadata to. } if (outSchema.GetMetadataTypeOrNull(MetadataUtils.Kinds.SlotNames, scoreIdx) != null) { return(false); // The mapper publishes a score column, and already produces its own slot names. } var scoreType = outSchema.GetColumnType(scoreIdx); // Check that the type is vector, and is of compatible size with the score output. return(labelNameType.IsVector && labelNameType.VectorSize == scoreType.VectorSize); }
// Computes the types of the columns and constructs the kvMaps. private void ComputeKvMaps(ISchema schema, out ColumnType[] types, out KeyToValueMap[] kvMaps) { types = new ColumnType[_parent.ColumnPairs.Length]; kvMaps = new KeyToValueMap[_parent.ColumnPairs.Length]; for (int iinfo = 0; iinfo < types.Length; iinfo++) { // Construct kvMaps. Contracts.Assert(types[iinfo] == null); var typeSrc = schema.GetColumnType(ColMapNewToOld[iinfo]); var typeVals = schema.GetMetadataTypeOrNull(MetadataUtils.Kinds.KeyValues, ColMapNewToOld[iinfo]); Host.Check(typeVals != null, "Metadata KeyValues does not exist"); Host.Check(typeVals.VectorSize == typeSrc.ItemType.KeyCount, "KeyValues metadata size does not match column type key count"); if (!typeSrc.IsVector) { types[iinfo] = typeVals.ItemType; } else { types[iinfo] = new VectorType(typeVals.ItemType.AsPrimitive, typeSrc.AsVector); } // MarshalInvoke with two generic params. Func <int, ColumnType, ColumnType, KeyToValueMap> func = GetKeyMetadata <int, int>; var meth = func.GetMethodInfo().GetGenericMethodDefinition().MakeGenericMethod( new Type[] { typeSrc.ItemType.RawType, types[iinfo].ItemType.RawType }); kvMaps[iinfo] = (KeyToValueMap)meth.Invoke(this, new object[] { iinfo, typeSrc, typeVals }); } }
// Computes the column type and whether multiple indicator vectors need to be concatenated. // Also populates the metadata. private static void ComputeType(KeyToVectorTransform trans, ISchema input, int iinfo, ColInfo info, bool bag, MetadataDispatcher md, out VectorType type, out bool concat) { Contracts.AssertValue(trans); Contracts.AssertValue(input); Contracts.AssertValue(info); Contracts.Assert(info.TypeSrc.ItemType.IsKey); Contracts.AssertValue(md); int size = info.TypeSrc.ItemType.KeyCount; Contracts.Assert(size > 0); // See if the source has key names. var typeNames = input.GetMetadataTypeOrNull(MetadataUtils.Kinds.KeyValues, info.Source); if (typeNames == null || !typeNames.IsKnownSizeVector || !typeNames.ItemType.IsText || typeNames.VectorSize != size) { typeNames = null; } // Don't pass through any source column metadata. using (var bldr = md.BuildMetadata(iinfo)) { if (bag || info.TypeSrc.ValueCount == 1) { // Output is a single vector computed as the sum of the output indicator vectors. concat = false; type = new VectorType(NumberType.Float, size); if (typeNames != null) { bldr.AddGetter <VBuffer <DvText> >(MetadataUtils.Kinds.SlotNames, typeNames, trans.GetKeyNames); } } else { // Output is the concatenation of the multiple output indicator vectors. concat = true; type = new VectorType(NumberType.Float, info.TypeSrc.ValueCount, size); if (typeNames != null && type.VectorSize > 0) { bldr.AddGetter <VBuffer <DvText> >(MetadataUtils.Kinds.SlotNames, new VectorType(TextType.Instance, type), trans.GetSlotNames); } } if (!bag && info.TypeSrc.ValueCount > 0) { bldr.AddGetter <VBuffer <DvInt4> >(MetadataUtils.Kinds.CategoricalSlotRanges, MetadataUtils.GetCategoricalType(info.TypeSrc.ValueCount), trans.GetCategoricalSlotRanges); } if (!bag || info.TypeSrc.ValueCount == 1) { bldr.AddPrimitive(MetadataUtils.Kinds.IsNormalized, BoolType.Instance, DvBool.True); } } }
public ColumnType GetMetadataTypeOrNull(string kind, int col) { if (col == _col && kind == MetadataUtils.Kinds.SlotNames) { return(_type); } return(_parent.GetMetadataTypeOrNull(kind, col)); }
public ColumnType GetMetadataTypeOrNull(string kind, int col) { if (col == _scoreCol && kind == _metadataKind) { return(_labelNameType); } return(_parent.GetMetadataTypeOrNull(kind, col)); }
public ColumnType GetMetadataTypeOrNull(string kind, int col) { _ectx.Check(0 <= col && col < ColumnCount); if (IsPivot(col) && !ShouldPreserveMetadata(kind)) { return(null); } return(_inputSchema.GetMetadataTypeOrNull(kind, col)); }
public DataViewType GetMetadataTypeOrNull(string kind, int col) { int count = _schemaInput.ColumnCount; if (col < count) { return(_schemaInput.GetMetadataTypeOrNull(kind, col)); } return(null); }
/// <summary> /// Tries to get the metadata kind of the specified type for a column. /// </summary> /// <typeparam name="T">The raw type of the metadata, should match the PrimitiveType type</typeparam> /// <param name="schema">The schema</param> /// <param name="type">The type of the metadata</param> /// <param name="kind">The metadata kind</param> /// <param name="col">The column</param> /// <param name="value">The value to return, if successful</param> /// <returns>True if the metadata of the right type exists, false otherwise</returns> public static bool TryGetMetadata <T>(this ISchema schema, PrimitiveType type, string kind, int col, ref T value) { var metadataType = schema.GetMetadataTypeOrNull(kind, col); if (!type.Equals(metadataType)) { return(false); } schema.GetMetadata(kind, col, ref value); return(true); }
/// <summary> /// Computes the column type and whether multiple indicator vectors need to be concatenated. /// Also populates the metadata. /// </summary> private static void ComputeType(KeyToBinaryVectorTransform trans, ISchema input, int iinfo, ColInfo info, MetadataDispatcher md, out VectorType type, out bool concat, out int bitsPerColumn) { Contracts.AssertValue(trans); Contracts.AssertValue(input); Contracts.AssertValue(info); Contracts.Assert(info.TypeSrc.ItemType.IsKey); Contracts.Assert(info.TypeSrc.ItemType.KeyCount > 0); //Add an additional bit for all 1s to represent missing values. bitsPerColumn = Utils.IbitHigh((uint)info.TypeSrc.ItemType.KeyCount) + 2; Contracts.Assert(bitsPerColumn > 0); // See if the source has key names. var typeNames = input.GetMetadataTypeOrNull(MetadataUtils.Kinds.KeyValues, info.Source); if (typeNames == null || !typeNames.IsKnownSizeVector || !typeNames.ItemType.IsText || typeNames.VectorSize != info.TypeSrc.ItemType.KeyCount) { typeNames = null; } // Don't pass through any source column metadata. using (var bldr = md.BuildMetadata(iinfo)) { if (info.TypeSrc.ValueCount == 1) { // Output is a single vector computed as the sum of the output indicator vectors. concat = false; type = new VectorType(NumberType.Float, bitsPerColumn); if (typeNames != null) { bldr.AddGetter <VBuffer <DvText> >(MetadataUtils.Kinds.SlotNames, new VectorType(TextType.Instance, type), trans.GetKeyNames); } bldr.AddPrimitive(MetadataUtils.Kinds.IsNormalized, BoolType.Instance, DvBool.True); } else { // Output is the concatenation of the multiple output indicator vectors. concat = true; type = new VectorType(NumberType.Float, info.TypeSrc.ValueCount, bitsPerColumn); if (typeNames != null && type.VectorSize > 0) { bldr.AddGetter <VBuffer <DvText> >(MetadataUtils.Kinds.SlotNames, new VectorType(TextType.Instance, type), trans.GetSlotNames); } } } }
private static IColumn KeyValueMetadataFromMetadata <T>(ISchema schema, int col, string metadataName) { Contracts.AssertValue(schema); Contracts.Assert(0 <= col && col < schema.ColumnCount); var type = schema.GetMetadataTypeOrNull(metadataName, col); Contracts.AssertValue(type); Contracts.Assert(type.RawType == typeof(T)); ValueGetter <T> getter = (ref T val) => schema.GetMetadata(metadataName, col, ref val); return(RowColumnUtils.GetColumn(MetadataUtils.Kinds.KeyValues, type, getter)); }
public IEnumerable <KeyValuePair <string, ColumnType> > GetMetadataTypes(int col) { CheckColumnInRange(col); if (col < _groupCount) { return(_input.GetMetadataTypes(GroupIds[col])); } col -= _groupCount; var result = new List <KeyValuePair <string, ColumnType> >(); foreach (var kind in _preservedMetadata) { var colType = _input.GetMetadataTypeOrNull(kind, KeepIds[col]); if (colType != null) { result.Add(colType.GetPair(kind)); } } return(result); }
/// <summary> /// Tries to get the metadata kind of the specified type for a column. /// </summary> /// <typeparam name="T">The raw type of the metadata, should match the PrimitiveType type</typeparam> /// <param name="schema">The schema</param> /// <param name="type">The type of the metadata</param> /// <param name="kind">The metadata kind</param> /// <param name="col">The column</param> /// <param name="value">The value to return, if successful</param> /// <returns>True if the metadata of the right type exists, false otherwise</returns> public static bool TryGetMetadata <T>(this ISchema schema, PrimitiveType type, string kind, int col, ref T value) { Contracts.CheckValue(schema, nameof(schema)); Contracts.CheckValue(type, nameof(type)); var metadataType = schema.GetMetadataTypeOrNull(kind, col); if (!type.Equals(metadataType)) { return(false); } schema.GetMetadata(kind, col, ref value); return(true); }
public ColumnType GetMetadataTypeOrNull(string kind, int col) { Contracts.CheckNonEmpty(kind, nameof(kind)); Contracts.CheckParam(0 <= col && col < ColumnCount, nameof(col)); bool isSrc; int index = MapColumnIndex(out isSrc, col); if (isSrc) { return(Input.GetMetadataTypeOrNull(kind, index)); } Contracts.Assert(0 <= index && index < InfoCount); return(GetMetadataTypeCore(kind, index)); }
public static bool HasKeyNames(this ISchema schema, int col, int keyCount) { if (keyCount == 0) { return(false); } var type = schema.GetMetadataTypeOrNull(Kinds.KeyValues, col); return (type != null && type.IsVector && type.VectorSize == keyCount && type.ItemType.IsText); }
/// <summary> /// Returns <c>true</c> if the specified column: /// * is a vector of length N (including 0) /// * has a SlotNames metadata /// * metadata type is VBuffer<ReadOnlyMemory<char>> of length N /// </summary> public static bool HasSlotNames(this ISchema schema, int col, int vectorSize) { if (vectorSize == 0) { return(false); } var type = schema.GetMetadataTypeOrNull(Kinds.SlotNames, col); return (type != null && type.IsVector && type.VectorSize == vectorSize && type.ItemType.IsText); }
/// <summary> /// Returns the set of column ids which match the value of specified metadata kind. /// The metadata type should be of type text. /// </summary> public static IEnumerable <int> GetColumnSet(this ISchema schema, string metadataKind, string value) { for (int col = 0; col < schema.ColumnCount; col++) { var columnType = schema.GetMetadataTypeOrNull(metadataKind, col); if (columnType != null && columnType.IsText) { ReadOnlyMemory <char> val = default; schema.GetMetadata(metadataKind, col, ref val); if (ReadOnlyMemoryUtils.EqualsStr(value, val)) { yield return(col); } } } }
/// <summary> /// Returns the set of column ids which match the value of specified metadata kind. /// The metadata type should be a KeyType with raw type U4. /// </summary> public static IEnumerable <int> GetColumnSet(this ISchema schema, string metadataKind, uint value) { for (int col = 0; col < schema.ColumnCount; col++) { var columnType = schema.GetMetadataTypeOrNull(metadataKind, col); if (columnType != null && columnType.IsKey && columnType.RawKind == DataKind.U4) { uint val = 0; schema.GetMetadata(metadataKind, col, ref val); if (val == value) { yield return(col); } } } }
/// <summary> /// The categoricalFeatures is a vector of the indices of categorical features slots. /// This vector should always have an even number of elements, and the elements should be parsed in groups of two consecutive numbers. /// So if its value is the range of numbers: 0,2,3,4,8,9 /// look at it as [0,2],[3,4],[8,9]. /// The way to interpret that is: feature with indices 0, 1, and 2 are one categorical /// Features with indices 3 and 4 are another categorical. Features 5 and 6 don't appear there, so they are not categoricals. /// </summary> public static bool TryGetCategoricalFeatureIndices(ISchema schema, int colIndex, out int[] categoricalFeatures) { Contracts.CheckValue(schema, nameof(schema)); Contracts.Check(colIndex >= 0, nameof(colIndex)); bool isValid = false; categoricalFeatures = null; if (!schema.GetColumnType(colIndex).IsKnownSizeVector) { return(isValid); } var type = schema.GetMetadataTypeOrNull(MetadataUtils.Kinds.CategoricalSlotRanges, colIndex); if (type?.RawType == typeof(VBuffer <int>)) { VBuffer <int> catIndices = default(VBuffer <int>); schema.GetMetadata(MetadataUtils.Kinds.CategoricalSlotRanges, colIndex, ref catIndices); VBufferUtils.Densify(ref catIndices); int columnSlotsCount = schema.GetColumnType(colIndex).AsVector.VectorSizeCore; if (catIndices.Length > 0 && catIndices.Length % 2 == 0 && catIndices.Length <= columnSlotsCount * 2) { int previousEndIndex = -1; isValid = true; for (int i = 0; i < catIndices.Values.Length; i += 2) { if (catIndices.Values[i] > catIndices.Values[i + 1] || catIndices.Values[i] <= previousEndIndex || catIndices.Values[i] >= columnSlotsCount || catIndices.Values[i + 1] >= columnSlotsCount) { isValid = false; break; } previousEndIndex = catIndices.Values[i + 1]; } if (isValid) { categoricalFeatures = catIndices.Values.Select(val => val).ToArray(); } } } return(isValid); }
public SchemaImpl(ISchema parent, int col, VectorType type, ValueGetter <VBuffer <T> > getter, string metadataKind) { Contracts.AssertValue(parent); Contracts.Assert(0 <= col && col < parent.ColumnCount); Contracts.AssertValue(type); Contracts.AssertValue(getter); Contracts.Assert(type.ItemType.RawType == typeof(T)); Contracts.AssertNonEmpty(metadataKind); Contracts.Assert(parent.GetMetadataTypeOrNull(metadataKind, col) == null); _parent = parent; _scoreCol = col; _labelNameType = type; // We change to this metadata variant of the getter to enable the marshal call to work. _labelNameGetter = (int c, ref VBuffer <T> val) => getter(ref val); _metadataKind = metadataKind; }
/// <summary> /// Returns the metadata. /// </summary> public ColumnType GetMetadataTypeOrNull(string kind, int col) { int count = _schemaInput == null ? 0 : _schemaInput.ColumnCount; if (col < count) { return(_schemaInput.GetMetadataTypeOrNull(kind, col)); } if (kind == MetadataUtils.Kinds.SlotNames) { var ty = GetColumnType(col); if (ty.IsVector() && ty.AsVector().DimCount() == 1 && ty.AsVector().GetDim(0) > 0) { return(new VectorType(TextType.Instance, ty.AsVector().GetDim(0))); } } return(null); }
private ValueGetter <VBuffer <ReadOnlyMemory <char> > > CreateSlotNamesGetter(ISchema schema, int column, int length, string prefix) { var type = schema.GetMetadataTypeOrNull(MetadataUtils.Kinds.SlotNames, column); if (type != null && type.IsText) { return ((ref VBuffer <ReadOnlyMemory <char> > dst) => schema.GetMetadata(MetadataUtils.Kinds.SlotNames, column, ref dst)); } return ((ref VBuffer <ReadOnlyMemory <char> > dst) => { var values = dst.Values; if (Utils.Size(values) < length) { values = new ReadOnlyMemory <char> [length]; } for (int i = 0; i < length; i++) { values[i] = string.Format("{0}_{1}", prefix, i).AsMemory(); } dst = new VBuffer <ReadOnlyMemory <char> >(length, values); }); }
private MetadataUtils.MetadataGetter <VBuffer <DvText> > CreateSlotNamesGetter(ISchema schema, int column, int length, string prefix) { var type = schema.GetMetadataTypeOrNull(MetadataUtils.Kinds.SlotNames, column); if (type != null && type.IsText) { return ((int col, ref VBuffer <DvText> dst) => schema.GetMetadata(MetadataUtils.Kinds.SlotNames, column, ref dst)); } return ((int col, ref VBuffer <DvText> dst) => { var values = dst.Values; if (Utils.Size(values) < length) { values = new DvText[length]; } for (int i = 0; i < length; i++) { values[i] = new DvText(string.Format("{0}_{1}", prefix, i)); } dst = new VBuffer <DvText>(length, values); }); }
public ColumnType GetMetadataTypeOrNull(string kind, int col) { Contracts.CheckNonEmpty(kind, nameof(kind)); Contracts.CheckParam(0 <= col && col < ColumnCount, nameof(col)); return(Input.GetMetadataTypeOrNull(kind, ColMap[col])); }
private static void PrintSchema(TextWriter writer, Arguments args, ISchema schema, ITransposeSchema tschema) { Contracts.AssertValue(writer); Contracts.AssertValue(args); Contracts.AssertValue(schema); Contracts.AssertValueOrNull(tschema); #if !CORECLR if (args.ShowJson) { writer.WriteLine("Json Schema not supported."); return; } #endif int colLim = schema.ColumnCount; writer.WriteLine("{0} columns:", colLim); var itw = IndentingTextWriter.Wrap(writer); using (itw.Nest()) { var names = default(VBuffer <ReadOnlyMemory <char> >); for (int col = 0; col < colLim; col++) { var name = schema.GetColumnName(col); var type = schema.GetColumnType(col); var slotType = tschema == null ? null : tschema.GetSlotType(col); itw.WriteLine("{0}: {1}{2}", name, type, slotType == null ? "" : " (T)"); bool metaVals = args.ShowMetadataValues; if (metaVals || args.ShowMetadataTypes) { ShowMetadata(itw, schema, col, metaVals); continue; } if (!args.ShowSlots) { continue; } if (!type.IsKnownSizeVector) { continue; } ColumnType typeNames; if ((typeNames = schema.GetMetadataTypeOrNull(MetadataUtils.Kinds.SlotNames, col)) == null) { continue; } if (typeNames.VectorSize != type.VectorSize || !typeNames.ItemType.IsText) { Contracts.Assert(false, "Unexpected slot names type"); continue; } schema.GetMetadata(MetadataUtils.Kinds.SlotNames, col, ref names); if (names.Length != type.VectorSize) { Contracts.Assert(false, "Unexpected length of slot names vector"); continue; } using (itw.Nest()) { bool verbose = args.Verbose ?? false; foreach (var kvp in names.Items(all: verbose)) { if (verbose || !kvp.Value.IsEmpty) { itw.WriteLine("{0}:{1}", kvp.Key, kvp.Value); } } } } } }
public ColumnType GetMetadataTypeOrNull(string kind, int col) { return(_schema.GetMetadataTypeOrNull(kind, _revmapping[col])); }