protected KeyToValueMap(KeyToValueTransform trans, PrimitiveType typeVal, int iinfo) { // REVIEW: Is there a better way to perform this first assert value? Contracts.AssertValue(trans); Parent = trans; Parent.Host.AssertValue(typeVal); Parent.Host.Assert(0 <= iinfo && iinfo < Parent.Infos.Length); TypeOutput = typeVal; InfoIndex = iinfo; }
public static CommonOutputs.TransformOutput KeyToText(IHostEnvironment env, KeyToValueTransform.Arguments input) { Contracts.CheckValue(env, nameof(env)); var host = env.Register("KeyToValue"); host.CheckValue(input, nameof(input)); EntryPointUtils.CheckInputArgs(host, input); var xf = new KeyToValueTransform(host, input, input.Data); return(new CommonOutputs.TransformOutput { Model = new TransformModel(env, xf, input.Data), OutputData = xf }); }
private IDataView AppendPerInstanceDataViews(IEnumerable <IDataView> foldDataViews, IChannel ch) { // Make sure there are no variable size vector columns. // This is a dictionary from the column name to its vector size. var vectorSizes = new Dictionary <string, int>(); var firstDvSlotNames = new Dictionary <string, VBuffer <DvText> >(); var firstDvKeyColumns = new List <string>(); var firstDvVectorKeyColumns = new List <string>(); var variableSizeVectorColumnNames = new List <string>(); var list = new List <IDataView>(); int dvNumber = 0; foreach (var dv in foldDataViews) { var hidden = new List <int>(); for (int i = 0; i < dv.Schema.ColumnCount; i++) { if (dv.Schema.IsHidden(i)) { hidden.Add(i); continue; } var type = dv.Schema.GetColumnType(i); var name = dv.Schema.GetColumnName(i); if (type.IsVector) { if (dvNumber == 0) { if (dv.Schema.HasKeyNames(i, type.ItemType.KeyCount)) { firstDvVectorKeyColumns.Add(name); } // Store the slot names of the 1st idv and use them as baseline. if (dv.Schema.HasSlotNames(i, type.VectorSize)) { VBuffer <DvText> slotNames = default(VBuffer <DvText>); dv.Schema.GetMetadata(MetadataUtils.Kinds.SlotNames, i, ref slotNames); firstDvSlotNames.Add(name, slotNames); } } int cachedSize; if (vectorSizes.TryGetValue(name, out cachedSize)) { VBuffer <DvText> slotNames; // In the event that no slot names were recorded here, then slotNames will be // the default, length 0 vector. firstDvSlotNames.TryGetValue(name, out slotNames); if (!VerifyVectorColumnsMatch(cachedSize, i, dv, type, ref slotNames)) { variableSizeVectorColumnNames.Add(name); } } else { vectorSizes.Add(name, type.VectorSize); } } else if (dvNumber == 0 && dv.Schema.HasKeyNames(i, type.KeyCount)) { // The label column can be a key. Reconcile the key values, and wrap with a KeyToValue transform. firstDvKeyColumns.Add(name); } } var idv = dv; if (hidden.Count > 0) { var args = new ChooseColumnsByIndexTransform.Arguments(); args.Drop = true; args.Index = hidden.ToArray(); idv = new ChooseColumnsByIndexTransform(Host, args, idv); } list.Add(idv); dvNumber++; } if (variableSizeVectorColumnNames.Count == 0 && firstDvKeyColumns.Count == 0) { return(AppendRowsDataView.Create(Host, null, list.ToArray())); } var views = list.ToArray(); foreach (var keyCol in firstDvKeyColumns) { EvaluateUtils.ReconcileKeyValues(Host, views, keyCol); } foreach (var vectorKeyCol in firstDvVectorKeyColumns) { EvaluateUtils.ReconcileVectorKeyValues(Host, views, vectorKeyCol); } Func <IDataView, int, IDataView> keyToValue = (idv, i) => { foreach (var keyCol in firstDvKeyColumns.Concat(firstDvVectorKeyColumns)) { idv = new KeyToValueTransform(Host, new KeyToValueTransform.Arguments() { Column = new[] { new KeyToValueTransform.Column() { Name = keyCol }, } }, idv); var hidden = FindHiddenColumns(idv.Schema, keyCol); idv = new ChooseColumnsByIndexTransform(Host, new ChooseColumnsByIndexTransform.Arguments() { Drop = true, Index = hidden.ToArray() }, idv); } return(idv); }; Func <IDataView, IDataView> selectDropNonVarLenthCol = (idv) => { foreach (var variableSizeVectorColumnName in variableSizeVectorColumnNames) { int index; idv.Schema.TryGetColumnIndex(variableSizeVectorColumnName, out index); var type = idv.Schema.GetColumnType(index); idv = Utils.MarshalInvoke(AddVarLengthColumn <int>, type.ItemType.RawType, Host, idv, variableSizeVectorColumnName, type); // Drop the old column that does not have variable length. idv = new DropColumnsTransform(Host, new DropColumnsTransform.Arguments() { Column = new[] { variableSizeVectorColumnName } }, idv); } return(idv); }; if (variableSizeVectorColumnNames.Count > 0) { ch.Warning("Detected columns of variable length: {0}. Consider setting collateMetrics- for meaningful per-Folds results.", string.Join(", ", variableSizeVectorColumnNames)); } return(AppendRowsDataView.Create(Host, null, views.Select(keyToValue).Select(selectDropNonVarLenthCol).ToArray())); }