예제 #1
0
 protected KeyToValueMap(KeyToValueTransform trans, PrimitiveType typeVal, int iinfo)
 {
     // REVIEW: Is there a better way to perform this first assert value?
     Contracts.AssertValue(trans);
     Parent = trans;
     Parent.Host.AssertValue(typeVal);
     Parent.Host.Assert(0 <= iinfo && iinfo < Parent.Infos.Length);
     TypeOutput = typeVal;
     InfoIndex  = iinfo;
 }
        public static CommonOutputs.TransformOutput KeyToText(IHostEnvironment env, KeyToValueTransform.Arguments input)
        {
            Contracts.CheckValue(env, nameof(env));
            var host = env.Register("KeyToValue");

            host.CheckValue(input, nameof(input));
            EntryPointUtils.CheckInputArgs(host, input);

            var xf = new KeyToValueTransform(host, input, input.Data);

            return(new CommonOutputs.TransformOutput {
                Model = new TransformModel(env, xf, input.Data), OutputData = xf
            });
        }
        private IDataView AppendPerInstanceDataViews(IEnumerable <IDataView> foldDataViews, IChannel ch)
        {
            // Make sure there are no variable size vector columns.
            // This is a dictionary from the column name to its vector size.
            var vectorSizes                   = new Dictionary <string, int>();
            var firstDvSlotNames              = new Dictionary <string, VBuffer <DvText> >();
            var firstDvKeyColumns             = new List <string>();
            var firstDvVectorKeyColumns       = new List <string>();
            var variableSizeVectorColumnNames = new List <string>();
            var list     = new List <IDataView>();
            int dvNumber = 0;

            foreach (var dv in foldDataViews)
            {
                var hidden = new List <int>();
                for (int i = 0; i < dv.Schema.ColumnCount; i++)
                {
                    if (dv.Schema.IsHidden(i))
                    {
                        hidden.Add(i);
                        continue;
                    }

                    var type = dv.Schema.GetColumnType(i);
                    var name = dv.Schema.GetColumnName(i);
                    if (type.IsVector)
                    {
                        if (dvNumber == 0)
                        {
                            if (dv.Schema.HasKeyNames(i, type.ItemType.KeyCount))
                            {
                                firstDvVectorKeyColumns.Add(name);
                            }
                            // Store the slot names of the 1st idv and use them as baseline.
                            if (dv.Schema.HasSlotNames(i, type.VectorSize))
                            {
                                VBuffer <DvText> slotNames = default(VBuffer <DvText>);
                                dv.Schema.GetMetadata(MetadataUtils.Kinds.SlotNames, i, ref slotNames);
                                firstDvSlotNames.Add(name, slotNames);
                            }
                        }

                        int cachedSize;
                        if (vectorSizes.TryGetValue(name, out cachedSize))
                        {
                            VBuffer <DvText> slotNames;
                            // In the event that no slot names were recorded here, then slotNames will be
                            // the default, length 0 vector.
                            firstDvSlotNames.TryGetValue(name, out slotNames);
                            if (!VerifyVectorColumnsMatch(cachedSize, i, dv, type, ref slotNames))
                            {
                                variableSizeVectorColumnNames.Add(name);
                            }
                        }
                        else
                        {
                            vectorSizes.Add(name, type.VectorSize);
                        }
                    }
                    else if (dvNumber == 0 && dv.Schema.HasKeyNames(i, type.KeyCount))
                    {
                        // The label column can be a key. Reconcile the key values, and wrap with a KeyToValue transform.
                        firstDvKeyColumns.Add(name);
                    }
                }
                var idv = dv;
                if (hidden.Count > 0)
                {
                    var args = new ChooseColumnsByIndexTransform.Arguments();
                    args.Drop  = true;
                    args.Index = hidden.ToArray();
                    idv        = new ChooseColumnsByIndexTransform(Host, args, idv);
                }
                list.Add(idv);
                dvNumber++;
            }

            if (variableSizeVectorColumnNames.Count == 0 && firstDvKeyColumns.Count == 0)
            {
                return(AppendRowsDataView.Create(Host, null, list.ToArray()));
            }

            var views = list.ToArray();

            foreach (var keyCol in firstDvKeyColumns)
            {
                EvaluateUtils.ReconcileKeyValues(Host, views, keyCol);
            }
            foreach (var vectorKeyCol in firstDvVectorKeyColumns)
            {
                EvaluateUtils.ReconcileVectorKeyValues(Host, views, vectorKeyCol);
            }

            Func <IDataView, int, IDataView> keyToValue =
                (idv, i) =>
            {
                foreach (var keyCol in firstDvKeyColumns.Concat(firstDvVectorKeyColumns))
                {
                    idv = new KeyToValueTransform(Host, new KeyToValueTransform.Arguments()
                    {
                        Column = new[] { new KeyToValueTransform.Column()
                                         {
                                             Name = keyCol
                                         }, }
                    }, idv);
                    var hidden = FindHiddenColumns(idv.Schema, keyCol);
                    idv = new ChooseColumnsByIndexTransform(Host, new ChooseColumnsByIndexTransform.Arguments()
                    {
                        Drop = true, Index = hidden.ToArray()
                    }, idv);
                }
                return(idv);
            };

            Func <IDataView, IDataView> selectDropNonVarLenthCol =
                (idv) =>
            {
                foreach (var variableSizeVectorColumnName in variableSizeVectorColumnNames)
                {
                    int index;
                    idv.Schema.TryGetColumnIndex(variableSizeVectorColumnName, out index);
                    var type = idv.Schema.GetColumnType(index);

                    idv = Utils.MarshalInvoke(AddVarLengthColumn <int>, type.ItemType.RawType, Host, idv,
                                              variableSizeVectorColumnName, type);

                    // Drop the old column that does not have variable length.
                    idv = new DropColumnsTransform(Host, new DropColumnsTransform.Arguments()
                    {
                        Column = new[] { variableSizeVectorColumnName }
                    }, idv);
                }
                return(idv);
            };

            if (variableSizeVectorColumnNames.Count > 0)
            {
                ch.Warning("Detected columns of variable length: {0}. Consider setting collateMetrics- for meaningful per-Folds results.", string.Join(", ", variableSizeVectorColumnNames));
            }
            return(AppendRowsDataView.Create(Host, null, views.Select(keyToValue).Select(selectDropNonVarLenthCol).ToArray()));
        }