Example #1
0
        /// <summary>
        /// Public constructor corresponding to SignatureDataTransform.
        /// </summary>
        public NgramHashingTransformer(IHostEnvironment env, Arguments args, IDataView input)
            : base(env, RegistrationName, input)
        {
            Host.CheckValue(args, nameof(args));
            Host.CheckUserArg(Utils.Size(args.Column) > 0, nameof(args.Column));

            _bindings = new Bindings(args, Source.Schema, this);
            _exes     = new ColInfoEx[args.Column.Length];
            List <int> invertIinfos = null;

            int[] invertHashMaxCounts = new int[args.Column.Length];
            for (int iinfo = 0; iinfo < _exes.Length; iinfo++)
            {
                _exes[iinfo] = new ColInfoEx(args.Column[iinfo], args);
                var invertHashMaxCount = GetAndVerifyInvertHashMaxCount(args, args.Column[iinfo], _exes[iinfo]);
                if (invertHashMaxCount > 0)
                {
                    Utils.Add(ref invertIinfos, iinfo);
                    invertHashMaxCounts[iinfo] = invertHashMaxCount;
                }
            }

            InitColumnTypes();

            if (Utils.Size(invertIinfos) > 0)
            {
                // Build the invert hashes if we actually had any.
                var        dstSrcs       = new HashSet <int>(invertIinfos.Select(i => _bindings.MapIinfoToCol(i)));
                var        inputPred     = _bindings.GetDependencies(dstSrcs.Contains);
                var        active        = _bindings.GetActive(dstSrcs.Contains);
                string[][] friendlyNames = args.Column.Select(c => c.FriendlyNames).ToArray();
                var        helper        = new InvertHashHelper(this, friendlyNames, inputPred, invertHashMaxCounts);

                using (IRowCursor srcCursor = input.GetRowCursor(inputPred))
                    using (var dstCursor = new RowCursor(this, srcCursor, active, helper.Decorate))
                    {
                        var allGetters = InvertHashHelper.CallAllGetters(dstCursor);
                        while (dstCursor.MoveNext())
                        {
                            allGetters();
                        }
                    }
                _slotNames = helper.SlotNamesMetadata(out _slotNamesTypes);
            }
        }
Example #2
0
        public HashTransform(IHostEnvironment env, Arguments args, IDataView input)
            : base(Contracts.CheckRef(env, nameof(env)), RegistrationName, env.CheckRef(args, nameof(args)).Column,
                   input, TestType)
        {
            if (args.HashBits < NumBitsMin || args.HashBits >= NumBitsLim)
            {
                throw Host.ExceptUserArg(nameof(args.HashBits), "hashBits should be between {0} and {1} inclusive", NumBitsMin, NumBitsLim - 1);
            }

            _exes = new ColInfoEx[Infos.Length];
            List <int> invertIinfos        = null;
            List <int> invertHashMaxCounts = null;

            for (int iinfo = 0; iinfo < Infos.Length; iinfo++)
            {
                _exes[iinfo] = new ColInfoEx(args, args.Column[iinfo]);
                int invertHashMaxCount = GetAndVerifyInvertHashMaxCount(args, args.Column[iinfo], _exes[iinfo]);
                if (invertHashMaxCount > 0)
                {
                    Utils.Add(ref invertIinfos, iinfo);
                    Utils.Add(ref invertHashMaxCounts, invertHashMaxCount);
                }
            }

            _types = InitColumnTypes();

            if (Utils.Size(invertIinfos) > 0)
            {
                // Build the invert hashes for all columns for which it was requested.
                var srcs = new HashSet <int>(invertIinfos.Select(i => Infos[i].Source));
                using (IRowCursor srcCursor = input.GetRowCursor(srcs.Contains))
                {
                    using (var ch = Host.Start("Invert hash building"))
                    {
                        InvertHashHelper[] helpers  = new InvertHashHelper[invertIinfos.Count];
                        Action             disposer = null;
                        for (int i = 0; i < helpers.Length; ++i)
                        {
                            int iinfo = invertIinfos[i];
                            Host.Assert(_types[iinfo].ItemType.KeyCount > 0);
                            var dstGetter = GetGetterCore(ch, srcCursor, iinfo, out disposer);
                            Host.Assert(disposer == null);
                            var ex       = _exes[iinfo];
                            var maxCount = invertHashMaxCounts[i];
                            helpers[i] = InvertHashHelper.Create(srcCursor, Infos[iinfo], ex, maxCount, dstGetter);
                        }
                        while (srcCursor.MoveNext())
                        {
                            for (int i = 0; i < helpers.Length; ++i)
                            {
                                helpers[i].Process();
                            }
                        }
                        _keyValues = new VBuffer <DvText> [_exes.Length];
                        _kvTypes   = new ColumnType[_exes.Length];
                        for (int i = 0; i < helpers.Length; ++i)
                        {
                            _keyValues[invertIinfos[i]] = helpers[i].GetKeyValuesMetadata();
                            Host.Assert(_keyValues[invertIinfos[i]].Length == _types[invertIinfos[i]].ItemType.KeyCount);
                            _kvTypes[invertIinfos[i]] = new VectorType(TextType.Instance, _keyValues[invertIinfos[i]].Length);
                        }
                        ch.Done();
                    }
                }
            }
            SetMetadata();
        }