/// <summary> /// Public constructor corresponding to SignatureDataTransform. /// </summary> public NgramHashingTransformer(IHostEnvironment env, Arguments args, IDataView input) : base(env, RegistrationName, input) { Host.CheckValue(args, nameof(args)); Host.CheckUserArg(Utils.Size(args.Column) > 0, nameof(args.Column)); _bindings = new Bindings(args, Source.Schema, this); _exes = new ColInfoEx[args.Column.Length]; List <int> invertIinfos = null; int[] invertHashMaxCounts = new int[args.Column.Length]; for (int iinfo = 0; iinfo < _exes.Length; iinfo++) { _exes[iinfo] = new ColInfoEx(args.Column[iinfo], args); var invertHashMaxCount = GetAndVerifyInvertHashMaxCount(args, args.Column[iinfo], _exes[iinfo]); if (invertHashMaxCount > 0) { Utils.Add(ref invertIinfos, iinfo); invertHashMaxCounts[iinfo] = invertHashMaxCount; } } InitColumnTypes(); if (Utils.Size(invertIinfos) > 0) { // Build the invert hashes if we actually had any. var dstSrcs = new HashSet <int>(invertIinfos.Select(i => _bindings.MapIinfoToCol(i))); var inputPred = _bindings.GetDependencies(dstSrcs.Contains); var active = _bindings.GetActive(dstSrcs.Contains); string[][] friendlyNames = args.Column.Select(c => c.FriendlyNames).ToArray(); var helper = new InvertHashHelper(this, friendlyNames, inputPred, invertHashMaxCounts); using (IRowCursor srcCursor = input.GetRowCursor(inputPred)) using (var dstCursor = new RowCursor(this, srcCursor, active, helper.Decorate)) { var allGetters = InvertHashHelper.CallAllGetters(dstCursor); while (dstCursor.MoveNext()) { allGetters(); } } _slotNames = helper.SlotNamesMetadata(out _slotNamesTypes); } }
public HashTransform(IHostEnvironment env, Arguments args, IDataView input) : base(Contracts.CheckRef(env, nameof(env)), RegistrationName, env.CheckRef(args, nameof(args)).Column, input, TestType) { if (args.HashBits < NumBitsMin || args.HashBits >= NumBitsLim) { throw Host.ExceptUserArg(nameof(args.HashBits), "hashBits should be between {0} and {1} inclusive", NumBitsMin, NumBitsLim - 1); } _exes = new ColInfoEx[Infos.Length]; List <int> invertIinfos = null; List <int> invertHashMaxCounts = null; for (int iinfo = 0; iinfo < Infos.Length; iinfo++) { _exes[iinfo] = new ColInfoEx(args, args.Column[iinfo]); int invertHashMaxCount = GetAndVerifyInvertHashMaxCount(args, args.Column[iinfo], _exes[iinfo]); if (invertHashMaxCount > 0) { Utils.Add(ref invertIinfos, iinfo); Utils.Add(ref invertHashMaxCounts, invertHashMaxCount); } } _types = InitColumnTypes(); if (Utils.Size(invertIinfos) > 0) { // Build the invert hashes for all columns for which it was requested. var srcs = new HashSet <int>(invertIinfos.Select(i => Infos[i].Source)); using (IRowCursor srcCursor = input.GetRowCursor(srcs.Contains)) { using (var ch = Host.Start("Invert hash building")) { InvertHashHelper[] helpers = new InvertHashHelper[invertIinfos.Count]; Action disposer = null; for (int i = 0; i < helpers.Length; ++i) { int iinfo = invertIinfos[i]; Host.Assert(_types[iinfo].ItemType.KeyCount > 0); var dstGetter = GetGetterCore(ch, srcCursor, iinfo, out disposer); Host.Assert(disposer == null); var ex = _exes[iinfo]; var maxCount = invertHashMaxCounts[i]; helpers[i] = InvertHashHelper.Create(srcCursor, Infos[iinfo], ex, maxCount, dstGetter); } while (srcCursor.MoveNext()) { for (int i = 0; i < helpers.Length; ++i) { helpers[i].Process(); } } _keyValues = new VBuffer <DvText> [_exes.Length]; _kvTypes = new ColumnType[_exes.Length]; for (int i = 0; i < helpers.Length; ++i) { _keyValues[invertIinfos[i]] = helpers[i].GetKeyValuesMetadata(); Host.Assert(_keyValues[invertIinfos[i]].Length == _types[invertIinfos[i]].ItemType.KeyCount); _kvTypes[invertIinfos[i]] = new VectorType(TextType.Instance, _keyValues[invertIinfos[i]].Length); } ch.Done(); } } } SetMetadata(); }