public NgramBufferBuilder(int ngramLength, int skipLength, int slotLim, NgramIdFinder finder) { Contracts.Assert(ngramLength > 0); Contracts.Assert(skipLength >= 0); Contracts.Assert(ngramLength <= MaxSkipNgramLength - skipLength); Contracts.Assert(slotLim >= 0); _ngramLength = ngramLength; _skipLength = skipLength; _slotLim = slotLim; _ngram = new uint[_ngramLength]; _queue = new FixedSizeQueue <uint>(_ngramLength + _skipLength); _bldr = BufferBuilder <Float> .CreateDefault(); _finder = finder; }
/// <summary> /// This is for the bagging case - vector input and outputs should be added. /// </summary> private ValueGetter <VBuffer <Float> > MakeGetterBag(IRow input, int iinfo) { Host.AssertValue(input); Host.Assert(Infos[iinfo].TypeSrc.IsVector); Host.Assert(Infos[iinfo].TypeSrc.ItemType.IsKey); Host.Assert(_bag[iinfo]); Host.Assert(Infos[iinfo].TypeSrc.ItemType.KeyCount == _types[iinfo].VectorSize); var info = Infos[iinfo]; int size = info.TypeSrc.ItemType.KeyCount; Host.Assert(size > 0); int cv = info.TypeSrc.VectorSize; Host.Assert(cv >= 0); var getSrc = RowCursorUtils.GetVecGetterAs <uint>(NumberType.U4, input, info.Source); var src = default(VBuffer <uint>); var bldr = BufferBuilder <float> .CreateDefault(); return ((ref VBuffer <Float> dst) => { bldr.Reset(size, false); getSrc(ref src); Host.Check(cv == 0 || src.Length == cv); // The indices are irrelevant in the bagging case. var values = src.Values; int count = src.Count; for (int slot = 0; slot < count; slot++) { uint key = values[slot] - 1; if (key < size) { bldr.AddFeature((int)key, 1); } } bldr.GetResult(ref dst); }); }
public ArrayToSparseVBufferColumn(PrimitiveType itemType, Combiner <T> combiner, T[][] values) : base(itemType, values, Utils.Size) { _bldr = new BufferBuilder <T>(combiner); }