public NgramBufferBuilder(int ngramLength, int skipLength, int slotLim, NgramIdFinder finder)
        {
            Contracts.Assert(ngramLength > 0);
            Contracts.Assert(skipLength >= 0);
            Contracts.Assert(ngramLength <= MaxSkipNgramLength - skipLength);
            Contracts.Assert(slotLim >= 0);

            _ngramLength = ngramLength;
            _skipLength  = skipLength;
            _slotLim     = slotLim;

            _ngram = new uint[_ngramLength];
            _queue = new FixedSizeQueue <uint>(_ngramLength + _skipLength);
            _bldr  = BufferBuilder <Float> .CreateDefault();

            _finder = finder;
        }
        /// <summary>
        /// This is for the bagging case - vector input and outputs should be added.
        /// </summary>
        private ValueGetter <VBuffer <Float> > MakeGetterBag(IRow input, int iinfo)
        {
            Host.AssertValue(input);
            Host.Assert(Infos[iinfo].TypeSrc.IsVector);
            Host.Assert(Infos[iinfo].TypeSrc.ItemType.IsKey);
            Host.Assert(_bag[iinfo]);
            Host.Assert(Infos[iinfo].TypeSrc.ItemType.KeyCount == _types[iinfo].VectorSize);

            var info = Infos[iinfo];
            int size = info.TypeSrc.ItemType.KeyCount;

            Host.Assert(size > 0);

            int cv = info.TypeSrc.VectorSize;

            Host.Assert(cv >= 0);

            var getSrc = RowCursorUtils.GetVecGetterAs <uint>(NumberType.U4, input, info.Source);
            var src    = default(VBuffer <uint>);
            var bldr   = BufferBuilder <float> .CreateDefault();

            return
                ((ref VBuffer <Float> dst) =>
            {
                bldr.Reset(size, false);

                getSrc(ref src);
                Host.Check(cv == 0 || src.Length == cv);

                // The indices are irrelevant in the bagging case.
                var values = src.Values;
                int count = src.Count;
                for (int slot = 0; slot < count; slot++)
                {
                    uint key = values[slot] - 1;
                    if (key < size)
                    {
                        bldr.AddFeature((int)key, 1);
                    }
                }

                bldr.GetResult(ref dst);
            });
        }
示例#3
0
 public ArrayToSparseVBufferColumn(PrimitiveType itemType, Combiner <T> combiner, T[][] values)
     : base(itemType, values, Utils.Size)
 {
     _bldr = new BufferBuilder <T>(combiner);
 }