Пример #1
0
        /// <summary>
        /// Copy from this buffer to the given destination, making sure to explicitly include the
        /// first count indices in indicesInclude. Note that indicesInclude should be sorted
        /// with each index less than this.Length. Note that this can make the destination be
        /// dense even if "this" is sparse.
        /// </summary>
        public void CopyTo(ref VBuffer <T> dst, int[] indicesInclude, int count)
        {
            Contracts.CheckParam(count >= 0, nameof(count));
            Contracts.CheckParam(Utils.Size(indicesInclude) >= count, nameof(indicesInclude));
            Contracts.CheckParam(Utils.Size(indicesInclude) <= Length, nameof(indicesInclude));

            // REVIEW: Ideally we should Check that indicesInclude is sorted and in range. Would that
            // check be too expensive?
#if DEBUG
            int prev = -1;
            for (int i = 0; i < count; i++)
            {
                Contracts.Assert(prev < indicesInclude[i]);
                prev = indicesInclude[i];
            }
            Contracts.Assert(prev < Length);
#endif

            if (IsDense || count == 0)
            {
                CopyTo(ref dst);
                return;
            }

            if (count >= Length / 2 || Count >= Length / 2)
            {
                CopyToDense(ref dst);
                return;
            }

            var indices = dst.Indices;
            var values  = dst.Values;
            if (Count == 0)
            {
                // No values in "this".
                if (Utils.Size(indices) < count)
                {
                    indices = new int[count];
                }
                Array.Copy(indicesInclude, indices, count);
                if (Utils.Size(values) < count)
                {
                    values = new T[count];
                }
                else
                {
                    Array.Clear(values, 0, count);
                }
                dst = new VBuffer <T>(Length, count, values, indices);
                return;
            }

            int size = 0;
            int max  = count + Count;
            Contracts.Assert(max < Length);
            int ii1;
            int ii2;
            if (max >= Length / 2 || Utils.Size(values) < max || Utils.Size(indices) < max)
            {
                // Compute the needed size.
                ii1 = 0;
                ii2 = 0;
                for (; ;)
                {
                    Contracts.Assert(ii1 < Count);
                    Contracts.Assert(ii2 < count);
                    size++;
                    int diff = Indices[ii1] - indicesInclude[ii2];
                    if (diff == 0)
                    {
                        ii1++;
                        ii2++;
                        if (ii1 >= Count)
                        {
                            size += count - ii2;
                            break;
                        }
                        if (ii2 >= count)
                        {
                            size += Count - ii1;
                            break;
                        }
                    }
                    else if (diff < 0)
                    {
                        if (++ii1 >= Count)
                        {
                            size += count - ii2;
                            break;
                        }
                    }
                    else
                    {
                        if (++ii2 >= count)
                        {
                            size += Count - ii1;
                            break;
                        }
                    }
                }
                Contracts.Assert(size >= count && size >= Count);

                if (size == Count)
                {
                    CopyTo(ref dst);
                    return;
                }

                if (size >= Length / 2)
                {
                    CopyToDense(ref dst);
                    return;
                }

                if (Utils.Size(values) < size)
                {
                    values = new T[size];
                }
                if (Utils.Size(indices) < size)
                {
                    indices = new int[size];
                }
                max = size;
            }

            int ii = 0;
            ii1 = 0;
            ii2 = 0;
            for (; ;)
            {
                Contracts.Assert(ii < max);
                Contracts.Assert(ii1 < Count);
                Contracts.Assert(ii2 < count);
                int i1 = Indices[ii1];
                int i2 = indicesInclude[ii2];
                if (i1 <= i2)
                {
                    indices[ii] = i1;
                    values[ii]  = Values[ii1];
                    ii++;
                    if (i1 == i2)
                    {
                        ii2++;
                    }
                    if (++ii1 >= Count)
                    {
                        if (ii2 >= count)
                        {
                            break;
                        }
                        Array.Clear(values, ii, count - ii2);
                        Array.Copy(indicesInclude, ii2, indices, ii, count - ii2);
                        ii += count - ii2;
                        break;
                    }
                    if (ii2 >= count)
                    {
                        Array.Copy(Values, ii1, values, ii, Count - ii1);
                        Array.Copy(Indices, ii1, indices, ii, Count - ii1);
                        ii += Count - ii1;
                        break;
                    }
                }
                else
                {
                    indices[ii] = i2;
                    values[ii]  = default(T);
                    ii++;
                    if (++ii2 >= count)
                    {
                        Array.Copy(Values, ii1, values, ii, Count - ii1);
                        Array.Copy(Indices, ii1, indices, ii, Count - ii1);
                        ii += Count - ii1;
                        break;
                    }
                }
            }
            Contracts.Assert(size == ii || size == 0);

            dst = new VBuffer <T>(Length, ii, values, indices);
        }
Пример #2
0
 private void GetKeyNames(int col, ref VBuffer <DvText> dst)
 {
     Contracts.Assert(col == 0);
     Contracts.AssertValue(_keyNamesType);
     _keyNames.CopyTo(ref dst);
 }
 private void GetTerms(int iinfo, ref VBuffer <ReadOnlyMemory <char> > dst)
 {
     Host.Assert(0 <= iinfo && iinfo < _exes.Length);
     Host.Assert(_slotNames[iinfo].Length > 0);
     _slotNames[iinfo].CopyTo(ref dst);
 }
 protected override void CopyValue(ref VBuffer <TItem> src, ref VBuffer <TItem> dst)
 {
     src.CopyTo(ref dst);
 }
Пример #5
0
        private static void FillValues(IExceptionContext ectx, ref VBuffer <Float> src, ref VBuffer <Float> dst, Float divisor, Float scale, Float offset = 0)
        {
            int count  = src.Count;
            int length = src.Length;

            ectx.Assert(Utils.Size(src.Values) >= count);
            ectx.Assert(divisor >= 0);

            if (count == 0)
            {
                dst = new VBuffer <Float>(length, 0, dst.Values, dst.Indices);
                return;
            }
            ectx.Assert(count > 0);
            ectx.Assert(length > 0);

            Float normScale = scale;

            if (divisor > 0)
            {
                normScale /= divisor;
            }

            // Don't normalize small values.
            if (normScale < MinScale)
            {
                normScale = 1;
            }

            if (offset == 0)
            {
                var dstValues = dst.Values;
                if (Utils.Size(dstValues) < count)
                {
                    dstValues = new Float[count];
                }
                var dstIndices = dst.Indices;
                if (!src.IsDense)
                {
                    if (Utils.Size(dstIndices) < count)
                    {
                        dstIndices = new int[count];
                    }
                    Array.Copy(src.Indices, dstIndices, count);
                }

                SseUtils.Scale(normScale, src.Values, dstValues, count);
                dst = new VBuffer <Float>(length, count, dstValues, dstIndices);

                return;
            }

            // Subtracting the mean requires a dense representation.
            src.CopyToDense(ref dst);

            if (normScale != 1)
            {
                SseUtils.ScaleAdd(normScale, -offset, dst.Values, length);
            }
            else
            {
                SseUtils.Add(-offset, dst.Values, length);
            }
        }
        public override Delegate[] CreateGetters(IRow input, Func <int, bool> activeOutput, out Action disposer)
        {
            disposer = null;

            var getters = new Delegate[3];

            if (!activeOutput(ClusterIdCol) && !activeOutput(SortedClusterCol) && !activeOutput(SortedClusterScoreCol))
            {
                return(getters);
            }

            long             cachedPosition = -1;
            VBuffer <Single> scores         = default(VBuffer <Single>);
            var scoresArr = new Single[_numClusters];

            int[] sortedIndices = new int[_numClusters];

            var    scoreGetter         = input.GetGetter <VBuffer <Single> >(ScoreIndex);
            Action updateCacheIfNeeded =
                () =>
            {
                if (cachedPosition != input.Position)
                {
                    scoreGetter(ref scores);
                    scores.CopyTo(scoresArr);
                    int j = 0;
                    foreach (var index in Enumerable.Range(0, scoresArr.Length).OrderBy(i => scoresArr[i]))
                    {
                        sortedIndices[j++] = index;
                    }
                    cachedPosition = input.Position;
                }
            };

            if (activeOutput(ClusterIdCol))
            {
                ValueGetter <uint> assignedFn =
                    (ref uint dst) =>
                {
                    updateCacheIfNeeded();
                    dst = (uint)sortedIndices[0] + 1;
                };
                getters[ClusterIdCol] = assignedFn;
            }

            if (activeOutput(SortedClusterScoreCol))
            {
                ValueGetter <VBuffer <Single> > topKScoresFn =
                    (ref VBuffer <Single> dst) =>
                {
                    updateCacheIfNeeded();
                    var values = dst.Values;
                    if (Utils.Size(values) < _numClusters)
                    {
                        values = new Single[_numClusters];
                    }
                    for (int i = 0; i < _numClusters; i++)
                    {
                        values[i] = scores.GetItemOrDefault(sortedIndices[i]);
                    }
                    dst = new VBuffer <Single>(_numClusters, values);
                };
                getters[SortedClusterScoreCol] = topKScoresFn;
            }

            if (activeOutput(SortedClusterCol))
            {
                ValueGetter <VBuffer <uint> > topKClassesFn =
                    (ref VBuffer <uint> dst) =>
                {
                    updateCacheIfNeeded();
                    var values = dst.Values;
                    if (Utils.Size(values) < _numClusters)
                    {
                        values = new uint[_numClusters];
                    }
                    for (int i = 0; i < _numClusters; i++)
                    {
                        values[i] = (uint)sortedIndices[i] + 1;
                    }
                    dst = new VBuffer <uint>(_numClusters, values);
                };
                getters[SortedClusterCol] = topKClassesFn;
            }
            return(getters);
        }
Пример #7
0
        protected override Delegate GetGetterCore(IChannel ch, IRow input, int iinfo, out Action disposer)
        {
            Host.AssertValueOrNull(ch);
            Host.AssertValue(input);
            Host.Assert(0 <= iinfo && iinfo < Infos.Length);
            Host.Assert(Infos[iinfo].TypeSrc.IsVector);
            Host.Assert(Infos[iinfo].TypeSrc.ItemType.IsKey);

            disposer = null;

            var getSrc = RowCursorUtils.GetVecGetterAs <uint>(NumberType.U4, input, Infos[iinfo].Source);
            var src    = default(VBuffer <uint>);
            var bldr   = new NgramBufferBuilder(_exes[iinfo].NgramLength, _exes[iinfo].SkipLength,
                                                _ngramMaps[iinfo].Count, GetNgramIdFinder(iinfo));
            var keyCount = (uint)Infos[iinfo].TypeSrc.ItemType.KeyCount;

            if (keyCount == 0)
            {
                keyCount = uint.MaxValue;
            }

            ValueGetter <VBuffer <Float> > del;

            switch (_exes[iinfo].Weighting)
            {
            case WeightingCriteria.TfIdf:
                Host.AssertValue(_invDocFreqs[iinfo]);
                del =
                    (ref VBuffer <Float> dst) =>
                {
                    getSrc(ref src);
                    if (!bldr.IsEmpty)
                    {
                        bldr.Reset();
                        bldr.AddNgrams(ref src, 0, keyCount);
                        bldr.GetResult(ref dst);
                        VBufferUtils.Apply(ref dst, (int i, ref Float v) => v = (Float)(v * _invDocFreqs[iinfo][i]));
                    }
                    else
                    {
                        dst = new VBuffer <Float>(0, dst.Values, dst.Indices);
                    }
                };
                break;

            case WeightingCriteria.Idf:
                Host.AssertValue(_invDocFreqs[iinfo]);
                del =
                    (ref VBuffer <Float> dst) =>
                {
                    getSrc(ref src);
                    if (!bldr.IsEmpty)
                    {
                        bldr.Reset();
                        bldr.AddNgrams(ref src, 0, keyCount);
                        bldr.GetResult(ref dst);
                        VBufferUtils.Apply(ref dst, (int i, ref Float v) => v = v >= 1 ? (Float)_invDocFreqs[iinfo][i] : 0);
                    }
                    else
                    {
                        dst = new VBuffer <Float>(0, dst.Values, dst.Indices);
                    }
                };
                break;

            case WeightingCriteria.Tf:
                del =
                    (ref VBuffer <Float> dst) =>
                {
                    getSrc(ref src);
                    if (!bldr.IsEmpty)
                    {
                        bldr.Reset();
                        bldr.AddNgrams(ref src, 0, keyCount);
                        bldr.GetResult(ref dst);
                    }
                    else
                    {
                        dst = new VBuffer <Float>(0, dst.Values, dst.Indices);
                    }
                };
                break;

            default:
                throw Host.Except("Unsupported weighting criteria");
            }

            return(del);
        }
Пример #8
0
 protected override void UpdateCore(Float label, ref VBuffer <Float> score, ref VBuffer <Double> loss, Float weight)
 {
     AddL1AndL2Loss(label, ref score, weight);
     AddCustomLoss(weight, ref loss);
 }
Пример #9
0
 protected override void ApplyLossFunction(ref VBuffer <float> score, float label, ref VBuffer <Double> loss)
 {
     VBufferUtils.PairManipulator <Float, Double> lossFn =
         (int slot, Float src, ref Double dst) => dst = LossFunction.Loss(src, label);
     VBufferUtils.ApplyWith(ref score, ref loss, lossFn);
 }
Пример #10
0
        private static void Load(IChannel ch, ModelLoadContext ctx, CodecFactory factory, ref VBuffer <ReadOnlyMemory <char> > values)
        {
            Contracts.AssertValue(ch);
            ch.CheckValue(ctx, nameof(ctx));
            ctx.CheckAtModel(GetVersionInfo());

            // *** Binary format ***
            // Codec parameterization: A codec parameterization that should be a ReadOnlyMemory codec
            // int: n, the number of bytes used to write the values
            // byte[n]: As encoded using the codec

            // Get the codec from the factory, and from the stream. We have to
            // attempt to read the codec from the stream, since codecs can potentially
            // be versioned based on their parameterization.
            IValueCodec codec;

            // This *could* happen if we have an old version attempt to read a new version.
            // Enabling this sort of binary classification is why we also need to write the
            // codec specification.
            if (!factory.TryReadCodec(ctx.Reader.BaseStream, out codec))
            {
                throw ch.ExceptDecode();
            }
            ch.AssertValue(codec);
            ch.CheckDecode(codec.Type.IsVector);
            ch.CheckDecode(codec.Type.ItemType.IsText);
            var textCodec = (IValueCodec <VBuffer <ReadOnlyMemory <char> > >)codec;

            var bufferLen = ctx.Reader.ReadInt32();

            ch.CheckDecode(bufferLen >= 0);
            using (var stream = new SubsetStream(ctx.Reader.BaseStream, bufferLen))
            {
                using (var reader = textCodec.OpenReader(stream, 1))
                {
                    reader.MoveNext();
                    values = default(VBuffer <ReadOnlyMemory <char> >);
                    reader.Get(ref values);
                }
                ch.CheckDecode(stream.ReadByte() == -1);
            }
        }
Пример #11
0
        private static void Save(IChannel ch, ModelSaveContext ctx, CodecFactory factory, ref VBuffer <ReadOnlyMemory <char> > values)
        {
            Contracts.AssertValue(ch);
            ch.CheckValue(ctx, nameof(ctx));
            ctx.CheckAtModel();
            ctx.SetVersionInfo(GetVersionInfo());

            // *** Binary format ***
            // Codec parameterization: A codec parameterization that should be a ReadOnlyMemory codec
            // int: n, the number of bytes used to write the values
            // byte[n]: As encoded using the codec

            // Get the codec from the factory
            IValueCodec codec;
            var         result = factory.TryGetCodec(new VectorType(TextType.Instance), out codec);

            ch.Assert(result);
            ch.Assert(codec.Type.IsVector);
            ch.Assert(codec.Type.VectorSize == 0);
            ch.Assert(codec.Type.ItemType.RawType == typeof(ReadOnlyMemory <char>));
            IValueCodec <VBuffer <ReadOnlyMemory <char> > > textCodec = (IValueCodec <VBuffer <ReadOnlyMemory <char> > >)codec;

            factory.WriteCodec(ctx.Writer.BaseStream, codec);
            using (var mem = new MemoryStream())
            {
                using (var writer = textCodec.OpenWriter(mem))
                {
                    writer.Write(ref values);
                    writer.Commit();
                }
                ctx.Writer.WriteByteArray(mem.ToArray());
            }

            // Make this resemble, more or less, the auxiliary output from the TermTransform.
            // It will differ somewhat due to the vector being possibly sparse. To distinguish
            // between missing and empty, empties are not written at all, while missings are.
            var v = values;

            char[] buffer = null;
            ctx.SaveTextStream("Terms.txt",
                               writer =>
            {
                writer.WriteLine("# Number of terms = {0} of length {1}", v.Count, v.Length);
                foreach (var pair in v.Items())
                {
                    var text = pair.Value;
                    if (text.IsEmpty)
                    {
                        continue;
                    }
                    writer.Write("{0}\t", pair.Key);
                    // REVIEW: What about escaping this, *especially* for linebreaks?
                    // Do C# and .NET really have no equivalent to Python's "repr"? :(
                    if (text.IsEmpty)
                    {
                        writer.WriteLine();
                        continue;
                    }
                    Utils.EnsureSize(ref buffer, text.Length);

                    var span = text.Span;
                    for (int i = 0; i < text.Length; i++)
                    {
                        buffer[i] = span[i];
                    }

                    writer.WriteLine(buffer, 0, text.Length);
                }
            });
        }
Пример #12
0
        private void DropNAs <TDst>(ref VBuffer <TDst> src, ref VBuffer <TDst> dst, InPredicate <TDst> isNA)
        {
            Host.AssertValue(isNA);

            int newCount = 0;

            for (int i = 0; i < src.Count; i++)
            {
                if (!isNA(in src.Values[i]))
                {
                    newCount++;
                }
            }
            Host.Assert(newCount <= src.Count);

            if (newCount == 0)
            {
                dst = new VBuffer <TDst>(src.Length - src.Count, 0, dst.Values, dst.Indices);
                return;
            }

            if (newCount == src.Count)
            {
                Utils.Swap(ref src, ref dst);
                return;
            }

            var values = dst.Values;

            if (Utils.Size(values) < newCount)
            {
                values = new TDst[newCount];
            }

            int iDst = 0;

            if (src.IsDense)
            {
                for (int i = 0; i < src.Count; i++)
                {
                    if (!isNA(in src.Values[i]))
                    {
                        values[iDst] = src.Values[i];
                        iDst++;
                    }
                }
                Host.Assert(iDst == newCount);
                dst = new VBuffer <TDst>(newCount, values, dst.Indices);
            }
            else
            {
                var indices = dst.Indices;
                if (Utils.Size(indices) < newCount)
                {
                    indices = new int[newCount];
                }

                int offset = 0;
                for (int i = 0; i < src.Count; i++)
                {
                    if (!isNA(in src.Values[i]))
                    {
                        values[iDst]  = src.Values[i];
                        indices[iDst] = src.Indices[i] - offset;
                        iDst++;
                    }
Пример #13
0
 public void Conv(ref byte[] src, ref VBuffer <Byte> dst) => dst = src != null ? new VBuffer <byte>(src.Length, src) : new VBuffer <byte>(0, new byte[0]);
Пример #14
0
 public static void Copy(ref VBuffer <T> src, ref VBuffer <T> dst)
 {
     src.CopyTo(ref dst);
 }
Пример #15
0
        private static Float[] Train(IHost host, ColInfo[] infos, Arguments args, IDataView trainingData)
        {
            Contracts.AssertValue(host, "host");
            host.AssertNonEmpty(infos);

            var       avgDistances  = new Float[infos.Length];
            const int reservoirSize = 5000;

            bool[] activeColumns = new bool[trainingData.Schema.ColumnCount];
            for (int i = 0; i < infos.Length; i++)
            {
                activeColumns[infos[i].Source] = true;
            }

            var reservoirSamplers = new ReservoirSamplerWithReplacement <VBuffer <Float> > [infos.Length];

            using (var cursor = trainingData.GetRowCursor(col => activeColumns[col]))
            {
                var rng = args.Seed.HasValue ? RandomUtils.Create(args.Seed) : host.Rand;
                for (int i = 0; i < infos.Length; i++)
                {
                    if (infos[i].TypeSrc.IsVector)
                    {
                        var get = cursor.GetGetter <VBuffer <Float> >(infos[i].Source);
                        reservoirSamplers[i] = new ReservoirSamplerWithReplacement <VBuffer <Float> >(rng, reservoirSize, get);
                    }
                    else
                    {
                        var   getOne = cursor.GetGetter <Float>(infos[i].Source);
                        Float val    = 0;
                        ValueGetter <VBuffer <Float> > get =
                            (ref VBuffer <Float> dst) =>
                        {
                            getOne(ref val);
                            dst = new VBuffer <float>(1, new[] { val });
                        };
                        reservoirSamplers[i] = new ReservoirSamplerWithReplacement <VBuffer <Float> >(rng, reservoirSize, get);
                    }
                }

                while (cursor.MoveNext())
                {
                    for (int i = 0; i < infos.Length; i++)
                    {
                        reservoirSamplers[i].Sample();
                    }
                }
                for (int i = 0; i < infos.Length; i++)
                {
                    reservoirSamplers[i].Lock();
                }
            }

            for (int iinfo = 0; iinfo < infos.Length; iinfo++)
            {
                var instanceCount = reservoirSamplers[iinfo].NumSampled;

                // If the number of pairs is at most the maximum reservoir size / 2, we go over all the pairs,
                // so we get all the examples. Otherwise, get a sample with replacement.
                VBuffer <Float>[] res;
                int resLength;
                if (instanceCount < reservoirSize && instanceCount * (instanceCount - 1) <= reservoirSize)
                {
                    res       = reservoirSamplers[iinfo].GetCache();
                    resLength = reservoirSamplers[iinfo].Size;
                    Contracts.Assert(resLength == instanceCount);
                }
                else
                {
                    res       = reservoirSamplers[iinfo].GetSample().ToArray();
                    resLength = res.Length;
                }

                // If the dataset contains only one valid Instance, then we can't learn anything anyway, so just return 1.
                if (instanceCount <= 1)
                {
                    avgDistances[iinfo] = 1;
                }
                else
                {
                    Float[] distances;
                    var     sub = args.Column[iinfo].MatrixGenerator;
                    if (sub == null)
                    {
                        sub = args.MatrixGenerator;
                    }
                    // create a dummy generator in order to get its type.
                    // REVIEW this should be refactored. See https://github.com/dotnet/machinelearning/issues/699
                    var  matrixGenerator = sub.CreateComponent(host, 1);
                    bool gaussian        = matrixGenerator is GaussianFourierSampler;

                    // If the number of pairs is at most the maximum reservoir size / 2, go over all the pairs.
                    if (resLength < reservoirSize)
                    {
                        distances = new Float[instanceCount * (instanceCount - 1) / 2];
                        int count = 0;
                        for (int i = 0; i < instanceCount; i++)
                        {
                            for (int j = i + 1; j < instanceCount; j++)
                            {
                                distances[count++] = gaussian ? VectorUtils.L2DistSquared(ref res[i], ref res[j])
                                    : VectorUtils.L1Distance(ref res[i], ref res[j]);
                            }
                        }
                        host.Assert(count == distances.Length);
                    }
                    else
                    {
                        distances = new Float[reservoirSize / 2];
                        for (int i = 0; i < reservoirSize - 1; i += 2)
                        {
                            // For Gaussian kernels, we scale by the L2 distance squared, since the kernel function is exp(-gamma ||x-y||^2).
                            // For Laplacian kernels, we scale by the L1 distance, since the kernel function is exp(-gamma ||x-y||_1).
                            distances[i / 2] = gaussian ? VectorUtils.L2DistSquared(ref res[i], ref res[i + 1]) :
                                               VectorUtils.L1Distance(ref res[i], ref res[i + 1]);
                        }
                    }

                    // If by chance, in the random permutation all the pairs are the same instance we return 1.
                    Float median = MathUtils.GetMedianInPlace(distances, distances.Length);
                    avgDistances[iinfo] = median == 0 ? 1 : median;
                }
            }
            return(avgDistances);
        }
Пример #16
0
 protected override bool IsNaN(ref VBuffer <Float> score)
 {
     return(VBufferUtils.HasNaNs(ref score));
 }
Пример #17
0
        private static void TransformFeatures(IExceptionContext ectx, ref VBuffer <Float> src, ref VBuffer <Float> dst, TransformInfo transformInfo)
        {
            ectx.Check(src.Length == transformInfo.Dimension);

            var values = dst.Values;

            if (Utils.Size(values) < transformInfo.Rank)
            {
                values = new Float[transformInfo.Rank];
            }

            for (int i = 0; i < transformInfo.Rank; i++)
            {
                values[i] = VectorUtils.DotProductWithOffset(transformInfo.Eigenvectors[i], 0, ref src) -
                            (transformInfo.MeanProjected == null ? 0 : transformInfo.MeanProjected[i]);
            }

            dst = new VBuffer <Float>(transformInfo.Rank, values, dst.Indices);
        }
Пример #18
0
        public override Delegate[] CreateGetters(IRow input, Func <int, bool> activeCols, out Action disposer)
        {
            Host.Assert(LabelIndex >= 0);
            Host.Assert(ScoreIndex >= 0);

            disposer = null;

            long  cachedPosition = -1;
            Float label          = 0;
            var   score          = default(VBuffer <Float>);
            var   l1             = VBufferUtils.CreateDense <Double>(_scoreSize);

            ValueGetter <Float> nanGetter = (ref Float value) => value = Single.NaN;
            var labelGetter = activeCols(L1Col) || activeCols(L2Col) ? RowCursorUtils.GetLabelGetter(input, LabelIndex) : nanGetter;
            ValueGetter <VBuffer <Float> > scoreGetter;

            if (activeCols(L1Col) || activeCols(L2Col))
            {
                scoreGetter = input.GetGetter <VBuffer <Float> >(ScoreIndex);
            }
            else
            {
                scoreGetter = (ref VBuffer <Float> dst) => dst = default(VBuffer <Float>);
            }
            Action updateCacheIfNeeded =
                () =>
            {
                if (cachedPosition != input.Position)
                {
                    labelGetter(ref label);
                    scoreGetter(ref score);
                    var lab = (Double)label;
                    foreach (var s in score.Items(all: true))
                    {
                        l1.Values[s.Key] = Math.Abs(lab - s.Value);
                    }
                    cachedPosition = input.Position;
                }
            };

            var getters = new Delegate[2];

            if (activeCols(L1Col))
            {
                ValueGetter <VBuffer <Double> > l1Fn =
                    (ref VBuffer <Double> dst) =>
                {
                    updateCacheIfNeeded();
                    l1.CopyTo(ref dst);
                };
                getters[L1Col] = l1Fn;
            }
            if (activeCols(L2Col))
            {
                VBufferUtils.PairManipulator <Double, Double> sqr =
                    (int slot, Double x, ref Double y) => y = x * x;

                ValueGetter <VBuffer <Double> > l2Fn =
                    (ref VBuffer <Double> dst) =>
                {
                    updateCacheIfNeeded();
                    dst = new VBuffer <Double>(_scoreSize, 0, dst.Values, dst.Indices);
                    VBufferUtils.ApplyWith(ref l1, ref dst, sqr);
                };
                getters[L2Col] = l2Fn;
            }
            return(getters);
        }
Пример #19
0
        private SequencePool[] Train(Arguments args, IDataView trainingData, out double[][] invDocFreqs)
        {
            // Contains the maximum number of grams to store in the dictionary, for each level of ngrams,
            // from 1 (in position 0) up to ngramLength (in position ngramLength-1)
            var lims = new int[Infos.Length][];

            for (int iinfo = 0; iinfo < Infos.Length; iinfo++)
            {
                var all         = args.Column[iinfo].AllLengths ?? args.AllLengths;
                var ngramLength = _exes[iinfo].NgramLength;
                var maxNumTerms = Utils.Size(args.Column[iinfo].MaxNumTerms) > 0 ? args.Column[iinfo].MaxNumTerms : args.MaxNumTerms;
                if (!all)
                {
                    Host.CheckUserArg(Utils.Size(maxNumTerms) == 0 ||
                                      Utils.Size(maxNumTerms) == 1 && maxNumTerms[0] > 0, nameof(args.MaxNumTerms));
                    lims[iinfo] = new int[ngramLength];
                    lims[iinfo][ngramLength - 1] = Utils.Size(maxNumTerms) == 0 ? Arguments.DefaultMaxTerms : maxNumTerms[0];
                }
                else
                {
                    Host.CheckUserArg(Utils.Size(maxNumTerms) <= ngramLength, nameof(args.MaxNumTerms));
                    Host.CheckUserArg(Utils.Size(maxNumTerms) == 0 || maxNumTerms.All(i => i >= 0) && maxNumTerms[maxNumTerms.Length - 1] > 0, nameof(args.MaxNumTerms));
                    var extend = Utils.Size(maxNumTerms) == 0 ? Arguments.DefaultMaxTerms : maxNumTerms[maxNumTerms.Length - 1];
                    lims[iinfo] = Utils.BuildArray(ngramLength,
                                                   i => i < Utils.Size(maxNumTerms) ? maxNumTerms[i] : extend);
                }
            }

            var helpers = new NgramBufferBuilder[Infos.Length];
            var getters = new ValueGetter <VBuffer <uint> > [Infos.Length];
            var src     = new VBuffer <uint> [Infos.Length];

            // Keep track of how many grams are in the pool for each value of n. Position
            // i in _counts counts how many (i+1)-grams are in the pool for column iinfo.
            var counts    = new int[Infos.Length][];
            var ngramMaps = new SequencePool[Infos.Length];

            bool[] activeInput = new bool[trainingData.Schema.ColumnCount];
            foreach (var info in Infos)
            {
                activeInput[info.Source] = true;
            }
            using (var cursor = trainingData.GetRowCursor(col => activeInput[col]))
                using (var pch = Host.StartProgressChannel("Building n-gram dictionary"))
                {
                    for (int iinfo = 0; iinfo < Infos.Length; iinfo++)
                    {
                        Host.Assert(Infos[iinfo].TypeSrc.IsVector && Infos[iinfo].TypeSrc.ItemType.IsKey);
                        var ngramLength = _exes[iinfo].NgramLength;
                        var skipLength  = _exes[iinfo].SkipLength;

                        getters[iinfo]   = RowCursorUtils.GetVecGetterAs <uint>(NumberType.U4, cursor, Infos[iinfo].Source);
                        src[iinfo]       = default(VBuffer <uint>);
                        counts[iinfo]    = new int[ngramLength];
                        ngramMaps[iinfo] = new SequencePool();

                        // Note: GetNgramIdFinderAdd will control how many ngrams of a specific length will
                        // be added (using lims[iinfo]), therefore we set slotLim to the maximum
                        helpers[iinfo] = new NgramBufferBuilder(ngramLength, skipLength, Utils.ArrayMaxSize,
                                                                GetNgramIdFinderAdd(counts[iinfo], lims[iinfo], ngramMaps[iinfo], _exes[iinfo].RequireIdf(), Host));
                    }

                    int    cInfoFull = 0;
                    bool[] infoFull  = new bool[Infos.Length];

                    invDocFreqs = new double[Infos.Length][];

                    long   totalDocs = 0;
                    Double rowCount  = trainingData.GetRowCount(true) ?? Double.NaN;
                    var    buffers   = new VBuffer <float> [Infos.Length];
                    pch.SetHeader(new ProgressHeader(new[] { "Total n-grams" }, new[] { "documents" }),
                                  e => e.SetProgress(0, totalDocs, rowCount));
                    while (cInfoFull < Infos.Length && cursor.MoveNext())
                    {
                        totalDocs++;
                        for (int iinfo = 0; iinfo < Infos.Length; iinfo++)
                        {
                            getters[iinfo](ref src[iinfo]);
                            var keyCount = (uint)Infos[iinfo].TypeSrc.ItemType.KeyCount;
                            if (keyCount == 0)
                            {
                                keyCount = uint.MaxValue;
                            }
                            if (!infoFull[iinfo])
                            {
                                if (_exes[iinfo].RequireIdf())
                                {
                                    helpers[iinfo].Reset();
                                }

                                helpers[iinfo].AddNgrams(ref src[iinfo], 0, keyCount);
                                if (_exes[iinfo].RequireIdf())
                                {
                                    int totalNgrams = counts[iinfo].Sum();
                                    Utils.EnsureSize(ref invDocFreqs[iinfo], totalNgrams);
                                    helpers[iinfo].GetResult(ref buffers[iinfo]);
                                    foreach (var pair in buffers[iinfo].Items())
                                    {
                                        if (pair.Value >= 1)
                                        {
                                            invDocFreqs[iinfo][pair.Key] += 1;
                                        }
                                    }
                                }
                            }
                            AssertValid(counts[iinfo], lims[iinfo], ngramMaps[iinfo]);
                        }
                    }

                    pch.Checkpoint(counts.Sum(c => c.Sum()), totalDocs);
                    for (int iinfo = 0; iinfo < Infos.Length; iinfo++)
                    {
                        for (int i = 0; i < Utils.Size(invDocFreqs[iinfo]); i++)
                        {
                            if (invDocFreqs[iinfo][i] != 0)
                            {
                                invDocFreqs[iinfo][i] = Math.Log(totalDocs / invDocFreqs[iinfo][i]);
                            }
                        }
                    }

                    for (int iinfo = 0; iinfo < Infos.Length; iinfo++)
                    {
                        AssertValid(counts[iinfo], lims[iinfo], ngramMaps[iinfo]);

                        int ngramLength = _exes[iinfo].NgramLength;
                        for (int i = 0; i < ngramLength; i++)
                        {
                            _exes[iinfo].NonEmptyLevels[i] = counts[iinfo][i] > 0;
                        }
                    }

                    return(ngramMaps);
                }
        }
Пример #20
0
        public static void GetSlotNames(RoleMappedSchema schema, RoleMappedSchema.ColumnRole role, int vectorSize, ref VBuffer <DvText> slotNames)
        {
            Contracts.CheckValueOrNull(schema);
            Contracts.CheckValue(role.Value, nameof(role));
            Contracts.CheckParam(vectorSize >= 0, nameof(vectorSize));

            IReadOnlyList <ColumnInfo> list;

            if ((list = schema?.GetColumns(role)) == null || list.Count != 1 || !schema.Schema.HasSlotNames(list[0].Index, vectorSize))
            {
                slotNames = new VBuffer <DvText>(vectorSize, 0, slotNames.Values, slotNames.Indices);
            }
            else
            {
                schema.Schema.GetMetadata(Kinds.SlotNames, list[0].Index, ref slotNames);
            }
        }
 protected override void GetMissing(ref VBuffer <TItem> dst)
 {
     dst = new VBuffer <TItem>(Type.VectorSize, 0, dst.Values, dst.Indices);
 }
Пример #22
0
        /// <summary>
        /// Getter generator for inputs of type <typeparamref name="TSrc"/>, where output type is a vector of hashes
        /// </summary>
        /// <typeparam name="TSrc">Input type. Must be a vector</typeparam>
        /// <param name="input">Row input</param>
        /// <param name="iinfo">Index of the getter</param>
        private ValueGetter <VBuffer <uint> > ComposeGetterVecToVec <TSrc>(IRow input, int iinfo)
        {
            Host.AssertValue(input);
            Host.Assert(Infos[iinfo].TypeSrc.IsVector);

            var getSrc            = GetSrcGetter <VBuffer <TSrc> >(input, iinfo);
            var hashFunction      = ComposeHashDelegate <TSrc>();
            var src               = default(VBuffer <TSrc>);
            int n                 = _exes[iinfo].OutputValueCount;
            int expectedSrcLength = Infos[iinfo].TypeSrc.VectorSize;

            int[][] slotMap = _exes[iinfo].SlotMap;
            // REVIEW: consider adding a fix-zero functionality (subtract emptyTextHash from all hashes)
            var  mask     = (1U << _exes[iinfo].HashBits) - 1;
            var  hashSeed = _exes[iinfo].HashSeed;
            bool ordered  = _exes[iinfo].Ordered;

            TSrc[] denseValues = null;
            return
                ((ref VBuffer <uint> dst) =>
            {
                getSrc(ref src);
                Host.Check(src.Length == expectedSrcLength);
                TSrc[] values;

                // force-densify the input
                // REVIEW: this performs poorly if only a fraction of sparse vector is used for hashing.
                // This scenario was unlikely at the time of writing. Regardless of performance, the hash value
                // needs to be consistent across equivalent representations - sparse vs dense.
                if (src.IsDense)
                {
                    values = src.Values;
                }
                else
                {
                    if (denseValues == null)
                    {
                        denseValues = new TSrc[expectedSrcLength];
                    }
                    values = denseValues;
                    src.CopyTo(values);
                }

                var hashes = dst.Values;
                if (Utils.Size(hashes) < n)
                {
                    hashes = new uint[n];
                }

                for (int i = 0; i < n; i++)
                {
                    uint hash = hashSeed;

                    foreach (var srcSlot in slotMap[i])
                    {
                        // REVIEW: some legacy code hashes 0 for srcSlot in ord- case, do we need to preserve this behavior?
                        if (ordered)
                        {
                            hash = Hashing.MurmurRound(hash, (uint)srcSlot);
                        }
                        hash = hashFunction(ref values[srcSlot], hash);
                    }

                    hashes[i] = (Hashing.MixHash(hash) & mask) + 1;     // +1 to offset from zero, which has special meaning for KeyType
                }

                dst = new VBuffer <uint>(n, hashes, dst.Indices);
            });
        }
Пример #23
0
            private ValueGetter <ReadOnlyMemory <char> > GetTextValueGetter <TSrc>(Row input, int colSrc, VBuffer <ReadOnlyMemory <char> > slotNames)
            {
                Contracts.AssertValue(input);
                Contracts.AssertValue(Predictor);

                var featureGetter = input.GetGetter <TSrc>(colSrc);
                var map           = Predictor.GetFeatureContributionMapper <TSrc, VBuffer <float> >(_topContributionsCount, _bottomContributionsCount, _normalize);

                var features      = default(TSrc);
                var contributions = default(VBuffer <float>);

                return
                    ((ref ReadOnlyMemory <char> dst) =>
                {
                    featureGetter(ref features);
                    map(in features, ref contributions);
                    var indices = new Span <int>();
                    var values = new Span <float>();
                    if (contributions.IsDense)
                    {
                        Utils.GetIdentityPermutation(contributions.Length).AsSpan().CopyTo(indices);
                    }
                    else
                    {
                        contributions.GetIndices().CopyTo(indices);
                    }
                    contributions.GetValues().CopyTo(values);
                    var count = values.Length;
                    var sb = new StringBuilder();
                    GenericSpanSortHelper <int> .Sort(indices, values, 0, count);

                    for (var i = 0; i < count; i++)
                    {
                        var val = values[i];
                        var ind = indices[i];
                        var name = GetSlotName(ind, slotNames);
                        sb.AppendFormat("{0}: {1}, ", name, val);
                    }

                    if (sb.Length > 0)
                    {
                        _env.Assert(sb.Length >= 2);
                        sb.Remove(sb.Length - 2, 2);
                    }

                    dst = new ReadOnlyMemory <char>(sb.ToString().ToCharArray());
                });
            }
Пример #24
0
        private static DropSlotsTransform.Column CreateDropSlotsColumn(Arguments args, ref VBuffer <Single> scores, out int selectedCount)
        {
            // Not checking the scores.Length, because:
            // 1. If it's the same as the features column length, we should be constructing the right DropSlots arguments.
            // 2. If it's less, we assume that the rest of the scores are zero and we drop the slots.
            // 3. If it's greater, the drop slots ignores the ranges that are outside the valid range of indices for the column.
            Contracts.Assert(args.Threshold.HasValue != args.NumSlotsToKeep.HasValue);
            var col = new DropSlotsTransform.Column();

            col.Source    = args.FeatureColumn;
            selectedCount = 0;

            // Degenerate case, dropping all slots.
            if (scores.Count == 0)
            {
                var range = new DropSlotsTransform.Range();
                col.Slots = new DropSlotsTransform.Range[] { range };
                return(col);
            }

            int   tiedScoresToKeep;
            float threshold;

            if (args.Threshold.HasValue)
            {
                threshold        = args.Threshold.Value;
                tiedScoresToKeep = threshold > 0 ? int.MaxValue : 0;
            }
            else
            {
                Contracts.Assert(args.NumSlotsToKeep.HasValue);
                threshold = ComputeThreshold(scores.Values, scores.Count, args.NumSlotsToKeep.Value, out tiedScoresToKeep);
            }

            var slots = new List <DropSlotsTransform.Range>();

            for (int i = 0; i < scores.Count; i++)
            {
                var score = Math.Abs(scores.Values[i]);
                if (score > threshold)
                {
                    selectedCount++;
                    continue;
                }
                if (score == threshold && tiedScoresToKeep > 0)
                {
                    tiedScoresToKeep--;
                    selectedCount++;
                    continue;
                }

                var range = new DropSlotsTransform.Range();
                range.Min = i;
                while (++i < scores.Count)
                {
                    score = Math.Abs(scores.Values[i]);
                    if (score > threshold)
                    {
                        selectedCount++;
                        break;
                    }
                    if (score == threshold && tiedScoresToKeep > 0)
                    {
                        tiedScoresToKeep--;
                        selectedCount++;
                        break;
                    }
                }
                range.Max = i - 1;
                slots.Add(range);
            }

            if (!scores.IsDense)
            {
                int ii    = 0;
                var count = slots.Count;
                for (int i = 0; i < count; i++)
                {
                    var range = slots[i];
                    Contracts.Assert(range.Max != null);
                    var min = range.Min;
                    var max = range.Max.Value;
                    Contracts.Assert(min <= max);
                    Contracts.Assert(max < scores.Count);

                    range.Min = min == 0 ? 0 : scores.Indices[min - 1] + 1;
                    range.Max = max == scores.Count - 1 ? scores.Length - 1 : scores.Indices[max + 1] - 1;

                    // Add the gaps before this range.
                    for (; ii < min; ii++)
                    {
                        var gapMin = ii == 0 ? 0 : scores.Indices[ii - 1] + 1;
                        var gapMax = scores.Indices[ii] - 1;
                        if (gapMin <= gapMax)
                        {
                            var gap = new DropSlotsTransform.Range();
                            gap.Min = gapMin;
                            gap.Max = gapMax;
                            slots.Add(gap);
                        }
                    }
                    ii = max;
                }

                // Add the gaps after the last range.
                for (; ii <= scores.Count; ii++)
                {
                    var gapMin = ii == 0 ? 0 : scores.Indices[ii - 1] + 1;
                    var gapMax = ii == scores.Count ? scores.Length - 1 : scores.Indices[ii] - 1;
                    if (gapMin <= gapMax)
                    {
                        var gap = new DropSlotsTransform.Range();
                        gap.Min = gapMin;
                        gap.Max = gapMax;
                        slots.Add(gap);
                    }
                }

                // Remove all slots past scores.Length.
                var lastRange = new DropSlotsTransform.Range();
                lastRange.Min = scores.Length;
                slots.Add(lastRange);
            }

            if (slots.Count > 0)
            {
                col.Slots = slots.ToArray();
                return(col);
            }

            return(null);
        }
Пример #25
0
        public static void GetSlotNames(RoleMappedSchema schema, RoleMappedSchema.ColumnRole role, int vectorSize, ref VBuffer <ReadOnlyMemory <char> > slotNames)
        {
            Contracts.CheckValueOrNull(schema);
            Contracts.CheckParam(vectorSize >= 0, nameof(vectorSize));

            IReadOnlyList <ColumnInfo> list;

            if ((list = schema?.GetColumns(role)) == null || list.Count != 1 || !schema.Schema.HasSlotNames(list[0].Index, vectorSize))
            {
                VBufferUtils.Resize(ref slotNames, vectorSize, 0);
            }
            else
            {
                schema.Schema.GetMetadata(Kinds.SlotNames, list[0].Index, ref slotNames);
            }
        }
Пример #26
0
        /// <summary>
        /// Returns a score for each slot of the features column.
        /// </summary>
        public static void Train(IHostEnvironment env, IDataView input, Arguments args, ref VBuffer <Single> scores)
        {
            Contracts.CheckValue(env, nameof(env));
            var host = env.Register(RegistrationName);

            host.CheckValue(args, nameof(args));
            host.CheckValue(input, nameof(input));
            args.Check(host);

            TrainCore(host, input, args, ref scores);
        }
        // The multi-output regression evaluator prints only the per-label metrics for each fold.
        protected override void PrintFoldResultsCore(IChannel ch, Dictionary <string, IDataView> metrics)
        {
            IDataView fold;

            if (!metrics.TryGetValue(MetricKinds.OverallMetrics, out fold))
            {
                throw ch.Except("No overall metrics found");
            }

            int  isWeightedCol;
            bool needWeighted = fold.Schema.TryGetColumnIndex(MetricKinds.ColumnNames.IsWeighted, out isWeightedCol);

            int  stratCol;
            bool hasStrats = fold.Schema.TryGetColumnIndex(MetricKinds.ColumnNames.StratCol, out stratCol);
            int  stratVal;
            bool hasStratVals = fold.Schema.TryGetColumnIndex(MetricKinds.ColumnNames.StratVal, out stratVal);

            ch.Assert(hasStrats == hasStratVals);

            var colCount       = fold.Schema.ColumnCount;
            var vBufferGetters = new ValueGetter <VBuffer <double> > [colCount];

            using (var cursor = fold.GetRowCursor(col => true))
            {
                bool isWeighted = false;
                ValueGetter <bool> isWeightedGetter;
                if (needWeighted)
                {
                    isWeightedGetter = cursor.GetGetter <bool>(isWeightedCol);
                }
                else
                {
                    isWeightedGetter = (ref bool dst) => dst = false;
                }

                ValueGetter <uint> stratGetter;
                if (hasStrats)
                {
                    var type = cursor.Schema.GetColumnType(stratCol);
                    stratGetter = RowCursorUtils.GetGetterAs <uint>(type, cursor, stratCol);
                }
                else
                {
                    stratGetter = (ref uint dst) => dst = 0;
                }

                int labelCount = 0;
                for (int i = 0; i < fold.Schema.ColumnCount; i++)
                {
                    if (fold.Schema.IsHidden(i) || (needWeighted && i == isWeightedCol) ||
                        (hasStrats && (i == stratCol || i == stratVal)))
                    {
                        continue;
                    }

                    var type = fold.Schema.GetColumnType(i);
                    if (type.IsKnownSizeVector && type.ItemType == NumberType.R8)
                    {
                        vBufferGetters[i] = cursor.GetGetter <VBuffer <double> >(i);
                        if (labelCount == 0)
                        {
                            labelCount = type.VectorSize;
                        }
                        else
                        {
                            ch.Check(labelCount == type.VectorSize, "All vector metrics should contain the same number of slots");
                        }
                    }
                }
                var labelNames = new ReadOnlyMemory <char> [labelCount];
                for (int j = 0; j < labelCount; j++)
                {
                    labelNames[j] = string.Format("Label_{0}", j).AsMemory();
                }

                var sb = new StringBuilder();
                sb.AppendLine("Per-label metrics:");
                sb.AppendFormat("{0,12} ", " ");
                for (int i = 0; i < labelCount; i++)
                {
                    sb.AppendFormat(" {0,20}", labelNames[i]);
                }
                sb.AppendLine();

                VBuffer <Double> metricVals      = default(VBuffer <Double>);
                bool             foundWeighted   = !needWeighted;
                bool             foundUnweighted = false;
                uint             strat           = 0;
                while (cursor.MoveNext())
                {
                    isWeightedGetter(ref isWeighted);
                    if (foundWeighted && isWeighted || foundUnweighted && !isWeighted)
                    {
                        throw ch.Except("Multiple {0} rows found in overall metrics data view",
                                        isWeighted ? "weighted" : "unweighted");
                    }
                    if (isWeighted)
                    {
                        foundWeighted = true;
                    }
                    else
                    {
                        foundUnweighted = true;
                    }

                    stratGetter(ref strat);
                    if (strat > 0)
                    {
                        continue;
                    }

                    for (int i = 0; i < colCount; i++)
                    {
                        if (vBufferGetters[i] != null)
                        {
                            vBufferGetters[i](ref metricVals);
                            ch.Assert(metricVals.Length == labelCount);

                            sb.AppendFormat("{0}{1,12}:", isWeighted ? "Weighted " : "", fold.Schema.GetColumnName(i));
                            foreach (var metric in metricVals.Items(all: true))
                            {
                                sb.AppendFormat(" {0,20:G20}", metric.Value);
                            }
                            sb.AppendLine();
                        }
                    }
                    if (foundUnweighted && foundWeighted)
                    {
                        break;
                    }
                }
                ch.Assert(foundUnweighted && foundWeighted);
                ch.Info(sb.ToString());
            }
        }
Пример #28
0
            private static void EnsureCachedResultValueMapper(ValueMapper <VBuffer <Float>, Float, Float> mapper,
                                                              ref long cachedPosition, ValueGetter <VBuffer <Float> > featureGetter, ref VBuffer <Float> features,
                                                              ref Float score, ref Float prob, Row input)
            {
                Contracts.AssertValue(mapper);
                if (cachedPosition != input.Position)
                {
                    if (featureGetter != null)
                    {
                        featureGetter(ref features);
                    }

                    mapper(in features, ref score, ref prob);
                    cachedPosition = input.Position;
                }
            }
Пример #29
0
 protected override void Copy(ref VBuffer <T> src, ref VBuffer <T> dst)
 {
     src.CopyTo(ref dst);
 }
Пример #30
0
 protected abstract bool AcceptColumnValue(ref VBuffer <TFloat> buffer);