Beispiel #1
0
            private static void EnsureCachedResultValueMapper(ValueMapper <VBuffer <Float>, Float, Float> mapper,
                                                              ref long cachedPosition, ValueGetter <VBuffer <Float> > featureGetter, ref VBuffer <Float> features,
                                                              ref Float score, ref Float prob, Row input)
            {
                Contracts.AssertValue(mapper);
                if (cachedPosition != input.Position)
                {
                    if (featureGetter != null)
                    {
                        featureGetter(ref features);
                    }

                    mapper(in features, ref score, ref prob);
                    cachedPosition = input.Position;
                }
            }
Beispiel #2
0
            private ValueGetter <ReadOnlyMemory <char> > GetTextValueGetter <TSrc>(Row input, int colSrc, VBuffer <ReadOnlyMemory <char> > slotNames)
            {
                Contracts.AssertValue(input);
                Contracts.AssertValue(Predictor);

                var featureGetter = input.GetGetter <TSrc>(colSrc);
                var map           = Predictor.GetFeatureContributionMapper <TSrc, VBuffer <float> >(_topContributionsCount, _bottomContributionsCount, _normalize);

                var features      = default(TSrc);
                var contributions = default(VBuffer <float>);

                return
                    ((ref ReadOnlyMemory <char> dst) =>
                {
                    featureGetter(ref features);
                    map(in features, ref contributions);
                    var editor = VBufferEditor.CreateFromBuffer(ref contributions);
                    var indices = contributions.IsDense ? Utils.GetIdentityPermutation(contributions.Length) : editor.Indices;
                    var values = editor.Values;
                    var count = values.Length;
                    var sb = new StringBuilder();
                    GenericSpanSortHelper <float> .Sort(values, indices, 0, count);

                    for (var i = 0; i < count; i++)
                    {
                        var val = values[i];
                        var ind = indices[i];
                        var name = GetSlotName(ind, slotNames);
                        sb.AppendFormat("{0}: {1}, ", name, val);
                    }

                    if (sb.Length > 0)
                    {
                        _env.Assert(sb.Length >= 2);
                        sb.Remove(sb.Length - 2, 2);
                    }

                    dst = new ReadOnlyMemory <char>(sb.ToString().ToCharArray());
                });
            }
        internal static void GetSlotNames(RoleMappedSchema schema, RoleMappedSchema.ColumnRole role, int vectorSize, ref VBuffer <ReadOnlyMemory <char> > slotNames)
        {
            Contracts.CheckValueOrNull(schema);
            Contracts.CheckParam(vectorSize >= 0, nameof(vectorSize));

            IReadOnlyList <Schema.Column> list = schema?.GetColumns(role);

            if (list?.Count != 1 || !schema.Schema[list[0].Index].HasSlotNames(vectorSize))
            {
                VBufferUtils.Resize(ref slotNames, vectorSize, 0);
            }
            else
            {
                schema.Schema[list[0].Index].Metadata.GetValue(Kinds.SlotNames, ref slotNames);
            }
        }
 public static void GetSlotNames(this Schema.Column column, ref VBuffer <ReadOnlyMemory <char> > slotNames)
 => column.Metadata.GetValue(Kinds.SlotNames, ref slotNames);
 /// <summary>
 /// Creates a <see cref="VBufferEditor{T}"/> with the same shape
 /// (length and density) as the <paramref name="destination"/>.
 /// </summary>
 /// <param name="destination">The destination buffer. Note that the resulting <see cref="VBufferEditor{T}"/> is assumed to take ownership
 /// of this passed in object, and so whatever <see cref="VBuffer{T}"/> was passed in as this parameter should not be used again, since its
 /// underlying buffers are being potentially reused.</param>
 public static VBufferEditor <T> CreateFromBuffer <T>(
     ref VBuffer <T> destination)
 {
     return(destination.GetEditor());
 }
Beispiel #6
0
        /// <summary>
        /// This function returns a schema for sequence predictor's output. Its output column is always called <see cref="AnnotationUtils.Const.ScoreValueKind.PredictedLabel"/>.
        /// </summary>
        /// <param name="scoreType">Score column's type produced by sequence predictor.</param>
        /// <param name="scoreColumnKindValue">A metadata value of score column. It's the value associated with key
        /// <see cref="AnnotationUtils.Kinds.ScoreColumnKind"/>.</param>
        /// <param name="keyNames">Sequence predictor usually generates integer outputs. This field tells the tags of all possible output values.
        /// For example, output integer 0 cound be mapped to "Sell" and 0 to "Buy" when predicting stock trend.</param>
        /// <returns><see cref="DataViewSchema"/> of sequence predictor's output.</returns>
        public static DataViewSchema CreateSequencePredictionSchema(DataViewType scoreType, string scoreColumnKindValue, VBuffer <ReadOnlyMemory <char> > keyNames = default)
        {
            Contracts.CheckValue(scoreType, nameof(scoreType));
            Contracts.CheckValue(scoreColumnKindValue, nameof(scoreColumnKindValue));

            var metadataBuilder = new DataViewSchema.Annotations.Builder();

            // Add metadata columns including their getters. We starts with key names of predicted keys if they exist.
            if (keyNames.Length > 0)
            {
                metadataBuilder.AddKeyValues(keyNames.Length, TextDataViewType.Instance,
                                             (ref VBuffer <ReadOnlyMemory <char> > value) => value = keyNames);
            }
            metadataBuilder.Add(AnnotationUtils.Kinds.ScoreColumnKind, TextDataViewType.Instance,
                                (ref ReadOnlyMemory <char> value) => value = scoreColumnKindValue.AsMemory());
            metadataBuilder.Add(AnnotationUtils.Kinds.ScoreValueKind, TextDataViewType.Instance,
                                (ref ReadOnlyMemory <char> value) => value = AnnotationUtils.Const.ScoreValueKind.PredictedLabel.AsMemory());

            // Build a schema consisting of a single column.
            var schemaBuilder = new DataViewSchema.Builder();

            schemaBuilder.AddColumn(AnnotationUtils.Const.ScoreValueKind.PredictedLabel, scoreType, metadataBuilder.ToAnnotations());

            return(schemaBuilder.ToSchema());
        }
Beispiel #7
0
        public void SaveData(Stream stream, IDataView data, params int[] cols)
        {
            _host.CheckValue(stream, nameof(stream));
            _host.CheckValue(data, nameof(data));
            _host.CheckValueOrNull(cols);

            if (cols == null)
            {
                cols = new int[0];
            }

            using (var ch = _host.Start("Saving"))
            {
                var labelCol = data.Schema.GetColumnOrNull(_labelCol);
                if (!labelCol.HasValue)
                {
                    throw ch.Except($"Column {_labelCol} not found in data");
                }

                var featureCol = data.Schema.GetColumnOrNull(_featureCol);
                if (!featureCol.HasValue)
                {
                    throw ch.Except($"Column {_featureCol} not found in data");
                }

                var groupCol = !string.IsNullOrWhiteSpace(_groupCol) ? data.Schema.GetColumnOrNull(_groupCol) : default;
                if (!string.IsNullOrWhiteSpace(_groupCol) && !groupCol.HasValue)
                {
                    throw ch.Except($"Column {_groupCol} not found in data");
                }

                var weightCol = !string.IsNullOrWhiteSpace(_weightCol) ? data.Schema.GetColumnOrNull(_weightCol) : default;
                if (!string.IsNullOrWhiteSpace(_weightCol) && !weightCol.HasValue)
                {
                    throw ch.Except($"Column {_weightCol} not found in data");
                }

                foreach (var col in cols)
                {
                    _host.Check(col < data.Schema.Count);
                    var column = data.Schema[col];
                    if (column.Name != _labelCol && column.Name != _featureCol && column.Name != _groupCol && column.Name != _weightCol)
                    {
                        ch.Warning($"Column {column.Name} will not be saved. SVM-light saver saves the label column, feature column, optional group column and optional weight column.");
                    }
                }

                var columns = new List <DataViewSchema.Column>()
                {
                    labelCol.Value, featureCol.Value
                };
                if (groupCol.HasValue)
                {
                    columns.Add(groupCol.Value);
                }
                if (weightCol.HasValue)
                {
                    columns.Add(weightCol.Value);
                }
                using (var writer = new StreamWriter(stream))
                    using (var cursor = data.GetRowCursor(columns))
                    {
                        // Getting the getters will fail with type errors if the types are not correct,
                        // so we rely on those messages.
                        var             labelGetter    = cursor.GetGetter <float>(labelCol.Value);
                        var             featuresGetter = cursor.GetGetter <VBuffer <float> >(featureCol.Value);
                        var             groupGetter    = groupCol.HasValue ? cursor.GetGetter <ulong>(groupCol.Value) : null;
                        var             weightGetter   = weightCol.HasValue ? cursor.GetGetter <float>(weightCol.Value) : null;
                        VBuffer <float> features       = default;
                        while (cursor.MoveNext())
                        {
                            float lab = default;
                            labelGetter(ref lab);
                            if (_binary)
                            {
                                writer.Write(float.IsNaN(lab) ? 0 : (lab > 0 ? 1 : -1));
                            }
                            else
                            {
                                writer.Write("{0:R}", lab);
                            }
                            if (groupGetter != null)
                            {
                                ulong groupId = default;
                                groupGetter(ref groupId);
                                if (groupId > 0)
                                {
                                    writer.Write(" qid:{0}", groupId - 1);
                                }
                            }
                            if (weightGetter != null)
                            {
                                float weight = default;
                                weightGetter(ref weight);
                                if (weight != 1)
                                {
                                    writer.Write(" cost:{0:R}", weight);
                                }
                            }

                            featuresGetter(ref features);
                            bool any = false;
                            foreach (var pair in features.Items().Where(p => p.Value != 0))
                            {
                                writer.Write(" {0}:{1}", _zero ? pair.Key : (pair.Key + 1), pair.Value);
                                any = true;
                            }
                            // If there were no non-zero items, write a dummy item. Some parsers can handle
                            // empty arrays correctly, but some assume there is at least one defined item.
                            if (!any)
                            {
                                writer.Write(" {0}:0", _zero ? 0 : 1);
                            }
                            writer.WriteLine();
                        }
                    }
            }
        }