private static void EnsureCachedResultValueMapper(ValueMapper <VBuffer <Float>, Float, Float> mapper, ref long cachedPosition, ValueGetter <VBuffer <Float> > featureGetter, ref VBuffer <Float> features, ref Float score, ref Float prob, Row input) { Contracts.AssertValue(mapper); if (cachedPosition != input.Position) { if (featureGetter != null) { featureGetter(ref features); } mapper(in features, ref score, ref prob); cachedPosition = input.Position; } }
private ValueGetter <ReadOnlyMemory <char> > GetTextValueGetter <TSrc>(Row input, int colSrc, VBuffer <ReadOnlyMemory <char> > slotNames) { Contracts.AssertValue(input); Contracts.AssertValue(Predictor); var featureGetter = input.GetGetter <TSrc>(colSrc); var map = Predictor.GetFeatureContributionMapper <TSrc, VBuffer <float> >(_topContributionsCount, _bottomContributionsCount, _normalize); var features = default(TSrc); var contributions = default(VBuffer <float>); return ((ref ReadOnlyMemory <char> dst) => { featureGetter(ref features); map(in features, ref contributions); var editor = VBufferEditor.CreateFromBuffer(ref contributions); var indices = contributions.IsDense ? Utils.GetIdentityPermutation(contributions.Length) : editor.Indices; var values = editor.Values; var count = values.Length; var sb = new StringBuilder(); GenericSpanSortHelper <float> .Sort(values, indices, 0, count); for (var i = 0; i < count; i++) { var val = values[i]; var ind = indices[i]; var name = GetSlotName(ind, slotNames); sb.AppendFormat("{0}: {1}, ", name, val); } if (sb.Length > 0) { _env.Assert(sb.Length >= 2); sb.Remove(sb.Length - 2, 2); } dst = new ReadOnlyMemory <char>(sb.ToString().ToCharArray()); }); }
internal static void GetSlotNames(RoleMappedSchema schema, RoleMappedSchema.ColumnRole role, int vectorSize, ref VBuffer <ReadOnlyMemory <char> > slotNames) { Contracts.CheckValueOrNull(schema); Contracts.CheckParam(vectorSize >= 0, nameof(vectorSize)); IReadOnlyList <Schema.Column> list = schema?.GetColumns(role); if (list?.Count != 1 || !schema.Schema[list[0].Index].HasSlotNames(vectorSize)) { VBufferUtils.Resize(ref slotNames, vectorSize, 0); } else { schema.Schema[list[0].Index].Metadata.GetValue(Kinds.SlotNames, ref slotNames); } }
public static void GetSlotNames(this Schema.Column column, ref VBuffer <ReadOnlyMemory <char> > slotNames) => column.Metadata.GetValue(Kinds.SlotNames, ref slotNames);
/// <summary> /// Creates a <see cref="VBufferEditor{T}"/> with the same shape /// (length and density) as the <paramref name="destination"/>. /// </summary> /// <param name="destination">The destination buffer. Note that the resulting <see cref="VBufferEditor{T}"/> is assumed to take ownership /// of this passed in object, and so whatever <see cref="VBuffer{T}"/> was passed in as this parameter should not be used again, since its /// underlying buffers are being potentially reused.</param> public static VBufferEditor <T> CreateFromBuffer <T>( ref VBuffer <T> destination) { return(destination.GetEditor()); }
/// <summary> /// This function returns a schema for sequence predictor's output. Its output column is always called <see cref="AnnotationUtils.Const.ScoreValueKind.PredictedLabel"/>. /// </summary> /// <param name="scoreType">Score column's type produced by sequence predictor.</param> /// <param name="scoreColumnKindValue">A metadata value of score column. It's the value associated with key /// <see cref="AnnotationUtils.Kinds.ScoreColumnKind"/>.</param> /// <param name="keyNames">Sequence predictor usually generates integer outputs. This field tells the tags of all possible output values. /// For example, output integer 0 cound be mapped to "Sell" and 0 to "Buy" when predicting stock trend.</param> /// <returns><see cref="DataViewSchema"/> of sequence predictor's output.</returns> public static DataViewSchema CreateSequencePredictionSchema(DataViewType scoreType, string scoreColumnKindValue, VBuffer <ReadOnlyMemory <char> > keyNames = default) { Contracts.CheckValue(scoreType, nameof(scoreType)); Contracts.CheckValue(scoreColumnKindValue, nameof(scoreColumnKindValue)); var metadataBuilder = new DataViewSchema.Annotations.Builder(); // Add metadata columns including their getters. We starts with key names of predicted keys if they exist. if (keyNames.Length > 0) { metadataBuilder.AddKeyValues(keyNames.Length, TextDataViewType.Instance, (ref VBuffer <ReadOnlyMemory <char> > value) => value = keyNames); } metadataBuilder.Add(AnnotationUtils.Kinds.ScoreColumnKind, TextDataViewType.Instance, (ref ReadOnlyMemory <char> value) => value = scoreColumnKindValue.AsMemory()); metadataBuilder.Add(AnnotationUtils.Kinds.ScoreValueKind, TextDataViewType.Instance, (ref ReadOnlyMemory <char> value) => value = AnnotationUtils.Const.ScoreValueKind.PredictedLabel.AsMemory()); // Build a schema consisting of a single column. var schemaBuilder = new DataViewSchema.Builder(); schemaBuilder.AddColumn(AnnotationUtils.Const.ScoreValueKind.PredictedLabel, scoreType, metadataBuilder.ToAnnotations()); return(schemaBuilder.ToSchema()); }
public void SaveData(Stream stream, IDataView data, params int[] cols) { _host.CheckValue(stream, nameof(stream)); _host.CheckValue(data, nameof(data)); _host.CheckValueOrNull(cols); if (cols == null) { cols = new int[0]; } using (var ch = _host.Start("Saving")) { var labelCol = data.Schema.GetColumnOrNull(_labelCol); if (!labelCol.HasValue) { throw ch.Except($"Column {_labelCol} not found in data"); } var featureCol = data.Schema.GetColumnOrNull(_featureCol); if (!featureCol.HasValue) { throw ch.Except($"Column {_featureCol} not found in data"); } var groupCol = !string.IsNullOrWhiteSpace(_groupCol) ? data.Schema.GetColumnOrNull(_groupCol) : default; if (!string.IsNullOrWhiteSpace(_groupCol) && !groupCol.HasValue) { throw ch.Except($"Column {_groupCol} not found in data"); } var weightCol = !string.IsNullOrWhiteSpace(_weightCol) ? data.Schema.GetColumnOrNull(_weightCol) : default; if (!string.IsNullOrWhiteSpace(_weightCol) && !weightCol.HasValue) { throw ch.Except($"Column {_weightCol} not found in data"); } foreach (var col in cols) { _host.Check(col < data.Schema.Count); var column = data.Schema[col]; if (column.Name != _labelCol && column.Name != _featureCol && column.Name != _groupCol && column.Name != _weightCol) { ch.Warning($"Column {column.Name} will not be saved. SVM-light saver saves the label column, feature column, optional group column and optional weight column."); } } var columns = new List <DataViewSchema.Column>() { labelCol.Value, featureCol.Value }; if (groupCol.HasValue) { columns.Add(groupCol.Value); } if (weightCol.HasValue) { columns.Add(weightCol.Value); } using (var writer = new StreamWriter(stream)) using (var cursor = data.GetRowCursor(columns)) { // Getting the getters will fail with type errors if the types are not correct, // so we rely on those messages. var labelGetter = cursor.GetGetter <float>(labelCol.Value); var featuresGetter = cursor.GetGetter <VBuffer <float> >(featureCol.Value); var groupGetter = groupCol.HasValue ? cursor.GetGetter <ulong>(groupCol.Value) : null; var weightGetter = weightCol.HasValue ? cursor.GetGetter <float>(weightCol.Value) : null; VBuffer <float> features = default; while (cursor.MoveNext()) { float lab = default; labelGetter(ref lab); if (_binary) { writer.Write(float.IsNaN(lab) ? 0 : (lab > 0 ? 1 : -1)); } else { writer.Write("{0:R}", lab); } if (groupGetter != null) { ulong groupId = default; groupGetter(ref groupId); if (groupId > 0) { writer.Write(" qid:{0}", groupId - 1); } } if (weightGetter != null) { float weight = default; weightGetter(ref weight); if (weight != 1) { writer.Write(" cost:{0:R}", weight); } } featuresGetter(ref features); bool any = false; foreach (var pair in features.Items().Where(p => p.Value != 0)) { writer.Write(" {0}:{1}", _zero ? pair.Key : (pair.Key + 1), pair.Value); any = true; } // If there were no non-zero items, write a dummy item. Some parsers can handle // empty arrays correctly, but some assume there is at least one defined item. if (!any) { writer.Write(" {0}:0", _zero ? 0 : 1); } writer.WriteLine(); } } } }