public void CategoricalOneHotEncodingFromSideData() { // In this case, whatever the value of the input, the term mapping should come from the optional side data if specified. var data = new[] { new TestStringClass() { A = "Stay" }, new TestStringClass() { A = "awhile and listen" } }; var mlContext = new MLContext(); var dataView = mlContext.Data.ReadFromEnumerable(data); var sideDataBuilder = new ArrayDataViewBuilder(mlContext); sideDataBuilder.AddColumn("Hello", "hello", "my", "friend"); var sideData = sideDataBuilder.GetDataView(); var ci = new OneHotEncodingEstimator.ColumnInfo("CatA", "A", OneHotEncodingTransformer.OutputKind.Bag); var pipe = mlContext.Transforms.Categorical.OneHotEncoding(new[] { ci }, sideData); var output = pipe.Fit(dataView).Transform(dataView); VBuffer <ReadOnlyMemory <char> > slotNames = default; output.Schema["CatA"].GetSlotNames(ref slotNames); Assert.Equal(3, slotNames.Length); Assert.Equal("hello", slotNames.GetItemOrDefault(0).ToString()); Assert.Equal("my", slotNames.GetItemOrDefault(1).ToString()); Assert.Equal("friend", slotNames.GetItemOrDefault(2).ToString()); Done(); }
public override IEstimator <ITransformer> Reconcile(IHostEnvironment env, PipelineColumn[] toOutput, IReadOnlyDictionary <PipelineColumn, string> inputNames, IReadOnlyDictionary <PipelineColumn, string> outputNames, IReadOnlyCollection <string> usedNames) { var infos = new OneHotEncodingEstimator.ColumnInfo[toOutput.Length]; Action <ValueToKeyMappingTransformer> onFit = null; for (int i = 0; i < toOutput.Length; ++i) { var tcol = (ICategoricalCol)toOutput[i]; infos[i] = new OneHotEncodingEstimator.ColumnInfo(inputNames[tcol.Input], outputNames[toOutput[i]], (OneHotEncodingTransformer.OutputKind)tcol.Config.OutputKind, tcol.Config.Max, (ValueToKeyMappingTransformer.SortOrder)tcol.Config.Order); if (tcol.Config.OnFit != null) { int ii = i; // Necessary because if we capture i that will change to toOutput.Length on call. onFit += tt => tcol.Config.OnFit(tt.GetTermMap(ii)); } } var est = new OneHotEncodingEstimator(env, infos); if (onFit != null) { est.WrapTermWithDelegate(onFit); } return(est); }