public void TestOldSavingAndLoading() { var data = new[] { new TestClass() { A = 1, B = 2, C = 3, }, new TestClass() { A = 4, B = 5, C = 6 } }; var dataView = ComponentCreation.CreateDataView(Env, data); var est = new ValueToKeyMappingEstimator(Env, new[] { new ValueToKeyMappingTransformer.ColumnInfo("A", "TermA"), new ValueToKeyMappingTransformer.ColumnInfo("B", "TermB", textKeyValues: true), new ValueToKeyMappingTransformer.ColumnInfo("C", "TermC") }); var transformer = est.Fit(dataView); dataView = transformer.Transform(dataView); var pipe = new KeyToBinaryVectorMappingEstimator(Env, new KeyToBinaryVectorMappingTransformer.ColumnInfo("TermA", "CatA"), new KeyToBinaryVectorMappingTransformer.ColumnInfo("TermB", "CatB"), new KeyToBinaryVectorMappingTransformer.ColumnInfo("TermC", "CatC") ); var result = pipe.Fit(dataView).Transform(dataView); var resultRoles = new RoleMappedData(result); using (var ms = new MemoryStream()) { TrainUtils.SaveModel(Env, Env.Start("saving"), ms, null, resultRoles); ms.Position = 0; var loadedView = ModelFileUtils.LoadTransforms(Env, dataView, ms); } }
public void TestMetadataPropagation() { var data = new[] { new TestMeta() { A = new string[2] { "A", "B" }, B = "C", C = new int[2] { 3, 5 }, D = 6 }, new TestMeta() { A = new string[2] { "A", "B" }, B = "C", C = new int[2] { 5, 3 }, D = 1 }, new TestMeta() { A = new string[2] { "A", "B" }, B = "C", C = new int[2] { 3, 5 }, D = 6 } }; var dataView = ComponentCreation.CreateDataView(Env, data); var termEst = new ValueToKeyMappingEstimator(Env, new[] { new ValueToKeyMappingTransformer.ColumnInfo("A", "TA", textKeyValues: true), new ValueToKeyMappingTransformer.ColumnInfo("B", "TB", textKeyValues: true), new ValueToKeyMappingTransformer.ColumnInfo("C", "TC"), new ValueToKeyMappingTransformer.ColumnInfo("D", "TD") }); var termTransformer = termEst.Fit(dataView); dataView = termTransformer.Transform(dataView); var pipe = new KeyToBinaryVectorMappingEstimator(Env, new KeyToBinaryVectorMappingTransformer.ColumnInfo("TA", "CatA"), new KeyToBinaryVectorMappingTransformer.ColumnInfo("TB", "CatB"), new KeyToBinaryVectorMappingTransformer.ColumnInfo("TC", "CatC"), new KeyToBinaryVectorMappingTransformer.ColumnInfo("TD", "CatD")); var result = pipe.Fit(dataView).Transform(dataView); ValidateMetadata(result); Done(); }
public void KeyToBinaryVectorWorkout() { var data = new[] { new TestClass() { A = 1, B = 2, C = 3, }, new TestClass() { A = 4, B = 5, C = 6 } }; var dataView = ComponentCreation.CreateDataView(Env, data); dataView = new ValueToKeyMappingEstimator(Env, new[] { new ValueToKeyMappingTransformer.ColumnInfo("A", "TermA"), new ValueToKeyMappingTransformer.ColumnInfo("B", "TermB"), new ValueToKeyMappingTransformer.ColumnInfo("C", "TermC", textKeyValues: true) }).Fit(dataView).Transform(dataView); var pipe = new KeyToBinaryVectorMappingEstimator(Env, new KeyToBinaryVectorMappingTransformer.ColumnInfo("TermA", "CatA"), new KeyToBinaryVectorMappingTransformer.ColumnInfo("TermC", "CatC")); TestEstimatorCore(pipe, dataView); Done(); }
public OneHotHashEncodingEstimator(IHostEnvironment env, params ColumnInfo[] columns) { Contracts.CheckValue(env, nameof(env)); _host = env.Register(nameof(ValueToKeyMappingEstimator)); _hash = new HashingEstimator(_host, columns.Select(x => x.HashInfo).ToArray()); using (var ch = _host.Start(nameof(OneHotHashEncodingEstimator))) { var binaryCols = new List <(string input, string output)>(); var cols = new List <(string input, string output, bool bag)>(); for (int i = 0; i < columns.Length; i++) { var column = columns[i]; CategoricalTransform.OutputKind kind = columns[i].OutputKind; switch (kind) { default: throw _host.ExceptUserArg(nameof(column.OutputKind)); case CategoricalTransform.OutputKind.Key: continue; case CategoricalTransform.OutputKind.Bin: if ((column.HashInfo.InvertHash) != 0) { ch.Warning("Invert hashing is being used with binary encoding."); } binaryCols.Add((column.HashInfo.Output, column.HashInfo.Output)); break; case CategoricalTransform.OutputKind.Ind: cols.Add((column.HashInfo.Output, column.HashInfo.Output, false)); break; case CategoricalTransform.OutputKind.Bag: cols.Add((column.HashInfo.Output, column.HashInfo.Output, true)); break; } } IEstimator <ITransformer> toBinVector = null; IEstimator <ITransformer> toVector = null; if (binaryCols.Count > 0) { toBinVector = new KeyToBinaryVectorMappingEstimator(_host, binaryCols.Select(x => new KeyToBinaryVectorTransform.ColumnInfo(x.input, x.output)).ToArray()); } if (cols.Count > 0) { toVector = new KeyToVectorMappingEstimator(_host, cols.Select(x => new KeyToVectorTransform.ColumnInfo(x.input, x.output, x.bag)).ToArray()); } if (toBinVector != null && toVector != null) { _toSomething = toVector.Append(toBinVector); } else { if (toBinVector != null) { _toSomething = toBinVector; } else { _toSomething = toVector; } } } }
public OneHotEncodingEstimator(IHostEnvironment env, ColumnInfo[] columns, string file = null, string termsColumn = null, IComponentFactory <IMultiStreamSource, IDataLoader> loaderFactory = null) { Contracts.CheckValue(env, nameof(env)); _host = env.Register(nameof(OneHotEncodingEstimator)); _term = new ValueToKeyMappingEstimator(_host, columns, file, termsColumn, loaderFactory); var binaryCols = new List <(string input, string output)>(); var cols = new List <(string input, string output, bool bag)>(); for (int i = 0; i < columns.Length; i++) { var column = columns[i]; OneHotEncodingTransformer.OutputKind kind = columns[i].OutputKind; switch (kind) { default: throw _host.ExceptUserArg(nameof(column.OutputKind)); case OneHotEncodingTransformer.OutputKind.Key: continue; case OneHotEncodingTransformer.OutputKind.Bin: binaryCols.Add((column.Output, column.Output)); break; case OneHotEncodingTransformer.OutputKind.Ind: cols.Add((column.Output, column.Output, false)); break; case OneHotEncodingTransformer.OutputKind.Bag: cols.Add((column.Output, column.Output, true)); break; } } IEstimator <ITransformer> toBinVector = null; IEstimator <ITransformer> toVector = null; if (binaryCols.Count > 0) { toBinVector = new KeyToBinaryVectorMappingEstimator(_host, binaryCols.Select(x => new KeyToBinaryVectorMappingTransformer.ColumnInfo(x.input, x.output)).ToArray()); } if (cols.Count > 0) { toVector = new KeyToVectorMappingEstimator(_host, cols.Select(x => new KeyToVectorMappingTransformer.ColumnInfo(x.input, x.output, x.bag)).ToArray()); } if (toBinVector != null && toVector != null) { _toSomething = toVector.Append(toBinVector); } else { if (toBinVector != null) { _toSomething = toBinVector; } else { _toSomething = toVector; } } }