public override IEstimator <ITransformer> Reconcile(IHostEnvironment env, PipelineColumn[] toOutput, IReadOnlyDictionary <PipelineColumn, string> inputNames, IReadOnlyDictionary <PipelineColumn, string> outputNames, IReadOnlyCollection <string> usedNames) { var infos = new TermTransform.ColumnInfo[toOutput.Length]; Action <TermTransform> onFit = null; for (int i = 0; i < toOutput.Length; ++i) { var tcol = (ITermCol)toOutput[i]; infos[i] = new TermTransform.ColumnInfo(inputNames[tcol.Input], outputNames[toOutput[i]], tcol.Config.Max, (TermTransform.SortOrder)tcol.Config.Order); if (tcol.Config.OnFit != null) { int ii = i; // Necessary because if we capture i that will change to toOutput.Length on call. onFit += tt => tcol.Config.OnFit(tt.GetTermMap(ii)); } } var est = new TermEstimator(env, infos); if (onFit == null) { return(est); } return(est.WithOnFitDelegate(onFit)); }
public CategoricalTransform(TermEstimator term, IEstimator <ITransformer> toVector, IDataView input) { if (toVector != null) { _transformer = term.Append(toVector).Fit(input); } else { _transformer = new TransformerChain <ITransformer>(term.Fit(input)); } }
public CategoricalEstimator(IHostEnvironment env, params ColumnInfo[] columns) { Contracts.CheckValue(env, nameof(env)); _host = env.Register(nameof(TermEstimator)); _term = new TermEstimator(_host, columns); var cols = new List <(string input, string output, bool bag)>(); bool binaryEncoding = false; for (int i = 0; i < columns.Length; i++) { var column = columns[i]; bool bag; CategoricalTransform.OutputKind kind = columns[i].OutputKind; switch (kind) { default: throw _host.ExceptUserArg(nameof(column.OutputKind)); case CategoricalTransform.OutputKind.Key: continue; case CategoricalTransform.OutputKind.Bin: binaryEncoding = true; bag = false; break; case CategoricalTransform.OutputKind.Ind: bag = false; break; case CategoricalTransform.OutputKind.Bag: bag = true; break; } cols.Add((column.Output, column.Output, bag)); if (binaryEncoding) { _keyToSomething = new KeyToBinaryVectorEstimator(_host, cols.Select(x => new KeyToBinaryVectorTransform.ColumnInfo(x.input, x.output)).ToArray()); } else { _keyToSomething = new KeyToVectorEstimator(_host, cols.Select(x => new KeyToVectorTransform.ColumnInfo(x.input, x.output, x.bag)).ToArray()); } } }
internal void WrapTermWithDelegate(Action <TermTransform> onFit) { _term = (TermEstimator)_term.WithOnFitDelegate(onFit); }
public CategoricalEstimator(IHostEnvironment env, params ColumnInfo[] columns) { Contracts.CheckValue(env, nameof(env)); _host = env.Register(nameof(TermEstimator)); _term = new TermEstimator(_host, columns); var binaryCols = new List <(string input, string output)>(); var cols = new List <(string input, string output, bool bag)>(); for (int i = 0; i < columns.Length; i++) { var column = columns[i]; CategoricalTransform.OutputKind kind = columns[i].OutputKind; switch (kind) { default: throw _host.ExceptUserArg(nameof(column.OutputKind)); case CategoricalTransform.OutputKind.Key: continue; case CategoricalTransform.OutputKind.Bin: binaryCols.Add((column.Output, column.Output)); break; case CategoricalTransform.OutputKind.Ind: cols.Add((column.Output, column.Output, false)); break; case CategoricalTransform.OutputKind.Bag: cols.Add((column.Output, column.Output, true)); break; } } IEstimator <ITransformer> toBinVector = null; IEstimator <ITransformer> toVector = null; if (binaryCols.Count > 0) { toBinVector = new KeyToBinaryVectorEstimator(_host, binaryCols.Select(x => new KeyToBinaryVectorTransform.ColumnInfo(x.input, x.output)).ToArray()); } if (cols.Count > 0) { toVector = new KeyToVectorEstimator(_host, cols.Select(x => new KeyToVectorTransform.ColumnInfo(x.input, x.output, x.bag)).ToArray()); } if (toBinVector != null && toVector != null) { _toSomething = toVector.Append(toBinVector); } else { if (toBinVector != null) { _toSomething = toBinVector; } else { _toSomething = toVector; } } }
public CategoricalTransform(TermEstimator term, IEstimator <ITransformer> toVector, IDataView input) { var chain = term.Append(toVector); _transformer = chain.Fit(input); }