private TScalarPredictor TrainOne(IChannel ch, TScalarTrainer trainer, RoleMappedData data, int cls) { string dstName; var view = MapLabels(data, cls, out dstName); var roles = data.Schema.GetColumnRoleNames() .Where(kvp => kvp.Key.Value != CR.Label.Value) .Prepend(CR.Label.Bind(dstName)); var td = new RoleMappedData(view, roles); // REVIEW: In principle we could support validation sets and the like via the train context, but // this is currently unsupported. var predictor = trainer.Train(td); if (Args.UseProbabilities) { ICalibratorTrainer calibrator; if (!Args.Calibrator.IsGood()) { calibrator = null; } else { calibrator = Args.Calibrator.CreateInstance(Host); } var res = CalibratorUtils.TrainCalibratorIfNeeded(Host, ch, calibrator, Args.MaxCalibrationExamples, trainer, predictor, td); predictor = res as TScalarPredictor; Host.Check(predictor != null, "Calibrated predictor does not implement the expected interface"); } return(predictor); }
private TDistPredictor TrainOne(IChannel ch, TScalarTrainer trainer, RoleMappedData data, int cls1, int cls2) { string dstName; var view = MapLabels(data, cls1, cls2, out dstName); var roles = data.Schema.GetColumnRoleNames() .Where(kvp => kvp.Key.Value != CR.Label.Value) .Prepend(CR.Label.Bind(dstName)); var td = RoleMappedData.Create(view, roles); trainer.Train(td); ICalibratorTrainer calibrator; if (!Args.Calibrator.IsGood()) { calibrator = null; } else { calibrator = Args.Calibrator.CreateInstance(Host); } TScalarPredictor predictor = trainer.CreatePredictor(); var res = CalibratorUtils.TrainCalibratorIfNeeded(Host, ch, calibrator, Args.MaxCalibrationExamples, trainer, predictor, td); var dist = res as TDistPredictor; Host.Check(dist != null, "Calibrated predictor does not implement the expected interface"); Host.Check(dist is IValueMapperDist, "Calibrated predictor does not implement the IValueMapperDist interface"); return(dist); }
// cls is the "class id", zero-based. private TScalarPredictor TrainOne(IChannel ch, TScalarTrainer trainer, RoleMappedData data, int cls) { string dstName; var view = MapLabels(data, cls, out dstName, ch); if (_args.cacheTransform != null) { var sub = ScikitSubComponent <IDataTransform, SignatureDataTransform> .AsSubComponent(_args.cacheTransform); view = sub.CreateInstance(Host, view); } var roles = data.Schema.GetColumnRoleNames() .Where(kvp => kvp.Key.Value != CR.Label.Value) .Prepend(CR.Label.Bind(dstName)); var td = new RoleMappedData(view, roles); var predictor = trainer.Train(td); if (_args.useProbabilities) { var calSett = ScikitSubComponent <ICalibratorTrainer, SignatureCalibrator> .AsSubComponent(_args.calibratorType); var calibrator = calSett.CreateInstance(Host); var res = CalibratorUtils.TrainCalibratorIfNeeded(Host, ch, calibrator, _args.maxCalibrationExamples, trainer, predictor, td); predictor = res as TScalarPredictor; Host.Check(predictor != null, "Calibrated predictor does not implement the expected interface"); } return(predictor); }
protected override TVectorPredictor TrainPredictor(IChannel ch, TScalarTrainer trainer, RoleMappedData data, int count) { var data0 = data; #region adding group ID // We insert a group Id. string groupColumnTemp = DataViewUtils.GetTempColumnName(data.Schema.Schema) + "GR"; var groupArgs = new GenerateNumberTransform.Options { Columns = new[] { GenerateNumberTransform.Column.Parse(groupColumnTemp) }, UseCounter = true }; var withGroup = new GenerateNumberTransform(Host, groupArgs, data.Data); data = new RoleMappedData(withGroup, data.Schema.GetColumnRoleNames()); #endregion #region preparing the training dataset string dstName, labName; var trans = MapLabelsAndInsertTransform(ch, data, out dstName, out labName, count, true, _args); var newFeatures = trans.Schema.GetTempColumnName() + "NF"; // We check the label is not boolean. int indexLab = SchemaHelper.GetColumnIndex(trans.Schema, dstName); var typeLab = trans.Schema[indexLab].Type; if (typeLab.RawKind() == DataKind.Boolean) { throw Host.Except("Column '{0}' has an unexpected type {1}.", dstName, typeLab.RawKind()); } var args3 = new DescribeTransform.Arguments { columns = new string[] { labName, dstName }, oneRowPerColumn = true }; var desc = new DescribeTransform(Host, args3, trans); IDataView viewI; if (_args.singleColumn && data.Schema.Label.Value.Type.RawKind() == DataKind.Single) { viewI = desc; } else if (_args.singleColumn) { var sch = new TypeReplacementSchema(desc.Schema, new[] { labName }, new[] { NumberDataViewType.Single }); viewI = new TypeReplacementDataView(desc, sch); #region debug #if (DEBUG) DebugChecking0(viewI, labName, false); #endif #endregion } else if (data.Schema.Label.Value.Type.IsKey()) { ulong nb = data.Schema.Label.Value.Type.AsKey().GetKeyCount(); var sch = new TypeReplacementSchema(desc.Schema, new[] { labName }, new[] { new VectorDataViewType(NumberDataViewType.Single, (int)nb) }); viewI = new TypeReplacementDataView(desc, sch); #region debug #if (DEBUG) int nb_; MinMaxLabelOverDataSet(trans, labName, out nb_); int count3; data.CheckMulticlassLabel(out count3); if ((ulong)count3 != nb) { throw ch.Except("Count mismatch (KeyCount){0} != {1}", nb, count3); } DebugChecking0(viewI, labName, true); DebugChecking0Vfloat(viewI, labName, nb); #endif #endregion } else { int nb; if (count <= 0) { MinMaxLabelOverDataSet(trans, labName, out nb); } else { nb = count; } var sch = new TypeReplacementSchema(desc.Schema, new[] { labName }, new[] { new VectorDataViewType(NumberDataViewType.Single, nb) }); viewI = new TypeReplacementDataView(desc, sch); #region debug #if (DEBUG) DebugChecking0(viewI, labName, true); #endif #endregion } ch.Info("Merging column label '{0}' with features '{1}'", labName, data.Schema.Feature.Value.Name); var args = string.Format("Concat{{col={0}:{1},{2}}}", newFeatures, data.Schema.Feature.Value.Name, labName); var after_concatenation_ = ComponentCreation.CreateTransform(Host, args, viewI); #endregion #region converting label and group into keys // We need to convert the label into a Key. var convArgs = new MulticlassConvertTransform.Arguments { column = new[] { MulticlassConvertTransform.Column.Parse(string.Format("{0}k:{0}", dstName)) }, keyCount = new KeyCount(4), resultType = DataKind.UInt32 }; IDataView after_concatenation_key_label = new MulticlassConvertTransform(Host, convArgs, after_concatenation_); // The group must be a key too! convArgs = new MulticlassConvertTransform.Arguments { column = new[] { MulticlassConvertTransform.Column.Parse(string.Format("{0}k:{0}", groupColumnTemp)) }, keyCount = new KeyCount(), resultType = _args.groupIsU4 ? DataKind.UInt32 : DataKind.UInt64 }; after_concatenation_key_label = new MulticlassConvertTransform(Host, convArgs, after_concatenation_key_label); #endregion #region preparing the RoleMapData view string groupColumn = groupColumnTemp + "k"; dstName += "k"; var roles = data.Schema.GetColumnRoleNames(); var rolesArray = roles.ToArray(); roles = roles .Where(kvp => kvp.Key.Value != RoleMappedSchema.ColumnRole.Label.Value) .Where(kvp => kvp.Key.Value != RoleMappedSchema.ColumnRole.Feature.Value) .Where(kvp => kvp.Key.Value != groupColumn) .Where(kvp => kvp.Key.Value != groupColumnTemp); rolesArray = roles.ToArray(); if (rolesArray.Any() && rolesArray[0].Value == groupColumnTemp) { throw ch.Except("Duplicated group."); } roles = roles .Prepend(RoleMappedSchema.ColumnRole.Feature.Bind(newFeatures)) .Prepend(RoleMappedSchema.ColumnRole.Label.Bind(dstName)) .Prepend(RoleMappedSchema.ColumnRole.Group.Bind(groupColumn)); var trainer_input = new RoleMappedData(after_concatenation_key_label, roles); #endregion ch.Info("New Features: {0}:{1}", trainer_input.Schema.Feature.Value.Name, trainer_input.Schema.Feature.Value.Type); ch.Info("New Label: {0}:{1}", trainer_input.Schema.Label.Value.Name, trainer_input.Schema.Label.Value.Type); // We train the unique binary classifier. var trainedPredictor = trainer.Train(trainer_input); var predictors = new TScalarPredictor[] { trainedPredictor }; // We train the reclassification classifier. if (_args.reclassicationPredictor != null) { var pred = CreateFinalPredictor(ch, data, trans, count, _args, predictors, null); TrainReclassificationPredictor(data0, pred, ScikitSubComponent <ITrainer, SignatureTrainer> .AsSubComponent(_args.reclassicationPredictor)); } return(CreateFinalPredictor(ch, data, trans, count, _args, predictors, _reclassPredictor)); }
protected override TVectorPredictor TrainPredictor(IChannel ch, TScalarTrainer trainer, RoleMappedData data, int count) { var data0 = data; string dstName, labName; var trans = MapLabelsAndInsertTransform(ch, data, out dstName, out labName, count, true, _args); var newFeatures = trans.Schema.GetTempColumnName() + "NF"; var args3 = new DescribeTransform.Arguments { columns = new string[] { labName, dstName }, oneRowPerColumn = true }; var desc = new DescribeTransform(Host, args3, trans); IDataView viewI; if (_args.singleColumn && data.Schema.Label.Value.Type.RawKind() == DataKind.R4) { viewI = desc; } else if (_args.singleColumn) { var sch = new TypeReplacementSchema(desc.Schema, new[] { labName }, new[] { NumberType.R4 }); viewI = new TypeReplacementDataView(desc, sch); #region debug #if (DEBUG) DebugChecking0(viewI, labName, false); #endif #endregion } else if (data.Schema.Label.Value.Type.IsKey()) { int nb = data.Schema.Label.Value.Type.AsKey().KeyCount(); var sch = new TypeReplacementSchema(desc.Schema, new[] { labName }, new[] { new VectorType(NumberType.R4, nb) }); viewI = new TypeReplacementDataView(desc, sch); #region debug #if (DEBUG) int nb_; MinMaxLabelOverDataSet(trans, labName, out nb_); int count3; data.CheckMultiClassLabel(out count3); if (count3 != nb) { throw ch.Except("Count mismatch (KeyCount){0} != {1}", nb, count3); } DebugChecking0(viewI, labName, true); DebugChecking0Vfloat(viewI, labName, nb); #endif #endregion } else { int nb; if (count <= 0) { MinMaxLabelOverDataSet(trans, labName, out nb); } else { nb = count; } var sch = new TypeReplacementSchema(desc.Schema, new[] { labName }, new[] { new VectorType(NumberType.R4, nb) }); viewI = new TypeReplacementDataView(desc, sch); #region debug #if (DEBUG) DebugChecking0(viewI, labName, true); #endif #endregion } ch.Info("Merging column label '{0}' with features '{1}'", labName, data.Schema.Feature.Value.Name); var args = string.Format("Concat{{col={0}:{1},{2}}}", newFeatures, data.Schema.Feature.Value.Name, labName); IDataView after_concatenation = ComponentCreation.CreateTransform(Host, args, viewI); var roles = data.Schema.GetColumnRoleNames() .Where(kvp => kvp.Key.Value != RoleMappedSchema.ColumnRole.Label.Value) .Where(kvp => kvp.Key.Value != RoleMappedSchema.ColumnRole.Feature.Value) .Prepend(RoleMappedSchema.ColumnRole.Feature.Bind(newFeatures)) .Prepend(RoleMappedSchema.ColumnRole.Label.Bind(dstName)); var trainer_input = new RoleMappedData(after_concatenation, roles); ch.Info("New Features: {0}:{1}", trainer_input.Schema.Feature.Value.Name, trainer_input.Schema.Feature.Value.Type); ch.Info("New Label: {0}:{1}", trainer_input.Schema.Label.Value.Name, trainer_input.Schema.Label.Value.Type); // We train the unique binary classifier. var trainedPredictor = trainer.Train(trainer_input); var predictors = new TScalarPredictor[] { trainedPredictor }; // We train the reclassification classifier. if (_args.reclassicationPredictor != null) { var pred = CreateFinalPredictor(ch, data, trans, count, _args, predictors, null); TrainReclassificationPredictor(data0, pred, ScikitSubComponent <ITrainer, SignatureTrainer> .AsSubComponent(_args.reclassicationPredictor)); } return(CreateFinalPredictor(ch, data, trans, count, _args, predictors, _reclassPredictor)); }