private void FetchWorker(BlockingCollection <Block> toCompress, IDataView data, ColumnCodec[] activeColumns, int rowsPerBlock, Stopwatch sw, IChannel ch, IProgressChannel pch, ExceptionMarshaller exMarshaller) { Contracts.AssertValue(ch); Contracts.AssertValueOrNull(pch); ch.AssertValue(exMarshaller); try { ch.AssertValue(toCompress); ch.AssertValue(data); ch.AssertValue(activeColumns); ch.AssertValue(sw); ch.Assert(rowsPerBlock > 0); // The main thread handles fetching from the cursor, and storing it into blocks passed to toCompress. HashSet <int> activeSet = new HashSet <int>(activeColumns.Select(col => col.SourceIndex)); long blockIndex = 0; int remainingInBlock = rowsPerBlock; using (RowCursor cursor = data.GetRowCursor(activeSet.Contains)) { WritePipe[] pipes = new WritePipe[activeColumns.Length]; for (int c = 0; c < activeColumns.Length; ++c) { pipes[c] = WritePipe.Create(this, cursor, activeColumns[c]); } for (int c = 0; c < pipes.Length; ++c) { pipes[c].BeginBlock(); } long rows = 0; if (pch != null) { pch.SetHeader(new ProgressHeader(new[] { "rows" }), e => e.SetProgress(0, rows)); } while (cursor.MoveNext()) { for (int c = 0; c < pipes.Length; ++c) { pipes[c].FetchAndWrite(); } if (--remainingInBlock == 0) { for (int c = 0; c < pipes.Length; ++c) { // REVIEW: It may be better if EndBlock got moved to a different worker thread. toCompress.Add(new Block(pipes[c].EndBlock(), c, blockIndex), exMarshaller.Token); pipes[c].BeginBlock(); } remainingInBlock = rowsPerBlock; blockIndex++; } rows++; } if (remainingInBlock < rowsPerBlock) { for (int c = 0; c < pipes.Length; ++c) { toCompress.Add(new Block(pipes[c].EndBlock(), c, blockIndex), exMarshaller.Token); } } Contracts.Assert(rows == (blockIndex + 1) * rowsPerBlock - remainingInBlock); _rowCount = rows; if (pch != null) { pch.Checkpoint(rows); } } toCompress.CompleteAdding(); } catch (Exception ex) { exMarshaller.Set("cursoring", ex); } }
private FieldAwareFactorizationMachineModelParameters TrainCore(IChannel ch, IProgressChannel pch, RoleMappedData data, RoleMappedData validData = null, FieldAwareFactorizationMachineModelParameters predictor = null) { _host.AssertValue(ch); _host.AssertValue(pch); data.CheckBinaryLabel(); var featureColumns = data.Schema.GetColumns(RoleMappedSchema.ColumnRole.Feature); int fieldCount = featureColumns.Count; int totalFeatureCount = 0; int[] fieldColumnIndexes = new int[fieldCount]; for (int f = 0; f < fieldCount; f++) { var col = featureColumns[f]; _host.Assert(!col.IsHidden); if (!(col.Type is VectorDataViewType vectorType) || !vectorType.IsKnownSize || vectorType.ItemType != NumberDataViewType.Single) { throw ch.ExceptParam(nameof(data), "Training feature column '{0}' must be a known-size vector of Single, but has type: {1}.", col.Name, col.Type); } _host.Assert(vectorType.Size > 0); fieldColumnIndexes[f] = col.Index; totalFeatureCount += vectorType.Size; } ch.Check(checked (totalFeatureCount * fieldCount * _latentDimAligned) <= Utils.ArrayMaxSize, "Latent dimension or the number of fields too large"); if (predictor != null) { ch.Check(predictor.FeatureCount == totalFeatureCount, "Input model's feature count mismatches training feature count"); ch.Check(predictor.LatentDimension == _latentDim, "Input model's latent dimension mismatches trainer's"); } if (validData != null) { validData.CheckBinaryLabel(); var validFeatureColumns = data.Schema.GetColumns(RoleMappedSchema.ColumnRole.Feature); _host.Assert(fieldCount == validFeatureColumns.Count); for (int f = 0; f < fieldCount; f++) { var featCol = featureColumns[f]; var validFeatCol = validFeatureColumns[f]; _host.Assert(featCol.Name == validFeatCol.Name); _host.Assert(featCol.Type == validFeatCol.Type); } } bool shuffle = _shuffle; if (shuffle && !data.Data.CanShuffle) { ch.Warning("Training data does not support shuffling, so ignoring request to shuffle"); shuffle = false; } var rng = shuffle ? _host.Rand : null; var featureGetters = new ValueGetter <VBuffer <float> > [fieldCount]; var featureBuffer = new VBuffer <float>(); var featureValueBuffer = new float[totalFeatureCount]; var featureIndexBuffer = new int[totalFeatureCount]; var featureFieldBuffer = new int[totalFeatureCount]; var latentSum = new AlignedArray(fieldCount * fieldCount * _latentDimAligned, 16); var metricNames = new List <string>() { "Training-loss" }; if (validData != null) { metricNames.Add("Validation-loss"); } int iter = 0; long exampleCount = 0; long badExampleCount = 0; long validBadExampleCount = 0; double loss = 0; double validLoss = 0; pch.SetHeader(new ProgressHeader(metricNames.ToArray(), new string[] { "iterations", "examples" }), entry => { entry.SetProgress(0, iter, _numIterations); entry.SetProgress(1, exampleCount); }); var columns = data.Schema.Schema.Where(x => fieldColumnIndexes.Contains(x.Index)).ToList(); columns.Add(data.Schema.Label.Value); if (data.Schema.Weight != null) { columns.Add(data.Schema.Weight.Value); } InitializeTrainingState(fieldCount, totalFeatureCount, predictor, out float[] linearWeights, out AlignedArray latentWeightsAligned, out float[] linearAccSqGrads, out AlignedArray latentAccSqGradsAligned); // refer to Algorithm 3 in https://github.com/wschin/fast-ffm/blob/master/fast-ffm.pdf while (iter++ < _numIterations) { using (var cursor = data.Data.GetRowCursor(columns, rng)) { var labelGetter = RowCursorUtils.GetLabelGetter(cursor, data.Schema.Label.Value.Index); var weightGetter = data.Schema.Weight?.Index is int weightIdx?RowCursorUtils.GetGetterAs <float>(NumberDataViewType.Single, cursor, weightIdx) : null; for (int i = 0; i < fieldCount; i++) { featureGetters[i] = cursor.GetGetter <VBuffer <float> >(cursor.Schema[fieldColumnIndexes[i]]); } loss = 0; exampleCount = 0; badExampleCount = 0; while (cursor.MoveNext()) { float label = 0; float weight = 1; int count = 0; float modelResponse = 0; labelGetter(ref label); weightGetter?.Invoke(ref weight); float annihilation = label - label + weight - weight; if (!FloatUtils.IsFinite(annihilation)) { badExampleCount++; continue; } if (!FieldAwareFactorizationMachineUtils.LoadOneExampleIntoBuffer(featureGetters, featureBuffer, _norm, ref count, featureFieldBuffer, featureIndexBuffer, featureValueBuffer)) { badExampleCount++; continue; } // refer to Algorithm 1 in [3] https://github.com/wschin/fast-ffm/blob/master/fast-ffm.pdf FieldAwareFactorizationMachineInterface.CalculateIntermediateVariables(fieldCount, _latentDimAligned, count, featureFieldBuffer, featureIndexBuffer, featureValueBuffer, linearWeights, latentWeightsAligned, latentSum, ref modelResponse); var slope = CalculateLossSlope(label, modelResponse); // refer to Algorithm 2 in [3] https://github.com/wschin/fast-ffm/blob/master/fast-ffm.pdf FieldAwareFactorizationMachineInterface.CalculateGradientAndUpdate(_lambdaLinear, _lambdaLatent, _learningRate, fieldCount, _latentDimAligned, weight, count, featureFieldBuffer, featureIndexBuffer, featureValueBuffer, latentSum, slope, linearWeights, latentWeightsAligned, linearAccSqGrads, latentAccSqGradsAligned); loss += weight * CalculateLoss(label, modelResponse); exampleCount++; } loss /= exampleCount; } if (_verbose) { if (validData == null) { pch.Checkpoint(loss, iter, exampleCount); } else { validLoss = CalculateAvgLoss(ch, validData, _norm, linearWeights, latentWeightsAligned, _latentDimAligned, latentSum, featureFieldBuffer, featureIndexBuffer, featureValueBuffer, featureBuffer, ref validBadExampleCount); pch.Checkpoint(loss, validLoss, iter, exampleCount); } } } if (badExampleCount != 0) { ch.Warning($"Skipped {badExampleCount} examples with bad label/weight/features in training set"); } if (validBadExampleCount != 0) { ch.Warning($"Skipped {validBadExampleCount} examples with bad label/weight/features in validation set"); } return(new FieldAwareFactorizationMachineModelParameters(_host, _norm, fieldCount, totalFeatureCount, _latentDim, linearWeights, latentWeightsAligned)); }
/// <summary> /// Train and return a booster. /// </summary> public static Booster Train(IChannel ch, IProgressChannel pch, Dictionary <string, object> parameters, Dataset dtrain, Dataset dvalid = null, int numIteration = 100, bool verboseEval = true, int earlyStoppingRound = 0) { // create Booster. Booster bst = new Booster(parameters, dtrain, dvalid); // Disable early stopping if we don't have validation data. if (dvalid == null && earlyStoppingRound > 0) { earlyStoppingRound = 0; ch.Warning("Validation dataset not present, early stopping will be disabled."); } int bestIter = 0; double bestScore = double.MaxValue; double factorToSmallerBetter = 1.0; var metric = (string)parameters["metric"]; if (earlyStoppingRound > 0 && (metric == "auc" || metric == "ndcg" || metric == "map")) { factorToSmallerBetter = -1.0; } const int evalFreq = 50; var metrics = new List <string>() { "Iteration" }; var units = new List <string>() { "iterations" }; if (verboseEval) { ch.Assert(parameters.ContainsKey("metric")); metrics.Add("Training-" + parameters["metric"]); if (dvalid != null) { metrics.Add("Validation-" + parameters["metric"]); } } var header = new ProgressHeader(metrics.ToArray(), units.ToArray()); int iter = 0; double trainError = double.NaN; double validError = double.NaN; pch.SetHeader(header, e => { e.SetProgress(0, iter, numIteration); if (verboseEval) { e.SetProgress(1, trainError); if (dvalid != null) { e.SetProgress(2, validError); } } }); for (iter = 0; iter < numIteration; ++iter) { if (bst.Update()) { break; } if (earlyStoppingRound > 0) { validError = bst.EvalValid(); if (validError * factorToSmallerBetter < bestScore) { bestScore = validError * factorToSmallerBetter; bestIter = iter; } if (iter - bestIter >= earlyStoppingRound) { ch.Info($"Met early stopping, best iteration: {bestIter + 1}, best score: {bestScore / factorToSmallerBetter}"); break; } } if ((iter + 1) % evalFreq == 0) { if (verboseEval) { trainError = bst.EvalTrain(); if (dvalid == null) { pch.Checkpoint(new double?[] { iter + 1, trainError }); } else { if (earlyStoppingRound == 0) { validError = bst.EvalValid(); } pch.Checkpoint(new double?[] { iter + 1, trainError, validError }); } } else { pch.Checkpoint(new double?[] { iter + 1 }); } } } // Set the BestIteration. if (iter != numIteration && earlyStoppingRound > 0) { bst.BestIteration = bestIter + 1; } return(bst); }
/// <inheritdoc/> private protected override bool CheckConvergence( IProgressChannel pch, int iter, FloatLabelCursor.Factory cursorFactory, DualsTableBase duals, IdToIdxLookup idToIdx, VBuffer <float>[] weights, VBuffer <float>[] bestWeights, float[] biasUnreg, float[] bestBiasUnreg, float[] biasReg, float[] bestBiasReg, long count, Double[] metrics, ref Double bestPrimalLoss, ref int bestIter) { Contracts.AssertValue(weights); Contracts.AssertValue(duals); int numClasses = weights.Length; Contracts.Assert(duals.Length >= numClasses * count); Contracts.AssertValueOrNull(idToIdx); Contracts.Assert(Utils.Size(weights) == numClasses); Contracts.Assert(Utils.Size(biasReg) == numClasses); Contracts.Assert(Utils.Size(biasUnreg) == numClasses); Contracts.Assert(Utils.Size(metrics) == 6); var reportedValues = new Double?[metrics.Length + 1]; reportedValues[metrics.Length] = iter; var lossSum = new CompensatedSum(); var dualLossSum = new CompensatedSum(); int numFeatures = weights[0].Length; using (var cursor = cursorFactory.Create()) { long row = 0; Func <DataViewRowId, long, long> getIndexFromIdAndRow = GetIndexFromIdAndRowGetter(idToIdx, biasReg.Length); // Iterates through data to compute loss function. while (cursor.MoveNext()) { var instanceWeight = GetInstanceWeight(cursor); var features = cursor.Features; var label = (int)cursor.Label; var labelOutput = WDot(in features, in weights[label], biasReg[label] + biasUnreg[label]); Double subLoss = 0; Double subDualLoss = 0; long idx = getIndexFromIdAndRow(cursor.Id, row); long dualIndex = idx * numClasses; for (int iClass = 0; iClass < numClasses; iClass++) { if (iClass == label) { dualIndex++; continue; } var currentClassOutput = WDot(in features, in weights[iClass], biasReg[iClass] + biasUnreg[iClass]); subLoss += _loss.Loss(labelOutput - currentClassOutput, 1); Contracts.Assert(dualIndex == iClass + idx * numClasses); var dual = duals[dualIndex++]; subDualLoss += _loss.DualLoss(1, dual); } lossSum.Add(subLoss * instanceWeight); dualLossSum.Add(subDualLoss * instanceWeight); row++; } Host.Assert(idToIdx == null || row * numClasses == duals.Length); } Contracts.Assert(SdcaTrainerOptions.L2Regularization.HasValue); Contracts.Assert(SdcaTrainerOptions.L1Threshold.HasValue); Double l2Const = SdcaTrainerOptions.L2Regularization.Value; Double l1Threshold = SdcaTrainerOptions.L1Threshold.Value; Double weightsL1Norm = 0; Double weightsL2NormSquared = 0; Double biasRegularizationAdjustment = 0; for (int iClass = 0; iClass < numClasses; iClass++) { weightsL1Norm += VectorUtils.L1Norm(in weights[iClass]) + Math.Abs(biasReg[iClass]); weightsL2NormSquared += VectorUtils.NormSquared(weights[iClass]) + biasReg[iClass] * biasReg[iClass]; biasRegularizationAdjustment += biasReg[iClass] * biasUnreg[iClass]; } Double l1Regularizer = SdcaTrainerOptions.L1Threshold.Value * l2Const * weightsL1Norm; var l2Regularizer = l2Const * weightsL2NormSquared * 0.5; var newLoss = lossSum.Sum / count + l2Regularizer + l1Regularizer; var newDualLoss = dualLossSum.Sum / count - l2Regularizer - l2Const * biasRegularizationAdjustment; var dualityGap = newLoss - newDualLoss; metrics[(int)MetricKind.Loss] = newLoss; metrics[(int)MetricKind.DualLoss] = newDualLoss; metrics[(int)MetricKind.DualityGap] = dualityGap; metrics[(int)MetricKind.BiasUnreg] = biasUnreg[0]; metrics[(int)MetricKind.BiasReg] = biasReg[0]; metrics[(int)MetricKind.L1Sparsity] = SdcaTrainerOptions.L1Threshold == 0 ? 1 : weights.Sum( weight => weight.GetValues().Count(w => w != 0)) / (numClasses * numFeatures); bool converged = dualityGap / newLoss < SdcaTrainerOptions.ConvergenceTolerance; if (metrics[(int)MetricKind.Loss] < bestPrimalLoss) { for (int iClass = 0; iClass < numClasses; iClass++) { // Maintain a copy of weights and bias with best primal loss thus far. // This is some extra work and uses extra memory, but it seems worth doing it. // REVIEW: Sparsify bestWeights? weights[iClass].CopyTo(ref bestWeights[iClass]); bestBiasReg[iClass] = biasReg[iClass]; bestBiasUnreg[iClass] = biasUnreg[iClass]; } bestPrimalLoss = metrics[(int)MetricKind.Loss]; bestIter = iter; } for (int i = 0; i < metrics.Length; i++) { reportedValues[i] = metrics[i]; } if (pch != null) { pch.Checkpoint(reportedValues); } return(converged); }
/// <summary> /// Train and returns a booster. /// </summary> /// <param name="ch">IChannel</param> /// <param name="pch">IProgressChannel</param> /// <param name="numberOfTrees">Number of trained trees</param> /// <param name="parameters">Parameters see <see cref="XGBoostArguments"/></param> /// <param name="dtrain">Training set</param> /// <param name="numBoostRound">Number of trees to train</param> /// <param name="obj">Custom objective</param> /// <param name="maximize">Whether to maximize feval.</param> /// <param name="verboseEval">Requires at least one item in evals. /// If "verbose_eval" is True then the evaluation metric on the validation set is /// printed at each boosting stage.</param> /// <param name="xgbModel">For continuous training.</param> /// <param name="saveBinaryDMatrix">Save DMatrix in binary format (for debugging purpose).</param> public static Booster Train(IChannel ch, IProgressChannel pch, out int numberOfTrees, Dictionary <string, string> parameters, DMatrix dtrain, int numBoostRound = 10, Booster.FObjType obj = null, bool maximize = false, bool verboseEval = true, Booster xgbModel = null, string saveBinaryDMatrix = null) { #if (!XGBOOST_RABIT) if (WrappedXGBoostInterface.RabitIsDistributed() == 1) { var pname = WrappedXGBoostInterface.RabitGetProcessorName(); ch.Info("[WrappedXGBoostTraining.Train] start {0}:{1}", pname, WrappedXGBoostInterface.RabitGetRank()); } #endif if (!string.IsNullOrEmpty(saveBinaryDMatrix)) { dtrain.SaveBinary(saveBinaryDMatrix); } Booster bst = new Booster(parameters, dtrain, xgbModel); int numParallelTree = 1; int nboost = 0; if (parameters != null && parameters.ContainsKey("num_parallel_tree")) { numParallelTree = Convert.ToInt32(parameters["num_parallel_tree"]); nboost /= numParallelTree; } if (parameters.ContainsKey("num_class")) { int numClass = Convert.ToInt32(parameters["num_class"]); nboost /= numClass; } var prediction = new VBuffer <Float>(); var grad = new VBuffer <Float>(); var hess = new VBuffer <Float>(); var start = DateTime.Now; #if (!XGBOOST_RABIT) int version = bst.LoadRabitCheckpoint(); ch.Check(WrappedXGBoostInterface.RabitGetWorldSize() != 1 || version == 0); #else int version = 0; #endif int startIteration = version / 2; nboost += startIteration; int logten = 0; int temp = numBoostRound * 5; while (temp > 0) { logten += 1; temp /= 10; } temp = Math.Max(logten - 2, 0); logten = 1; while (temp-- > 0) { logten *= 10; } var metrics = new List <string>() { "Iteration", "Training Time" }; var units = new List <string>() { "iterations", "seconds" }; if (verboseEval) { metrics.Add("Training Error"); metrics.Add(parameters["objective"]); } var header = new ProgressHeader(metrics.ToArray(), units.ToArray()); int iter = 0; double trainTime = 0; double trainError = double.NaN; pch.SetHeader(header, e => { e.SetProgress(0, iter, numBoostRound - startIteration); e.SetProgress(1, trainTime); if (verboseEval) { e.SetProgress(2, trainError); } }); for (iter = startIteration; iter < numBoostRound; ++iter) { if (version % 2 == 0) { bst.Update(dtrain, iter, ref grad, ref hess, ref prediction, obj); #if (!XGBOOST_RABIT) bst.SaveRabitCheckpoint(); #endif version += 1; } #if (!XGBOOST_RABIT) ch.Check(WrappedXGBoostInterface.RabitGetWorldSize() == 1 || version == WrappedXGBoostInterface.RabitVersionNumber()); #endif nboost += 1; trainTime = (DateTime.Now - start).TotalMilliseconds; if (verboseEval) { pch.Checkpoint(new double?[] { iter, trainTime, trainError }); if (iter == startIteration || iter == numBoostRound - 1 || iter % logten == 0 || (DateTime.Now - start) > TimeSpan.FromMinutes(2)) { string strainError = bst.EvalSet(new[] { dtrain }, new[] { "Train" }, iter); // Example: "[0]\tTrain-error:0.028612" if (!string.IsNullOrEmpty(strainError) && strainError.Contains(":")) { double val; if (double.TryParse(strainError.Split(':').Last(), out val)) { trainError = val; } } } } else { pch.Checkpoint(new double?[] { iter, trainTime }); } version += 1; } numberOfTrees = numBoostRound * numParallelTree; if (WrappedXGBoostInterface.RabitIsDistributed() == 1) { var pname = WrappedXGBoostInterface.RabitGetProcessorName(); ch.Info("[WrappedXGBoostTraining.Train] end {0}:{1}", pname, WrappedXGBoostInterface.RabitGetRank()); } return(bst); }