예제 #1
0
        private void FetchWorker(BlockingCollection <Block> toCompress, IDataView data,
                                 ColumnCodec[] activeColumns, int rowsPerBlock, Stopwatch sw, IChannel ch, IProgressChannel pch, ExceptionMarshaller exMarshaller)
        {
            Contracts.AssertValue(ch);
            Contracts.AssertValueOrNull(pch);
            ch.AssertValue(exMarshaller);
            try
            {
                ch.AssertValue(toCompress);
                ch.AssertValue(data);
                ch.AssertValue(activeColumns);
                ch.AssertValue(sw);
                ch.Assert(rowsPerBlock > 0);

                // The main thread handles fetching from the cursor, and storing it into blocks passed to toCompress.
                HashSet <int> activeSet        = new HashSet <int>(activeColumns.Select(col => col.SourceIndex));
                long          blockIndex       = 0;
                int           remainingInBlock = rowsPerBlock;
                using (RowCursor cursor = data.GetRowCursor(activeSet.Contains))
                {
                    WritePipe[] pipes = new WritePipe[activeColumns.Length];
                    for (int c = 0; c < activeColumns.Length; ++c)
                    {
                        pipes[c] = WritePipe.Create(this, cursor, activeColumns[c]);
                    }
                    for (int c = 0; c < pipes.Length; ++c)
                    {
                        pipes[c].BeginBlock();
                    }

                    long rows = 0;
                    if (pch != null)
                    {
                        pch.SetHeader(new ProgressHeader(new[] { "rows" }), e => e.SetProgress(0, rows));
                    }

                    while (cursor.MoveNext())
                    {
                        for (int c = 0; c < pipes.Length; ++c)
                        {
                            pipes[c].FetchAndWrite();
                        }
                        if (--remainingInBlock == 0)
                        {
                            for (int c = 0; c < pipes.Length; ++c)
                            {
                                // REVIEW: It may be better if EndBlock got moved to a different worker thread.
                                toCompress.Add(new Block(pipes[c].EndBlock(), c, blockIndex), exMarshaller.Token);
                                pipes[c].BeginBlock();
                            }
                            remainingInBlock = rowsPerBlock;
                            blockIndex++;
                        }

                        rows++;
                    }
                    if (remainingInBlock < rowsPerBlock)
                    {
                        for (int c = 0; c < pipes.Length; ++c)
                        {
                            toCompress.Add(new Block(pipes[c].EndBlock(), c, blockIndex), exMarshaller.Token);
                        }
                    }

                    Contracts.Assert(rows == (blockIndex + 1) * rowsPerBlock - remainingInBlock);
                    _rowCount = rows;
                    if (pch != null)
                    {
                        pch.Checkpoint(rows);
                    }
                }

                toCompress.CompleteAdding();
            }
            catch (Exception ex)
            {
                exMarshaller.Set("cursoring", ex);
            }
        }
예제 #2
0
        private FieldAwareFactorizationMachineModelParameters TrainCore(IChannel ch, IProgressChannel pch, RoleMappedData data,
                                                                        RoleMappedData validData = null, FieldAwareFactorizationMachineModelParameters predictor = null)
        {
            _host.AssertValue(ch);
            _host.AssertValue(pch);

            data.CheckBinaryLabel();
            var featureColumns    = data.Schema.GetColumns(RoleMappedSchema.ColumnRole.Feature);
            int fieldCount        = featureColumns.Count;
            int totalFeatureCount = 0;

            int[] fieldColumnIndexes = new int[fieldCount];
            for (int f = 0; f < fieldCount; f++)
            {
                var col = featureColumns[f];
                _host.Assert(!col.IsHidden);
                if (!(col.Type is VectorDataViewType vectorType) ||
                    !vectorType.IsKnownSize ||
                    vectorType.ItemType != NumberDataViewType.Single)
                {
                    throw ch.ExceptParam(nameof(data), "Training feature column '{0}' must be a known-size vector of Single, but has type: {1}.", col.Name, col.Type);
                }
                _host.Assert(vectorType.Size > 0);
                fieldColumnIndexes[f] = col.Index;
                totalFeatureCount    += vectorType.Size;
            }
            ch.Check(checked (totalFeatureCount * fieldCount * _latentDimAligned) <= Utils.ArrayMaxSize, "Latent dimension or the number of fields too large");
            if (predictor != null)
            {
                ch.Check(predictor.FeatureCount == totalFeatureCount, "Input model's feature count mismatches training feature count");
                ch.Check(predictor.LatentDimension == _latentDim, "Input model's latent dimension mismatches trainer's");
            }
            if (validData != null)
            {
                validData.CheckBinaryLabel();
                var validFeatureColumns = data.Schema.GetColumns(RoleMappedSchema.ColumnRole.Feature);
                _host.Assert(fieldCount == validFeatureColumns.Count);
                for (int f = 0; f < fieldCount; f++)
                {
                    var featCol      = featureColumns[f];
                    var validFeatCol = validFeatureColumns[f];
                    _host.Assert(featCol.Name == validFeatCol.Name);
                    _host.Assert(featCol.Type == validFeatCol.Type);
                }
            }
            bool shuffle = _shuffle;

            if (shuffle && !data.Data.CanShuffle)
            {
                ch.Warning("Training data does not support shuffling, so ignoring request to shuffle");
                shuffle = false;
            }
            var rng                = shuffle ? _host.Rand : null;
            var featureGetters     = new ValueGetter <VBuffer <float> > [fieldCount];
            var featureBuffer      = new VBuffer <float>();
            var featureValueBuffer = new float[totalFeatureCount];
            var featureIndexBuffer = new int[totalFeatureCount];
            var featureFieldBuffer = new int[totalFeatureCount];
            var latentSum          = new AlignedArray(fieldCount * fieldCount * _latentDimAligned, 16);
            var metricNames        = new List <string>()
            {
                "Training-loss"
            };

            if (validData != null)
            {
                metricNames.Add("Validation-loss");
            }
            int    iter                 = 0;
            long   exampleCount         = 0;
            long   badExampleCount      = 0;
            long   validBadExampleCount = 0;
            double loss                 = 0;
            double validLoss            = 0;

            pch.SetHeader(new ProgressHeader(metricNames.ToArray(), new string[] { "iterations", "examples" }), entry =>
            {
                entry.SetProgress(0, iter, _numIterations);
                entry.SetProgress(1, exampleCount);
            });

            var columns = data.Schema.Schema.Where(x => fieldColumnIndexes.Contains(x.Index)).ToList();

            columns.Add(data.Schema.Label.Value);
            if (data.Schema.Weight != null)
            {
                columns.Add(data.Schema.Weight.Value);
            }

            InitializeTrainingState(fieldCount, totalFeatureCount, predictor, out float[] linearWeights,
                                    out AlignedArray latentWeightsAligned, out float[] linearAccSqGrads, out AlignedArray latentAccSqGradsAligned);

            // refer to Algorithm 3 in https://github.com/wschin/fast-ffm/blob/master/fast-ffm.pdf
            while (iter++ < _numIterations)
            {
                using (var cursor = data.Data.GetRowCursor(columns, rng))
                {
                    var labelGetter  = RowCursorUtils.GetLabelGetter(cursor, data.Schema.Label.Value.Index);
                    var weightGetter = data.Schema.Weight?.Index is int weightIdx?RowCursorUtils.GetGetterAs <float>(NumberDataViewType.Single, cursor, weightIdx) : null;

                    for (int i = 0; i < fieldCount; i++)
                    {
                        featureGetters[i] = cursor.GetGetter <VBuffer <float> >(cursor.Schema[fieldColumnIndexes[i]]);
                    }
                    loss            = 0;
                    exampleCount    = 0;
                    badExampleCount = 0;
                    while (cursor.MoveNext())
                    {
                        float label         = 0;
                        float weight        = 1;
                        int   count         = 0;
                        float modelResponse = 0;
                        labelGetter(ref label);
                        weightGetter?.Invoke(ref weight);
                        float annihilation = label - label + weight - weight;
                        if (!FloatUtils.IsFinite(annihilation))
                        {
                            badExampleCount++;
                            continue;
                        }
                        if (!FieldAwareFactorizationMachineUtils.LoadOneExampleIntoBuffer(featureGetters, featureBuffer, _norm, ref count,
                                                                                          featureFieldBuffer, featureIndexBuffer, featureValueBuffer))
                        {
                            badExampleCount++;
                            continue;
                        }

                        // refer to Algorithm 1 in [3] https://github.com/wschin/fast-ffm/blob/master/fast-ffm.pdf
                        FieldAwareFactorizationMachineInterface.CalculateIntermediateVariables(fieldCount, _latentDimAligned, count,
                                                                                               featureFieldBuffer, featureIndexBuffer, featureValueBuffer, linearWeights, latentWeightsAligned, latentSum, ref modelResponse);
                        var slope = CalculateLossSlope(label, modelResponse);

                        // refer to Algorithm 2 in [3] https://github.com/wschin/fast-ffm/blob/master/fast-ffm.pdf
                        FieldAwareFactorizationMachineInterface.CalculateGradientAndUpdate(_lambdaLinear, _lambdaLatent, _learningRate, fieldCount, _latentDimAligned, weight, count,
                                                                                           featureFieldBuffer, featureIndexBuffer, featureValueBuffer, latentSum, slope, linearWeights, latentWeightsAligned, linearAccSqGrads, latentAccSqGradsAligned);
                        loss += weight * CalculateLoss(label, modelResponse);
                        exampleCount++;
                    }
                    loss /= exampleCount;
                }

                if (_verbose)
                {
                    if (validData == null)
                    {
                        pch.Checkpoint(loss, iter, exampleCount);
                    }
                    else
                    {
                        validLoss = CalculateAvgLoss(ch, validData, _norm, linearWeights, latentWeightsAligned, _latentDimAligned, latentSum,
                                                     featureFieldBuffer, featureIndexBuffer, featureValueBuffer, featureBuffer, ref validBadExampleCount);
                        pch.Checkpoint(loss, validLoss, iter, exampleCount);
                    }
                }
            }
            if (badExampleCount != 0)
            {
                ch.Warning($"Skipped {badExampleCount} examples with bad label/weight/features in training set");
            }
            if (validBadExampleCount != 0)
            {
                ch.Warning($"Skipped {validBadExampleCount} examples with bad label/weight/features in validation set");
            }

            return(new FieldAwareFactorizationMachineModelParameters(_host, _norm, fieldCount, totalFeatureCount, _latentDim, linearWeights, latentWeightsAligned));
        }
예제 #3
0
        /// <summary>
        /// Train and return a booster.
        /// </summary>
        public static Booster Train(IChannel ch, IProgressChannel pch,
                                    Dictionary <string, object> parameters, Dataset dtrain, Dataset dvalid = null, int numIteration = 100,
                                    bool verboseEval = true, int earlyStoppingRound = 0)
        {
            // create Booster.
            Booster bst = new Booster(parameters, dtrain, dvalid);

            // Disable early stopping if we don't have validation data.
            if (dvalid == null && earlyStoppingRound > 0)
            {
                earlyStoppingRound = 0;
                ch.Warning("Validation dataset not present, early stopping will be disabled.");
            }

            int    bestIter              = 0;
            double bestScore             = double.MaxValue;
            double factorToSmallerBetter = 1.0;

            var metric = (string)parameters["metric"];

            if (earlyStoppingRound > 0 && (metric == "auc" || metric == "ndcg" || metric == "map"))
            {
                factorToSmallerBetter = -1.0;
            }

            const int evalFreq = 50;

            var metrics = new List <string>()
            {
                "Iteration"
            };
            var units = new List <string>()
            {
                "iterations"
            };

            if (verboseEval)
            {
                ch.Assert(parameters.ContainsKey("metric"));
                metrics.Add("Training-" + parameters["metric"]);
                if (dvalid != null)
                {
                    metrics.Add("Validation-" + parameters["metric"]);
                }
            }

            var header = new ProgressHeader(metrics.ToArray(), units.ToArray());

            int    iter       = 0;
            double trainError = double.NaN;
            double validError = double.NaN;

            pch.SetHeader(header, e =>
            {
                e.SetProgress(0, iter, numIteration);
                if (verboseEval)
                {
                    e.SetProgress(1, trainError);
                    if (dvalid != null)
                    {
                        e.SetProgress(2, validError);
                    }
                }
            });
            for (iter = 0; iter < numIteration; ++iter)
            {
                if (bst.Update())
                {
                    break;
                }

                if (earlyStoppingRound > 0)
                {
                    validError = bst.EvalValid();
                    if (validError * factorToSmallerBetter < bestScore)
                    {
                        bestScore = validError * factorToSmallerBetter;
                        bestIter  = iter;
                    }
                    if (iter - bestIter >= earlyStoppingRound)
                    {
                        ch.Info($"Met early stopping, best iteration: {bestIter + 1}, best score: {bestScore / factorToSmallerBetter}");
                        break;
                    }
                }
                if ((iter + 1) % evalFreq == 0)
                {
                    if (verboseEval)
                    {
                        trainError = bst.EvalTrain();
                        if (dvalid == null)
                        {
                            pch.Checkpoint(new double?[] { iter + 1, trainError });
                        }
                        else
                        {
                            if (earlyStoppingRound == 0)
                            {
                                validError = bst.EvalValid();
                            }
                            pch.Checkpoint(new double?[] { iter + 1,
                                                           trainError, validError });
                        }
                    }
                    else
                    {
                        pch.Checkpoint(new double?[] { iter + 1 });
                    }
                }
            }
            // Set the BestIteration.
            if (iter != numIteration && earlyStoppingRound > 0)
            {
                bst.BestIteration = bestIter + 1;
            }
            return(bst);
        }
예제 #4
0
        /// <inheritdoc/>
        private protected override bool CheckConvergence(
            IProgressChannel pch,
            int iter,
            FloatLabelCursor.Factory cursorFactory,
            DualsTableBase duals,
            IdToIdxLookup idToIdx,
            VBuffer <float>[] weights,
            VBuffer <float>[] bestWeights,
            float[] biasUnreg,
            float[] bestBiasUnreg,
            float[] biasReg,
            float[] bestBiasReg,
            long count,
            Double[] metrics,
            ref Double bestPrimalLoss,
            ref int bestIter)
        {
            Contracts.AssertValue(weights);
            Contracts.AssertValue(duals);
            int numClasses = weights.Length;

            Contracts.Assert(duals.Length >= numClasses * count);
            Contracts.AssertValueOrNull(idToIdx);
            Contracts.Assert(Utils.Size(weights) == numClasses);
            Contracts.Assert(Utils.Size(biasReg) == numClasses);
            Contracts.Assert(Utils.Size(biasUnreg) == numClasses);
            Contracts.Assert(Utils.Size(metrics) == 6);
            var reportedValues = new Double?[metrics.Length + 1];

            reportedValues[metrics.Length] = iter;
            var lossSum     = new CompensatedSum();
            var dualLossSum = new CompensatedSum();
            int numFeatures = weights[0].Length;

            using (var cursor = cursorFactory.Create())
            {
                long row = 0;
                Func <DataViewRowId, long, long> getIndexFromIdAndRow = GetIndexFromIdAndRowGetter(idToIdx, biasReg.Length);
                // Iterates through data to compute loss function.
                while (cursor.MoveNext())
                {
                    var    instanceWeight = GetInstanceWeight(cursor);
                    var    features       = cursor.Features;
                    var    label          = (int)cursor.Label;
                    var    labelOutput    = WDot(in features, in weights[label], biasReg[label] + biasUnreg[label]);
                    Double subLoss        = 0;
                    Double subDualLoss    = 0;
                    long   idx            = getIndexFromIdAndRow(cursor.Id, row);
                    long   dualIndex      = idx * numClasses;
                    for (int iClass = 0; iClass < numClasses; iClass++)
                    {
                        if (iClass == label)
                        {
                            dualIndex++;
                            continue;
                        }

                        var currentClassOutput = WDot(in features, in weights[iClass], biasReg[iClass] + biasUnreg[iClass]);
                        subLoss += _loss.Loss(labelOutput - currentClassOutput, 1);
                        Contracts.Assert(dualIndex == iClass + idx * numClasses);
                        var dual = duals[dualIndex++];
                        subDualLoss += _loss.DualLoss(1, dual);
                    }

                    lossSum.Add(subLoss * instanceWeight);
                    dualLossSum.Add(subDualLoss * instanceWeight);

                    row++;
                }
                Host.Assert(idToIdx == null || row * numClasses == duals.Length);
            }

            Contracts.Assert(SdcaTrainerOptions.L2Regularization.HasValue);
            Contracts.Assert(SdcaTrainerOptions.L1Threshold.HasValue);
            Double l2Const     = SdcaTrainerOptions.L2Regularization.Value;
            Double l1Threshold = SdcaTrainerOptions.L1Threshold.Value;

            Double weightsL1Norm                = 0;
            Double weightsL2NormSquared         = 0;
            Double biasRegularizationAdjustment = 0;

            for (int iClass = 0; iClass < numClasses; iClass++)
            {
                weightsL1Norm                += VectorUtils.L1Norm(in weights[iClass]) + Math.Abs(biasReg[iClass]);
                weightsL2NormSquared         += VectorUtils.NormSquared(weights[iClass]) + biasReg[iClass] * biasReg[iClass];
                biasRegularizationAdjustment += biasReg[iClass] * biasUnreg[iClass];
            }

            Double l1Regularizer = SdcaTrainerOptions.L1Threshold.Value * l2Const * weightsL1Norm;
            var    l2Regularizer = l2Const * weightsL2NormSquared * 0.5;

            var newLoss     = lossSum.Sum / count + l2Regularizer + l1Regularizer;
            var newDualLoss = dualLossSum.Sum / count - l2Regularizer - l2Const * biasRegularizationAdjustment;
            var dualityGap  = newLoss - newDualLoss;

            metrics[(int)MetricKind.Loss]       = newLoss;
            metrics[(int)MetricKind.DualLoss]   = newDualLoss;
            metrics[(int)MetricKind.DualityGap] = dualityGap;
            metrics[(int)MetricKind.BiasUnreg]  = biasUnreg[0];
            metrics[(int)MetricKind.BiasReg]    = biasReg[0];
            metrics[(int)MetricKind.L1Sparsity] = SdcaTrainerOptions.L1Threshold == 0 ? 1 : weights.Sum(
                weight => weight.GetValues().Count(w => w != 0)) / (numClasses * numFeatures);

            bool converged = dualityGap / newLoss < SdcaTrainerOptions.ConvergenceTolerance;

            if (metrics[(int)MetricKind.Loss] < bestPrimalLoss)
            {
                for (int iClass = 0; iClass < numClasses; iClass++)
                {
                    // Maintain a copy of weights and bias with best primal loss thus far.
                    // This is some extra work and uses extra memory, but it seems worth doing it.
                    // REVIEW: Sparsify bestWeights?
                    weights[iClass].CopyTo(ref bestWeights[iClass]);
                    bestBiasReg[iClass]   = biasReg[iClass];
                    bestBiasUnreg[iClass] = biasUnreg[iClass];
                }

                bestPrimalLoss = metrics[(int)MetricKind.Loss];
                bestIter       = iter;
            }

            for (int i = 0; i < metrics.Length; i++)
            {
                reportedValues[i] = metrics[i];
            }
            if (pch != null)
            {
                pch.Checkpoint(reportedValues);
            }

            return(converged);
        }
예제 #5
0
        /// <summary>
        /// Train and returns a booster.
        /// </summary>
        /// <param name="ch">IChannel</param>
        /// <param name="pch">IProgressChannel</param>
        /// <param name="numberOfTrees">Number of trained trees</param>
        /// <param name="parameters">Parameters see <see cref="XGBoostArguments"/></param>
        /// <param name="dtrain">Training set</param>
        /// <param name="numBoostRound">Number of trees to train</param>
        /// <param name="obj">Custom objective</param>
        /// <param name="maximize">Whether to maximize feval.</param>
        /// <param name="verboseEval">Requires at least one item in evals.
        ///     If "verbose_eval" is True then the evaluation metric on the validation set is
        ///     printed at each boosting stage.</param>
        /// <param name="xgbModel">For continuous training.</param>
        /// <param name="saveBinaryDMatrix">Save DMatrix in binary format (for debugging purpose).</param>
        public static Booster Train(IChannel ch, IProgressChannel pch, out int numberOfTrees,
                                    Dictionary <string, string> parameters, DMatrix dtrain, int numBoostRound = 10,
                                    Booster.FObjType obj     = null, bool maximize    = false,
                                    bool verboseEval         = true, Booster xgbModel = null,
                                    string saveBinaryDMatrix = null)
        {
#if (!XGBOOST_RABIT)
            if (WrappedXGBoostInterface.RabitIsDistributed() == 1)
            {
                var pname = WrappedXGBoostInterface.RabitGetProcessorName();
                ch.Info("[WrappedXGBoostTraining.Train] start {0}:{1}", pname, WrappedXGBoostInterface.RabitGetRank());
            }
#endif

            if (!string.IsNullOrEmpty(saveBinaryDMatrix))
            {
                dtrain.SaveBinary(saveBinaryDMatrix);
            }

            Booster bst             = new Booster(parameters, dtrain, xgbModel);
            int     numParallelTree = 1;
            int     nboost          = 0;

            if (parameters != null && parameters.ContainsKey("num_parallel_tree"))
            {
                numParallelTree = Convert.ToInt32(parameters["num_parallel_tree"]);
                nboost         /= numParallelTree;
            }
            if (parameters.ContainsKey("num_class"))
            {
                int numClass = Convert.ToInt32(parameters["num_class"]);
                nboost /= numClass;
            }

            var prediction = new VBuffer <Float>();
            var grad       = new VBuffer <Float>();
            var hess       = new VBuffer <Float>();
            var start      = DateTime.Now;

#if (!XGBOOST_RABIT)
            int version = bst.LoadRabitCheckpoint();
            ch.Check(WrappedXGBoostInterface.RabitGetWorldSize() != 1 || version == 0);
#else
            int version = 0;
#endif
            int startIteration = version / 2;
            nboost += startIteration;
            int logten = 0;
            int temp   = numBoostRound * 5;
            while (temp > 0)
            {
                logten += 1;
                temp   /= 10;
            }
            temp   = Math.Max(logten - 2, 0);
            logten = 1;
            while (temp-- > 0)
            {
                logten *= 10;
            }

            var metrics = new List <string>()
            {
                "Iteration", "Training Time"
            };
            var units = new List <string>()
            {
                "iterations", "seconds"
            };
            if (verboseEval)
            {
                metrics.Add("Training Error");
                metrics.Add(parameters["objective"]);
            }
            var header = new ProgressHeader(metrics.ToArray(), units.ToArray());

            int    iter       = 0;
            double trainTime  = 0;
            double trainError = double.NaN;

            pch.SetHeader(header, e =>
            {
                e.SetProgress(0, iter, numBoostRound - startIteration);
                e.SetProgress(1, trainTime);
                if (verboseEval)
                {
                    e.SetProgress(2, trainError);
                }
            });
            for (iter = startIteration; iter < numBoostRound; ++iter)
            {
                if (version % 2 == 0)
                {
                    bst.Update(dtrain, iter, ref grad, ref hess, ref prediction, obj);
#if (!XGBOOST_RABIT)
                    bst.SaveRabitCheckpoint();
#endif
                    version += 1;
                }

#if (!XGBOOST_RABIT)
                ch.Check(WrappedXGBoostInterface.RabitGetWorldSize() == 1 ||
                         version == WrappedXGBoostInterface.RabitVersionNumber());
#endif
                nboost += 1;

                trainTime = (DateTime.Now - start).TotalMilliseconds;

                if (verboseEval)
                {
                    pch.Checkpoint(new double?[] { iter, trainTime, trainError });
                    if (iter == startIteration || iter == numBoostRound - 1 || iter % logten == 0 ||
                        (DateTime.Now - start) > TimeSpan.FromMinutes(2))
                    {
                        string strainError = bst.EvalSet(new[] { dtrain }, new[] { "Train" }, iter);
                        // Example: "[0]\tTrain-error:0.028612"
                        if (!string.IsNullOrEmpty(strainError) && strainError.Contains(":"))
                        {
                            double val;
                            if (double.TryParse(strainError.Split(':').Last(), out val))
                            {
                                trainError = val;
                            }
                        }
                    }
                }
                else
                {
                    pch.Checkpoint(new double?[] { iter, trainTime });
                }

                version += 1;
            }
            numberOfTrees = numBoostRound * numParallelTree;
            if (WrappedXGBoostInterface.RabitIsDistributed() == 1)
            {
                var pname = WrappedXGBoostInterface.RabitGetProcessorName();
                ch.Info("[WrappedXGBoostTraining.Train] end {0}:{1}", pname, WrappedXGBoostInterface.RabitGetRank());
            }
            return(bst);
        }