Пример #1
0
        public uint GetNumCols()
        {
            uint nb = 0;

            WrappedXGBoostInterface.Check(WrappedXGBoostInterface.XGDMatrixNumCol(_handle, ref nb));
            return(nb);
        }
Пример #2
0
        /// <summary>
        /// Create a dense matrix used in XGBoost.
        /// </summary>
        /// <param name="data">Matrix as a Float array</param>
        /// <param name="nrow">Number of rows</param>
        /// <param name="ncol">Number of columns</param>
        /// <param name="labels">Labels</param>
        /// <param name="missing">Missing value</param>
        /// <param name="weights">Vector of weights (can be null)</param>
        /// <param name="groups">Vector of groups (can be null)</param>
        /// <param name="featureNames">Set names for features.</param>
        /// <param name="featureTypes">Set types for features.</param>
        public DMatrix(Float[] data, uint nrow, uint ncol, Float[] labels = null, Float missing = Float.NaN,
                       Float[] weights = null, uint[] groups = null,
                       IEnumerable <string> featureNames = null, IEnumerable <string> featureTypes = null)
        {
#if (DEBUG)
            _gcKeep = new GcKeep()
            {
                data    = data,
                labels  = labels,
                weights = weights,
                groups  = groups
            };
#endif

            WrappedXGBoostInterface.Check(WrappedXGBoostInterface.XGDMatrixCreateFromMat(data, nrow, ncol, missing, ref _handle));

            if (labels != null)
            {
                SetLabel(labels, nrow);
            }
            if (weights != null)
            {
                SetWeight(weights, nrow);
            }
            if (groups != null)
            {
                SetGroups(groups, nrow);
            }

            _featureNames = featureNames == null ? null : featureNames.ToArray();
            _featureTypes = featureTypes == null ? null : featureTypes.ToArray();
        }
Пример #3
0
        /// <summary>
        /// Initialize the Booster.
        /// </summary>
        /// <param name="parameters">Parameters for boosters. See <see cref="XGBoostArguments"/>.</param>
        /// <param name="data">training data<see cref="DMatrix"/></param>
        /// <param name="continuousTraining">Start from a trained model</param>
        public Booster(Dictionary <string, string> parameters, DMatrix data, Booster continuousTraining)
        {
            _featureNames = null;
            _featureTypes = null;
            _numFeatures  = (int)data.GetNumCols();
            Contracts.Assert(_numFeatures > 0);

            _handle = IntPtr.Zero;
            WrappedXGBoostInterface.Check(WrappedXGBoostInterface.XGBoosterCreate(new[] { data.Handle }, 1, ref _handle));
            if (continuousTraining != null)
            {
                // There should be another way than serialized then loading the model.
                var saved = continuousTraining.SaveRaw();
                unsafe
                {
                    fixed(byte *buf = saved)
                    WrappedXGBoostInterface.Check(WrappedXGBoostInterface.XGBoosterLoadModelFromBuffer(_handle, buf, (uint)saved.Length));
                }
            }

            if (parameters != null)
            {
                SetParam(parameters);
            }
            WrappedXGBoostInterface.Check(WrappedXGBoostInterface.XGBoosterLazyInit(_handle));
        }
Пример #4
0
 /// <summary>
 /// Boost the booster for one iteration, with customized gradient statistics.
 /// </summary>
 /// <param name="dtrain">DMatrix (training set)</param>
 /// <param name="grad">Gradient as a vector of floats (can be null).</param>
 /// <param name="hess">Hessien as a vector of floats (can be null).</param>
 private void Boost(DMatrix dtrain, ref VBuffer <Float> grad, ref VBuffer <Float> hess)
 {
     Contracts.Assert(grad.Length == hess.Length, string.Format("grad / hess length mismatch: {0} / {1}", grad.Length, hess.Length));
     ValidateFeatures(dtrain);
     Contracts.Assert(grad.IsDense, "grad");
     Contracts.Assert(hess.IsDense, "hess");
     WrappedXGBoostInterface.Check(WrappedXGBoostInterface.XGBoosterBoostOneIter(_handle, dtrain.Handle, grad.Values, hess.Values, (uint)grad.Length));
 }
Пример #5
0
        /// <summary>
        /// Initialize the model by load from rabit checkpoint.
        /// </summary>
        public int LoadRabitCheckpoint()
        {
            int version = 0;

#if (!XGBOOST_RABIT)
            WrappedXGBoostInterface.Check(WrappedXGBoostInterface.XGBoosterLoadRabitCheckpoint(_handle, ref version));
#endif
            return(version);
        }
Пример #6
0
 /// <summary>
 /// Save the model to a in memory buffer represetation.
 /// </summary>
 public byte[] SaveRaw()
 {
     unsafe
     {
         byte *buffer;
         uint  size = 0;
         WrappedXGBoostInterface.Check(WrappedXGBoostInterface.XGBoosterGetModelRaw(_handle, ref size, out buffer));
         byte[] content = new byte[size];
         Marshal.Copy((IntPtr)buffer, content, 0, content.Length);
         return(content);
     }
 }
Пример #7
0
        /// <summary>
        /// Initialize the booster with a byte string obtained by serializing a Booster.
        /// </summary>
        public Booster(byte[] content, int numFeatures)
        {
            Contracts.Assert(numFeatures > 0);
            WrappedXGBoostInterface.Check(WrappedXGBoostInterface.XGBoosterCreate(new IntPtr[] { }, 0, ref _handle));
            unsafe
            {
                fixed(byte *p = content)
                WrappedXGBoostInterface.Check(WrappedXGBoostInterface.XGBoosterLoadModelFromBuffer(_handle, p, (uint)content.Length));

                WrappedXGBoostInterface.Check(WrappedXGBoostInterface.XGBoosterLazyInit(_handle));
            }
            _numFeatures = numFeatures;
        }
Пример #8
0
        /// <summary>
        /// Evaluates a set of data and returns a string as a result.
        /// Used by the training function to display intermediate results on each iteration.
        /// </summary>
        public string EvalSet(DMatrix[] dmats, string[] names, int iteration = 0)
        {
            IntPtr outResult;

            for (int i = 0; i < dmats.Length; ++i)
            {
                ValidateFeatures(dmats[i]);
            }
            WrappedXGBoostInterface.Check(WrappedXGBoostInterface.XGBoosterEvalOneIter(Handle, iteration,
                                                                                       dmats.Select(c => c.Handle).ToArray(),
                                                                                       names, (uint)dmats.Length, out outResult));

            return(WrappedXGBoostInterface.CastString(outResult));
        }
Пример #9
0
        /// <summary>
        /// Predict with data. Calls the official API of XGBoost library.
        /// The function is protected against concurrent calls.
        /// </summary>
        /// <param name="data">Data as DMatrix</param>
        /// <param name="predictedValues">Results of the prediction</param>
        /// <param name="outputMargin">Whether to output the raw untransformed margin value.</param>
        /// <param name="ntreeLimit">Limit number of trees in the prediction; defaults to 0 (use all trees).</param>
        public void PredictN(DMatrix data, ref VBuffer <Float> predictedValues, bool outputMargin = true, int ntreeLimit = 0)
        {
            int optionMask = 0x00;

            if (outputMargin)
            {
                optionMask |= 0x01;
            }

            // REVIEW xadupre: see review in function PredictOneOff.

            ValidateFeatures(data);
            uint   length = 0;
            IntPtr ppreds = IntPtr.Zero;

            unsafe
            {
                // XGBoost uses OMP to parallelize the computation
                // of the output, each observation will be computed in a separate thread
                // and will use thread specific context.
                // Read https://blogs.msdn.microsoft.com/oldnewthing/20101122-00/?p=12233.
                // This function is called from multiple threads in C# for the evaluation with an iterator,
                // XGBoost parallelizes the computation for each evaluation (even if it is one in this case).
                // It chooses the number of thread with: nthread = omp_get_num_threads() (gbtree.cc)
                // The lock nullifies the parallelization done by Microsoft.ML.
                // There is no parallelization done by XGBoost on one observation.
                // Without the lock, the program fails (null pointer or something similar).
                // This item is a request: https://github.com/dmlc/xgboost/issues/1449.
                // As a consequence, this function is only used during training to evaluate the model on a batch of observations.
                // The reason is XGBoost is using caches in many places assuming XGBoost is called from one unique thread.
                // That explains this lock.
                // That function only relies on the offical API of XGBoost.
                lock (this)
                {
                    int t = WrappedXGBoostInterface.XGBoosterPredict(_handle, data.Handle,
                                                                     optionMask, (uint)ntreeLimit,
                                                                     ref length, ref ppreds);
                    WrappedXGBoostInterface.Check(t);
                }
                Float *preds = (Float *)ppreds;
                Contracts.Assert(0 < length && length < Int32.MaxValue);
                if (length > (ulong)predictedValues.Length)
                {
                    predictedValues = new VBuffer <Float>((int)length, new Float[length]);
                }
                WrappedXGBoostInterface.Copy((IntPtr)preds, 0, predictedValues.Values, (int)length);
            }
        }
Пример #10
0
        /// <summary>
        /// Update for one iteration, with objective function calculated internally.
        /// </summary>
        /// <param name="dtrain">Training data</param>
        /// <param name="iteration">Iteration number</param>
        /// <param name="grad">Gradient (used if fobj != null)</param>
        /// <param name="hess">Hessien (used if fobj != null)</param>
        /// <param name="prediction">Predictions (used if fobj != null)</param>
        /// <param name="fobj">Custom objective function, it returns gradient and hessien for this objective.</param>
        public void Update(DMatrix dtrain, int iteration,
                           ref VBuffer <Float> grad, ref VBuffer <Float> hess, ref VBuffer <Float> prediction,
                           FObjType fobj = null)
        {
            ValidateFeatures(dtrain);

            if (fobj == null)
            {
                WrappedXGBoostInterface.Check(WrappedXGBoostInterface.XGBoosterUpdateOneIter(_handle, iteration, dtrain.Handle));
            }
            else
            {
                PredictN(dtrain, ref prediction);
                fobj(ref prediction, dtrain, ref grad, ref hess);
                Boost(dtrain, ref grad, ref hess);
            }
        }
Пример #11
0
        /// <summary>
        /// Create a sparse matrix used in XGBoost.
        /// </summary>
        /// <param name="numColumn">number of features or columns</param>
        /// <param name="indptr">Pointer to row headers</param>
        /// <param name="indices">column indices</param>
        /// <param name="data">Matrix as a Float array</param>
        /// <param name="nrow">Rows in the matix</param>
        /// <param name="nelem">Number of nonzero elements in the matrix</param>
        /// <param name="labels">Labels</param>
        /// <param name="weights">Vector of weights (can be null)</param>
        /// <param name="groups">Vector of groups (can be null)</param>
        /// <param name="featureNames">Set names for features.</param>
        /// <param name="featureTypes">Set types for features.</param>
        public DMatrix(/*bst_ulong*/ uint numColumn, /*size_t*/ ulong[] indptr, uint[] indices, Float[] data,
                       uint nrow, uint nelem, Float[] labels = null,
                       Float[] weights = null, uint[] groups = null,
                       IEnumerable <string> featureNames = null, IEnumerable <string> featureTypes = null)
        {
            Contracts.Assert(nrow + 1 == indptr.Length);
#if (DEBUG)
            _gcKeep = new GcKeep()
            {
                indptr  = indptr,
                indices = indices,
                data    = data,
                labels  = labels,
                weights = weights,
                groups  = groups
            };
#endif

#if (XGB_EXTENDED)
            WrappedXGBoostInterface.Check(WrappedXGBoostInterface.XGDMatrixCreateFromCSREx(indptr,
                                                                                           indices, data, (ulong)indptr.Length, nelem, numColumn, ref _handle));
#else
            WrappedXGBoostInterface.Check(WrappedXGBoostInterface.XGDMatrixCreateFromCSR(indptr,
                                                                                         indices, data, (uint)indptr.Length, nelem, ref _handle));
#endif

            if (labels != null)
            {
                SetLabel(labels, nrow);
            }
            if (weights != null)
            {
                SetWeight(weights, nrow);
            }
            if (groups != null)
            {
                SetGroups(groups, nrow);
            }

            _featureNames = featureNames == null ? null : featureNames.ToArray();
            _featureTypes = featureTypes == null ? null : featureTypes.ToArray();

            Contracts.Assert(nrow == (int)GetNumRows());
            Contracts.Assert((int)GetNumCols() == numColumn);
        }
Пример #12
0
        /// <summary>
        /// Check the buffer can hold the current input, resize it otherwise.
        /// </summary>
        /// <param name="numSparseFeatures">number of sparsed features (VBuffer.Count), can be different for every observation</param>
        /// <param name="numFeatures">number of features (VBuffer.Length), same for all observations</param>
        public void ResizeEntries(uint numSparseFeatures, int numFeatures)
        {
            uint xgboostEntriesSize = numSparseFeatures * (sizeof(float) + sizeof(uint));

            if (_xgboostEntries == null || _xgboostEntries.Length < xgboostEntriesSize ||
                xgboostEntriesSize > _xgboostEntries.Length * 2)
            {
                _xgboostEntries = new byte[xgboostEntriesSize];
            }

#if (XGB_EXTENDED)
            if (_regTreeFVec == IntPtr.Zero || _regTreeFVecLength < numFeatures || numFeatures > _regTreeFVecLength * 2)
            {
                if (_regTreeFVec != IntPtr.Zero)
                {
                    WrappedXGBoostInterface.XGBoosterPredictNoInsideCacheFree(_regTreeFVec);
                }
                WrappedXGBoostInterface.Check(WrappedXGBoostInterface.XGBoosterPredictNoInsideCacheAllocate(numFeatures, ref _regTreeFVec));
                _regTreeFVecLength = numFeatures;
            }
#endif
        }
Пример #13
0
 public void LazyInit()
 {
     WrappedXGBoostInterface.Check(WrappedXGBoostInterface.XGBoosterLazyInit(_handle));
 }
Пример #14
0
 /// <summary>
 /// Set float type property into the DMatrix.
 /// </summary>
 /// <param name="field">The field name of the information</param>
 /// <param name="data">The array of data to be set</param>
 /// <param name="nrow">Number of rows</param>
 private void SetFloatInfo(string field, IEnumerable <Float> data, uint nrow)
 {
     Float[] cont = data.ToArray();
     WrappedXGBoostInterface.Check(WrappedXGBoostInterface.XGDMatrixSetFloatInfo(_handle, field, cont, nrow));
 }
Пример #15
0
        /// <summary>
        /// Set group size of DMatrix (used for ranking).
        /// </summary>
        public void SetGroups(IEnumerable <uint> group, uint nrow)
        {
            var agroup = group.ToArray();

            WrappedXGBoostInterface.Check(WrappedXGBoostInterface.XGDMatrixSetGroup(_handle, agroup, nrow));
        }
Пример #16
0
        /// <summary>
        /// Save the current booster to rabit checkpoint.
        /// </summary>
        public void SaveRabitCheckpoint()
        {
#if (!XGBOOST_RABIT)
            WrappedXGBoostInterface.Check(WrappedXGBoostInterface.XGBoosterSaveRabitCheckpoint(_handle));
#endif
        }
Пример #17
0
 public void SaveBinary(string name, int silent = 0)
 {
     WrappedXGBoostInterface.Check(WrappedXGBoostInterface.XGDMatrixSaveBinary(_handle, name, silent));
 }