Пример #1
0
            protected TrainStateBase(IChannel ch, int numFeatures, LinearPredictor predictor, OnlineLinearTrainer <TTransformer, TModel> parent)
            {
                Contracts.CheckValue(ch, nameof(ch));
                ch.Check(numFeatures > 0, "Cannot train with zero features!");
                ch.AssertValueOrNull(predictor);
                ch.AssertValue(parent);
                ch.Assert(Iteration == 0);
                ch.Assert(Bias == 0);

                ParentHost = parent.Host;

                ch.Trace("{0} Initializing {1} on {2} features", DateTime.UtcNow, parent.Name, numFeatures);

                // We want a dense vector, to prevent memory creation during training
                // unless we have a lot of features.
                if (predictor != null)
                {
                    predictor.GetFeatureWeights(ref Weights);
                    VBufferUtils.Densify(ref Weights);
                    Bias = predictor.Bias;
                }
                else if (!string.IsNullOrWhiteSpace(parent.Args.InitialWeights))
                {
                    ch.Info("Initializing weights and bias to " + parent.Args.InitialWeights);
                    string[] weightStr = parent.Args.InitialWeights.Split(',');
                    if (weightStr.Length != numFeatures + 1)
                    {
                        throw ch.Except(
                                  "Could not initialize weights from 'initialWeights': expecting {0} values to initialize {1} weights and the intercept",
                                  numFeatures + 1, numFeatures);
                    }

                    var weightValues = new float[numFeatures];
                    for (int i = 0; i < numFeatures; i++)
                    {
                        weightValues[i] = Float.Parse(weightStr[i], CultureInfo.InvariantCulture);
                    }
                    Weights = new VBuffer <float>(numFeatures, weightValues);
                    Bias    = Float.Parse(weightStr[numFeatures], CultureInfo.InvariantCulture);
                }
                else if (parent.Args.InitWtsDiameter > 0)
                {
                    var weightValues = new float[numFeatures];
                    for (int i = 0; i < numFeatures; i++)
                    {
                        weightValues[i] = parent.Args.InitWtsDiameter * (parent.Host.Rand.NextSingle() - (Float)0.5);
                    }
                    Weights = new VBuffer <float>(numFeatures, weightValues);
                    Bias    = parent.Args.InitWtsDiameter * (parent.Host.Rand.NextSingle() - (Float)0.5);
                }
                else if (numFeatures <= 1000)
                {
                    Weights = VBufferUtils.CreateDense <Float>(numFeatures);
                }
                else
                {
                    Weights = VBufferUtils.CreateEmpty <Float>(numFeatures);
                }
                WeightsScale = 1;
            }
Пример #2
0
        protected virtual void InitCore(IChannel ch, int numFeatures, LinearPredictor predictor)
        {
            Contracts.Check(numFeatures > 0, "Can't train with zero features!");
            Contracts.Check(NumFeatures == 0, "Can't re-use trainer!");
            Contracts.Assert(Iteration == 0);
            Contracts.Assert(Bias == 0);

            ch.Trace("{0} Initializing {1} on {2} features", DateTime.UtcNow, Name, numFeatures);
            NumFeatures = numFeatures;

            // We want a dense vector, to prevent memory creation during training
            // unless we have a lot of features.
            // REVIEW: make a setting
            if (predictor != null)
            {
                predictor.GetFeatureWeights(ref Weights);
                VBufferUtils.Densify(ref Weights);
                Bias = predictor.Bias;
            }
            else if (!string.IsNullOrWhiteSpace(Args.InitialWeights))
            {
                ch.Info("Initializing weights and bias to " + Args.InitialWeights);
                string[] weightStr = Args.InitialWeights.Split(',');
                if (weightStr.Length != NumFeatures + 1)
                {
                    throw Contracts.Except(
                              "Could not initialize weights from 'initialWeights': expecting {0} values to initialize {1} weights and the intercept",
                              NumFeatures + 1, NumFeatures);
                }

                Weights = VBufferUtils.CreateDense <Float>(NumFeatures);
                for (int i = 0; i < NumFeatures; i++)
                {
                    Weights.Values[i] = Float.Parse(weightStr[i], CultureInfo.InvariantCulture);
                }
                Bias = Float.Parse(weightStr[NumFeatures], CultureInfo.InvariantCulture);
            }
            else if (Args.InitWtsDiameter > 0)
            {
                Weights = VBufferUtils.CreateDense <Float>(NumFeatures);
                for (int i = 0; i < NumFeatures; i++)
                {
                    Weights.Values[i] = Args.InitWtsDiameter * (Host.Rand.NextSingle() - (Float)0.5);
                }
                Bias = Args.InitWtsDiameter * (Host.Rand.NextSingle() - (Float)0.5);
            }
            else if (NumFeatures <= 1000)
            {
                Weights = VBufferUtils.CreateDense <Float>(NumFeatures);
            }
            else
            {
                Weights = VBufferUtils.CreateEmpty <Float>(NumFeatures);
            }
            WeightsScale = 1;
        }
        private TPredictor TrainCore(IChannel ch, RoleMappedData data, LinearPredictor predictor, int weightSetCount)
        {
            int numFeatures   = data.Schema.Feature.Type.VectorSize;
            var cursorFactory = new FloatLabelCursor.Factory(data, CursOpt.Label | CursOpt.Features | CursOpt.Weight);
            int numThreads    = 1;

            ch.CheckUserArg(numThreads > 0, nameof(_args.NumberOfThreads),
                            "The number of threads must be either null or a positive integer.");

            var             positiveInstanceWeight = _args.PositiveInstanceWeight;
            VBuffer <float> weights = default;
            float           bias    = 0.0f;

            if (predictor != null)
            {
                predictor.GetFeatureWeights(ref weights);
                VBufferUtils.Densify(ref weights);
                bias = predictor.Bias;
            }
            else
            {
                weights = VBufferUtils.CreateDense <float>(numFeatures);
            }

            // Reference: Parasail. SymSGD.
            bool tuneLR = _args.LearningRate == null;
            var  lr     = _args.LearningRate ?? 1.0f;

            bool tuneNumLocIter = (_args.UpdateFrequency == null);
            var  numLocIter     = _args.UpdateFrequency ?? 1;

            var l2Const = _args.L2Regularization;
            var piw     = _args.PositiveInstanceWeight;

            // This is state of the learner that is shared with the native code.
            State    state         = new State();
            GCHandle stateGCHandle = default;

            try
            {
                stateGCHandle = GCHandle.Alloc(state, GCHandleType.Pinned);

                state.TotalInstancesProcessed = 0;
                using (InputDataManager inputDataManager = new InputDataManager(this, cursorFactory, ch))
                {
                    bool shouldInitialize = true;
                    using (var pch = Host.StartProgressChannel("Preprocessing"))
                        inputDataManager.LoadAsMuchAsPossible();

                    int iter = 0;
                    if (inputDataManager.IsFullyLoaded)
                    {
                        ch.Info("Data fully loaded into memory.");
                    }
                    using (var pch = Host.StartProgressChannel("Training"))
                    {
                        if (inputDataManager.IsFullyLoaded)
                        {
                            pch.SetHeader(new ProgressHeader(new[] { "iterations" }),
                                          entry => entry.SetProgress(0, state.PassIteration, _args.NumberOfIterations));
                            // If fully loaded, call the SymSGDNative and do not come back until learned for all iterations.
                            Native.LearnAll(inputDataManager, tuneLR, ref lr, l2Const, piw, weights.Values, ref bias, numFeatures,
                                            _args.NumberOfIterations, numThreads, tuneNumLocIter, ref numLocIter, _args.Tolerance, _args.Shuffle, shouldInitialize, stateGCHandle);
                            shouldInitialize = false;
                        }
                        else
                        {
                            pch.SetHeader(new ProgressHeader(new[] { "iterations" }),
                                          entry => entry.SetProgress(0, iter, _args.NumberOfIterations));

                            // Since we loaded data in batch sizes, multiple passes over the loaded data is feasible.
                            int numPassesForABatch = inputDataManager.Count / 10000;
                            while (iter < _args.NumberOfIterations)
                            {
                                // We want to train on the final passes thoroughly (without learning on the same batch multiple times)
                                // This is for fine tuning the AUC. Experimentally, we found that 1 or 2 passes is enough
                                int numFinalPassesToTrainThoroughly = 2;
                                // We also do not want to learn for more passes than what the user asked
                                int numPassesForThisBatch = Math.Min(numPassesForABatch, _args.NumberOfIterations - iter - numFinalPassesToTrainThoroughly);
                                // If all of this leaves us with 0 passes, then set numPassesForThisBatch to 1
                                numPassesForThisBatch = Math.Max(1, numPassesForThisBatch);
                                state.PassIteration   = iter;
                                Native.LearnAll(inputDataManager, tuneLR, ref lr, l2Const, piw, weights.Values, ref bias, numFeatures,
                                                numPassesForThisBatch, numThreads, tuneNumLocIter, ref numLocIter, _args.Tolerance, _args.Shuffle, shouldInitialize, stateGCHandle);
                                shouldInitialize = false;

                                // Check if we are done with going through the data
                                if (inputDataManager.FinishedTheLoad)
                                {
                                    iter += numPassesForThisBatch;
                                    // Check if more passes are left
                                    if (iter < _args.NumberOfIterations)
                                    {
                                        inputDataManager.RestartLoading(_args.Shuffle, Host);
                                    }
                                }

                                // If more passes are left, load as much as possible
                                if (iter < _args.NumberOfIterations)
                                {
                                    inputDataManager.LoadAsMuchAsPossible();
                                }
                            }
                        }

                        // Maps back the dense features that are mislocated
                        if (numThreads > 1)
                        {
                            Native.MapBackWeightVector(weights.Values, stateGCHandle);
                        }
                        Native.DeallocateSequentially(stateGCHandle);
                    }
                }
            }
            finally
            {
                if (stateGCHandle.IsAllocated)
                {
                    stateGCHandle.Free();
                }
            }
            return(CreatePredictor(weights, bias));
        }