Пример #1
0
            internal ColumnInfo(Column item, Arguments args)
            {
                Contracts.CheckValue(item, nameof(item));
                Contracts.CheckValue(args, nameof(args));

                Input  = item.Source ?? item.Name;
                Output = item.Name;

                if (item.UseAlpha ?? args.UseAlpha)
                {
                    Colors |= ColorBits.Alpha; Planes++;
                }
                if (item.UseRed ?? args.UseRed)
                {
                    Colors |= ColorBits.Red; Planes++;
                }
                if (item.UseGreen ?? args.UseGreen)
                {
                    Colors |= ColorBits.Green; Planes++;
                }
                if (item.UseBlue ?? args.UseBlue)
                {
                    Colors |= ColorBits.Blue; Planes++;
                }
                Contracts.CheckUserArg(Planes > 0, nameof(item.UseRed), "Need to use at least one color plane");

                Interleave = item.InterleaveArgb ?? args.InterleaveArgb;

                AsFloat = item.Convert ?? args.Convert;
                if (!AsFloat)
                {
                    Offset = Defaults.Offset;
                    Scale  = Defaults.Scale;
                }
                else
                {
                    Offset = item.Offset ?? args.Offset ?? Defaults.Offset;
                    Scale  = item.Scale ?? args.Scale ?? Defaults.Scale;
                    Contracts.CheckUserArg(FloatUtils.IsFinite(Offset), nameof(item.Offset));
                    Contracts.CheckUserArg(FloatUtils.IsFiniteNonZero(Scale), nameof(item.Scale));
                }
            }
        protected bool TryNormalize(VBuffer <Single>[] values)
        {
            if (!Normalize)
            {
                return(true);
            }

            for (int i = 0; i < values.Length; i++)
            {
                // Leave a zero vector as all zeros. Otherwise, make the L1 norm equal to 1.
                var sum = VectorUtils.L1Norm(in values[i]);
                if (!FloatUtils.IsFinite(sum))
                {
                    return(false);
                }
                if (sum > 0)
                {
                    VectorUtils.ScaleBy(ref values[i], 1 / sum);
                }
            }
            return(true);
        }
Пример #3
0
            public override void ProcessRow()
            {
                float label = 0;

                _labelGetter(ref label);
                _scoreGetter(ref Score);

                if (float.IsNaN(label))
                {
                    NumUnlabeledInstances++;
                    return;
                }

                if (IsNaN(ref Score))
                {
                    NumBadScores++;
                    return;
                }

                float weight = 1;

                if (_weightGetter != null)
                {
                    _weightGetter(ref weight);
                    if (!FloatUtils.IsFinite(weight))
                    {
                        NumBadWeights++;
                        weight = 1;
                    }
                }

                ApplyLossFunction(ref Score, label, ref Loss);
                UnweightedCounters.Update(ref Score, label, 1, ref Loss);
                if (WeightedCounters != null)
                {
                    WeightedCounters.Update(ref Score, label, weight, ref Loss);
                }
            }
Пример #4
0
        protected override float AccumulateOneGradient(ref VBuffer <float> feat, float label, float weight,
                                                       ref VBuffer <float> x, ref VBuffer <float> grad, ref float[] scores)
        {
            if (Utils.Size(scores) < _numClasses)
            {
                scores = new float[_numClasses];
            }

            float bias = 0;

            for (int c = 0, start = _numClasses; c < _numClasses; c++, start += NumFeatures)
            {
                x.GetItemOrDefault(c, ref bias);
                scores[c] = bias + VectorUtils.DotProductWithOffset(ref x, start, ref feat);
            }

            float logZ      = MathUtils.SoftMax(scores, _numClasses);
            float datumLoss = logZ;

            int lab = (int)label;

            Contracts.Assert(0 <= lab && lab < _numClasses);
            for (int c = 0, start = _numClasses; c < _numClasses; c++, start += NumFeatures)
            {
                float probLabel = lab == c ? 1 : 0;
                datumLoss -= probLabel * scores[c];

                float modelProb = MathUtils.ExpSlow(scores[c] - logZ);
                float mult      = weight * (modelProb - probLabel);
                VectorUtils.AddMultWithOffset(ref feat, mult, ref grad, start);
                // Due to the call to EnsureBiases, we know this region is dense.
                Contracts.Assert(grad.Count >= BiasCount && (grad.IsDense || grad.Indices[BiasCount - 1] == BiasCount - 1));
                grad.Values[c] += mult;
            }

            Contracts.Check(FloatUtils.IsFinite(datumLoss), "Data contain bad values.");
            return(weight * datumLoss);
        }
        /// <summary>
        /// This should be overridden by derived classes. This implementation simply increments
        /// _numIterExamples and dumps debug information to the console.
        /// </summary>
        protected virtual void ProcessDataInstance(IChannel ch, ref VBuffer <Float> feat, Float label, Float weight)
        {
            Contracts.Assert(FloatUtils.IsFinite(feat.Values, feat.Count));

            ++NumIterExamples;
#if OLD_TRACING // REVIEW: How should this be ported?
            if (DebugLevel > 2)
            {
                Vector features = instance.Features;
                Host.StdOut.Write("Instance has label {0} and {1} features:", instance.Label, features.Length);
                for (int i = 0; i < features.Length; i++)
                {
                    Host.StdOut.Write('\t');
                    Host.StdOut.Write(features[i]);
                }
                Host.StdOut.WriteLine();
            }

            if (DebugLevel > 1)
            {
                if (_numIterExamples % 5000 == 0)
                {
                    Host.StdOut.Write('.');
                    if (_numIterExamples % 500000 == 0)
                    {
                        Host.StdOut.Write(" ");
                        Host.StdOut.Write(_numIterExamples);
                        if (_numIterExamples % 5000000 == 0)
                        {
                            Host.StdOut.Write(" ");
                            Host.StdOut.Write(DateTime.UtcNow);
                        }
                        Host.StdOut.WriteLine();
                    }
                }
            }
#endif
        }
Пример #6
0
        /// <summary>
        /// Finds approximate minimum of the function
        /// </summary>
        /// <param name="function">Function to minimize</param>
        /// <param name="initial">Initial point</param>
        /// <param name="result">Approximate minimum</param>
        public void Minimize(DifferentiableFunction function, ref VBuffer <Float> initial, ref VBuffer <Float> result)
        {
            Contracts.Check(FloatUtils.IsFinite(initial.Values, initial.Count), "The initial vector contains NaNs or infinite values.");
            LineFunc        lineFunc = new LineFunc(function, ref initial, UseCG);
            VBuffer <Float> prev     = default(VBuffer <Float>);

            initial.CopyTo(ref prev);

            for (int n = 0; _maxSteps == 0 || n < _maxSteps; ++n)
            {
                Float step         = LineSearch.Minimize(lineFunc.Eval, lineFunc.Value, lineFunc.Deriv);
                var   newPoint     = lineFunc.NewPoint;
                bool  terminateNow = n > 0 && TerminateTester.ShouldTerminate(ref newPoint, ref prev);
                if (terminateNow || Terminate(ref newPoint))
                {
                    break;
                }
                newPoint.CopyTo(ref prev);
                lineFunc.ChangeDir();
            }

            lineFunc.NewPoint.CopyTo(ref result);
        }
Пример #7
0
            public void Aggregate(double diff, int line, int col)
            {
                if (diff == 0)
                {
                    return;
                }

                if (!FloatUtils.IsFinite(diff))
                {
                    InfCount++;
                    return;
                }

                if (DiffMax < diff)
                {
                    DiffMax = diff;
                    LineMax = line;
                    ColMax  = col;
                }

                DiffTot += diff;
                DiffCount++;
            }
Пример #8
0
            public void Save(ModelSaveContext ctx)
            {
                Contracts.AssertValue(ctx);

                // *** Binary format ***
                // int: Dimension
                // int: Rank
                // for i=0,..,Rank-1:
                //   float[]: the i'th eigenvector
                // int: the size of MeanProjected (0 if it is null)
                // float[]: MeanProjected

                Contracts.Assert(0 < Rank && Rank <= Dimension);
                ctx.Writer.Write(Dimension);
                ctx.Writer.Write(Rank);
                for (int i = 0; i < Rank; i++)
                {
                    Contracts.Assert(FloatUtils.IsFinite(Eigenvectors[i]));
                    ctx.Writer.WriteSinglesNoCount(Eigenvectors[i].AsSpan(0, Dimension));
                }
                Contracts.Assert(MeanProjected == null || (MeanProjected.Length == Rank && FloatUtils.IsFinite(MeanProjected)));
                ctx.Writer.WriteSingleArray(MeanProjected);
            }
        protected override TModel TrainModelCore(TrainContext context)
        {
            Host.CheckValue(context, nameof(context));
            var initPredictor  = context.InitialPredictor;
            var initLinearPred = initPredictor as LinearPredictor ?? (initPredictor as CalibratedPredictorBase)?.SubPredictor as LinearPredictor;

            Host.CheckParam(initPredictor == null || initLinearPred != null, nameof(context), "Not a linear predictor.");
            var data = context.TrainingSet;

            data.CheckFeatureFloatVector(out int numFeatures);
            CheckLabel(data);

            using (var ch = Host.Start("Training"))
            {
                InitCore(ch, numFeatures, initLinearPred);
                // InitCore should set the number of features field.
                Contracts.Assert(NumFeatures > 0);

                TrainCore(ch, data);

                if (NumBad > 0)
                {
                    ch.Warning(
                        "Skipped {0} instances with missing features during training (over {1} iterations; {2} inst/iter)",
                        NumBad, Args.NumIterations, NumBad / Args.NumIterations);
                }

                Contracts.Assert(WeightsScale == 1);
                Float maxNorm = Math.Max(VectorUtils.MaxNorm(ref Weights), Math.Abs(Bias));
                Contracts.Check(FloatUtils.IsFinite(maxNorm),
                                "The weights/bias contain invalid values (NaN or Infinite). Potential causes: high learning rates, no normalization, high initial weights, etc.");

                ch.Done();
            }

            return(CreatePredictor());
        }
Пример #10
0
        public void GetFeatures(int iCol, int iSlot, Random rand, long key, Span <float> features)
        {
            _host.Assert(features.Length == NumFeatures);

            // get counts from count table in the first _labelBinCount indices.
            var countsBuffer = features.Slice(0, _labelBinCount);
            var countTable   = _countTables[iCol, iSlot];

            countTable.GetCounts(key, countsBuffer);

            // check if it's garbage and replace with garbage counts if true
            float sum = 0;

            foreach (var feat in countsBuffer)
            {
                sum += feat;
            }
            bool isGarbage = sum < countTable.GarbageThreshold;

            if (isGarbage)
            {
                int i = 0;
                foreach (var count in countTable.GarbageCounts)
                {
                    countsBuffer[i++] = count;
                }
            }

            sum = AddLaplacianNoisePerLabel(iCol, rand, countsBuffer);

            // add log odds in the next _logOddsCount indices.
            GenerateLogOdds(iCol, countTable, countsBuffer, features.Slice(_labelBinCount, _logOddsCount), sum);
            _host.Assert(FloatUtils.IsFinite(features));

            // Add the last feature: an indicator for isGarbage.
            features[NumFeatures - 1] = isGarbage ? 1 : 0;
        }
Пример #11
0
        /// <summary>
        /// Samples new hyperparameters for the trainer, and sets them.
        /// Returns true if success (new hyperparameters were suggested and set). Else, returns false.
        /// </summary>
        private static bool SampleHyperparameters(MLContext context, SuggestedTrainer trainer,
                                                  IEnumerable <SuggestedPipelineRunDetail> history, bool isMaximizingMetric, IChannel logger)
        {
            try
            {
                var sps     = ConvertToValueGenerators(trainer.SweepParams);
                var sweeper = new SmacSweeper(context,
                                              new SmacSweeper.Arguments
                {
                    SweptParameters = sps
                });

                IEnumerable <SuggestedPipelineRunDetail> historyToUse = history
                                                                        .Where(r => r.RunSucceeded && r.Pipeline.Trainer.TrainerName == trainer.TrainerName &&
                                                                               r.Pipeline.Trainer.HyperParamSet != null &&
                                                                               r.Pipeline.Trainer.HyperParamSet.Any() &&
                                                                               FloatUtils.IsFinite(r.Score));

                // get new set of hyperparameter values
                var proposedParamSet = sweeper.ProposeSweeps(1, historyToUse.Select(h => h.ToRunResult(isMaximizingMetric))).FirstOrDefault();
                if (!proposedParamSet.Any())
                {
                    return(false);
                }

                // associate proposed parameter set with trainer, so that smart hyperparameter
                // sweepers (like KDO) can map them back.
                trainer.SetHyperparamValues(proposedParamSet);

                return(true);
            }
            catch (Exception ex)
            {
                logger.Error($"SampleHyperparameters failed with exception: {ex}");
                throw;
            }
        }
        /// <summary>
        /// Initialize predictor from a binary file.
        /// </summary>
        /// <param name="ctx">The load context</param>
        /// <param name="env">The host environment</param>
        private KMeansModelParameters(IHostEnvironment env, ModelLoadContext ctx)
            : base(env, LoaderSignature, ctx)
        {
            // *** Binary format ***
            // int: k, number of clusters
            // int: dimensionality, length of the centroid vectors
            // for each cluster, then:
            //     int: count of this centroid vector (sparse iff count < dimensionality)
            //     int[count]: only present if sparse, in order indices
            //     Float[count]: centroid vector values

            _k = ctx.Reader.ReadInt32();
            Host.CheckDecode(_k > 0);
            _dimensionality = ctx.Reader.ReadInt32();
            Host.CheckDecode(_dimensionality > 0);

            _centroidL2s = new Float[_k];
            _centroids   = new VBuffer <Float> [_k];
            for (int i = 0; i < _k; i++)
            {
                // Prior to allowing sparse vectors, count was not written and was implicitly
                // always equal to dimensionality, and no indices were written either.
                int count = ctx.Header.ModelVerWritten >= 0x00010002 ? ctx.Reader.ReadInt32() : _dimensionality;
                Host.CheckDecode(0 <= count && count <= _dimensionality);
                var indices = count < _dimensionality?ctx.Reader.ReadIntArray(count) : null;

                var values = ctx.Reader.ReadFloatArray(count);
                Host.CheckDecode(FloatUtils.IsFinite(values));
                _centroids[i] = new VBuffer <Float>(_dimensionality, count, values, indices);
            }
            WarnOnOldNormalizer(ctx, GetType(), Host);

            InitPredictor();

            _inputType  = new VectorType(NumberType.Float, _dimensionality);
            _outputType = new VectorType(NumberType.Float, _k);
        }
Пример #13
0
        protected sealed override TModel TrainModelCore(TrainContext context)
        {
            Host.CheckValue(context, nameof(context));
            var initPredictor  = context.InitialPredictor;
            var initLinearPred = initPredictor as LinearPredictor ?? (initPredictor as CalibratedPredictorBase)?.SubPredictor as LinearPredictor;

            Host.CheckParam(initPredictor == null || initLinearPred != null, nameof(context), "Not a linear predictor.");
            var data = context.TrainingSet;

            data.CheckFeatureFloatVector(out int numFeatures);
            CheckLabels(data);

            using (var ch = Host.Start("Training"))
            {
                var state = MakeState(ch, numFeatures, initLinearPred);
                TrainCore(ch, data, state);

                ch.Assert(state.WeightsScale == 1);
                Float maxNorm = Math.Max(VectorUtils.MaxNorm(ref state.Weights), Math.Abs(state.Bias));
                ch.Check(FloatUtils.IsFinite(maxNorm),
                         "The weights/bias contain invalid values (NaN or Infinite). Potential causes: high learning rates, no normalization, high initial weights, etc.");
                return(state.CreatePredictor());
            }
        }
            public ColInfoEx(Column item, Arguments args)
            {
                if (item.UseAlpha ?? args.UseAlpha)
                {
                    Colors |= ColorBits.Alpha; Planes++;
                }
                if (item.UseRed ?? args.UseRed)
                {
                    Colors |= ColorBits.Red; Planes++;
                }
                if (item.UseGreen ?? args.UseGreen)
                {
                    Colors |= ColorBits.Green; Planes++;
                }
                if (item.UseBlue ?? args.UseBlue)
                {
                    Colors |= ColorBits.Blue; Planes++;
                }
                Contracts.CheckUserArg(Planes > 0, nameof(item.UseRed), "Need to use at least one color plane");

                Interleave = item.InterleaveArgb ?? args.InterleaveArgb;

                Convert = item.Convert ?? args.Convert;
                if (!Convert)
                {
                    Offset = 0;
                    Scale  = 1;
                }
                else
                {
                    Offset = item.Offset ?? args.Offset ?? 0;
                    Scale  = item.Scale ?? args.Scale ?? 1;
                    Contracts.CheckUserArg(FloatUtils.IsFinite(Offset), nameof(item.Offset));
                    Contracts.CheckUserArg(FloatUtils.IsFiniteNonZero(Scale), nameof(item.Scale));
                }
            }
Пример #15
0
        internal OlsLinearRegressionPredictor(IHostEnvironment env, ref VBuffer <Float> weights, Float bias,
                                              Double[] standardErrors, Double[] tValues, Double[] pValues, Double rSquared, Double rSquaredAdjusted)
            : base(env, RegistrationName, ref weights, bias)
        {
            Contracts.AssertValueOrNull(standardErrors);
            Contracts.AssertValueOrNull(tValues);
            Contracts.AssertValueOrNull(pValues);
            // If r-squared is NaN then the other statistics must be null, however, if r-rsquared is not NaN,
            // then the statistics may be null if creation of statistics was suppressed.
            Contracts.Assert(!Double.IsNaN(rSquaredAdjusted) || standardErrors == null);
            // Nullity or not must be consistent between the statistics.
            Contracts.Assert((standardErrors == null) == (tValues == null) && (tValues == null) == (pValues == null));
            Contracts.Assert(0 <= rSquared & rSquared <= 1);
            Contracts.Assert(Double.IsNaN(rSquaredAdjusted) | (0 <= rSquaredAdjusted & rSquaredAdjusted <= 1));
            if (standardErrors != null)
            {
                // If not null, the input arrays must have one value for each parameter.
                Contracts.Assert(Utils.Size(standardErrors) == weights.Length + 1);
                Contracts.Assert(Utils.Size(tValues) == weights.Length + 1);
                Contracts.Assert(Utils.Size(pValues) == weights.Length + 1);
#if DEBUG
                for (int i = 0; i <= weights.Length; ++i)
                {
                    Contracts.Assert(FloatUtils.IsFinite(standardErrors[i]));
                    Contracts.Assert(FloatUtils.IsFinite(tValues[i]));
                    Contracts.Assert(FloatUtils.IsFinite(pValues[i]));
                }
#endif
            }

            _standardErrors   = standardErrors;
            _tValues          = tValues;
            _pValues          = pValues;
            _rSquared         = rSquared;
            _rSquaredAdjusted = rSquaredAdjusted;
        }
Пример #16
0
        private FieldAwareFactorizationMachineModelParameters TrainCore(IChannel ch, IProgressChannel pch, RoleMappedData data,
                                                                        RoleMappedData validData = null, FieldAwareFactorizationMachineModelParameters predictor = null)
        {
            _host.AssertValue(ch);
            _host.AssertValue(pch);

            data.CheckBinaryLabel();
            var featureColumns    = data.Schema.GetColumns(RoleMappedSchema.ColumnRole.Feature);
            int fieldCount        = featureColumns.Count;
            int totalFeatureCount = 0;

            int[] fieldColumnIndexes = new int[fieldCount];
            for (int f = 0; f < fieldCount; f++)
            {
                var col = featureColumns[f];
                _host.Assert(!col.IsHidden);
                if (!(col.Type is VectorDataViewType vectorType) ||
                    !vectorType.IsKnownSize ||
                    vectorType.ItemType != NumberDataViewType.Single)
                {
                    throw ch.ExceptParam(nameof(data), "Training feature column '{0}' must be a known-size vector of Single, but has type: {1}.", col.Name, col.Type);
                }
                _host.Assert(vectorType.Size > 0);
                fieldColumnIndexes[f] = col.Index;
                totalFeatureCount    += vectorType.Size;
            }
            ch.Check(checked (totalFeatureCount * fieldCount * _latentDimAligned) <= Utils.ArrayMaxSize, "Latent dimension or the number of fields too large");
            if (predictor != null)
            {
                ch.Check(predictor.FeatureCount == totalFeatureCount, "Input model's feature count mismatches training feature count");
                ch.Check(predictor.LatentDimension == _latentDim, "Input model's latent dimension mismatches trainer's");
            }
            if (validData != null)
            {
                validData.CheckBinaryLabel();
                var validFeatureColumns = data.Schema.GetColumns(RoleMappedSchema.ColumnRole.Feature);
                _host.Assert(fieldCount == validFeatureColumns.Count);
                for (int f = 0; f < fieldCount; f++)
                {
                    var featCol      = featureColumns[f];
                    var validFeatCol = validFeatureColumns[f];
                    _host.Assert(featCol.Name == validFeatCol.Name);
                    _host.Assert(featCol.Type == validFeatCol.Type);
                }
            }
            bool shuffle = _shuffle;

            if (shuffle && !data.Data.CanShuffle)
            {
                ch.Warning("Training data does not support shuffling, so ignoring request to shuffle");
                shuffle = false;
            }
            var rng                = shuffle ? _host.Rand : null;
            var featureGetters     = new ValueGetter <VBuffer <float> > [fieldCount];
            var featureBuffer      = new VBuffer <float>();
            var featureValueBuffer = new float[totalFeatureCount];
            var featureIndexBuffer = new int[totalFeatureCount];
            var featureFieldBuffer = new int[totalFeatureCount];
            var latentSum          = new AlignedArray(fieldCount * fieldCount * _latentDimAligned, 16);
            var metricNames        = new List <string>()
            {
                "Training-loss"
            };

            if (validData != null)
            {
                metricNames.Add("Validation-loss");
            }
            int    iter                 = 0;
            long   exampleCount         = 0;
            long   badExampleCount      = 0;
            long   validBadExampleCount = 0;
            double loss                 = 0;
            double validLoss            = 0;

            pch.SetHeader(new ProgressHeader(metricNames.ToArray(), new string[] { "iterations", "examples" }), entry =>
            {
                entry.SetProgress(0, iter, _numIterations);
                entry.SetProgress(1, exampleCount);
            });

            var columns = data.Schema.Schema.Where(x => fieldColumnIndexes.Contains(x.Index)).ToList();

            columns.Add(data.Schema.Label.Value);
            if (data.Schema.Weight != null)
            {
                columns.Add(data.Schema.Weight.Value);
            }

            InitializeTrainingState(fieldCount, totalFeatureCount, predictor, out float[] linearWeights,
                                    out AlignedArray latentWeightsAligned, out float[] linearAccSqGrads, out AlignedArray latentAccSqGradsAligned);

            // refer to Algorithm 3 in https://github.com/wschin/fast-ffm/blob/master/fast-ffm.pdf
            while (iter++ < _numIterations)
            {
                using (var cursor = data.Data.GetRowCursor(columns, rng))
                {
                    var labelGetter  = RowCursorUtils.GetLabelGetter(cursor, data.Schema.Label.Value.Index);
                    var weightGetter = data.Schema.Weight?.Index is int weightIdx?RowCursorUtils.GetGetterAs <float>(NumberDataViewType.Single, cursor, weightIdx) : null;

                    for (int i = 0; i < fieldCount; i++)
                    {
                        featureGetters[i] = cursor.GetGetter <VBuffer <float> >(cursor.Schema[fieldColumnIndexes[i]]);
                    }
                    loss            = 0;
                    exampleCount    = 0;
                    badExampleCount = 0;
                    while (cursor.MoveNext())
                    {
                        float label         = 0;
                        float weight        = 1;
                        int   count         = 0;
                        float modelResponse = 0;
                        labelGetter(ref label);
                        weightGetter?.Invoke(ref weight);
                        float annihilation = label - label + weight - weight;
                        if (!FloatUtils.IsFinite(annihilation))
                        {
                            badExampleCount++;
                            continue;
                        }
                        if (!FieldAwareFactorizationMachineUtils.LoadOneExampleIntoBuffer(featureGetters, featureBuffer, _norm, ref count,
                                                                                          featureFieldBuffer, featureIndexBuffer, featureValueBuffer))
                        {
                            badExampleCount++;
                            continue;
                        }

                        // refer to Algorithm 1 in [3] https://github.com/wschin/fast-ffm/blob/master/fast-ffm.pdf
                        FieldAwareFactorizationMachineInterface.CalculateIntermediateVariables(fieldCount, _latentDimAligned, count,
                                                                                               featureFieldBuffer, featureIndexBuffer, featureValueBuffer, linearWeights, latentWeightsAligned, latentSum, ref modelResponse);
                        var slope = CalculateLossSlope(label, modelResponse);

                        // refer to Algorithm 2 in [3] https://github.com/wschin/fast-ffm/blob/master/fast-ffm.pdf
                        FieldAwareFactorizationMachineInterface.CalculateGradientAndUpdate(_lambdaLinear, _lambdaLatent, _learningRate, fieldCount, _latentDimAligned, weight, count,
                                                                                           featureFieldBuffer, featureIndexBuffer, featureValueBuffer, latentSum, slope, linearWeights, latentWeightsAligned, linearAccSqGrads, latentAccSqGradsAligned);
                        loss += weight * CalculateLoss(label, modelResponse);
                        exampleCount++;
                    }
                    loss /= exampleCount;
                }

                if (_verbose)
                {
                    if (validData == null)
                    {
                        pch.Checkpoint(loss, iter, exampleCount);
                    }
                    else
                    {
                        validLoss = CalculateAvgLoss(ch, validData, _norm, linearWeights, latentWeightsAligned, _latentDimAligned, latentSum,
                                                     featureFieldBuffer, featureIndexBuffer, featureValueBuffer, featureBuffer, ref validBadExampleCount);
                        pch.Checkpoint(loss, validLoss, iter, exampleCount);
                    }
                }
            }
            if (badExampleCount != 0)
            {
                ch.Warning($"Skipped {badExampleCount} examples with bad label/weight/features in training set");
            }
            if (validBadExampleCount != 0)
            {
                ch.Warning($"Skipped {validBadExampleCount} examples with bad label/weight/features in validation set");
            }

            return(new FieldAwareFactorizationMachineModelParameters(_host, _norm, fieldCount, totalFeatureCount, _latentDim, linearWeights, latentWeightsAligned));
        }
Пример #17
0
        /// <summary>
        /// Finds the bins.
        /// </summary>
        private void FindBinsFromDistinctCounts(double[] distinctValues, int[] counts, int numValues, int maxBins, out double[] binUpperBounds, out int firstBinCount)
        {
            Contracts.Assert(0 <= numValues && numValues <= distinctValues.Length);
            Contracts.Assert(numValues <= counts.Length);

#if DEBUG
            int inv  = 0;
            int bad  = 0;
            var prev = double.NegativeInfinity;
            for (int i = 0; i < numValues; i++)
            {
                var v = distinctValues[i];
                if (!FloatUtils.IsFinite(v))
                {
                    bad++;
                }
                else
                {
                    if (!(prev < v))
                    {
                        inv++;
                    }
                    prev = v;
                }
            }
            Contracts.Assert(bad == 0, "distinctValues passed to FindBinsFromDistinctCounts contains non-finite values");
            Contracts.Assert(inv == 0, "distinctValues passed to FindBinsFromDistinctCounts is not sorted");
#endif

            if (numValues <= maxBins)
            {
                binUpperBounds = new double[Math.Max(1, numValues)];
                for (int i = 1; i < binUpperBounds.Length; i++)
                {
                    binUpperBounds[i - 1] = GetSplitValue(distinctValues[i - 1], distinctValues[i]);
                }
                binUpperBounds[binUpperBounds.Length - 1] = double.PositiveInfinity;

                firstBinCount = numValues > 0 ? counts[0] : 0;
                return;
            }

            var path = new int[maxBins + 1];
            _finder.FindBinsWithCounts(counts, numValues, maxBins, path);
            binUpperBounds = new double[maxBins];
            for (int i = 1; i < binUpperBounds.Length; i++)
            {
                binUpperBounds[i - 1] = GetSplitValue(distinctValues[path[i] - 1], distinctValues[path[i]]);
            }
            binUpperBounds[binUpperBounds.Length - 1] = double.PositiveInfinity;

            // Compute the first bin count.
            firstBinCount = 0;
            var firstBinUpperBound = binUpperBounds[0];
            for (int v = 0; v < numValues; ++v)
            {
                if (distinctValues[v] > firstBinUpperBound)
                {
                    firstBinCount += counts[v];
                }
            }
        }
        private OlsLinearRegressionPredictor TrainCore(IChannel ch, FloatLabelCursor.Factory cursorFactory, int featureCount)
        {
            Host.AssertValue(ch);
            ch.AssertValue(cursorFactory);

            int m = featureCount + 1;

            // Check for memory conditions first.
            if ((long)m * (m + 1) / 2 > int.MaxValue)
            {
                throw ch.Except("Cannot hold covariance matrix in memory with {0} features", m - 1);
            }

            // Track the number of examples.
            long n = 0;
            // Since we are accumulating over many values, we use Double even for the single precision build.
            var xty = new Double[m];
            // The layout of this algorithm is a packed row-major lower triangular matrix.
            var xtx = new Double[m * (m + 1) / 2];

            // Build X'X (lower triangular) and X'y incrementally (X'X+=X'X_i; X'y+=X'y_i):
            using (var cursor = cursorFactory.Create())
            {
                while (cursor.MoveNext())
                {
                    var yi = cursor.Label;
                    // Increment first element of X'y
                    xty[0] += yi;
                    // Increment first element of lower triangular X'X
                    xtx[0] += 1;
                    var values = cursor.Features.GetValues();

                    if (cursor.Features.IsDense)
                    {
                        int ioff = 1;
                        ch.Assert(values.Length + 1 == m);
                        // Increment rest of first column of lower triangular X'X
                        for (int i = 1; i < m; i++)
                        {
                            ch.Assert(ioff == i * (i + 1) / 2);
                            var val = values[i - 1];
                            // Add the implicit first bias term to X'X
                            xtx[ioff++] += val;
                            // Add the remainder of X'X
                            for (int j = 0; j < i; j++)
                            {
                                xtx[ioff++] += val * values[j];
                            }
                            // X'y
                            xty[i] += val * yi;
                        }
                        ch.Assert(ioff == xtx.Length);
                    }
                    else
                    {
                        var fIndices = cursor.Features.GetIndices();
                        for (int ii = 0; ii < values.Length; ++ii)
                        {
                            int i    = fIndices[ii] + 1;
                            int ioff = i * (i + 1) / 2;
                            var val  = values[ii];
                            // Add the implicit first bias term to X'X
                            xtx[ioff++] += val;
                            // Add the remainder of X'X
                            for (int jj = 0; jj <= ii; jj++)
                            {
                                xtx[ioff + fIndices[jj]] += val * values[jj];
                            }
                            // X'y
                            xty[i] += val * yi;
                        }
                    }
                    n++;
                }
                ch.Check(n > 0, "No training examples in dataset.");
                if (cursor.BadFeaturesRowCount > 0)
                {
                    ch.Warning("Skipped {0} instances with missing features/label during training", cursor.SkippedRowCount);
                }

                if (_l2Weight > 0)
                {
                    // Skip the bias term for regularization, in the ridge regression case.
                    // So start at [1,1] instead of [0,0].

                    // REVIEW: There are two ways to view this, firstly, it is more
                    // user friendly ot make this scaling factor behave similarly regardless
                    // of data size, so that if you have the same parameters, you get the same
                    // model if you feed in your data than if you duplicate your data 10 times.
                    // This is what I have now. The alternate point of view is to view this
                    // L2 regularization parameter as providing some sort of prior, in which
                    // case duplication 10 times should in fact be treated differently! (That
                    // is, we should not multiply by n below.) Both interpretations seem
                    // correct, in their way.
                    Double squared = _l2Weight * _l2Weight * n;
                    int    ioff    = 0;
                    for (int i = 1; i < m; ++i)
                    {
                        xtx[ioff += i + 1] += squared;
                    }
                    ch.Assert(ioff == xtx.Length - 1);
                }
            }

            if (!(_l2Weight > 0) && n < m)
            {
                throw ch.Except("Ordinary least squares requires more examples than parameters. There are {0} parameters, but {1} examples. To enable training, use a positive L2 weight so this behaves as ridge regression.", m, n);
            }

            Double yMean = n == 0 ? 0 : xty[0] / n;

            ch.Info("Trainer solving for {0} parameters across {1} examples", m, n);
            // Cholesky Decomposition of X'X into LL'
            try
            {
                Mkl.Pptrf(Mkl.Layout.RowMajor, Mkl.UpLo.Lo, m, xtx);
            }
            catch (DllNotFoundException)
            {
                // REVIEW: Is there no better way?
                throw ch.ExceptNotSupp("The MKL library (libMklImports) or one of its dependencies is missing.");
            }
            // Solve for beta in (LL')beta = X'y:
            Mkl.Pptrs(Mkl.Layout.RowMajor, Mkl.UpLo.Lo, m, 1, xtx, xty, 1);
            // Note that the solver overwrote xty so it contains the solution. To be more clear,
            // we effectively change its name (through reassignment) so we don't get confused that
            // this is somehow xty in the remaining calculation.
            var beta = xty;

            xty = null;
            // Check that the solution is valid.
            for (int i = 0; i < beta.Length; ++i)
            {
                ch.Check(FloatUtils.IsFinite(beta[i]), "Non-finite values detected in OLS solution");
            }

            var weights = VBufferUtils.CreateDense <float>(beta.Length - 1);

            for (int i = 1; i < beta.Length; ++i)
            {
                weights.Values[i - 1] = (float)beta[i];
            }
            var bias = (float)beta[0];

            if (!(_l2Weight > 0) && m == n)
            {
                // We would expect the solution to the problem to be exact in this case.
                ch.Info("Number of examples equals number of parameters, solution is exact but no statistics can be derived");
                return(new OlsLinearRegressionPredictor(Host, in weights, bias, null, null, null, 1, float.NaN));
            }

            Double rss = 0; // residual sum of squares
            Double tss = 0; // total sum of squares

            using (var cursor = cursorFactory.Create())
            {
                var   lrPredictor = new LinearRegressionPredictor(Host, in weights, bias);
                var   lrMap       = lrPredictor.GetMapper <VBuffer <float>, float>();
                float yh          = default;
                while (cursor.MoveNext())
                {
                    var features = cursor.Features;
                    lrMap(in features, ref yh);
                    var e = cursor.Label - yh;
                    rss += e * e;
                    var ydm = cursor.Label - yMean;
                    tss += ydm * ydm;
                }
            }
            var rSquared = ProbClamp(1 - (rss / tss));
            // R^2 adjusted differs from the normal formula on account of the bias term, by Said's reckoning.
            double rSquaredAdjusted;

            if (n > m)
            {
                rSquaredAdjusted = ProbClamp(1 - (1 - rSquared) * (n - 1) / (n - m));
                ch.Info("Coefficient of determination R2 = {0:g}, or {1:g} (adjusted)",
                        rSquared, rSquaredAdjusted);
            }
            else
            {
                rSquaredAdjusted = Double.NaN;
            }

            // The per parameter significance is compute intensive and may not be required for all practitioners.
            // Also we can't estimate it, unless we can estimate the variance, which requires more examples than
            // parameters.
            if (!_perParameterSignificance || m >= n)
            {
                return(new OlsLinearRegressionPredictor(Host, in weights, bias, null, null, null, rSquared, rSquaredAdjusted));
            }

            ch.Assert(!Double.IsNaN(rSquaredAdjusted));
            var standardErrors = new Double[m];
            var tValues        = new Double[m];
            var pValues        = new Double[m];

            // Invert X'X:
            Mkl.Pptri(Mkl.Layout.RowMajor, Mkl.UpLo.Lo, m, xtx);
            var s2 = rss / (n - m); // estimate of variance of y

            for (int i = 0; i < m; i++)
            {
                // Initialize with inverse Hessian.
                standardErrors[i] = (Single)xtx[i * (i + 1) / 2 + i];
            }

            if (_l2Weight > 0)
            {
                // Iterate through all entries of inverse Hessian to make adjustment to variance.
                int   ioffset = 1;
                float reg     = _l2Weight * _l2Weight * n;
                for (int iRow = 1; iRow < m; iRow++)
                {
                    for (int iCol = 0; iCol <= iRow; iCol++)
                    {
                        var entry      = (Single)xtx[ioffset];
                        var adjustment = -reg * entry * entry;
                        standardErrors[iRow] -= adjustment;
                        if (0 < iCol && iCol < iRow)
                        {
                            standardErrors[iCol] -= adjustment;
                        }
                        ioffset++;
                    }
                }

                Contracts.Assert(ioffset == xtx.Length);
            }

            for (int i = 0; i < m; i++)
            {
                // sqrt of diagonal entries of s2 * inverse(X'X + reg * I) * X'X * inverse(X'X + reg * I).
                standardErrors[i] = Math.Sqrt(s2 * standardErrors[i]);
                ch.Check(FloatUtils.IsFinite(standardErrors[i]), "Non-finite standard error detected from OLS solution");
                tValues[i] = beta[i] / standardErrors[i];
                pValues[i] = (float)MathUtils.TStatisticToPValue(tValues[i], n - m);
                ch.Check(0 <= pValues[i] && pValues[i] <= 1, "p-Value calculated outside expected [0,1] range");
            }

            return(new OlsLinearRegressionPredictor(Host, in weights, bias, standardErrors, tValues, pValues, rSquared, rSquaredAdjusted));
        }
Пример #19
0
        /// <summary>
        /// Minimize the function represented by <paramref name="f"/>.
        /// </summary>
        /// <param name="f">Stochastic gradients of function to minimize</param>
        /// <param name="initial">Initial point</param>
        /// <param name="result">Approximate minimum of <paramref name="f"/></param>
        public void Minimize(DStochasticGradient f, ref VBuffer <Float> initial, ref VBuffer <Float> result)
        {
            Contracts.Check(FloatUtils.IsFinite(initial.Values, initial.Count), "The initial vector contains NaNs or infinite values.");
            int dim = initial.Length;

            VBuffer <Float> grad = VBufferUtils.CreateEmpty <Float>(dim);
            VBuffer <Float> step = VBufferUtils.CreateEmpty <Float>(dim);
            VBuffer <Float> x    = default(VBuffer <Float>);

            initial.CopyTo(ref x);
            VBuffer <Float> prev = default(VBuffer <Float>);
            VBuffer <Float> avg  = VBufferUtils.CreateEmpty <Float>(dim);

            for (int n = 0; _maxSteps == 0 || n < _maxSteps; ++n)
            {
                if (_momentum == 0)
                {
                    step = new VBuffer <Float>(step.Length, 0, step.Values, step.Indices);
                }
                else
                {
                    VectorUtils.ScaleBy(ref step, _momentum);
                }

                Float stepSize;
                switch (_rateSchedule)
                {
                case RateScheduleType.Constant:
                    stepSize = 1 / _t0;
                    break;

                case RateScheduleType.Sqrt:
                    stepSize = 1 / (_t0 + MathUtils.Sqrt(n));
                    break;

                case RateScheduleType.Linear:
                    stepSize = 1 / (_t0 + n);
                    break;

                default:
                    throw Contracts.Except();
                }

                Float scale = (1 - _momentum) / _batchSize;
                for (int i = 0; i < _batchSize; ++i)
                {
                    f(ref x, ref grad);
                    VectorUtils.AddMult(ref grad, scale, ref step);
                }

                if (_averaging)
                {
                    Utils.Swap(ref avg, ref prev);
                    VectorUtils.ScaleBy(prev, ref avg, (Float)n / (n + 1));
                    VectorUtils.AddMult(ref step, -stepSize, ref x);
                    VectorUtils.AddMult(ref x, (Float)1 / (n + 1), ref avg);

                    if ((n > 0 && TerminateTester.ShouldTerminate(ref avg, ref prev)) || _terminate(ref avg))
                    {
                        result = avg;
                        return;
                    }
                }
                else
                {
                    Utils.Swap(ref x, ref prev);
                    VectorUtils.AddMult(ref step, -stepSize, ref prev, ref x);
                    if ((n > 0 && TerminateTester.ShouldTerminate(ref x, ref prev)) || _terminate(ref x))
                    {
                        result = x;
                        return;
                    }
                }
            }

            result = _averaging ? avg : x;
        }
Пример #20
0
        /// <summary>
        /// Test whether the optimization should terminate. Returns true if x contains NA or +/-Inf or x equals xprev.
        /// </summary>
        /// <param name="x">The current value.</param>
        /// <param name="xprev">The value from the previous iteration.</param>
        /// <returns>True if the optimization routine should terminate at this iteration.</returns>
        internal static bool ShouldTerminate(ref VBuffer <Float> x, ref VBuffer <Float> xprev)
        {
            Contracts.Assert(x.Length == xprev.Length, "Vectors must have the same dimensionality.");
            Contracts.Assert(FloatUtils.IsFinite(xprev.Values, xprev.Count));

            if (!FloatUtils.IsFinite(x.Values, x.Count))
            {
                return(true);
            }

            if (x.IsDense && xprev.IsDense)
            {
                for (int i = 0; i < x.Length; i++)
                {
                    if (x.Values[i] != xprev.Values[i])
                    {
                        return(false);
                    }
                }
            }
            else if (xprev.IsDense)
            {
                int j = 0;
                for (int ii = 0; ii < x.Count; ii++)
                {
                    int i = x.Indices[ii];
                    while (j < i)
                    {
                        if (xprev.Values[j++] != 0)
                        {
                            return(false);
                        }
                    }
                    Contracts.Assert(i == j);
                    if (x.Values[ii] != xprev.Values[j++])
                    {
                        return(false);
                    }
                }

                while (j < xprev.Length)
                {
                    if (xprev.Values[j++] != 0)
                    {
                        return(false);
                    }
                }
            }
            else if (x.IsDense)
            {
                int i = 0;
                for (int jj = 0; jj < xprev.Count; jj++)
                {
                    int j = xprev.Indices[jj];
                    while (i < j)
                    {
                        if (x.Values[i++] != 0)
                        {
                            return(false);
                        }
                    }
                    Contracts.Assert(j == i);
                    if (x.Values[i++] != xprev.Values[jj])
                    {
                        return(false);
                    }
                }

                while (i < x.Length)
                {
                    if (x.Values[i++] != 0)
                    {
                        return(false);
                    }
                }
            }
            else
            {
                // Both sparse.
                int ii = 0;
                int jj = 0;
                while (ii < x.Count && jj < xprev.Count)
                {
                    int i = x.Indices[ii];
                    int j = xprev.Indices[jj];
                    if (i == j)
                    {
                        if (x.Values[ii++] != xprev.Values[jj++])
                        {
                            return(false);
                        }
                    }
                    else if (i < j)
                    {
                        if (x.Values[ii++] != 0)
                        {
                            return(false);
                        }
                    }
                    else
                    {
                        if (xprev.Values[jj++] != 0)
                        {
                            return(false);
                        }
                    }
                }

                while (ii < x.Count)
                {
                    if (x.Values[ii++] != 0)
                    {
                        return(false);
                    }
                }

                while (jj < xprev.Count)
                {
                    if (xprev.Values[jj++] != 0)
                    {
                        return(false);
                    }
                }
            }

            return(true);
        }
Пример #21
0
            public TransformInfo(ModelLoadContext ctx)
            {
                Contracts.AssertValue(ctx);

                // *** Binary format ***
                // int: Dimension
                // int: Rank
                // for i=0,..,Rank-1:
                //   float[]: the i'th eigenvector
                // int: the size of MeanProjected (0 if it is null)
                // float[]: MeanProjected

                Dimension = ctx.Reader.ReadInt32();
                Rank      = ctx.Reader.ReadInt32();
                Contracts.CheckDecode(0 < Rank && Rank <= Dimension);

                Eigenvectors = new float[Rank][];
                for (int i = 0; i < Rank; i++)
                {
                    Eigenvectors[i] = ctx.Reader.ReadFloatArray(Dimension);
                    Contracts.CheckDecode(FloatUtils.IsFinite(Eigenvectors[i]));
                }

                MeanProjected = ctx.Reader.ReadFloatArray();
                Contracts.CheckDecode(MeanProjected == null || (MeanProjected.Length == Rank && FloatUtils.IsFinite(MeanProjected)));
            }
Пример #22
0
        protected LinearPredictor(IHostEnvironment env, string name, ModelLoadContext ctx)
            : base(env, name, ctx)
        {
            // *** Binary format ***
            // Float: bias
            // int: number of features (weights)
            // int: number of indices
            // int[]: indices
            // int: number of weights
            // Float[]: weights
            // bool: has model stats
            // (Conditional) LinearModelStatistics: stats

            Bias = ctx.Reader.ReadFloat();
            Host.CheckDecode(FloatUtils.IsFinite(Bias));

            int len = ctx.Reader.ReadInt32();

            Host.Assert(len > 0);

            int cind = ctx.Reader.ReadInt32();

            Host.CheckDecode(0 <= cind & cind < len);
            var indices = ctx.Reader.ReadIntArray(cind);

            // Verify monotonicity of indices.
            int prev = -1;

            for (int i = 0; i < cind; i++)
            {
                Host.CheckDecode(prev < indices[i]);
                prev = indices[i];
            }
            Host.CheckDecode(prev < len);

            int cwht = ctx.Reader.ReadInt32();

            // Either there are as many weights as there are indices (in the
            // sparse case), or (in the dense case) there are no indices and the
            // number of weights is the length of the vector. Note that for the
            // trivial predictor it is quite legal to have 0 in both counts.
            Host.CheckDecode(cwht == cind || (cind == 0 && cwht == len));

            var weights = ctx.Reader.ReadFloatArray(cwht);

            Host.CheckDecode(Utils.Size(weights) == 0 || weights.All(x => FloatUtils.IsFinite(x)));

            if (cwht == 0)
            {
                Weight = VBufferUtils.CreateEmpty <Float>(len);
            }
            else
            {
                Weight = new VBuffer <Float>(len, Utils.Size(weights), weights, indices);
            }

            InputType = new VectorType(NumberType.Float, Weight.Length);
            WarnOnOldNormalizer(ctx, GetType(), Host);

            if (Weight.IsDense)
            {
                _weightsDense = Weight;
            }
            else
            {
                _weightsDenseLock = new object();
            }
        }
        //Project the covariance matrix A on to Omega: Y <- A * Omega
        //A = X' * X / n, where X = data - mean
        //Note that the covariance matrix is not computed explicitly
        private void Project(IDataView trainingData, Float[][] mean, Float[][][] omega, Float[][][] y, TransformInfo[] transformInfos)
        {
            Host.Assert(mean.Length == omega.Length && omega.Length == y.Length && y.Length == Infos.Length);
            for (int i = 0; i < omega.Length; i++)
            {
                Contracts.Assert(omega[i].Length == y[i].Length);
            }

            // set y to be all zeros
            for (int iinfo = 0; iinfo < y.Length; iinfo++)
            {
                for (int i = 0; i < y[iinfo].Length; i++)
                {
                    Array.Clear(y[iinfo][i], 0, y[iinfo][i].Length);
                }
            }

            bool[] center = Enumerable.Range(0, mean.Length).Select(i => mean[i] != null).ToArray();

            Double[] totalColWeight = new Double[Infos.Length];

            bool[] activeColumns = new bool[Source.Schema.ColumnCount];
            for (int iinfo = 0; iinfo < Infos.Length; iinfo++)
            {
                activeColumns[Infos[iinfo].Source] = true;
                if (_weightColumnIndex[iinfo] >= 0)
                {
                    activeColumns[_weightColumnIndex[iinfo]] = true;
                }
            }
            using (var cursor = trainingData.GetRowCursor(col => activeColumns[col]))
            {
                var weightGetters = new ValueGetter <Float> [Infos.Length];
                var columnGetters = new ValueGetter <VBuffer <Float> > [Infos.Length];
                for (int iinfo = 0; iinfo < Infos.Length; iinfo++)
                {
                    if (_weightColumnIndex[iinfo] >= 0)
                    {
                        weightGetters[iinfo] = cursor.GetGetter <Float>(_weightColumnIndex[iinfo]);
                    }
                    columnGetters[iinfo] = cursor.GetGetter <VBuffer <Float> >(Infos[iinfo].Source);
                }

                var features = default(VBuffer <Float>);
                while (cursor.MoveNext())
                {
                    for (int iinfo = 0; iinfo < Infos.Length; iinfo++)
                    {
                        Contracts.Check(Infos[iinfo].TypeSrc.IsVector && Infos[iinfo].TypeSrc.ItemType.IsNumber,
                                        "PCA transform can only be performed on numeric columns of dimension > 1");

                        Float weight = 1;
                        if (weightGetters[iinfo] != null)
                        {
                            weightGetters[iinfo](ref weight);
                        }
                        columnGetters[iinfo](ref features);

                        if (FloatUtils.IsFinite(weight) && weight >= 0 && (features.Count == 0 || FloatUtils.IsFinite(features.Values, features.Count)))
                        {
                            totalColWeight[iinfo] += weight;

                            if (center[iinfo])
                            {
                                VectorUtils.AddMult(ref features, mean[iinfo], weight);
                            }

                            for (int i = 0; i < omega[iinfo].Length; i++)
                            {
                                VectorUtils.AddMult(ref features, y[iinfo][i], weight * VectorUtils.DotProductWithOffset(omega[iinfo][i], 0, ref features));
                            }
                        }
                    }
                }

                for (int iinfo = 0; iinfo < Infos.Length; iinfo++)
                {
                    if (totalColWeight[iinfo] <= 0)
                    {
                        throw Host.Except("Empty data in column '{0}'", Source.Schema.GetColumnName(Infos[iinfo].Source));
                    }
                }

                for (int iinfo = 0; iinfo < Infos.Length; iinfo++)
                {
                    var invn = (Float)(1 / totalColWeight[iinfo]);

                    for (var i = 0; i < omega[iinfo].Length; ++i)
                    {
                        VectorUtils.ScaleBy(y[iinfo][i], invn);
                    }

                    if (center[iinfo])
                    {
                        VectorUtils.ScaleBy(mean[iinfo], invn);
                        for (int i = 0; i < omega[iinfo].Length; i++)
                        {
                            VectorUtils.AddMult(mean[iinfo], y[iinfo][i], -VectorUtils.DotProduct(omega[iinfo][i], mean[iinfo]));
                        }
                    }
                }
            }
        }
Пример #24
0
        /// <summary>
        /// Possible returns:
        ///
        /// Finite Value: no infinite value in the sliding window and at least a non NaN value
        /// NaN value: only NaN values in the sliding window or +/- Infinite
        /// Inifinite value: one infinite value in the sliding window (sign is no relevant)
        /// </summary>
        internal static Single ComputeMovingAverageUniform(FixedSizeQueue <Single> others, Single input, int lag,
                                                           Single lastDropped, ref Single currentSum,
                                                           ref bool initUniformMovingAverage,
                                                           ref int nbNanValues)
        {
            if (initUniformMovingAverage)
            {
                initUniformMovingAverage = false;
                return(ComputeMovingAverageUniformInitialisation(others, input, lag,
                                                                 lastDropped, ref currentSum, ref nbNanValues));
            }
            else
            {
                if (Single.IsNaN(lastDropped))
                {
                    --nbNanValues;
                }
                else if (!FloatUtils.IsFinite(lastDropped))
                {
                    // One infinite value left,
                    // we need to recompute everything as we don't know how many infinite values are in the sliding window.
                    return(ComputeMovingAverageUniformInitialisation(others, input, lag,
                                                                     lastDropped, ref currentSum, ref nbNanValues));
                }
                else
                {
                    currentSum -= lastDropped;
                }

                // lastDropped is finite
                Contracts.Assert(FloatUtils.IsFinite(lastDropped) || Single.IsNaN(lastDropped));

                var newValue = lag == 0 ? input : others[others.Count - lag];
                if (!Single.IsNaN(newValue) && !FloatUtils.IsFinite(newValue))
                {
                    // One infinite value entered,
                    // we need to recompute everything as we don't know how many infinite values are in the sliding window.
                    return(ComputeMovingAverageUniformInitialisation(others, input, lag,
                                                                     lastDropped, ref currentSum, ref nbNanValues));
                }

                // lastDropped is finite and input is finite or NaN
                Contracts.Assert(FloatUtils.IsFinite(newValue) || Single.IsNaN(newValue));

                if (!Single.IsNaN(currentSum) && !FloatUtils.IsFinite(currentSum))
                {
                    if (Single.IsNaN(newValue))
                    {
                        ++nbNanValues;
                        return(currentSum);
                    }
                    else
                    {
                        return(FloatUtils.IsFinite(newValue) ? currentSum : (currentSum + newValue));
                    }
                }

                // lastDropped is finite, input is finite or NaN, currentSum is finite or NaN
                Contracts.Assert(FloatUtils.IsFinite(currentSum) || Single.IsNaN(currentSum));

                if (Single.IsNaN(newValue))
                {
                    ++nbNanValues;
                    int nb = (lag == 0 ? others.Count + 1 : others.Count - lag + 1) - nbNanValues;
                    return(nb == 0 ? Single.NaN : currentSum / nb);
                }
                else
                {
                    int nb = lag == 0 ? others.Count + 1 - nbNanValues : others.Count + 1 - nbNanValues - lag;
                    currentSum += input;
                    return(nb == 0 ? Single.NaN : currentSum / nb);
                }
            }
        }
Пример #25
0
 /// <summary>
 /// This should be overridden by derived classes. This implementation simply increments <see cref="NumIterExamples"/>.
 /// </summary>
 public virtual void ProcessDataInstance(IChannel ch, ref VBuffer <Float> feat, Float label, Float weight)
 {
     ch.Assert(FloatUtils.IsFinite(feat.Values, feat.Count));
     ++NumIterExamples;
 }
Пример #26
0
            public IPredictor Calibrate(IChannel ch, IDataView data, ICalibratorTrainer caliTrainer, int maxRows)
            {
                Host.CheckValue(ch, nameof(ch));
                ch.CheckValue(data, nameof(data));
                ch.CheckValue(caliTrainer, nameof(caliTrainer));

                if (caliTrainer.NeedsTraining)
                {
                    var bound = new Bound(this, new RoleMappedSchema(data.Schema));
                    using (var curs = data.GetRowCursor(col => true))
                    {
                        var scoreGetter = (ValueGetter <Single>)bound.CreateScoreGetter(curs, col => true, out Action disposer);

                        // We assume that we can use the label column of the first predictor, since if the labels are not identical
                        // then the whole model is garbage anyway.
                        var labelGetter = bound.GetLabelGetter(curs, 0, out Action disp);
                        disposer += disp;
                        var weightGetter = bound.GetWeightGetter(curs, 0, out disp);
                        disposer += disp;
                        try
                        {
                            int num = 0;
                            while (curs.MoveNext())
                            {
                                Single label = 0;
                                labelGetter(ref label);
                                if (!FloatUtils.IsFinite(label))
                                {
                                    continue;
                                }
                                Single score = 0;
                                scoreGetter(ref score);
                                if (!FloatUtils.IsFinite(score))
                                {
                                    continue;
                                }
                                Single weight = 0;
                                weightGetter(ref weight);
                                if (!FloatUtils.IsFinite(weight))
                                {
                                    continue;
                                }

                                caliTrainer.ProcessTrainingExample(score, label > 0, weight);

                                if (maxRows > 0 && ++num >= maxRows)
                                {
                                    break;
                                }
                            }
                        }
                        finally
                        {
                            disposer?.Invoke();
                        }
                    }
                }

                var calibrator = caliTrainer.FinishTraining(ch);

                return(CalibratorUtils.CreateCalibratedPredictor(Host, this, calibrator));
            }
Пример #27
0
            /// <summary>
            /// An implementation of the line search for the Wolfe conditions, from Nocedal &amp; Wright
            /// </summary>
            internal virtual bool LineSearch(IChannel ch, bool force)
            {
                Contracts.AssertValue(ch);
                Float dirDeriv = VectorUtils.DotProduct(ref _dir, ref _grad);

                if (dirDeriv == 0)
                {
                    throw ch.Process(new PrematureConvergenceException(this, "Directional derivative is zero. You may be sitting on the optimum."));
                }

                // if a non-descent direction is chosen, the line search will break anyway, so throw here
                // The most likely reasons for this is a bug in your function's gradient computation,
                ch.Check(dirDeriv < 0, "L-BFGS chose a non-descent direction.");

                Float c1 = (Float)1e-4 * dirDeriv;
                Float c2 = (Float)0.9 * dirDeriv;

                Float alpha = (Iter == 1 ? (1 / VectorUtils.Norm(_dir)) : 1);

                PointValueDeriv last = new PointValueDeriv(0, LastValue, dirDeriv);
                PointValueDeriv aLo  = new PointValueDeriv();
                PointValueDeriv aHi  = new PointValueDeriv();

                // initial bracketing phase
                while (true)
                {
                    VectorUtils.AddMultInto(ref _x, alpha, ref _dir, ref _newX);
                    if (EnforceNonNegativity)
                    {
                        VBufferUtils.Apply(ref _newX, delegate(int ind, ref Float newXval)
                        {
                            if (newXval < 0.0)
                            {
                                newXval = 0;
                            }
                        });
                    }

                    Value = Eval(ref _newX, ref _newGrad);
                    GradientCalculations++;
                    if (Float.IsPositiveInfinity(Value))
                    {
                        alpha /= 2;
                        continue;
                    }

                    if (!FloatUtils.IsFinite(Value))
                    {
                        throw ch.Except("Optimizer unable to proceed with loss function yielding {0}", Value);
                    }

                    dirDeriv = VectorUtils.DotProduct(ref _dir, ref _newGrad);
                    PointValueDeriv curr = new PointValueDeriv(alpha, Value, dirDeriv);

                    if ((curr.V > LastValue + c1 * alpha) || (last.A > 0 && curr.V >= last.V))
                    {
                        aLo = last;
                        aHi = curr;
                        break;
                    }
                    else if (Math.Abs(curr.D) <= -c2)
                    {
                        return(true);
                    }
                    else if (curr.D >= 0)
                    {
                        aLo = curr;
                        aHi = last;
                        break;
                    }

                    last = curr;
                    if (alpha == 0)
                    {
                        alpha = Float.Epsilon; // Robust to divisional underflow.
                    }
                    else
                    {
                        alpha *= 2;
                    }
                }

                Float minChange = (Float)0.01;
                int   maxSteps  = 10;

                // this loop is the "zoom" procedure described in Nocedal & Wright
                for (int step = 0; ; ++step)
                {
                    if (step == maxSteps && !force)
                    {
                        return(false);
                    }

                    PointValueDeriv left  = aLo.A < aHi.A ? aLo : aHi;
                    PointValueDeriv right = aLo.A < aHi.A ? aHi : aLo;
                    if (left.D > 0 && right.D < 0)
                    {
                        // interpolating cubic would have max in range, not min (can this happen?)
                        // set a to the one with smaller value
                        alpha = aLo.V < aHi.V ? aLo.A : aHi.A;
                    }
                    else
                    {
                        alpha = CubicInterp(aLo, aHi);
                        if (Float.IsNaN(alpha) || Float.IsInfinity(alpha))
                        {
                            alpha = (aLo.A + aHi.A) / 2;
                        }
                    }

                    // this is to ensure that the new point is within bounds
                    // and that the change is reasonably sized
                    Float ub = (minChange * left.A + (1 - minChange) * right.A);
                    if (alpha > ub)
                    {
                        alpha = ub;
                    }
                    Float lb = (minChange * right.A + (1 - minChange) * left.A);
                    if (alpha < lb)
                    {
                        alpha = lb;
                    }

                    VectorUtils.AddMultInto(ref _x, alpha, ref _dir, ref _newX);
                    if (EnforceNonNegativity)
                    {
                        VBufferUtils.Apply(ref _newX, delegate(int ind, ref Float newXval)
                        {
                            if (newXval < 0.0)
                            {
                                newXval = 0;
                            }
                        });
                    }

                    Value = Eval(ref _newX, ref _newGrad);
                    GradientCalculations++;
                    if (!FloatUtils.IsFinite(Value))
                    {
                        throw ch.Except("Optimizer unable to proceed with loss function yielding {0}", Value);
                    }
                    dirDeriv = VectorUtils.DotProduct(ref _dir, ref _newGrad);

                    PointValueDeriv curr = new PointValueDeriv(alpha, Value, dirDeriv);

                    if ((curr.V > LastValue + c1 * alpha) || (curr.V >= aLo.V))
                    {
                        if (aHi.A == curr.A)
                        {
                            if (force)
                            {
                                throw ch.Process(new PrematureConvergenceException(this, "Step size interval numerically zero."));
                            }
                            else
                            {
                                return(false);
                            }
                        }
                        aHi = curr;
                    }
                    else if (Math.Abs(curr.D) <= -c2)
                    {
                        return(true);
                    }
                    else
                    {
                        if (curr.D * (aHi.A - aLo.A) >= 0)
                        {
                            aHi = aLo;
                        }
                        if (aLo.A == curr.A)
                        {
                            if (force)
                            {
                                throw ch.Process(new PrematureConvergenceException(this, "Step size interval numerically zero."));
                            }
                            else
                            {
                                return(false);
                            }
                        }
                        aLo = curr;
                    }
                }
            }