/// <summary> /// Describes how the transformer handles one input-output column pair. /// </summary> /// <param name="input">Name of the input column.</param> /// <param name="output">Name of the column resulting from the transformation of <paramref name="input"/>. Null means <paramref name="input"/> is replaced.</param> /// <param name="colors">What colors to extract.</param> /// <param name="interleave"></param> /// <param name="scale">Scale color pixel value by this amount.</param> /// <param name="offset">Offset color pixel value by this amount.</param> /// <param name="asFloat">Output array as float array. If false, output as byte array.</param> public ColumnInfo(string input, string output = null, ColorBits colors = Defaults.Colors, bool interleave = Defaults.Interleave, float scale = Defaults.Scale, float offset = Defaults.Offset, bool asFloat = Defaults.Convert) { Contracts.CheckNonWhiteSpace(input, nameof(input)); Input = input; Output = output ?? input; Colors = colors; if ((Colors & ColorBits.Alpha) == ColorBits.Alpha) { Planes++; } if ((Colors & ColorBits.Red) == ColorBits.Red) { Planes++; } if ((Colors & ColorBits.Green) == ColorBits.Green) { Planes++; } if ((Colors & ColorBits.Blue) == ColorBits.Blue) { Planes++; } Contracts.CheckParam(Planes > 0, nameof(colors), "Need to use at least one color plane"); Interleave = interleave; AsFloat = asFloat; if (!AsFloat) { Offset = Defaults.Offset; Scale = Defaults.Scale; } else { Offset = offset; Scale = scale; } Contracts.CheckParam(FloatUtils.IsFinite(Offset), nameof(offset)); Contracts.CheckParam(FloatUtils.IsFiniteNonZero(Scale), nameof(scale)); }
public ColInfoEx(Column item, Arguments args) { if (item.ContainsAlpha ?? args.ContainsAlpha) { Colors |= ColorBits.Alpha; Planes++; } if (item.ContainsRed ?? args.ContainsRed) { Colors |= ColorBits.Red; Planes++; } if (item.ContainsGreen ?? args.ContainsGreen) { Colors |= ColorBits.Green; Planes++; } if (item.ContainsBlue ?? args.ContainsBlue) { Colors |= ColorBits.Blue; Planes++; } Contracts.CheckUserArg(Planes > 0, nameof(item.ContainsRed), "Need to use at least one color plane"); Interleave = item.InterleaveArgb ?? args.InterleaveArgb; Width = item.ImageWidth ?? args.ImageWidth; Height = item.ImageHeight ?? args.ImageHeight; Offset = item.Offset ?? args.Offset ?? 0; Scale = item.Scale ?? args.Scale ?? 1; Contracts.CheckUserArg(FloatUtils.IsFinite(Offset), nameof(item.Offset)); Contracts.CheckUserArg(FloatUtils.IsFiniteNonZero(Scale), nameof(item.Scale)); }
public override bool Accept() { if (!base.Accept()) { return(false); } if (_get != null) { _get(ref Features); if (!_keepBad && !FloatUtils.IsFinite(Features.Values, Features.Count)) { _badCount++; return(false); } } return(true); }
/// <summary> /// print the linear model as code /// </summary> public static void SaveAsCode(TextWriter writer, ref VBuffer <Float> weights, Float bias, RoleMappedSchema schema, string codeVariable = "output") { Contracts.CheckValue(writer, nameof(writer)); Contracts.CheckValueOrNull(schema); var featureNames = default(VBuffer <ReadOnlyMemory <char> >); MetadataUtils.GetSlotNames(schema, RoleMappedSchema.ColumnRole.Feature, weights.Length, ref featureNames); int numNonZeroWeights = 0; writer.Write(codeVariable); writer.Write(" = "); VBufferUtils.ForEachDefined(ref weights, (idx, value) => { if (Math.Abs(value - 0) >= Epsilon) { if (numNonZeroWeights > 0) { writer.Write(" + "); } writer.Write(FloatUtils.ToRoundTripString(value)); writer.Write("*"); if (featureNames.Count > 0) { writer.Write(FeatureNameAsCode(featureNames.GetItemOrDefault(idx).ToString(), idx)); } else { writer.Write("f_" + idx); } numNonZeroWeights++; } }); if (numNonZeroWeights > 0) { writer.Write(" + "); } writer.Write(FloatUtils.ToRoundTripString(bias)); writer.WriteLine(";"); }
private float getEffectiveWeightFromBody(Rigidbody body3D, Rigidbody2D body2D) { if (!body2D && !body3D) { return(0f); } Vector3 velocity = body2D ? (Vector3)body2D.velocity : body3D.velocity; float mass = body2D ? body2D.mass : body3D.mass; bool colliderIsNull = body2D ? !body2D.GetComponent <Collider2D>() : !body3D.GetComponent <Collider>(); bool colliderIsTrigger = !colliderIsNull && (body2D ? body2D.GetComponent <Collider2D>().isTrigger : body3D.GetComponent <Collider>().isTrigger); if (FloatUtils.IsFirstFloatPreciselySmallerOrEqualToSecond(Mathf.Abs(velocity.y), 0.1f) && !colliderIsNull && !colliderIsTrigger) { return(mass); } return(0); }
private GaussianFourierSampler(IHostEnvironment env, ModelLoadContext ctx) { Contracts.AssertValue(env); _host = env.Register(LoadName); _host.AssertValue(ctx); // *** Binary format *** // int: sizeof(Float) // Float: gamma int cbFloat = ctx.Reader.ReadInt32(); _host.CheckDecode(cbFloat == sizeof(float)); _gamma = ctx.Reader.ReadFloat(); _host.CheckDecode(FloatUtils.IsFinite(_gamma)); }
void FixedUpdate() { moveDirection.Set(0, 0); if (eat.dead || eat.eating) { return; } if (!FloatUtils.CloseEnough(Input.GetAxis("Horizontal"), 0f)) { moveDirection.Set(Input.GetAxis("Horizontal") * speed, 0); } if (!FloatUtils.CloseEnough(Input.GetAxis("Vertical"), 0f)) { moveDirection.Set(moveDirection.x, Input.GetAxis("Vertical") * speed); } if (transform.position.y < minYPosition.position.y) { moveDirection.Set(moveDirection.x, Mathf.Max(0, moveDirection.y)); } rb.MovePosition(rb.position + moveDirection * Time.fixedDeltaTime); if (moveDirection.x < 0) { direction = Direction.W; } else if (moveDirection.x > 0) { direction = Direction.E; } else if (FloatUtils.CloseEnough(moveDirection.x, 0, 0.01f)) { direction = Direction.S; } if (direction != lastDirection) { lastDirection = direction; anim.SetInteger("Direction", (int)direction); } }
// This is absolute error near zero and relative error away from zero. private static double Diff(double d1, double d2) { if (d1 == d2) { return(0); } if (FloatUtils.IsFinite(d1) && FloatUtils.IsFinite(d2)) { return(Math.Abs(d1 - d2) / Math.Max(1, Math.Max(Math.Abs(d1), Math.Abs(d2)))); } if (double.IsNaN(d1) && double.IsNaN(d2)) { return(0); } return(double.PositiveInfinity); }
private PcaPredictor(IHostEnvironment env, ModelLoadContext ctx) : base(env, RegistrationName, ctx) { // *** Binary format *** // int: dimension (aka. number of features) // int: rank // bool: center // If (center) // Float[]: mean vector // Float[][]: eigenvectors _dimension = ctx.Reader.ReadInt32(); Host.CheckDecode(FloatUtils.IsFinite(_dimension)); _rank = ctx.Reader.ReadInt32(); Host.CheckDecode(FloatUtils.IsFinite(_rank)); bool center = ctx.Reader.ReadBoolByte(); if (center) { var meanArray = ctx.Reader.ReadFloatArray(_dimension); Host.CheckDecode(meanArray.All(FloatUtils.IsFinite)); _mean = new VBuffer <Float>(_dimension, meanArray); _norm2Mean = VectorUtils.NormSquared(_mean); } else { _mean = VBufferUtils.CreateEmpty <Float>(_dimension); _norm2Mean = 0; } _eigenVectors = new VBuffer <Float> [_rank]; _meanProjected = new Float[_rank]; for (int i = 0; i < _rank; ++i) { var vi = ctx.Reader.ReadFloatArray(_dimension); Host.CheckDecode(vi.All(FloatUtils.IsFinite)); _eigenVectors[i] = new VBuffer <Float>(_dimension, vi); _meanProjected[i] = VectorUtils.DotProduct(ref _eigenVectors[i], ref _mean); } WarnOnOldNormalizer(ctx, GetType(), Host); _inputType = new VectorType(NumberType.Float, _dimension); }
private static double CalculateAvgLoss(IChannel ch, RoleMappedData data, bool norm, float[] linearWeights, AlignedArray latentWeightsAligned, int latentDimAligned, AlignedArray latentSum, int[] featureFieldBuffer, int[] featureIndexBuffer, float[] featureValueBuffer, VBuffer<float> buffer, ref long badExampleCount) { var featureColumns = data.Schema.GetColumns(RoleMappedSchema.ColumnRole.Feature); Func<int, bool> pred = c => featureColumns.Select(ci => ci.Index).Contains(c) || c == data.Schema.Label.Index || (data.Schema.Weight != null && c == data.Schema.Weight.Index); var getters = new ValueGetter<VBuffer<float>>[featureColumns.Count]; float label = 0; float weight = 1; double loss = 0; float modelResponse = 0; long exampleCount = 0; badExampleCount = 0; int count = 0; using (var cursor = data.Data.GetRowCursor(pred)) { var labelGetter = cursor.GetGetter<float>(data.Schema.Label.Index); var weightGetter = data.Schema.Weight == null ? null : cursor.GetGetter<float>(data.Schema.Weight.Index); for (int f = 0; f < featureColumns.Count; f++) getters[f] = cursor.GetGetter<VBuffer<float>>(featureColumns[f].Index); while (cursor.MoveNext()) { labelGetter(ref label); weightGetter?.Invoke(ref weight); float annihilation = label - label + weight - weight; if (!FloatUtils.IsFinite(annihilation)) { badExampleCount++; continue; } if (!FieldAwareFactorizationMachineUtils.LoadOneExampleIntoBuffer(getters, buffer, norm, ref count, featureFieldBuffer, featureIndexBuffer, featureValueBuffer)) { badExampleCount++; continue; } FieldAwareFactorizationMachineInterface.CalculateIntermediateVariables(featureColumns.Count, latentDimAligned, count, featureFieldBuffer, featureIndexBuffer, featureValueBuffer, linearWeights, latentWeightsAligned, latentSum, ref modelResponse); loss += weight * CalculateLoss(label, modelResponse); exampleCount++; } } return loss / exampleCount; }
public IEnumerator ZoomOverTime(float damping, float targetZoom) { var currentZoom = _camera.orthographicSize; var currentZoomVelocity = 0.0f; while (true) { if (FloatUtils.IsApproximately(currentZoom, targetZoom, _zoomThreshold)) { _camera.orthographicSize = targetZoom; yield break; } currentZoom = Mathf.SmoothDamp(currentZoom, targetZoom, ref currentZoomVelocity, damping); _camera.orthographicSize = currentZoom; yield return(null); } }
private ColumnInfo(string input, string output, ColorBits colors, bool interleave, bool convert, float scale, float offset) { Contracts.CheckNonEmpty(input, nameof(input)); Contracts.CheckNonEmpty(output, nameof(output)); Input = input; Output = output; Colors = colors; if ((Colors & ColorBits.Alpha) == ColorBits.Alpha) { Planes++; } if ((Colors & ColorBits.Red) == ColorBits.Red) { Planes++; } if ((Colors & ColorBits.Green) == ColorBits.Green) { Planes++; } if ((Colors & ColorBits.Blue) == ColorBits.Blue) { Planes++; } Contracts.CheckParam(Planes > 0, nameof(colors), "Need to use at least one color plane"); Interleave = interleave; Convert = convert; if (!Convert) { Offset = 0; Scale = 1; } else { Offset = offset; Scale = scale; Contracts.CheckParam(FloatUtils.IsFinite(Offset), nameof(offset)); Contracts.CheckParam(FloatUtils.IsFiniteNonZero(Scale), nameof(scale)); } }
/// <summary> /// Constructs a new linear predictor. /// </summary> /// <param name="env">The host environment.</param> /// <param name="name">Component name.</param> /// <param name="weights">The weights for the linear predictor. Note that this /// will take ownership of the <see cref="VBuffer{T}"/>.</param> /// <param name="bias">The bias added to every output score.</param> internal LinearPredictor(IHostEnvironment env, string name, ref VBuffer <Float> weights, Float bias) : base(env, name) { Host.CheckParam(FloatUtils.IsFinite(weights.Values, weights.Count), nameof(weights), "Cannot initialize linear predictor with non-finite weights"); Host.CheckParam(FloatUtils.IsFinite(bias), nameof(bias), "Cannot initialize linear predictor with non-finite bias"); Weight = weights; Bias = bias; InputType = new VectorType(NumberType.Float, Weight.Length); if (Weight.IsDense) { _weightsDense = Weight; } else { _weightsDenseLock = new object(); } }
internal ColumnInfo(Column item, Arguments args) { Contracts.CheckValue(item, nameof(item)); Contracts.CheckValue(args, nameof(args)); Input = item.Source ?? item.Name; Output = item.Name; if (item.UseAlpha ?? args.UseAlpha) { Colors |= ColorBits.Alpha; Planes++; } if (item.UseRed ?? args.UseRed) { Colors |= ColorBits.Red; Planes++; } if (item.UseGreen ?? args.UseGreen) { Colors |= ColorBits.Green; Planes++; } if (item.UseBlue ?? args.UseBlue) { Colors |= ColorBits.Blue; Planes++; } Contracts.CheckUserArg(Planes > 0, nameof(item.UseRed), "Need to use at least one color plane"); Interleave = item.InterleaveArgb ?? args.InterleaveArgb; Convert = item.Convert ?? args.Convert; if (!Convert) { Offset = 0; Scale = 1; } else { Offset = item.Offset ?? args.Offset ?? 0; Scale = item.Scale ?? args.Scale ?? 1; Contracts.CheckUserArg(FloatUtils.IsFinite(Offset), nameof(item.Offset)); Contracts.CheckUserArg(FloatUtils.IsFiniteNonZero(Scale), nameof(item.Scale)); } }
protected bool TryNormalize(VBuffer <Single>[] values) { if (!Normalize) { return(true); } for (int i = 0; i < values.Length; i++) { // Leave a zero vector as all zeros. Otherwise, make the L1 norm equal to 1. var sum = VectorUtils.L1Norm(in values[i]); if (!FloatUtils.IsFinite(sum)) { return(false); } if (sum > 0) { VectorUtils.ScaleBy(ref values[i], 1 / sum); } } return(true); }
/// <summary> /// This should be overridden by derived classes. This implementation simply increments /// _numIterExamples and dumps debug information to the console. /// </summary> protected virtual void ProcessDataInstance(IChannel ch, ref VBuffer <Float> feat, Float label, Float weight) { Contracts.Assert(FloatUtils.IsFinite(feat.Values, feat.Count)); ++NumIterExamples; #if OLD_TRACING // REVIEW: How should this be ported? if (DebugLevel > 2) { Vector features = instance.Features; Host.StdOut.Write("Instance has label {0} and {1} features:", instance.Label, features.Length); for (int i = 0; i < features.Length; i++) { Host.StdOut.Write('\t'); Host.StdOut.Write(features[i]); } Host.StdOut.WriteLine(); } if (DebugLevel > 1) { if (_numIterExamples % 5000 == 0) { Host.StdOut.Write('.'); if (_numIterExamples % 500000 == 0) { Host.StdOut.Write(" "); Host.StdOut.Write(_numIterExamples); if (_numIterExamples % 5000000 == 0) { Host.StdOut.Write(" "); Host.StdOut.Write(DateTime.UtcNow); } Host.StdOut.WriteLine(); } } } #endif }
protected override float AccumulateOneGradient(ref VBuffer <float> feat, float label, float weight, ref VBuffer <float> x, ref VBuffer <float> grad, ref float[] scores) { if (Utils.Size(scores) < _numClasses) { scores = new float[_numClasses]; } float bias = 0; for (int c = 0, start = _numClasses; c < _numClasses; c++, start += NumFeatures) { x.GetItemOrDefault(c, ref bias); scores[c] = bias + VectorUtils.DotProductWithOffset(ref x, start, ref feat); } float logZ = MathUtils.SoftMax(scores, _numClasses); float datumLoss = logZ; int lab = (int)label; Contracts.Assert(0 <= lab && lab < _numClasses); for (int c = 0, start = _numClasses; c < _numClasses; c++, start += NumFeatures) { float probLabel = lab == c ? 1 : 0; datumLoss -= probLabel * scores[c]; float modelProb = MathUtils.ExpSlow(scores[c] - logZ); float mult = weight * (modelProb - probLabel); VectorUtils.AddMultWithOffset(ref feat, mult, ref grad, start); // Due to the call to EnsureBiases, we know this region is dense. Contracts.Assert(grad.Count >= BiasCount && (grad.IsDense || grad.Indices[BiasCount - 1] == BiasCount - 1)); grad.Values[c] += mult; } Contracts.Check(FloatUtils.IsFinite(datumLoss), "Data contain bad values."); return(weight * datumLoss); }
public override void ProcessRow() { float label = 0; _labelGetter(ref label); _scoreGetter(ref Score); if (float.IsNaN(label)) { NumUnlabeledInstances++; return; } if (IsNaN(ref Score)) { NumBadScores++; return; } float weight = 1; if (_weightGetter != null) { _weightGetter(ref weight); if (!FloatUtils.IsFinite(weight)) { NumBadWeights++; weight = 1; } } ApplyLossFunction(ref Score, label, ref Loss); UnweightedCounters.Update(ref Score, label, 1, ref Loss); if (WeightedCounters != null) { WeightedCounters.Update(ref Score, label, weight, ref Loss); } }
public void Save(ModelSaveContext ctx) { Contracts.AssertValue(ctx); // *** Binary format *** // int: Dimension // int: Rank // for i=0,..,Rank-1: // float[]: the i'th eigenvector // int: the size of MeanProjected (0 if it is null) // float[]: MeanProjected Contracts.Assert(0 < Rank && Rank <= Dimension); ctx.Writer.Write(Dimension); ctx.Writer.Write(Rank); for (int i = 0; i < Rank; i++) { Contracts.Assert(FloatUtils.IsFinite(Eigenvectors[i])); ctx.Writer.WriteSinglesNoCount(Eigenvectors[i].AsSpan(0, Dimension)); } Contracts.Assert(MeanProjected == null || (MeanProjected.Length == Rank && FloatUtils.IsFinite(MeanProjected))); ctx.Writer.WriteSingleArray(MeanProjected); }
/// <summary> /// Finds approximate minimum of the function /// </summary> /// <param name="function">Function to minimize</param> /// <param name="initial">Initial point</param> /// <param name="result">Approximate minimum</param> public void Minimize(DifferentiableFunction function, ref VBuffer <Float> initial, ref VBuffer <Float> result) { Contracts.Check(FloatUtils.IsFinite(initial.Values, initial.Count), "The initial vector contains NaNs or infinite values."); LineFunc lineFunc = new LineFunc(function, ref initial, UseCG); VBuffer <Float> prev = default(VBuffer <Float>); initial.CopyTo(ref prev); for (int n = 0; _maxSteps == 0 || n < _maxSteps; ++n) { Float step = LineSearch.Minimize(lineFunc.Eval, lineFunc.Value, lineFunc.Deriv); var newPoint = lineFunc.NewPoint; bool terminateNow = n > 0 && TerminateTester.ShouldTerminate(ref newPoint, ref prev); if (terminateNow || Terminate(ref newPoint)) { break; } newPoint.CopyTo(ref prev); lineFunc.ChangeDir(); } lineFunc.NewPoint.CopyTo(ref result); }
public void Aggregate(double diff, int line, int col) { if (diff == 0) { return; } if (!FloatUtils.IsFinite(diff)) { InfCount++; return; } if (DiffMax < diff) { DiffMax = diff; LineMax = line; ColMax = col; } DiffTot += diff; DiffCount++; }
/// <summary> /// Samples new hyperparameters for the trainer, and sets them. /// Returns true if success (new hyperparameters were suggested and set). Else, returns false. /// </summary> private static bool SampleHyperparameters(MLContext context, SuggestedTrainer trainer, IEnumerable <SuggestedPipelineRunDetail> history, bool isMaximizingMetric, IChannel logger) { try { var sps = ConvertToValueGenerators(trainer.SweepParams); var sweeper = new SmacSweeper(context, new SmacSweeper.Arguments { SweptParameters = sps }); IEnumerable <SuggestedPipelineRunDetail> historyToUse = history .Where(r => r.RunSucceeded && r.Pipeline.Trainer.TrainerName == trainer.TrainerName && r.Pipeline.Trainer.HyperParamSet != null && r.Pipeline.Trainer.HyperParamSet.Any() && FloatUtils.IsFinite(r.Score)); // get new set of hyperparameter values var proposedParamSet = sweeper.ProposeSweeps(1, historyToUse.Select(h => h.ToRunResult(isMaximizingMetric))).FirstOrDefault(); if (!proposedParamSet.Any()) { return(false); } // associate proposed parameter set with trainer, so that smart hyperparameter // sweepers (like KDO) can map them back. trainer.SetHyperparamValues(proposedParamSet); return(true); } catch (Exception ex) { logger.Error($"SampleHyperparameters failed with exception: {ex}"); throw; } }
protected override TModel TrainModelCore(TrainContext context) { Host.CheckValue(context, nameof(context)); var initPredictor = context.InitialPredictor; var initLinearPred = initPredictor as LinearPredictor ?? (initPredictor as CalibratedPredictorBase)?.SubPredictor as LinearPredictor; Host.CheckParam(initPredictor == null || initLinearPred != null, nameof(context), "Not a linear predictor."); var data = context.TrainingSet; data.CheckFeatureFloatVector(out int numFeatures); CheckLabel(data); using (var ch = Host.Start("Training")) { InitCore(ch, numFeatures, initLinearPred); // InitCore should set the number of features field. Contracts.Assert(NumFeatures > 0); TrainCore(ch, data); if (NumBad > 0) { ch.Warning( "Skipped {0} instances with missing features during training (over {1} iterations; {2} inst/iter)", NumBad, Args.NumIterations, NumBad / Args.NumIterations); } Contracts.Assert(WeightsScale == 1); Float maxNorm = Math.Max(VectorUtils.MaxNorm(ref Weights), Math.Abs(Bias)); Contracts.Check(FloatUtils.IsFinite(maxNorm), "The weights/bias contain invalid values (NaN or Infinite). Potential causes: high learning rates, no normalization, high initial weights, etc."); ch.Done(); } return(CreatePredictor()); }
/// <summary> /// Initialize predictor from a binary file. /// </summary> /// <param name="ctx">The load context</param> /// <param name="env">The host environment</param> private KMeansModelParameters(IHostEnvironment env, ModelLoadContext ctx) : base(env, LoaderSignature, ctx) { // *** Binary format *** // int: k, number of clusters // int: dimensionality, length of the centroid vectors // for each cluster, then: // int: count of this centroid vector (sparse iff count < dimensionality) // int[count]: only present if sparse, in order indices // Float[count]: centroid vector values _k = ctx.Reader.ReadInt32(); Host.CheckDecode(_k > 0); _dimensionality = ctx.Reader.ReadInt32(); Host.CheckDecode(_dimensionality > 0); _centroidL2s = new Float[_k]; _centroids = new VBuffer <Float> [_k]; for (int i = 0; i < _k; i++) { // Prior to allowing sparse vectors, count was not written and was implicitly // always equal to dimensionality, and no indices were written either. int count = ctx.Header.ModelVerWritten >= 0x00010002 ? ctx.Reader.ReadInt32() : _dimensionality; Host.CheckDecode(0 <= count && count <= _dimensionality); var indices = count < _dimensionality?ctx.Reader.ReadIntArray(count) : null; var values = ctx.Reader.ReadFloatArray(count); Host.CheckDecode(FloatUtils.IsFinite(values)); _centroids[i] = new VBuffer <Float>(_dimensionality, count, values, indices); } WarnOnOldNormalizer(ctx, GetType(), Host); InitPredictor(); _inputType = new VectorType(NumberType.Float, _dimensionality); _outputType = new VectorType(NumberType.Float, _k); }
public void GetFeatures(int iCol, int iSlot, Random rand, long key, Span <float> features) { _host.Assert(features.Length == NumFeatures); // get counts from count table in the first _labelBinCount indices. var countsBuffer = features.Slice(0, _labelBinCount); var countTable = _countTables[iCol, iSlot]; countTable.GetCounts(key, countsBuffer); // check if it's garbage and replace with garbage counts if true float sum = 0; foreach (var feat in countsBuffer) { sum += feat; } bool isGarbage = sum < countTable.GarbageThreshold; if (isGarbage) { int i = 0; foreach (var count in countTable.GarbageCounts) { countsBuffer[i++] = count; } } sum = AddLaplacianNoisePerLabel(iCol, rand, countsBuffer); // add log odds in the next _logOddsCount indices. GenerateLogOdds(iCol, countTable, countsBuffer, features.Slice(_labelBinCount, _logOddsCount), sum); _host.Assert(FloatUtils.IsFinite(features)); // Add the last feature: an indicator for isGarbage. features[NumFeatures - 1] = isGarbage ? 1 : 0; }
protected sealed override TModel TrainModelCore(TrainContext context) { Host.CheckValue(context, nameof(context)); var initPredictor = context.InitialPredictor; var initLinearPred = initPredictor as LinearPredictor ?? (initPredictor as CalibratedPredictorBase)?.SubPredictor as LinearPredictor; Host.CheckParam(initPredictor == null || initLinearPred != null, nameof(context), "Not a linear predictor."); var data = context.TrainingSet; data.CheckFeatureFloatVector(out int numFeatures); CheckLabels(data); using (var ch = Host.Start("Training")) { var state = MakeState(ch, numFeatures, initLinearPred); TrainCore(ch, data, state); ch.Assert(state.WeightsScale == 1); Float maxNorm = Math.Max(VectorUtils.MaxNorm(ref state.Weights), Math.Abs(state.Bias)); ch.Check(FloatUtils.IsFinite(maxNorm), "The weights/bias contain invalid values (NaN or Infinite). Potential causes: high learning rates, no normalization, high initial weights, etc."); return(state.CreatePredictor()); } }
internal OlsLinearRegressionPredictor(IHostEnvironment env, ref VBuffer <Float> weights, Float bias, Double[] standardErrors, Double[] tValues, Double[] pValues, Double rSquared, Double rSquaredAdjusted) : base(env, RegistrationName, ref weights, bias) { Contracts.AssertValueOrNull(standardErrors); Contracts.AssertValueOrNull(tValues); Contracts.AssertValueOrNull(pValues); // If r-squared is NaN then the other statistics must be null, however, if r-rsquared is not NaN, // then the statistics may be null if creation of statistics was suppressed. Contracts.Assert(!Double.IsNaN(rSquaredAdjusted) || standardErrors == null); // Nullity or not must be consistent between the statistics. Contracts.Assert((standardErrors == null) == (tValues == null) && (tValues == null) == (pValues == null)); Contracts.Assert(0 <= rSquared & rSquared <= 1); Contracts.Assert(Double.IsNaN(rSquaredAdjusted) | (0 <= rSquaredAdjusted & rSquaredAdjusted <= 1)); if (standardErrors != null) { // If not null, the input arrays must have one value for each parameter. Contracts.Assert(Utils.Size(standardErrors) == weights.Length + 1); Contracts.Assert(Utils.Size(tValues) == weights.Length + 1); Contracts.Assert(Utils.Size(pValues) == weights.Length + 1); #if DEBUG for (int i = 0; i <= weights.Length; ++i) { Contracts.Assert(FloatUtils.IsFinite(standardErrors[i])); Contracts.Assert(FloatUtils.IsFinite(tValues[i])); Contracts.Assert(FloatUtils.IsFinite(pValues[i])); } #endif } _standardErrors = standardErrors; _tValues = tValues; _pValues = pValues; _rSquared = rSquared; _rSquaredAdjusted = rSquaredAdjusted; }
private static FloatSphere GetShadowSphere( Float4x4 ndcToWorldMat, Float4x4 worldToLightMat, float shadowDistance) { //Frustum of the camera that will be covered by the shadow map in NDC space //Note: this covers the entire screen but only to a certain depth FloatBox shadowNDC = new FloatBox( min: (-1f, -1f, 0f), max: (1f, 1f, DepthUtils.LinearToDepth( shadowDistance, Camera.NEAR_CLIP_DISTANCE, Camera.FAR_CLIP_DISTANCE))); //Gather points of the frustum Span <Float3> points = stackalloc Float3[8]; shadowNDC.GetPoints(points); //Transform all the points to lightspace (ndc -> world -> lightspace) Float3 center = Float3.Zero; for (int i = 0; i < points.Length; i++) { points[i] = (worldToLightMat * ndcToWorldMat).TransformPoint(points[i]); center = i == 0 ? points[i] : (center + points[i]); } center /= points.Length; //The the longest diagonal of the frustum and base our sphere on that float squareDiag1 = (points[0] - points[6]).SquareMagnitude; float squareDiag2 = (points[2] - points[4]).SquareMagnitude; float radius = FloatUtils.SquareRoot(FloatUtils.Max(squareDiag1, squareDiag2)) * .5f; return(new FloatSphere(center, radius)); }
public IPredictor Calibrate(IChannel ch, IDataView data, ICalibratorTrainer caliTrainer, int maxRows) { Host.CheckValue(ch, nameof(ch)); ch.CheckValue(data, nameof(data)); ch.CheckValue(caliTrainer, nameof(caliTrainer)); if (caliTrainer.NeedsTraining) { var bound = new Bound(this, new RoleMappedSchema(data.Schema)); using (var curs = data.GetRowCursor(col => true)) { var scoreGetter = (ValueGetter <Single>)bound.CreateScoreGetter(curs, col => true, out Action disposer); // We assume that we can use the label column of the first predictor, since if the labels are not identical // then the whole model is garbage anyway. var labelGetter = bound.GetLabelGetter(curs, 0, out Action disp); disposer += disp; var weightGetter = bound.GetWeightGetter(curs, 0, out disp); disposer += disp; try { int num = 0; while (curs.MoveNext()) { Single label = 0; labelGetter(ref label); if (!FloatUtils.IsFinite(label)) { continue; } Single score = 0; scoreGetter(ref score); if (!FloatUtils.IsFinite(score)) { continue; } Single weight = 0; weightGetter(ref weight); if (!FloatUtils.IsFinite(weight)) { continue; } caliTrainer.ProcessTrainingExample(score, label > 0, weight); if (maxRows > 0 && ++num >= maxRows) { break; } } } finally { disposer?.Invoke(); } } } var calibrator = caliTrainer.FinishTraining(ch); return(CalibratorUtils.CreateCalibratedPredictor(Host, this, calibrator)); }
private OlsLinearRegressionPredictor TrainCore(IChannel ch, FloatLabelCursor.Factory cursorFactory, int featureCount) { Host.AssertValue(ch); ch.AssertValue(cursorFactory); int m = featureCount + 1; // Check for memory conditions first. if ((long)m * (m + 1) / 2 > int.MaxValue) { throw ch.Except("Cannot hold covariance matrix in memory with {0} features", m - 1); } // Track the number of examples. long n = 0; // Since we are accumulating over many values, we use Double even for the single precision build. var xty = new Double[m]; // The layout of this algorithm is a packed row-major lower triangular matrix. var xtx = new Double[m * (m + 1) / 2]; // Build X'X (lower triangular) and X'y incrementally (X'X+=X'X_i; X'y+=X'y_i): using (var cursor = cursorFactory.Create()) { while (cursor.MoveNext()) { var yi = cursor.Label; // Increment first element of X'y xty[0] += yi; // Increment first element of lower triangular X'X xtx[0] += 1; var values = cursor.Features.GetValues(); if (cursor.Features.IsDense) { int ioff = 1; ch.Assert(values.Length + 1 == m); // Increment rest of first column of lower triangular X'X for (int i = 1; i < m; i++) { ch.Assert(ioff == i * (i + 1) / 2); var val = values[i - 1]; // Add the implicit first bias term to X'X xtx[ioff++] += val; // Add the remainder of X'X for (int j = 0; j < i; j++) { xtx[ioff++] += val * values[j]; } // X'y xty[i] += val * yi; } ch.Assert(ioff == xtx.Length); } else { var fIndices = cursor.Features.GetIndices(); for (int ii = 0; ii < values.Length; ++ii) { int i = fIndices[ii] + 1; int ioff = i * (i + 1) / 2; var val = values[ii]; // Add the implicit first bias term to X'X xtx[ioff++] += val; // Add the remainder of X'X for (int jj = 0; jj <= ii; jj++) { xtx[ioff + fIndices[jj]] += val * values[jj]; } // X'y xty[i] += val * yi; } } n++; } ch.Check(n > 0, "No training examples in dataset."); if (cursor.BadFeaturesRowCount > 0) { ch.Warning("Skipped {0} instances with missing features/label during training", cursor.SkippedRowCount); } if (_l2Weight > 0) { // Skip the bias term for regularization, in the ridge regression case. // So start at [1,1] instead of [0,0]. // REVIEW: There are two ways to view this, firstly, it is more // user friendly ot make this scaling factor behave similarly regardless // of data size, so that if you have the same parameters, you get the same // model if you feed in your data than if you duplicate your data 10 times. // This is what I have now. The alternate point of view is to view this // L2 regularization parameter as providing some sort of prior, in which // case duplication 10 times should in fact be treated differently! (That // is, we should not multiply by n below.) Both interpretations seem // correct, in their way. Double squared = _l2Weight * _l2Weight * n; int ioff = 0; for (int i = 1; i < m; ++i) { xtx[ioff += i + 1] += squared; } ch.Assert(ioff == xtx.Length - 1); } } if (!(_l2Weight > 0) && n < m) { throw ch.Except("Ordinary least squares requires more examples than parameters. There are {0} parameters, but {1} examples. To enable training, use a positive L2 weight so this behaves as ridge regression.", m, n); } Double yMean = n == 0 ? 0 : xty[0] / n; ch.Info("Trainer solving for {0} parameters across {1} examples", m, n); // Cholesky Decomposition of X'X into LL' try { Mkl.Pptrf(Mkl.Layout.RowMajor, Mkl.UpLo.Lo, m, xtx); } catch (DllNotFoundException) { // REVIEW: Is there no better way? throw ch.ExceptNotSupp("The MKL library (libMklImports) or one of its dependencies is missing."); } // Solve for beta in (LL')beta = X'y: Mkl.Pptrs(Mkl.Layout.RowMajor, Mkl.UpLo.Lo, m, 1, xtx, xty, 1); // Note that the solver overwrote xty so it contains the solution. To be more clear, // we effectively change its name (through reassignment) so we don't get confused that // this is somehow xty in the remaining calculation. var beta = xty; xty = null; // Check that the solution is valid. for (int i = 0; i < beta.Length; ++i) { ch.Check(FloatUtils.IsFinite(beta[i]), "Non-finite values detected in OLS solution"); } var weights = VBufferUtils.CreateDense <float>(beta.Length - 1); for (int i = 1; i < beta.Length; ++i) { weights.Values[i - 1] = (float)beta[i]; } var bias = (float)beta[0]; if (!(_l2Weight > 0) && m == n) { // We would expect the solution to the problem to be exact in this case. ch.Info("Number of examples equals number of parameters, solution is exact but no statistics can be derived"); return(new OlsLinearRegressionPredictor(Host, in weights, bias, null, null, null, 1, float.NaN)); } Double rss = 0; // residual sum of squares Double tss = 0; // total sum of squares using (var cursor = cursorFactory.Create()) { var lrPredictor = new LinearRegressionPredictor(Host, in weights, bias); var lrMap = lrPredictor.GetMapper <VBuffer <float>, float>(); float yh = default; while (cursor.MoveNext()) { var features = cursor.Features; lrMap(in features, ref yh); var e = cursor.Label - yh; rss += e * e; var ydm = cursor.Label - yMean; tss += ydm * ydm; } } var rSquared = ProbClamp(1 - (rss / tss)); // R^2 adjusted differs from the normal formula on account of the bias term, by Said's reckoning. double rSquaredAdjusted; if (n > m) { rSquaredAdjusted = ProbClamp(1 - (1 - rSquared) * (n - 1) / (n - m)); ch.Info("Coefficient of determination R2 = {0:g}, or {1:g} (adjusted)", rSquared, rSquaredAdjusted); } else { rSquaredAdjusted = Double.NaN; } // The per parameter significance is compute intensive and may not be required for all practitioners. // Also we can't estimate it, unless we can estimate the variance, which requires more examples than // parameters. if (!_perParameterSignificance || m >= n) { return(new OlsLinearRegressionPredictor(Host, in weights, bias, null, null, null, rSquared, rSquaredAdjusted)); } ch.Assert(!Double.IsNaN(rSquaredAdjusted)); var standardErrors = new Double[m]; var tValues = new Double[m]; var pValues = new Double[m]; // Invert X'X: Mkl.Pptri(Mkl.Layout.RowMajor, Mkl.UpLo.Lo, m, xtx); var s2 = rss / (n - m); // estimate of variance of y for (int i = 0; i < m; i++) { // Initialize with inverse Hessian. standardErrors[i] = (Single)xtx[i * (i + 1) / 2 + i]; } if (_l2Weight > 0) { // Iterate through all entries of inverse Hessian to make adjustment to variance. int ioffset = 1; float reg = _l2Weight * _l2Weight * n; for (int iRow = 1; iRow < m; iRow++) { for (int iCol = 0; iCol <= iRow; iCol++) { var entry = (Single)xtx[ioffset]; var adjustment = -reg * entry * entry; standardErrors[iRow] -= adjustment; if (0 < iCol && iCol < iRow) { standardErrors[iCol] -= adjustment; } ioffset++; } } Contracts.Assert(ioffset == xtx.Length); } for (int i = 0; i < m; i++) { // sqrt of diagonal entries of s2 * inverse(X'X + reg * I) * X'X * inverse(X'X + reg * I). standardErrors[i] = Math.Sqrt(s2 * standardErrors[i]); ch.Check(FloatUtils.IsFinite(standardErrors[i]), "Non-finite standard error detected from OLS solution"); tValues[i] = beta[i] / standardErrors[i]; pValues[i] = (float)MathUtils.TStatisticToPValue(tValues[i], n - m); ch.Check(0 <= pValues[i] && pValues[i] <= 1, "p-Value calculated outside expected [0,1] range"); } return(new OlsLinearRegressionPredictor(Host, in weights, bias, standardErrors, tValues, pValues, rSquared, rSquaredAdjusted)); }