protected TrainStateBase(IChannel ch, int numFeatures, LinearModelParameters predictor, OnlineLinearTrainer <TTransformer, TModel> parent) { Contracts.CheckValue(ch, nameof(ch)); ch.Check(numFeatures > 0, "Cannot train with zero features!"); ch.AssertValueOrNull(predictor); ch.AssertValue(parent); ch.Assert(Iteration == 0); ch.Assert(Bias == 0); ParentHost = parent.Host; ch.Trace("{0} Initializing {1} on {2} features", DateTime.UtcNow, parent.Name, numFeatures); // We want a dense vector, to prevent memory creation during training // unless we have a lot of features. if (predictor != null) { ((IHaveFeatureWeights)predictor).GetFeatureWeights(ref Weights); VBufferUtils.Densify(ref Weights); Bias = predictor.Bias; } else if (!string.IsNullOrWhiteSpace(parent.OnlineLinearTrainerOptions.InitialWeights)) { ch.Info("Initializing weights and bias to " + parent.OnlineLinearTrainerOptions.InitialWeights); string[] weightStr = parent.OnlineLinearTrainerOptions.InitialWeights.Split(','); if (weightStr.Length != numFeatures + 1) { throw ch.Except( "Could not initialize weights from 'initialWeights': expecting {0} values to initialize {1} weights and the intercept", numFeatures + 1, numFeatures); } var weightValues = new float[numFeatures]; for (int i = 0; i < numFeatures; i++) { weightValues[i] = float.Parse(weightStr[i], CultureInfo.InvariantCulture); } Weights = new VBuffer <float>(numFeatures, weightValues); Bias = float.Parse(weightStr[numFeatures], CultureInfo.InvariantCulture); } else if (parent.OnlineLinearTrainerOptions.InitialWeightsDiameter > 0) { var weightValues = new float[numFeatures]; for (int i = 0; i < numFeatures; i++) { weightValues[i] = parent.OnlineLinearTrainerOptions.InitialWeightsDiameter * (parent.Host.Rand.NextSingle() - (float)0.5); } Weights = new VBuffer <float>(numFeatures, weightValues); Bias = parent.OnlineLinearTrainerOptions.InitialWeightsDiameter * (parent.Host.Rand.NextSingle() - (float)0.5); } else if (numFeatures <= 1000) { Weights = VBufferUtils.CreateDense <float>(numFeatures); } else { Weights = VBufferUtils.CreateEmpty <float>(numFeatures); } WeightsScale = 1; }
public static FeatureNameCollection Create(RoleMappedSchema schema) { // REVIEW: This shim should be deleted as soon as is convenient. Contracts.CheckValue(schema, nameof(schema)); Contracts.CheckParam(schema.Feature != null, nameof(schema), "Cannot create feature name collection if we have no features"); Contracts.CheckParam(schema.Feature.Type.ValueCount > 0, nameof(schema), "Cannot create feature name collection if our features are not of known size"); VBuffer <ReadOnlyMemory <char> > slotNames = default; int len = schema.Feature.Type.ValueCount; if (schema.Schema.HasSlotNames(schema.Feature.Index, len)) { schema.Schema.GetMetadata(MetadataUtils.Kinds.SlotNames, schema.Feature.Index, ref slotNames); } else { slotNames = VBufferUtils.CreateEmpty <ReadOnlyMemory <char> >(len); } var slotNameValues = slotNames.GetValues(); string[] names = new string[slotNameValues.Length]; for (int i = 0; i < slotNameValues.Length; ++i) { names[i] = !slotNameValues[i].IsEmpty ? slotNameValues[i].ToString() : null; } if (slotNames.IsDense) { return(new Dense(names.Length, names)); } ReadOnlySpan <int> indices = slotNames.GetIndices(); return(new Sparse(slotNames.Length, slotNameValues.Length, indices.ToArray(), names)); }
/// <summary> /// Maps input features names to their input INI content based on the metadata of the /// features column. If the <c>IniContent</c> slotwise string metadata is present, that /// is used, or else default content is derived from the slot names. /// </summary> /// <seealso cref="MetadataUtils.Kinds.SlotNames"/> public FeaturesToContentMap(RoleMappedSchema schema) { Contracts.AssertValue(schema); var feat = schema.Feature; Contracts.AssertValue(feat); Contracts.Assert(feat.Type.ValueCount > 0); var sch = schema.Schema; if (sch.HasSlotNames(feat.Index, feat.Type.ValueCount)) { sch.GetMetadata(MetadataUtils.Kinds.SlotNames, feat.Index, ref _names); } else { _names = VBufferUtils.CreateEmpty <DvText>(feat.Type.ValueCount); } #if !CORECLR var type = sch.GetMetadataTypeOrNull(BingBinLoader.IniContentMetadataKind, feat.Index); if (type != null && type.IsVector && type.VectorSize == feat.Type.ValueCount && type.ItemType.IsText) { sch.GetMetadata(BingBinLoader.IniContentMetadataKind, feat.Index, ref _content); } else { _content = VBufferUtils.CreateEmpty <DvText>(feat.Type.ValueCount); } #else _content = VBufferUtils.CreateEmpty <DvText>(feat.Type.ValueCount); #endif Contracts.Assert(_names.Length == _content.Length); }
protected virtual void InitCore(IChannel ch, int numFeatures, LinearPredictor predictor) { Contracts.Check(numFeatures > 0, "Can't train with zero features!"); Contracts.Check(NumFeatures == 0, "Can't re-use trainer!"); Contracts.Assert(Iteration == 0); Contracts.Assert(Bias == 0); ch.Trace("{0} Initializing {1} on {2} features", DateTime.UtcNow, Name, numFeatures); NumFeatures = numFeatures; // We want a dense vector, to prevent memory creation during training // unless we have a lot of features. // REVIEW: make a setting if (predictor != null) { predictor.GetFeatureWeights(ref Weights); VBufferUtils.Densify(ref Weights); Bias = predictor.Bias; } else if (!string.IsNullOrWhiteSpace(Args.InitialWeights)) { ch.Info("Initializing weights and bias to " + Args.InitialWeights); string[] weightStr = Args.InitialWeights.Split(','); if (weightStr.Length != NumFeatures + 1) { throw Contracts.Except( "Could not initialize weights from 'initialWeights': expecting {0} values to initialize {1} weights and the intercept", NumFeatures + 1, NumFeatures); } Weights = VBufferUtils.CreateDense <Float>(NumFeatures); for (int i = 0; i < NumFeatures; i++) { Weights.Values[i] = Float.Parse(weightStr[i], CultureInfo.InvariantCulture); } Bias = Float.Parse(weightStr[NumFeatures], CultureInfo.InvariantCulture); } else if (Args.InitWtsDiameter > 0) { Weights = VBufferUtils.CreateDense <Float>(NumFeatures); for (int i = 0; i < NumFeatures; i++) { Weights.Values[i] = Args.InitWtsDiameter * (Host.Rand.NextSingle() - (Float)0.5); } Bias = Args.InitWtsDiameter * (Host.Rand.NextSingle() - (Float)0.5); } else if (NumFeatures <= 1000) { Weights = VBufferUtils.CreateDense <Float>(NumFeatures); } else { Weights = VBufferUtils.CreateEmpty <Float>(NumFeatures); } WeightsScale = 1; }
public override IPredictor CreatePredictor() { Contracts.Assert(WeightArraySize == 1); Contracts.Assert(Utils.Size(Weights) == 1); Contracts.Assert(Utils.Size(Bias) == 1); Host.Check(Weights[0].Length > 0); VBuffer <Float> maybeSparseWeights = VBufferUtils.CreateEmpty <Float>(Weights[0].Length); VBufferUtils.CreateMaybeSparseCopy(ref Weights[0], ref maybeSparseWeights, Conversions.Instance.GetIsDefaultPredicate <Float>(NumberType.Float)); return(new LinearRegressionPredictor(Host, ref maybeSparseWeights, Bias[0])); }
protected override void InitCore(IChannel ch, int numFeatures, LinearPredictor predictor) { base.InitCore(ch, numFeatures, predictor); if (Args.NoBias) { Bias = 0; } if (predictor == null) { VBufferUtils.Densify(ref Weights); } _weightsUpdate = VBufferUtils.CreateEmpty <Float>(numFeatures); }
public RowMapper(IHostEnvironment env, BindableMapper parent, RoleMappedSchema schema) { Contracts.AssertValue(env); _env = env; _env.AssertValue(schema); _env.AssertValue(parent); _env.Assert(schema.Feature.HasValue); _parent = parent; InputRoleMappedSchema = schema; var genericMapper = parent.GenericMapper.Bind(_env, schema); _genericRowMapper = genericMapper as ISchemaBoundRowMapper; var featureSize = FeatureColumn.Type.GetVectorSize(); if (parent.Stringify) { var builder = new DataViewSchema.Builder(); builder.AddColumn(DefaultColumnNames.FeatureContributions, TextDataViewType.Instance, null); _outputSchema = builder.ToSchema(); if (FeatureColumn.HasSlotNames(featureSize)) { FeatureColumn.Annotations.GetValue(AnnotationUtils.Kinds.SlotNames, ref _slotNames); } else { _slotNames = VBufferUtils.CreateEmpty <ReadOnlyMemory <char> >(featureSize); } } else { var metadataBuilder = new DataViewSchema.Annotations.Builder(); if (InputSchema[FeatureColumn.Index].HasSlotNames(featureSize)) { metadataBuilder.AddSlotNames(featureSize, (ref VBuffer <ReadOnlyMemory <char> > value) => FeatureColumn.Annotations.GetValue(AnnotationUtils.Kinds.SlotNames, ref value)); } var schemaBuilder = new DataViewSchema.Builder(); var featureContributionType = new VectorType(NumberDataViewType.Single, FeatureColumn.Type as VectorType); schemaBuilder.AddColumn(DefaultColumnNames.FeatureContributions, featureContributionType, metadataBuilder.ToAnnotations()); _outputSchema = schemaBuilder.ToSchema(); } _outputGenericSchema = _genericRowMapper.OutputSchema; OutputSchema = new ZipBinding(new DataViewSchema[] { _outputGenericSchema, _outputSchema, }).OutputSchema; }
private PcaPredictor(IHostEnvironment env, ModelLoadContext ctx) : base(env, RegistrationName, ctx) { // *** Binary format *** // int: dimension (aka. number of features) // int: rank // bool: center // If (center) // Float[]: mean vector // Float[][]: eigenvectors _dimension = ctx.Reader.ReadInt32(); Host.CheckDecode(FloatUtils.IsFinite(_dimension)); _rank = ctx.Reader.ReadInt32(); Host.CheckDecode(FloatUtils.IsFinite(_rank)); bool center = ctx.Reader.ReadBoolByte(); if (center) { var meanArray = ctx.Reader.ReadFloatArray(_dimension); Host.CheckDecode(meanArray.All(FloatUtils.IsFinite)); _mean = new VBuffer <Float>(_dimension, meanArray); _norm2Mean = VectorUtils.NormSquared(_mean); } else { _mean = VBufferUtils.CreateEmpty <Float>(_dimension); _norm2Mean = 0; } _eigenVectors = new VBuffer <Float> [_rank]; _meanProjected = new Float[_rank]; for (int i = 0; i < _rank; ++i) { var vi = ctx.Reader.ReadFloatArray(_dimension); Host.CheckDecode(vi.All(FloatUtils.IsFinite)); _eigenVectors[i] = new VBuffer <Float>(_dimension, vi); _meanProjected[i] = VectorUtils.DotProduct(ref _eigenVectors[i], ref _mean); } WarnOnOldNormalizer(ctx, GetType(), Host); _inputType = new VectorType(NumberType.Float, _dimension); }
public Counters(int numClusters, bool calculateDbi, ColumnInfo features) { _numClusters = numClusters; CalculateDbi = calculateDbi; _numInstancesOfClstr = new Double[_numClusters]; _numInstancesOfClass = new List <Double>(); _confusionMatrix = new List <Double[]>(); if (CalculateDbi) { Contracts.AssertValue(features); _clusterCentroids = new VBuffer <Single> [_numClusters]; for (int i = 0; i < _numClusters; i++) { _clusterCentroids[i] = VBufferUtils.CreateEmpty <Single>(features.Type.VectorSize); } _distancesToCentroids = new Double[_numClusters]; } }
public Aggregator(IHostEnvironment env, ColumnInfo features, int scoreVectorSize, bool calculateDbi, bool weighted, string stratName) : base(env, stratName) { _calculateDbi = calculateDbi; _scoresArr = new float[scoreVectorSize]; _indicesArr = new int[scoreVectorSize]; UnweightedCounters = new Counters(scoreVectorSize, _calculateDbi, features); Weighted = weighted; WeightedCounters = Weighted ? new Counters(scoreVectorSize, _calculateDbi, features) : null; if (_calculateDbi) { Host.AssertValue(features); _clusterCentroids = new VBuffer <Single> [scoreVectorSize]; for (int i = 0; i < scoreVectorSize; i++) { _clusterCentroids[i] = VBufferUtils.CreateEmpty <Single>(features.Type.VectorSize); } } }
public TrainState(IChannel ch, int numFeatures, LinearPredictor predictor, LinearSvm parent) : base(ch, numFeatures, predictor, parent) { _batchSize = parent.Args.BatchSize; _noBias = parent.Args.NoBias; _performProjection = parent.Args.PerformProjection; _lambda = parent.Args.Lambda; if (_noBias) { Bias = 0; } if (predictor == null) { VBufferUtils.Densify(ref Weights); } _weightsUpdate = VBufferUtils.CreateEmpty <Float>(numFeatures); }
public static void Main(string[] argv) { RunTest(QuadTest); RunTest(LogTest); VBuffer <Float> grad = VBufferUtils.CreateEmpty <Float>(2); int n = 0; bool print = false; DTerminate term = (ref VBuffer <Float> x) => { QuadTest2D(ref x, ref grad); Float norm = VectorUtils.Norm(grad); if (++n % 1000 == 0 || print) { Console.WriteLine("{0}\t{1}", n, norm); } return(norm < 1e-5); }; SgdOptimizer sgdo = new SgdOptimizer(term, SgdOptimizer.RateScheduleType.Constant, false, 100, 1, (Float)0.99); VBuffer <Float> init; CreateWrapped(out init, 0, 0); VBuffer <Float> ans = default(VBuffer <Float>); sgdo.Minimize(StochasticQuadTest2D, ref init, ref ans); QuadTest2D(ref ans, ref grad); Console.WriteLine(VectorUtils.Norm(grad)); Console.WriteLine(); Console.WriteLine(); n = 0; GDOptimizer gdo = new GDOptimizer(term, null, true); print = true; CreateWrapped(out init, 0, 0); gdo.Minimize(QuadTest2D, ref init, ref ans); QuadTest2D(ref ans, ref grad); Console.WriteLine(VectorUtils.Norm(grad)); }
public static FeatureNameCollection Create(RoleMappedSchema schema) { // REVIEW: This shim should be deleted as soon as is convenient. Contracts.CheckValue(schema, nameof(schema)); Contracts.CheckParam(schema.Feature != null, nameof(schema), "Cannot create feature name collection if we have no features"); Contracts.CheckParam(schema.Feature.Type.ValueCount > 0, nameof(schema), "Cannot create feature name collection if our features are not of known size"); VBuffer <DvText> slotNames = default(VBuffer <DvText>); int len = schema.Feature.Type.ValueCount; if (schema.Schema.HasSlotNames(schema.Feature.Index, len)) { schema.Schema.GetMetadata(MetadataUtils.Kinds.SlotNames, schema.Feature.Index, ref slotNames); } else { slotNames = VBufferUtils.CreateEmpty <DvText>(len); } string[] names = new string[slotNames.Count]; for (int i = 0; i < slotNames.Count; ++i) { names[i] = slotNames.Values[i].HasChars ? slotNames.Values[i].ToString() : null; } if (slotNames.IsDense) { return(new Dense(names.Length, names)); } int[] indices = slotNames.Indices; if (indices == null) { indices = new int[0]; } else if (indices.Length != slotNames.Count) { Array.Resize(ref indices, slotNames.Count); } return(new Sparse(slotNames.Length, slotNames.Count, indices, names)); }
protected virtual Optimizer InitializeOptimizer(IChannel ch, FloatLabelCursor.Factory cursorFactory, out VBuffer <float> init, out ITerminationCriterion terminationCriterion) { // MeanRelativeImprovementCriterion: // Stops optimization when the average objective improvement over the last // n iterations, normalized by the function value, is small enough. terminationCriterion = new MeanRelativeImprovementCriterion(OptTol, 5, MaxIterations); Optimizer opt = (L1Weight > 0) ? new L1Optimizer(Host, BiasCount, L1Weight / NumGoodRows, MemorySize, DenseOptimizer, null, EnforceNonNegativity) : new Optimizer(Host, MemorySize, DenseOptimizer, null, EnforceNonNegativity); opt.Quiet = Quiet; if (_srcPredictor != null) { init = InitializeWeightsFromPredictor(_srcPredictor); } else if (InitWtsDiameter > 0) { float[] initWeights = new float[BiasCount + WeightCount]; for (int j = 0; j < initWeights.Length; j++) { initWeights[j] = InitWtsDiameter * (Host.Rand.NextSingle() - 0.5f); } init = new VBuffer <float>(initWeights.Length, initWeights); } else if (SgdInitializationTolerance > 0) { init = InitializeWeightsSgd(ch, cursorFactory); } else { init = VBufferUtils.CreateEmpty <float>(BiasCount + WeightCount); } return(opt); }
public RowMapper(IHostEnvironment env, BindableMapper parent, RoleMappedSchema schema) { Contracts.AssertValue(env); _env = env; _env.AssertValue(schema); _env.AssertValue(parent); _env.AssertValue(schema.Feature); _parent = parent; InputRoleMappedSchema = schema; var genericMapper = parent.GenericMapper.Bind(_env, schema); _genericRowMapper = genericMapper as ISchemaBoundRowMapper; if (parent.Stringify) { var builder = new SchemaBuilder(); builder.AddColumn(DefaultColumnNames.FeatureContributions, TextType.Instance, null); _outputSchema = builder.GetSchema(); if (InputSchema.HasSlotNames(InputRoleMappedSchema.Feature.Index, InputRoleMappedSchema.Feature.Type.VectorSize)) { InputSchema.GetMetadata(MetadataUtils.Kinds.SlotNames, InputRoleMappedSchema.Feature.Index, ref _slotNames); } else { _slotNames = VBufferUtils.CreateEmpty <ReadOnlyMemory <char> >(InputRoleMappedSchema.Feature.Type.VectorSize); } } else { _outputSchema = Schema.Create(new FeatureContributionSchema(_env, DefaultColumnNames.FeatureContributions, new VectorType(NumberType.R4, schema.Feature.Type as VectorType), InputSchema, InputRoleMappedSchema.Feature.Index)); } _outputGenericSchema = _genericRowMapper.OutputSchema; OutputSchema = new CompositeSchema(new Schema[] { _outputGenericSchema, _outputSchema, }).AsSchema; }
protected virtual void TrainCore(IChannel ch, RoleMappedData data) { Host.AssertValue(ch); ch.AssertValue(data); // Compute the number of threads to use. The ctor should have verified that this will // produce a positive value. int numThreads = !UseThreads ? 1 : (NumThreads ?? Environment.ProcessorCount); if (Host.ConcurrencyFactor > 0 && numThreads > Host.ConcurrencyFactor) { numThreads = Host.ConcurrencyFactor; ch.Warning("The number of threads specified in trainer arguments is larger than the concurrency factor " + "setting of the environment. Using {0} training threads instead.", numThreads); } ch.Assert(numThreads > 0); NumGoodRows = 0; WeightSum = 0; _features = null; _labels = null; _weights = null; if (numThreads > 1) { ch.Info("LBFGS multi-threading will attempt to load dataset into memory. In case of out-of-memory " + "issues, add 'numThreads=1' to the trainer arguments and 'cache=-' to the command line " + "arguments to turn off multi-threading."); _features = new VBuffer <float> [1000]; _labels = new float[1000]; if (data.Schema.Weight != null) { _weights = new float[1000]; } } var cursorFactory = new FloatLabelCursor.Factory(data, CursOpt.Features | CursOpt.Label | CursOpt.Weight); long numBad; // REVIEW: This pass seems overly expensive for the benefit when multi-threading is off.... using (var cursor = cursorFactory.Create()) using (var pch = Host.StartProgressChannel("LBFGS data prep")) { // REVIEW: maybe it makes sense for the factory to capture the good row count after // the first successful cursoring? Double totalCount = data.Data.GetRowCount(true) ?? Double.NaN; long exCount = 0; pch.SetHeader(new ProgressHeader(null, new[] { "examples" }), e => e.SetProgress(0, exCount, totalCount)); while (cursor.MoveNext()) { WeightSum += cursor.Weight; if (ShowTrainingStats) { ProcessPriorDistribution(cursor.Label, cursor.Weight); } PreTrainingProcessInstance(cursor.Label, ref cursor.Features, cursor.Weight); exCount++; if (_features != null) { ch.Assert(cursor.KeptRowCount <= int.MaxValue); int index = (int)cursor.KeptRowCount - 1; Utils.EnsureSize(ref _features, index + 1); Utils.EnsureSize(ref _labels, index + 1); if (_weights != null) { Utils.EnsureSize(ref _weights, index + 1); _weights[index] = cursor.Weight; } Utils.Swap(ref _features[index], ref cursor.Features); _labels[index] = cursor.Label; if (cursor.KeptRowCount >= int.MaxValue) { ch.Warning("Limiting data size for multi-threading"); break; } } } NumGoodRows = cursor.KeptRowCount; numBad = cursor.SkippedRowCount; } ch.Check(NumGoodRows > 0, NoTrainingInstancesMessage); if (numBad > 0) { ch.Warning("Skipped {0} instances with missing features/label/weight during training", numBad); } if (_features != null) { ch.Assert(numThreads > 1); // If there are so many threads that each only gets a small number (less than 10) of instances, trim // the number of threads so each gets a more reasonable number (100 or so). These numbers are pretty arbitrary, // but avoid the possibility of having no instances on some threads. if (numThreads > 1 && NumGoodRows / numThreads < 10) { int numNew = Math.Max(1, (int)NumGoodRows / 100); ch.Warning("Too few instances to use {0} threads, decreasing to {1} thread(s)", numThreads, numNew); numThreads = numNew; } ch.Assert(numThreads > 0); // Divide up the instances among the threads. _numChunks = numThreads; _ranges = new int[_numChunks + 1]; int cinstTot = (int)NumGoodRows; for (int ichk = 0, iinstMin = 0; ichk < numThreads; ichk++) { int cchkLeft = numThreads - ichk; // Number of chunks left to fill. ch.Assert(0 < cchkLeft && cchkLeft <= numThreads); int cinstThis = (cinstTot - iinstMin + cchkLeft - 1) / cchkLeft; // Size of this chunk. ch.Assert(0 < cinstThis && cinstThis <= cinstTot - iinstMin); iinstMin += cinstThis; _ranges[ichk + 1] = iinstMin; } _localLosses = new float[numThreads]; _localGradients = new VBuffer <float> [numThreads - 1]; int size = BiasCount + WeightCount; for (int i = 0; i < _localGradients.Length; i++) { _localGradients[i] = VBufferUtils.CreateEmpty <float>(size); } ch.Assert(_numChunks > 0 && _data == null); } else { // Streaming, single-threaded case. _data = data; _cursorFactory = cursorFactory; ch.Assert(_numChunks == 0 && _data != null); } VBuffer <float> initWeights; ITerminationCriterion terminationCriterion; Optimizer opt = InitializeOptimizer(ch, cursorFactory, out initWeights, out terminationCriterion); opt.Quiet = Quiet; float loss; try { opt.Minimize(DifferentiableFunction, ref initWeights, terminationCriterion, ref CurrentWeights, out loss); } catch (Optimizer.PrematureConvergenceException e) { if (!Quiet) { ch.Warning("Premature convergence occurred. The OptimizationTolerance may be set too small. {0}", e.Message); } CurrentWeights = e.State.X; loss = e.State.Value; } ch.Assert(CurrentWeights.Length == BiasCount + WeightCount); int numParams = BiasCount; if ((L1Weight > 0 && !Quiet) || ShowTrainingStats) { VBufferUtils.ForEachDefined(ref CurrentWeights, (index, value) => { if (index >= BiasCount && value != 0) { numParams++; } }); if (L1Weight > 0 && !Quiet) { ch.Info("L1 regularization selected {0} of {1} weights.", numParams, BiasCount + WeightCount); } } if (ShowTrainingStats) { ComputeTrainingStatistics(ch, cursorFactory, loss, numParams); } }
private PcaPredictor TrainCore(IChannel ch, RoleMappedData data, int dimension) { Host.AssertValue(ch); ch.AssertValue(data); if (_rank > dimension) { throw ch.Except("Rank ({0}) cannot be larger than the original dimension ({1})", _rank, dimension); } int oversampledRank = Math.Min(_rank + _oversampling, dimension); //exact: (size of the 2 big matrices + other minor allocations) / (2^30) Double memoryUsageEstimate = 2.0 * dimension * oversampledRank * sizeof(Float) / 1e9; if (memoryUsageEstimate > 2) { ch.Info("Estimate memory usage: {0:G2} GB. If running out of memory, reduce rank and oversampling factor.", memoryUsageEstimate); } var y = Zeros(oversampledRank, dimension); var mean = _center ? VBufferUtils.CreateDense <Float>(dimension) : VBufferUtils.CreateEmpty <Float>(dimension); var omega = GaussianMatrix(oversampledRank, dimension, _seed); var cursorFactory = new FeatureFloatVectorCursor.Factory(data, CursOpt.Features | CursOpt.Weight); long numBad; Project(Host, cursorFactory, ref mean, omega, y, out numBad); if (numBad > 0) { ch.Warning("Skipped {0} instances with missing features/weights during training", numBad); } //Orthonormalize Y in-place using stabilized Gram Schmidt algorithm. //Ref: https://en.wikipedia.org/wiki/Gram-Schmidt#Algorithm for (var i = 0; i < oversampledRank; ++i) { var v = y[i]; VectorUtils.ScaleBy(ref v, 1 / VectorUtils.Norm(y[i])); // Make the next vectors in the queue orthogonal to the orthonormalized vectors. for (var j = i + 1; j < oversampledRank; ++j) //subtract the projection of y[j] on v. { VectorUtils.AddMult(ref v, -VectorUtils.DotProduct(ref v, ref y[j]), ref y[j]); } } var q = y; // q in QR decomposition. var b = omega; // reuse the memory allocated by Omega. Project(Host, cursorFactory, ref mean, q, b, out numBad); //Compute B2 = B' * B var b2 = new Float[oversampledRank * oversampledRank]; for (var i = 0; i < oversampledRank; ++i) { for (var j = i; j < oversampledRank; ++j) { b2[i * oversampledRank + j] = b2[j * oversampledRank + i] = VectorUtils.DotProduct(ref b[i], ref b[j]); } } Float[] smallEigenvalues;// eigenvectors and eigenvalues of the small matrix B2. Float[] smallEigenvectors; EigenUtils.EigenDecomposition(b2, out smallEigenvalues, out smallEigenvectors); PostProcess(b, smallEigenvalues, smallEigenvectors, dimension, oversampledRank); return(new PcaPredictor(Host, _rank, b, ref mean)); }
protected override void ComputeTrainingStatistics(IChannel ch, FloatLabelCursor.Factory cursorFactory, Float loss, int numParams) { Contracts.AssertValue(ch); Contracts.AssertValue(cursorFactory); Contracts.Assert(NumGoodRows > 0); Contracts.Assert(WeightSum > 0); Contracts.Assert(BiasCount == 1); Contracts.Assert(loss >= 0); Contracts.Assert(numParams >= BiasCount); Contracts.Assert(CurrentWeights.IsDense); ch.Info("Model trained with {0} training examples.", NumGoodRows); // Compute deviance: start with loss function. Float deviance = (Float)(2 * loss * WeightSum); if (L2Weight > 0) { // Need to subtract L2 regularization loss. // The bias term is not regularized. var regLoss = VectorUtils.NormSquared(CurrentWeights.Values, 1, CurrentWeights.Length - 1) * L2Weight; deviance -= regLoss; } if (L1Weight > 0) { // Need to subtract L1 regularization loss. // The bias term is not regularized. Double regLoss = 0; VBufferUtils.ForEachDefined(ref CurrentWeights, (ind, value) => { if (ind >= BiasCount) { regLoss += Math.Abs(value); } }); deviance -= (Float)regLoss * L1Weight * 2; } ch.Info("Residual Deviance: \t{0} (on {1} degrees of freedom)", deviance, Math.Max(NumGoodRows - numParams, 0)); // Compute null deviance, i.e., the deviance of null hypothesis. // Cap the prior positive rate at 1e-15. Double priorPosRate = _posWeight / WeightSum; Contracts.Assert(0 <= priorPosRate && priorPosRate <= 1); Float nullDeviance = (priorPosRate <= 1e-15 || 1 - priorPosRate <= 1e-15) ? 0f : (Float)(2 * WeightSum * MathUtils.Entropy(priorPosRate, true)); ch.Info("Null Deviance: \t{0} (on {1} degrees of freedom)", nullDeviance, NumGoodRows - 1); // Compute AIC. ch.Info("AIC: \t{0}", 2 * numParams + deviance); // Show the coefficients statistics table. var featureColIdx = cursorFactory.Data.Schema.Feature.Index; var schema = cursorFactory.Data.Data.Schema; var featureLength = CurrentWeights.Length - BiasCount; var namesSpans = VBufferUtils.CreateEmpty <DvText>(featureLength); if (schema.HasSlotNames(featureColIdx, featureLength)) { schema.GetMetadata(MetadataUtils.Kinds.SlotNames, featureColIdx, ref namesSpans); } Host.Assert(namesSpans.Length == featureLength); // Inverse mapping of non-zero weight slots. Dictionary <int, int> weightIndicesInvMap = null; // Indices of bias and non-zero weight slots. int[] weightIndices = null; // Whether all weights are non-zero. bool denseWeight = numParams == CurrentWeights.Length; // Extract non-zero indices of weight. if (!denseWeight) { weightIndices = new int[numParams]; weightIndicesInvMap = new Dictionary <int, int>(numParams); weightIndices[0] = 0; weightIndicesInvMap[0] = 0; int j = 1; for (int i = 1; i < CurrentWeights.Length; i++) { if (CurrentWeights.Values[i] != 0) { weightIndices[j] = i; weightIndicesInvMap[i] = j++; } } Contracts.Assert(j == numParams); } // Compute the standard error of coefficients. long hessianDimension = (long)numParams * (numParams + 1) / 2; if (hessianDimension > int.MaxValue) { ch.Warning("The number of parameter is too large. Cannot hold the variance-covariance matrix in memory. " + "Skipping computation of standard errors and z-statistics of coefficients. Consider choosing a larger L1 regularizer" + "to reduce the number of parameters."); _stats = new LinearModelStatistics(Host, NumGoodRows, numParams, deviance, nullDeviance); return; } // Building the variance-covariance matrix for parameters. // The layout of this algorithm is a packed row-major lower triangular matrix. // E.g., layout of indices for 4-by-4: // 0 // 1 2 // 3 4 5 // 6 7 8 9 var hessian = new Double[hessianDimension]; // Initialize diagonal elements with L2 regularizers except for the first entry (index 0) // Since bias is not regularized. if (L2Weight > 0) { // i is the array index of the diagonal entry at iRow-th row and iRow-th column. // iRow is one-based. int i = 0; for (int iRow = 2; iRow <= numParams; iRow++) { i += iRow; hessian[i] = L2Weight; } Contracts.Assert(i == hessian.Length - 1); } // Initialize the remaining entries. var bias = CurrentWeights.Values[0]; using (var cursor = cursorFactory.Create()) { while (cursor.MoveNext()) { var label = cursor.Label; var weight = cursor.Weight; var score = bias + VectorUtils.DotProductWithOffset(ref CurrentWeights, 1, ref cursor.Features); // Compute Bernoulli variance n_i * p_i * (1 - p_i) for the i-th training example. var variance = weight / (2 + 2 * Math.Cosh(score)); // Increment the first entry of hessian. hessian[0] += variance; var values = cursor.Features.Values; if (cursor.Features.IsDense) { int ioff = 1; // Increment remaining entries of hessian. for (int i = 1; i < numParams; i++) { ch.Assert(ioff == i * (i + 1) / 2); int wi = weightIndices == null ? i - 1 : weightIndices[i] - 1; Contracts.Assert(0 <= wi && wi < cursor.Features.Length); var val = values[wi] * variance; // Add the implicit first bias term to X'X hessian[ioff++] += val; // Add the remainder of X'X for (int j = 0; j < i; j++) { int wj = weightIndices == null ? j : weightIndices[j + 1] - 1; Contracts.Assert(0 <= wj && wj < cursor.Features.Length); hessian[ioff++] += val * values[wj]; } } ch.Assert(ioff == hessian.Length); } else { var indices = cursor.Features.Indices; for (int ii = 0; ii < cursor.Features.Count; ++ii) { int i = indices[ii]; int wi = i + 1; if (weightIndicesInvMap != null && !weightIndicesInvMap.TryGetValue(i + 1, out wi)) { continue; } Contracts.Assert(0 < wi && wi <= cursor.Features.Length); int ioff = wi * (wi + 1) / 2; var val = values[ii] * variance; // Add the implicit first bias term to X'X hessian[ioff] += val; // Add the remainder of X'X for (int jj = 0; jj <= ii; jj++) { int j = indices[jj]; int wj = j + 1; if (weightIndicesInvMap != null && !weightIndicesInvMap.TryGetValue(j + 1, out wj)) { continue; } Contracts.Assert(0 < wj && wj <= cursor.Features.Length); hessian[ioff + wj] += val * values[jj]; } } } } } // Apply Cholesky Decomposition to find the inverse of the Hessian. Double[] invHessian = null; try { // First, find the Cholesky decomposition LL' of the Hessian. Mkl.Pptrf(Mkl.Layout.RowMajor, Mkl.UpLo.Lo, numParams, hessian); // Note that hessian is already modified at this point. It is no longer the original Hessian, // but instead represents the Cholesky decomposition L. // Also note that the following routine is supposed to consume the Cholesky decomposition L instead // of the original information matrix. Mkl.Pptri(Mkl.Layout.RowMajor, Mkl.UpLo.Lo, numParams, hessian); // At this point, hessian should contain the inverse of the original Hessian matrix. // Swap hessian with invHessian to avoid confusion in the following context. Utils.Swap(ref hessian, ref invHessian); Contracts.Assert(hessian == null); } catch (DllNotFoundException) { throw ch.ExceptNotSupp("The MKL library (Microsoft.ML.MklImports.dll) or one of its dependencies is missing."); } Float[] stdErrorValues = new Float[numParams]; stdErrorValues[0] = (Float)Math.Sqrt(invHessian[0]); for (int i = 1; i < numParams; i++) { // Initialize with inverse Hessian. stdErrorValues[i] = (Single)invHessian[i * (i + 1) / 2 + i]; } if (L2Weight > 0) { // Iterate through all entries of inverse Hessian to make adjustment to variance. // A discussion on ridge regularized LR coefficient covariance matrix can be found here: // http://www.ncbi.nlm.nih.gov/pmc/articles/PMC3228544/ // http://www.inf.unibz.it/dis/teaching/DWDM/project2010/LogisticRegression.pdf int ioffset = 1; for (int iRow = 1; iRow < numParams; iRow++) { for (int iCol = 0; iCol <= iRow; iCol++) { var entry = (Single)invHessian[ioffset]; var adjustment = -L2Weight * entry * entry; stdErrorValues[iRow] -= adjustment; if (0 < iCol && iCol < iRow) { stdErrorValues[iCol] -= adjustment; } ioffset++; } } Contracts.Assert(ioffset == invHessian.Length); } for (int i = 1; i < numParams; i++) { stdErrorValues[i] = (Float)Math.Sqrt(stdErrorValues[i]); } VBuffer <Float> stdErrors = new VBuffer <Float>(CurrentWeights.Length, numParams, stdErrorValues, weightIndices); _stats = new LinearModelStatistics(Host, NumGoodRows, numParams, deviance, nullDeviance, ref stdErrors); }
protected override void ComputeTrainingStatistics(IChannel ch, FloatLabelCursor.Factory cursorFactory, float loss, int numParams) { Contracts.AssertValue(ch); Contracts.AssertValue(cursorFactory); Contracts.Assert(NumGoodRows > 0); Contracts.Assert(WeightSum > 0); Contracts.Assert(BiasCount == 1); Contracts.Assert(loss >= 0); Contracts.Assert(numParams >= BiasCount); Contracts.Assert(CurrentWeights.IsDense); ch.Info("Model trained with {0} training examples.", NumGoodRows); // Compute deviance: start with loss function. float deviance = (float)(2 * loss * WeightSum); if (L2Weight > 0) { // Need to subtract L2 regularization loss. // The bias term is not regularized. var regLoss = VectorUtils.NormSquared(CurrentWeights.Values, 1, CurrentWeights.Length - 1) * L2Weight; deviance -= regLoss; } if (L1Weight > 0) { // Need to subtract L1 regularization loss. // The bias term is not regularized. Double regLoss = 0; VBufferUtils.ForEachDefined(ref CurrentWeights, (ind, value) => { if (ind >= BiasCount) { regLoss += Math.Abs(value); } }); deviance -= (float)regLoss * L1Weight * 2; } ch.Info("Residual Deviance: \t{0} (on {1} degrees of freedom)", deviance, Math.Max(NumGoodRows - numParams, 0)); // Compute null deviance, i.e., the deviance of null hypothesis. // Cap the prior positive rate at 1e-15. Double priorPosRate = _posWeight / WeightSum; Contracts.Assert(0 <= priorPosRate && priorPosRate <= 1); float nullDeviance = (priorPosRate <= 1e-15 || 1 - priorPosRate <= 1e-15) ? 0f : (float)(2 * WeightSum * MathUtils.Entropy(priorPosRate, true)); ch.Info("Null Deviance: \t{0} (on {1} degrees of freedom)", nullDeviance, NumGoodRows - 1); // Compute AIC. ch.Info("AIC: \t{0}", 2 * numParams + deviance); // Show the coefficients statistics table. var featureColIdx = cursorFactory.Data.Schema.Feature.Index; var schema = cursorFactory.Data.Data.Schema; var featureLength = CurrentWeights.Length - BiasCount; var namesSpans = VBufferUtils.CreateEmpty <ReadOnlyMemory <char> >(featureLength); if (schema.HasSlotNames(featureColIdx, featureLength)) { schema.GetMetadata(MetadataUtils.Kinds.SlotNames, featureColIdx, ref namesSpans); } Host.Assert(namesSpans.Length == featureLength); // Inverse mapping of non-zero weight slots. Dictionary <int, int> weightIndicesInvMap = null; // Indices of bias and non-zero weight slots. int[] weightIndices = null; // Whether all weights are non-zero. bool denseWeight = numParams == CurrentWeights.Length; // Extract non-zero indices of weight. if (!denseWeight) { weightIndices = new int[numParams]; weightIndicesInvMap = new Dictionary <int, int>(numParams); weightIndices[0] = 0; weightIndicesInvMap[0] = 0; int j = 1; for (int i = 1; i < CurrentWeights.Length; i++) { if (CurrentWeights.Values[i] != 0) { weightIndices[j] = i; weightIndicesInvMap[i] = j++; } } Contracts.Assert(j == numParams); } // Compute the standard error of coefficients. long hessianDimension = (long)numParams * (numParams + 1) / 2; if (hessianDimension > int.MaxValue) { ch.Warning("The number of parameter is too large. Cannot hold the variance-covariance matrix in memory. " + "Skipping computation of standard errors and z-statistics of coefficients. Consider choosing a larger L1 regularizer" + "to reduce the number of parameters."); _stats = new LinearModelStatistics(Host, NumGoodRows, numParams, deviance, nullDeviance); return; } // Building the variance-covariance matrix for parameters. // The layout of this algorithm is a packed row-major lower triangular matrix. // E.g., layout of indices for 4-by-4: // 0 // 1 2 // 3 4 5 // 6 7 8 9 var hessian = new Double[hessianDimension]; // Initialize diagonal elements with L2 regularizers except for the first entry (index 0) // Since bias is not regularized. if (L2Weight > 0) { // i is the array index of the diagonal entry at iRow-th row and iRow-th column. // iRow is one-based. int i = 0; for (int iRow = 2; iRow <= numParams; iRow++) { i += iRow; hessian[i] = L2Weight; } Contracts.Assert(i == hessian.Length - 1); } // Initialize the remaining entries. var bias = CurrentWeights.Values[0]; using (var cursor = cursorFactory.Create()) { while (cursor.MoveNext()) { var label = cursor.Label; var weight = cursor.Weight; var score = bias + VectorUtils.DotProductWithOffset(ref CurrentWeights, 1, ref cursor.Features); // Compute Bernoulli variance n_i * p_i * (1 - p_i) for the i-th training example. var variance = weight / (2 + 2 * Math.Cosh(score)); // Increment the first entry of hessian. hessian[0] += variance; var values = cursor.Features.Values; if (cursor.Features.IsDense) { int ioff = 1; // Increment remaining entries of hessian. for (int i = 1; i < numParams; i++) { ch.Assert(ioff == i * (i + 1) / 2); int wi = weightIndices == null ? i - 1 : weightIndices[i] - 1; Contracts.Assert(0 <= wi && wi < cursor.Features.Length); var val = values[wi] * variance; // Add the implicit first bias term to X'X hessian[ioff++] += val; // Add the remainder of X'X for (int j = 0; j < i; j++) { int wj = weightIndices == null ? j : weightIndices[j + 1] - 1; Contracts.Assert(0 <= wj && wj < cursor.Features.Length); hessian[ioff++] += val * values[wj]; } } ch.Assert(ioff == hessian.Length); } else { var indices = cursor.Features.Indices; for (int ii = 0; ii < cursor.Features.Count; ++ii) { int i = indices[ii]; int wi = i + 1; if (weightIndicesInvMap != null && !weightIndicesInvMap.TryGetValue(i + 1, out wi)) { continue; } Contracts.Assert(0 < wi && wi <= cursor.Features.Length); int ioff = wi * (wi + 1) / 2; var val = values[ii] * variance; // Add the implicit first bias term to X'X hessian[ioff] += val; // Add the remainder of X'X for (int jj = 0; jj <= ii; jj++) { int j = indices[jj]; int wj = j + 1; if (weightIndicesInvMap != null && !weightIndicesInvMap.TryGetValue(j + 1, out wj)) { continue; } Contracts.Assert(0 < wj && wj <= cursor.Features.Length); hessian[ioff + wj] += val * values[jj]; } } } } } _stats = new LinearModelStatistics(Host, NumGoodRows, numParams, deviance, nullDeviance); }
// Combines source key names and slot names to produce final slot names. private void GetSlotNames(int iinfo, ref VBuffer <DvText> dst) { Host.Assert(0 <= iinfo && iinfo < Infos.Length); Host.Assert(_concat[iinfo]); Host.Assert(_types[iinfo].IsKnownSizeVector); // Size one should have been treated the same as Bag (by the caller). // Variable size should have thrown (by the caller). var typeSrc = Infos[iinfo].TypeSrc; Host.Assert(typeSrc.VectorSize > 1); // Get the source slot names, defaulting to empty text. var namesSlotSrc = default(VBuffer <DvText>); var typeSlotSrc = Source.Schema.GetMetadataTypeOrNull(MetadataUtils.Kinds.SlotNames, Infos[iinfo].Source); if (typeSlotSrc != null && typeSlotSrc.VectorSize == typeSrc.VectorSize && typeSlotSrc.ItemType.IsText) { Source.Schema.GetMetadata(MetadataUtils.Kinds.SlotNames, Infos[iinfo].Source, ref namesSlotSrc); Host.Check(namesSlotSrc.Length == typeSrc.VectorSize); } else { namesSlotSrc = VBufferUtils.CreateEmpty <DvText>(typeSrc.VectorSize); } int keyCount = typeSrc.ItemType.KeyCount; int slotLim = _types[iinfo].VectorSize; Host.Assert(slotLim == (long)typeSrc.VectorSize * keyCount); // Get the source key names, in an array (since we will use them multiple times). var namesKeySrc = default(VBuffer <DvText>); Source.Schema.GetMetadata(MetadataUtils.Kinds.KeyValues, Infos[iinfo].Source, ref namesKeySrc); Host.Check(namesKeySrc.Length == keyCount); var keys = new DvText[keyCount]; namesKeySrc.CopyTo(keys); var values = dst.Values; if (Utils.Size(values) < slotLim) { values = new DvText[slotLim]; } var sb = new StringBuilder(); int slot = 0; foreach (var kvpSlot in namesSlotSrc.Items(all: true)) { Contracts.Assert(slot == (long)kvpSlot.Key * keyCount); sb.Clear(); if (kvpSlot.Value.HasChars) { kvpSlot.Value.AddToStringBuilder(sb); } else { sb.Append('[').Append(kvpSlot.Key).Append(']'); } sb.Append('.'); int len = sb.Length; foreach (var key in keys) { sb.Length = len; key.AddToStringBuilder(sb); values[slot++] = new DvText(sb.ToString()); } } Host.Assert(slot == slotLim); dst = new VBuffer <DvText>(slotLim, values, dst.Indices); }
protected LinearPredictor(IHostEnvironment env, string name, ModelLoadContext ctx) : base(env, name, ctx) { // *** Binary format *** // Float: bias // int: number of features (weights) // int: number of indices // int[]: indices // int: number of weights // Float[]: weights // bool: has model stats // (Conditional) LinearModelStatistics: stats Bias = ctx.Reader.ReadFloat(); Host.CheckDecode(FloatUtils.IsFinite(Bias)); int len = ctx.Reader.ReadInt32(); Host.Assert(len > 0); int cind = ctx.Reader.ReadInt32(); Host.CheckDecode(0 <= cind & cind < len); var indices = ctx.Reader.ReadIntArray(cind); // Verify monotonicity of indices. int prev = -1; for (int i = 0; i < cind; i++) { Host.CheckDecode(prev < indices[i]); prev = indices[i]; } Host.CheckDecode(prev < len); int cwht = ctx.Reader.ReadInt32(); // Either there are as many weights as there are indices (in the // sparse case), or (in the dense case) there are no indices and the // number of weights is the length of the vector. Note that for the // trivial predictor it is quite legal to have 0 in both counts. Host.CheckDecode(cwht == cind || (cind == 0 && cwht == len)); var weights = ctx.Reader.ReadFloatArray(cwht); Host.CheckDecode(Utils.Size(weights) == 0 || weights.All(x => FloatUtils.IsFinite(x))); if (cwht == 0) { Weight = VBufferUtils.CreateEmpty <Float>(len); } else { Weight = new VBuffer <Float>(len, Utils.Size(weights), weights, indices); } InputType = new VectorType(NumberType.Float, Weight.Length); WarnOnOldNormalizer(ctx, GetType(), Host); if (Weight.IsDense) { _weightsDense = Weight; } else { _weightsDenseLock = new object(); } }
/// <summary> /// Convenience function to construct a working vector of length <c>Dim</c>. /// </summary> /// <returns></returns> protected VBuffer <Float> CreateWorkingVector() { // Owing to the way the operations are structured, if the "x", "newX", and "dir" vectors // start out (or somehow naturally become) dense, they will remain dense. return(_keepDense ? VBufferUtils.CreateDense <Float>(Dim) : VBufferUtils.CreateEmpty <Float>(Dim)); }
/// <summary> /// Initialize weights by running SGD up to specified tolerance. /// </summary> protected virtual VBuffer <float> InitializeWeightsSgd(IChannel ch, FloatLabelCursor.Factory cursorFactory) { if (!Quiet) { ch.Info("Running SGD initialization with tolerance {0}", SgdInitializationTolerance); } int numExamples = 0; var oldWeights = VBufferUtils.CreateEmpty <float>(BiasCount + WeightCount); DTerminate terminateSgd = (in VBuffer <float> x) => { if (++numExamples % 1000 != 0) { return(false); } VectorUtils.AddMult(in x, -1, ref oldWeights); float normDiff = VectorUtils.Norm(oldWeights); x.CopyTo(ref oldWeights); // #if OLD_TRACING // REVIEW: How should this be ported? if (!Quiet) { Console.Write("."); if (numExamples % 50000 == 0) { Console.WriteLine("\t{0}\t{1}", numExamples, normDiff); } } // #endif return(normDiff < SgdInitializationTolerance); }; VBuffer <float> result = default(VBuffer <float>); FloatLabelCursor cursor = null; try { float[] scratch = null; SgdOptimizer.DStochasticGradient lossSgd = (in VBuffer <float> x, ref VBuffer <float> grad) => { // Zero out the gradient by sparsifying. grad = new VBuffer <float>(grad.Length, 0, grad.Values, grad.Indices); EnsureBiases(ref grad); if (cursor == null || !cursor.MoveNext()) { if (cursor != null) { cursor.Dispose(); } cursor = cursorFactory.Create(); if (!cursor.MoveNext()) { return; } } AccumulateOneGradient(in cursor.Features, cursor.Label, cursor.Weight, in x, ref grad, ref scratch); }; VBuffer <float> sgdWeights; if (DenseOptimizer) { sgdWeights = VBufferUtils.CreateDense <float>(BiasCount + WeightCount); } else { sgdWeights = VBufferUtils.CreateEmpty <float>(BiasCount + WeightCount); } SgdOptimizer sgdo = new SgdOptimizer(terminateSgd); sgdo.Minimize(lossSgd, ref sgdWeights, ref result); // #if OLD_TRACING // REVIEW: How should this be ported? if (!Quiet) { Console.WriteLine(); } // #endif ch.Info("SGD initialization done in {0} rounds", numExamples); } finally { if (cursor != null) { cursor.Dispose(); } } return(result); }
GetImportanceMetricsMatrix( IHostEnvironment env, IPredictionTransformer <TModel> model, IDataView data, Func <TResult> resultInitializer, Func <IDataView, TMetric> evaluationFunc, Func <TMetric, TMetric, TMetric> deltaFunc, string features, int permutationCount, bool useFeatureWeightFilter = false, int?topExamples = null) { Contracts.CheckValue(env, nameof(env)); var host = env.Register(nameof(PermutationFeatureImportance <TModel, TMetric, TResult>)); host.CheckValue(model, nameof(model)); host.CheckValue(data, nameof(data)); host.CheckNonEmpty(features, nameof(features)); topExamples = topExamples ?? Utils.ArrayMaxSize; host.Check(topExamples > 0, "Provide how many examples to use (positive number) or set to null to use whole dataset."); VBuffer <ReadOnlyMemory <char> > slotNames = default; var metricsDelta = new List <TResult>(); using (var ch = host.Start("GetImportanceMetrics")) { ch.Trace("Scoring and evaluating baseline."); var baselineMetrics = evaluationFunc(model.Transform(data)); // Get slot names. var featuresColumn = data.Schema[features]; int numSlots = featuresColumn.Type.GetVectorSize(); data.Schema.TryGetColumnIndex(features, out int featuresColumnIndex); ch.Info("Number of slots: " + numSlots); if (data.Schema[featuresColumnIndex].HasSlotNames(numSlots)) { data.Schema[featuresColumnIndex].Annotations.GetValue(AnnotationUtils.Kinds.SlotNames, ref slotNames); } if (slotNames.Length != numSlots) { slotNames = VBufferUtils.CreateEmpty <ReadOnlyMemory <char> >(numSlots); } VBuffer <float> weights = default; var workingFeatureIndices = Enumerable.Range(0, numSlots).ToList(); int zeroWeightsCount = 0; // By default set to the number of all features available. var evaluatedFeaturesCount = numSlots; if (useFeatureWeightFilter) { var predictorWithWeights = model.Model as IPredictorWithFeatureWeights <Single>; if (predictorWithWeights != null) { predictorWithWeights.GetFeatureWeights(ref weights); const int maxReportedZeroFeatures = 10; StringBuilder msgFilteredOutFeatures = new StringBuilder("The following features have zero weight and will not be evaluated: \n \t"); var prefix = ""; foreach (var k in weights.Items(all: true)) { if (k.Value == 0) { zeroWeightsCount++; // Print info about first few features we're not going to evaluate. if (zeroWeightsCount <= maxReportedZeroFeatures) { msgFilteredOutFeatures.Append(prefix); msgFilteredOutFeatures.Append(GetSlotName(slotNames, k.Key)); prefix = ", "; } } else { workingFeatureIndices.Add(k.Key); } } // Old FastTree models has less weights than slots. if (weights.Length < numSlots) { ch.Warning( "Predictor had fewer features than slots. All unknown features will get default 0 weight."); zeroWeightsCount += numSlots - weights.Length; var indexes = weights.GetIndices().ToArray(); var values = weights.GetValues().ToArray(); var count = values.Length; weights = new VBuffer <float>(numSlots, count, values, indexes); } evaluatedFeaturesCount = workingFeatureIndices.Count; ch.Info("Number of zero weights: {0} out of {1}.", zeroWeightsCount, weights.Length); // Print what features have 0 weight if (zeroWeightsCount > 0) { if (zeroWeightsCount > maxReportedZeroFeatures) { msgFilteredOutFeatures.Append(string.Format("... (printing out {0} features here).\n Use 'Index' column in the report for info on what features are not evaluated.", maxReportedZeroFeatures)); } ch.Info(msgFilteredOutFeatures.ToString()); } } } if (workingFeatureIndices.Count == 0 && zeroWeightsCount == 0) { // Use all features otherwise. workingFeatureIndices.AddRange(Enumerable.Range(0, numSlots)); } if (zeroWeightsCount == numSlots) { ch.Warning("All features have 0 weight thus can not do thorough evaluation"); return(metricsDelta.ToImmutableArray()); } // Note: this will not work on the huge dataset. var maxSize = topExamples; List <float> initialfeatureValuesList = new List <float>(); // Cursor through the data to cache slot 0 values for the upcoming permutation. var valuesRowCount = 0; // REVIEW: Seems like if the labels are NaN, so that all metrics are NaN, this command will be useless. // In which case probably erroring out is probably the most useful thing. using (var cursor = data.GetRowCursor(featuresColumn)) { var featuresGetter = cursor.GetGetter <VBuffer <float> >(featuresColumn); var featuresBuffer = default(VBuffer <float>); while (initialfeatureValuesList.Count < maxSize && cursor.MoveNext()) { featuresGetter(ref featuresBuffer); initialfeatureValuesList.Add(featuresBuffer.GetItemOrDefault(workingFeatureIndices[0])); } valuesRowCount = initialfeatureValuesList.Count; } if (valuesRowCount > 0) { ch.Info("Detected {0} examples for evaluation.", valuesRowCount); } else { ch.Warning("Detected no examples for evaluation."); return(metricsDelta.ToImmutableArray()); } float[] featureValuesBuffer = initialfeatureValuesList.ToArray(); float[] nextValues = new float[valuesRowCount]; // Now iterate through all the working slots, do permutation and calc the delta of metrics. int processedCnt = 0; int nextFeatureIndex = 0; var shuffleRand = RandomUtils.Create(host.Rand.Next()); using (var pch = host.StartProgressChannel("Calculating Permutation Feature Importance")) { pch.SetHeader(new ProgressHeader("processed slots"), e => e.SetProgress(0, processedCnt)); foreach (var workingIndx in workingFeatureIndices) { // Index for the feature we will permute next. Needed to build in advance a buffer for the permutation. if (processedCnt < workingFeatureIndices.Count - 1) { nextFeatureIndex = workingFeatureIndices[processedCnt + 1]; } // Used for pre-caching the next feature int nextValuesIndex = 0; SchemaDefinition input = SchemaDefinition.Create(typeof(FeaturesBuffer)); Contracts.Assert(input.Count == 1); input[0].ColumnName = features; SchemaDefinition output = SchemaDefinition.Create(typeof(FeaturesBuffer)); Contracts.Assert(output.Count == 1); output[0].ColumnName = features; output[0].ColumnType = featuresColumn.Type; // Perform multiple permutations for one feature to build a confidence interval var metricsDeltaForFeature = resultInitializer(); for (int permutationIteration = 0; permutationIteration < permutationCount; permutationIteration++) { Utils.Shuffle <float>(shuffleRand, featureValuesBuffer); Action <FeaturesBuffer, FeaturesBuffer, PermuterState> permuter = (src, dst, state) => { src.Features.CopyTo(ref dst.Features); VBufferUtils.ApplyAt(ref dst.Features, workingIndx, (int ii, ref float d) => d = featureValuesBuffer[state.SampleIndex++]); // Is it time to pre-cache the next feature? if (permutationIteration == permutationCount - 1 && processedCnt < workingFeatureIndices.Count - 1) { // Fill out the featureValueBuffer for the next feature while updating the current feature // This is the reason I need PermuterState in LambdaTransform.CreateMap. nextValues[nextValuesIndex] = src.Features.GetItemOrDefault(nextFeatureIndex); if (nextValuesIndex < valuesRowCount - 1) { nextValuesIndex++; } } }; IDataView viewPermuted = LambdaTransform.CreateMap( host, data, permuter, null, input, output); if (valuesRowCount == topExamples) { viewPermuted = SkipTakeFilter.Create(host, new SkipTakeFilter.TakeOptions() { Count = valuesRowCount }, viewPermuted); } var metrics = evaluationFunc(model.Transform(viewPermuted)); var delta = deltaFunc(metrics, baselineMetrics); metricsDeltaForFeature.Add(delta); } // Add the metrics delta to the list metricsDelta.Add(metricsDeltaForFeature); // Swap values for next iteration of permutation. if (processedCnt < workingFeatureIndices.Count - 1) { Array.Clear(featureValuesBuffer, 0, featureValuesBuffer.Length); nextValues.CopyTo(featureValuesBuffer, 0); Array.Clear(nextValues, 0, nextValues.Length); } processedCnt++; } pch.Checkpoint(processedCnt, processedCnt); } } return(metricsDelta.ToImmutableArray()); }
/// <summary> /// Minimize the function represented by <paramref name="f"/>. /// </summary> /// <param name="f">Stochastic gradients of function to minimize</param> /// <param name="initial">Initial point</param> /// <param name="result">Approximate minimum of <paramref name="f"/></param> public void Minimize(DStochasticGradient f, ref VBuffer <Float> initial, ref VBuffer <Float> result) { Contracts.Check(FloatUtils.IsFinite(initial.Values, initial.Count), "The initial vector contains NaNs or infinite values."); int dim = initial.Length; VBuffer <Float> grad = VBufferUtils.CreateEmpty <Float>(dim); VBuffer <Float> step = VBufferUtils.CreateEmpty <Float>(dim); VBuffer <Float> x = default(VBuffer <Float>); initial.CopyTo(ref x); VBuffer <Float> prev = default(VBuffer <Float>); VBuffer <Float> avg = VBufferUtils.CreateEmpty <Float>(dim); for (int n = 0; _maxSteps == 0 || n < _maxSteps; ++n) { if (_momentum == 0) { step = new VBuffer <Float>(step.Length, 0, step.Values, step.Indices); } else { VectorUtils.ScaleBy(ref step, _momentum); } Float stepSize; switch (_rateSchedule) { case RateScheduleType.Constant: stepSize = 1 / _t0; break; case RateScheduleType.Sqrt: stepSize = 1 / (_t0 + MathUtils.Sqrt(n)); break; case RateScheduleType.Linear: stepSize = 1 / (_t0 + n); break; default: throw Contracts.Except(); } Float scale = (1 - _momentum) / _batchSize; for (int i = 0; i < _batchSize; ++i) { f(ref x, ref grad); VectorUtils.AddMult(ref grad, scale, ref step); } if (_averaging) { Utils.Swap(ref avg, ref prev); VectorUtils.ScaleBy(prev, ref avg, (Float)n / (n + 1)); VectorUtils.AddMult(ref step, -stepSize, ref x); VectorUtils.AddMult(ref x, (Float)1 / (n + 1), ref avg); if ((n > 0 && TerminateTester.ShouldTerminate(ref avg, ref prev)) || _terminate(ref avg)) { result = avg; return; } } else { Utils.Swap(ref x, ref prev); VectorUtils.AddMult(ref step, -stepSize, ref prev, ref x); if ((n > 0 && TerminateTester.ShouldTerminate(ref x, ref prev)) || _terminate(ref x)) { result = x; return; } } } result = _averaging ? avg : x; }