예제 #1
0
        /// <summary>
        /// Trains a model using SAR.
        /// </summary>
        /// <param name="settings">The training settings</param>
        /// <param name="usageEvents">The usage events to use for training</param>
        /// <param name="catalogItems">The catalog items to use for training</param>
        /// <param name="uniqueUsersCount">The number of users in the user id index file.</param>
        /// <param name="uniqueUsageItemsCount">The number of usage items in the item id index file</param>
        /// <param name="cancellationToken">A cancellation token</param>
        public IPredictorModel Train(ITrainingSettings settings,
                                     IList <SarUsageEvent> usageEvents,
                                     IList <SarCatalogItem> catalogItems,
                                     int uniqueUsersCount,
                                     int uniqueUsageItemsCount,
                                     CancellationToken cancellationToken)
        {
            if (settings == null)
            {
                throw new ArgumentNullException(nameof(settings));
            }

            if (usageEvents == null)
            {
                throw new ArgumentNullException(nameof(usageEvents));
            }

            if (settings.EnableColdItemPlacement && catalogItems == null)
            {
                throw new ArgumentNullException(nameof(catalogItems));
            }

            if (uniqueUsersCount < 0)
            {
                var exception = new ArgumentException($"{nameof(uniqueUsersCount)} must be a positive integer");
                _tracer.TraceWarning(exception.ToString());
                throw exception;
            }

            if (uniqueUsageItemsCount < 0)
            {
                var exception = new ArgumentException($"{nameof(uniqueUsageItemsCount)} must be a positive integer");
                _tracer.TraceWarning(exception.ToString());
                throw exception;
            }

            cancellationToken.ThrowIfCancellationRequested();

            using (TlcEnvironment environment = new TlcEnvironment(verbose: true))
            {
                _detectedFeatureWeights = null;
                try
                {
                    environment.AddListener <ChannelMessage>(ChannelMessageListener);
                    IHost environmentHost = environment.Register("SarHost");

                    // bind the cancellation token to SAR cancellation
                    using (cancellationToken.Register(() => { environmentHost.StopExecution(); }))
                    {
                        _tracer.TraceInformation("Starting training model using SAR");
                        return(TrainModel(environmentHost, settings, usageEvents, catalogItems, uniqueUsersCount,
                                          uniqueUsageItemsCount));
                    }
                }
                finally
                {
                    environment.RemoveListener <ChannelMessage>(ChannelMessageListener);
                }
            }
        }
예제 #2
0
 /// <summary>
 /// Trains a model using SAR.
 /// </summary>
 /// <param name="settings">The training settings</param>
 /// <param name="usageEvents">The usage events to use for training</param>
 /// <param name="catalogItems">The catalog items to use for training</param>
 /// <param name="uniqueUsersCount">The number of users in the user id index file.</param>
 /// <param name="uniqueUsageItemsCount">The number of usage items in the item id index file</param>
 public IPredictorModel Train(ITrainingSettings settings,
                              IList <SarUsageEvent> usageEvents,
                              IList <SarCatalogItem> catalogItems,
                              int uniqueUsersCount,
                              int uniqueUsageItemsCount)
 {
     return(Train(settings, usageEvents, catalogItems, uniqueUsersCount, uniqueUsageItemsCount,
                  CancellationToken.None));
 }
 /// <summary>
 /// Trains a model using SAR.
 /// </summary>
 /// <param name="settings">The training settings</param>
 /// <param name="usageEvents">The usage events to use for training</param>
 /// <param name="catalogItems">The catalog items to use for training</param>
 /// <param name="featureNames">The names of the catalog items features, in the same order as the feature values in the catalog</param>
 /// <param name="uniqueUsersCount">The number of users in the user id index file.</param>
 /// <param name="uniqueUsageItemsCount">The number of usage items in the item id index file</param>
 /// <param name="catalogFeatureWeights">The computed catalog items features weights (if relevant)</param>
 public IPredictorModel Train(ITrainingSettings settings,
                              IList <SarUsageEvent> usageEvents,
                              IList <SarCatalogItem> catalogItems,
                              string[] featureNames,
                              int uniqueUsersCount,
                              int uniqueUsageItemsCount,
                              out IDictionary <string, double> catalogFeatureWeights)
 {
     return(Train(settings, usageEvents, catalogItems, featureNames, uniqueUsersCount, uniqueUsageItemsCount,
                  out catalogFeatureWeights, CancellationToken.None));
 }
예제 #4
0
        /// <summary>
        /// Trains a model using SAR.
        /// </summary>
        private IPredictorModel TrainModel(
            IHost environment,
            ITrainingSettings settings,
            IList <SarUsageEvent> usageItems,
            IList <SarCatalogItem> catalogItems,
            int uniqueUsersCount,
            int uniqueUsageItemsCount)
        {
            IDataView catalog = null;

            if (settings.EnableColdItemPlacement)
            {
                SarCatalogItem item = catalogItems.FirstOrDefault();
                int            featuresVectorSize = item?.FeatureVector.Length ?? 0;

                _tracer.TraceInformation($"Found catalog item features vector size of {featuresVectorSize}");

                // check if the catalog has items with any features. if not, there is no point to compute 'cold' items recommendations
                if (featuresVectorSize > 0)
                {
                    _tracer.TraceInformation($"Creating catalog item schema using the features vector size of {featuresVectorSize}");

                    var catalogItemSchema = SchemaDefinition.Create(typeof(SarCatalogItem));
                    catalogItemSchema["Item"].ColumnType     = new KeyType(DataKind.U4, 1, uniqueUsageItemsCount);
                    catalogItemSchema["Features"].ColumnType = new VectorType(TextType.Instance, featuresVectorSize);
                    catalog = environment.CreateDataView(catalogItems, catalogItemSchema);
                }
            }

            _tracer.TraceInformation("Creating usage item schema");
            var usageItemSchema = SchemaDefinition.Create(typeof(SarUsageEvent));

            usageItemSchema["user"].ColumnType = new KeyType(DataKind.U4, 1, uniqueUsersCount);
            usageItemSchema["Item"].ColumnType = new KeyType(DataKind.U4, 1, uniqueUsageItemsCount);

            // create a usage data view
            IDataView usage = environment.CreateDataView(usageItems, usageItemSchema);

            // set the similarity function factory
            ISimCalculatorFactory simCalculatorFactory;

            switch (settings.SimilarityFunction)
            {
            case SimilarityFunction.Jaccard:
                simCalculatorFactory = new JaccardSimilarityCalculator.Arguments
                {
                    threshold = settings.SupportThreshold
                };
                break;

            case SimilarityFunction.Lift:
                simCalculatorFactory = new LiftSimilarityCalculator.Arguments
                {
                    threshold = settings.SupportThreshold
                };
                break;

            case SimilarityFunction.Cooccurrence:
                simCalculatorFactory = new CoOccurrenceSimilarityCalculator.Arguments
                {
                    threshold = settings.SupportThreshold
                };
                break;

            default:
                var exception = new ArgumentException($"Unknown similarity function '{settings.SimilarityFunction}'");
                _tracer.TraceError(exception.ToString());
                throw exception;
            }

            // prepare SAR trainer's input arguments
            var trainInput = new SAR.Input
            {
                TrainingData       = usage,
                CatalogData        = catalog,
                Calculator         = simCalculatorFactory,
                Backfill           = settings.EnableBackfilling,
                ItemColumn         = "Item",
                UserColumn         = "user",
                ColdToCold         = settings.EnableColdToColdRecommendations,
                MaxColdItems       = 10,
                MultiValueFeatures = true
            };

            _tracer.TraceInformation("Training a SAR predictor using the usage and catalog data");
            SAR.Output trainOutput = SAR.Train(environment, trainInput);
            return(trainOutput.PredictorModel);
        }
        /// <summary>
        /// Trains a model using SAR.
        /// </summary>
        /// <param name="settings">The training settings</param>
        /// <param name="usageEvents">The usage events to use for training</param>
        /// <param name="catalogItems">The catalog items to use for training</param>
        /// <param name="featureNames">The names of the catalog items features, in the same order as the feature values in the catalog</param>
        /// <param name="uniqueUsersCount">The number of users in the user id index file.</param>
        /// <param name="uniqueUsageItemsCount">The number of usage items in the item id index file</param>
        /// <param name="catalogFeatureWeights">The computed catalog items features weights (if relevant)</param>
        /// <param name="cancellationToken">A cancellation token</param>
        public IPredictorModel Train(ITrainingSettings settings,
                                     IList <SarUsageEvent> usageEvents,
                                     IList <SarCatalogItem> catalogItems,
                                     string[] featureNames,
                                     int uniqueUsersCount,
                                     int uniqueUsageItemsCount,
                                     out IDictionary <string, double> catalogFeatureWeights,
                                     CancellationToken cancellationToken)
        {
            if (settings == null)
            {
                throw new ArgumentNullException(nameof(settings));
            }

            if (usageEvents == null)
            {
                throw new ArgumentNullException(nameof(usageEvents));
            }

            if (settings.EnableColdItemPlacement && catalogItems == null)
            {
                throw new ArgumentNullException(nameof(catalogItems));
            }

            if (uniqueUsersCount < 0)
            {
                var exception = new ArgumentException($"{nameof(uniqueUsersCount)} must be a positive integer");
                _tracer.TraceWarning(exception.ToString());
                throw exception;
            }

            if (uniqueUsageItemsCount < 0)
            {
                var exception = new ArgumentException($"{nameof(uniqueUsageItemsCount)} must be a positive integer");
                _tracer.TraceWarning(exception.ToString());
                throw exception;
            }

            cancellationToken.ThrowIfCancellationRequested();

            using (TlcEnvironment environment = new TlcEnvironment(verbose: true))
            {
                _detectedFeatureWeights = null;
                try
                {
                    environment.AddListener <ChannelMessage>(ChannelMessageListener);
                    IHost environmentHost = environment.Register("SarHost");

                    // bind the cancellation token to SAR cancellation
                    using (cancellationToken.Register(() => { environmentHost.StopExecution(); }))
                    {
                        _tracer.TraceInformation("Starting training model using SAR");
                        IPredictorModel model = TrainModel(environmentHost, settings, usageEvents, catalogItems, uniqueUsersCount,
                                                           uniqueUsageItemsCount);

                        catalogFeatureWeights = new Dictionary <string, double>();
                        if (_detectedFeatureWeights != null && featureNames != null)
                        {
                            if (_detectedFeatureWeights.Length == featureNames.Length)
                            {
                                for (int i = 0; i < featureNames.Length; i++)
                                {
                                    catalogFeatureWeights[featureNames[i]] = _detectedFeatureWeights[i];
                                }
                            }
                            else
                            {
                                _tracer.TraceWarning(
                                    $"Found a mismatch between number of feature names ({featureNames.Length}) and the number of feature weights ({_detectedFeatureWeights.Length})");
                            }
                        }

                        return(model);
                    }
                }
                finally
                {
                    environment.RemoveListener <ChannelMessage>(ChannelMessageListener);
                }
            }
        }