// this method ingores the properties of the baseClass: WithReplacement and UniformUserSampling public override void Iterate() { int time = (int)Wrap.MeasureTime(delegate() { for (int i = 0; i < Feedback.Count; i++) { if (UnobservedNegSampler == UnobservedNegSampler.Dynamic && i % (AllItems.Count * Math.Log(AllItems.Count)) == 0) { UpdateDynamicSampler(); } var pos = SamplePosFeedback(); var neg = SampleNegFeedback(pos); int user_id = UsersMap.ToInternalID(pos.User.Id); int item_id = ItemsMap.ToInternalID(pos.Item.Id); int other_item_id = ItemsMap.ToInternalID(neg.Item.Id); UpdateFactors(user_id, item_id, other_item_id, true, true, update_j); } if (PosSampler == PosSampler.AdaptedWeight) { UpdatePosSampler(); } }).TotalMilliseconds; Model.OnIterate(this, time); }
public virtual float Predict(Feedback feedback) { int userId = UsersMap.ToInternalID(feedback.User.Id); int itemId = ItemsMap.ToInternalID(feedback.Item.Id); List <Tuple <int, float> > features = new List <Tuple <int, float> >(); if (!IgnoreFeaturesOnPrediction) { features = feedback.GetAllAttributes().Select(a => FeatureBuilder.TranslateAttribute(a)).NormalizeSumToOne().ToList(); } bool newUser = (userId > MaxUserID); bool newItem = (itemId > MaxItemID); float userAttrsTerm = 0, itemAttrsTerm = 0; foreach (var feat in features) { // if feat_index is greater than MaxFeatureId it means that the feature is new in test set so its factors has not been learnt if (feat.Item1 < NumTrainFeaturs) { float x_z = feat.Item2; userAttrsTerm += newUser ? 0 : x_z *MatrixExtensions.RowScalarProduct(feature_factors, feat.Item1, user_factors, userId); itemAttrsTerm += newItem ? 0 : x_z *MatrixExtensions.RowScalarProduct(feature_factors, feat.Item1, item_factors, itemId); } } float itemBias = newItem ? 0 : item_bias[itemId]; float userItemTerm = (newUser || newItem) ? 0 : MatrixExtensions.RowScalarProduct(user_factors, userId, item_factors, itemId); return(itemBias + userItemTerm + userAttrsTerm + itemAttrsTerm); }
public virtual Feedback SampleUnobservedNegFeedback(Feedback posFeedback) { Feedback neg = null; switch (UnobservedNegSampler) { case UnobservedNegSampler.UniformFeedback: do { neg = TrainFeedback[random.Next(TrainFeedback.Count)]; } while (neg.User == posFeedback.User); break; case UnobservedNegSampler.DynamicLevel: do { int l = PosLevels[_posLevelSampler.Sample()]; int i = random.Next(LevelPosFeedback[l].Count); neg = LevelPosFeedback[l][i]; } while (neg.User == posFeedback.User); break; case UnobservedNegSampler.UniformItem: { string itemId; int user_id, item_id; do { itemId = AllItems[random.Next(AllItems.Count)]; item_id = ItemsMap.ToInternalID(itemId); user_id = UsersMap.ToInternalID(posFeedback.User.Id); //} while (UserFeedback[posFeedback.User.Id].Select(f => f.Item.Id).Contains(itemId)); } while (Feedback.UserMatrix[user_id, item_id] == true); neg = new Feedback(posFeedback.User, Split.Container.Items[itemId]); } break; case UnobservedNegSampler.Dynamic: { string negItemId; int user_id, item_id; do { negItemId = SampleNegItemDynamic(posFeedback); item_id = ItemsMap.ToInternalID(negItemId); user_id = UsersMap.ToInternalID(posFeedback.User.Id); //} while (UserFeedback[posFeedback.User.Id].Select(f => f.Item.Id).Contains(negItemId)); } while (Feedback.UserMatrix[user_id, item_id] == true); neg = new Feedback(posFeedback.User, Split.Container.Items[negItemId]); } break; default: break; } NumUnobservedNeg++; return(neg); }
protected virtual void UpdateDynamicSampler() { for (int f = 0; f < NumFactors; f++) { _factorBasedRank[f] = AllItems.OrderByDescending(iId => item_factors[ItemsMap.ToInternalID(iId), f]).ToList(); _itemFactorsStdev[f] = AllItems.Select(iId => item_factors[ItemsMap.ToInternalID(iId), f]).Stdev(); } }
public override void Train(Split split) { var mmlInstance = (FM)MmlRecommenderInstance; var featBuilder = new FmFeatureBuilder(); var wFm = MmlRecommenderInstance as WeightedBPRFM; if (DataType == WrapRec.IO.DataType.Ratings) { var mmlFeedback = new Ratings(); foreach (var feedback in split.Train) { var rating = (Rating)feedback; mmlFeedback.Add(UsersMap.ToInternalID(rating.User.Id), ItemsMap.ToInternalID(rating.Item.Id), rating.Value); // the attributes are translated so that they can be used later for training foreach (var attr in feedback.GetAllAttributes()) { attr.Translation = featBuilder.TranslateAttribute(attr); // hard code attribute group. User is 0, item is 1, others is 2 attr.Group = 2; if (wFm != null && !wFm.FeatureGroups.ContainsKey(attr.Translation.Item1)) { wFm.FeatureGroups.Add(attr.Translation.Item1, 2); } } } ((IRatingPredictor)MmlRecommenderInstance).Ratings = mmlFeedback; } foreach (var feedback in split.Test) { // the attributes are translated so that they can be used later for training foreach (var attr in feedback.GetAllAttributes()) { attr.Translation = featBuilder.TranslateAttribute(attr); // hard code attribute group. User is 0, item is 1, others is 2 attr.Group = 2; if (wFm != null && !wFm.FeatureGroups.ContainsKey(attr.Translation.Item1)) { wFm.FeatureGroups.Add(attr.Translation.Item1, 2); } } } mmlInstance.Split = split; mmlInstance.Model = this; mmlInstance.UsersMap = UsersMap; mmlInstance.ItemsMap = ItemsMap; mmlInstance.FeatureBuilder = featBuilder; Logger.Current.Trace("Training with MmlFmRecommender recommender..."); PureTrainTime = (int)Wrap.MeasureTime(delegate() { mmlInstance.Train(); }).TotalMilliseconds; }
// This method makes sure that all itemIds are already have an internalId when they want to be used in evaluation // this prevent cross-thread access to ItemMap (already existing key in dictionary error) // when evaluation is peformed in parallel for each user private void ExhaustInternalIds(Split split) { foreach (var item in split.Container.Items.Values) { ItemsMap.ToInternalID(item.Id); } foreach (var user in split.Container.Items.Values) { UsersMap.ToInternalID(user.Id); } }
public override void Train(Split split) { var mmlInstance = (BPRFM)MmlRecommenderInstance; var featBuilder = new FmFeatureBuilder(); var mmlFeedback = new PosOnlyFeedback <SparseBooleanMatrix>(); var wBprFm = MmlRecommenderInstance as WeightedBPRFM; foreach (var feedback in split.Train) { mmlFeedback.Add(UsersMap.ToInternalID(feedback.User.Id), ItemsMap.ToInternalID(feedback.Item.Id)); // the attributes are translated so that they can be used later for training foreach (var attr in feedback.GetAllAttributes()) { attr.Translation = featBuilder.TranslateAttribute(attr); // hard code attribute group. User is 0, item is 1, others is 2 attr.Group = 2; if (wBprFm != null && !wBprFm.FeatureGroups.ContainsKey(attr.Translation.Item1)) { wBprFm.FeatureGroups.Add(attr.Translation.Item1, 2); } } } foreach (var feedback in split.Test) { // the attributes are translated so that they can be used later for training foreach (var attr in feedback.GetAllAttributes()) { attr.Translation = featBuilder.TranslateAttribute(attr); // hard code attribute group. User is 0, item is 1, others is 2 attr.Group = 2; if (wBprFm != null && !wBprFm.FeatureGroups.ContainsKey(attr.Translation.Item1)) { wBprFm?.FeatureGroups.Add(attr.Translation.Item1, 2); } } } mmlInstance.Feedback = mmlFeedback; mmlInstance.Split = split; mmlInstance.Model = this; mmlInstance.UsersMap = UsersMap; mmlInstance.ItemsMap = ItemsMap; mmlInstance.FeatureBuilder = featBuilder; Logger.Current.Trace("Training with MmlBprfmRecommender recommender..."); PureTrainTime = (int)Wrap.MeasureTime(delegate() { mmlInstance.Train(); }).TotalMilliseconds; }
public override void Train(Split split) { // Convert trainset to MyMediaLite trianset format if (DataType == IO.DataType.Ratings) { var mmlFeedback = new Ratings(); foreach (var feedback in split.Train) { var rating = (Rating)feedback; mmlFeedback.Add(UsersMap.ToInternalID(rating.User.Id), ItemsMap.ToInternalID(rating.Item.Id), rating.Value); } ((IRatingPredictor)MmlRecommenderInstance).Ratings = mmlFeedback; } else if (DataType == IO.DataType.TimeAwareRating) { var mmlFeedback = new TimedRatings(); var firstRatingMl10M = new DateTime(1998, 11, 1); foreach (var feedback in split.Train) { var rating = (Rating)feedback; var time = firstRatingMl10M.AddDays(double.Parse(feedback.Attributes["timestamp"].Value)); mmlFeedback.Add(UsersMap.ToInternalID(rating.User.Id), ItemsMap.ToInternalID(rating.Item.Id), rating.Value, time); } ((ITimeAwareRatingPredictor)MmlRecommenderInstance).Ratings = mmlFeedback; } else { var mmlFeedback = new PosOnlyFeedback <SparseBooleanMatrix>(); foreach (var feedback in split.Train) { mmlFeedback.Add(UsersMap.ToInternalID(feedback.User.Id), ItemsMap.ToInternalID(feedback.Item.Id)); } ((ItemRecommender)MmlRecommenderInstance).Feedback = mmlFeedback; if (MmlRecommenderInstance is IModelAwareRecommender) { ((IModelAwareRecommender)MmlRecommenderInstance).Model = this; } } Logger.Current.Trace("Training with MyMediaLite recommender..."); PureTrainTime = (int)Wrap.MeasureTime(delegate() { MmlRecommenderInstance.Train(); }).TotalMilliseconds; }
protected virtual void UpdatePosSampler() { double[] levelsAvg = new double[PosLevels.Count]; for (int i = 0; i < PosLevels.Count; i++) { foreach (Feedback f in LevelPosFeedback[PosLevels[i]]) { int user_id = UsersMap.ToInternalID(f.User.Id); int item_id = ItemsMap.ToInternalID(f.Item.Id); levelsAvg[i] += MatrixExtensions.RowScalarProduct(user_factors, user_id, item_factors, item_id); } //Console.WriteLine(levelsAvg[i]); levelsAvg[i] /= LevelPosFeedback[PosLevels[i]].Count; } double avgSum = levelsAvg.Sum(); double[] levelWeights = new double[PosLevels.Count]; for (int i = 0; i < PosLevels.Count; i++) { levelWeights[i] = levelsAvg[i] / avgSum; } double sum = 0; for (int i = 0; i < PosLevels.Count; i++) { sum += levelWeights[i] * LevelPosFeedback[PosLevels[i]].Count; } double[] levelPros = new double[PosLevels.Count]; for (int i = 0; i < PosLevels.Count; i++) { levelPros[i] = levelWeights[i] * LevelPosFeedback[PosLevels[i]].Count / sum; } string weights = levelWeights.Select(p => string.Format("{0:0.00}", p)).Aggregate((a, b) => a + " " + b); Logger.Current.Info(weights); //var temp = SampledCount.Values.Take(10).Select(i => i.ToString()).Aggregate((a, b) => a + " " + b); //Console.WriteLine(temp); _posLevelSampler = new Categorical(levelPros); }
public override float Predict(Feedback feedback) { int userId = UsersMap.ToInternalID(feedback.User.Id); int itemId = ItemsMap.ToInternalID(feedback.Item.Id); var featurs = feedback.GetAllAttributes().Select(a => FeatureBuilder.TranslateAttribute(a)); bool newUser = (userId > MaxUserID); bool newItem = (itemId > MaxItemID); float userAttrsTerm = 0, itemAttrsTerm = 0; foreach (var feat in featurs) { // if feat_index is greater than MaxFeatureId it means that the feature is new in test set so its factors has not been learnt if (feat.Item1 < NumTrainFeaturs) { float x_z = feat.Item2; int g_z = FeatureGroups[feat.Item1]; float alpha_z = weights[g_z]; userAttrsTerm += newUser ? 0 : alpha_z *x_z *MatrixExtensions.RowScalarProduct(feature_factors, feat.Item1, user_factors, userId); itemAttrsTerm += newItem ? 0 : alpha_z *x_z *MatrixExtensions.RowScalarProduct(feature_factors, feat.Item1, item_factors, itemId); } } int u = 0; int i = 1; float alpha_u = weights[u]; float alpha_i = weights[i]; float itemBias = newItem ? 0 : item_bias[itemId]; float userItemTerm = (newUser || newItem) ? 0 : alpha_u *alpha_i *MatrixExtensions.RowScalarProduct(user_factors, userId, item_factors, itemId); return(itemBias + userItemTerm + alpha_u * userAttrsTerm + alpha_i * itemAttrsTerm); }
public override float Predict(string userId, string itemId) { return(MmlRecommenderInstance.Predict(UsersMap.ToInternalID(userId), ItemsMap.ToInternalID(itemId))); }
public override void Evaluate(Split split, EvaluationContext context) { ExhaustInternalIds(split); PureEvaluationTime = (int)Wrap.MeasureTime(delegate() { if (DataType == DataType.Ratings) { foreach (var feedback in split.Test) { context.PredictedScores.Add(feedback, Predict(feedback)); } } else if (DataType == DataType.TimeAwareRating) { var predictor = (ITimeAwareRatingPredictor)MmlRecommenderInstance; var firstRatingMl10M = new DateTime(1998, 11, 01); foreach (var feedback in split.Test) { var time = firstRatingMl10M.AddDays(double.Parse(feedback.Attributes["timestamp"].Value)); context.PredictedScores.Add(feedback, predictor.Predict(UsersMap.ToInternalID(feedback.User.Id), ItemsMap.ToInternalID(feedback.Item.Id), time)); } } context.Evaluators.ForEach(e => e.Evaluate(context, this, split)); }).TotalMilliseconds; }