/// <summary> /// Get a list of all users and their ratings on every article /// </summary> public UserArticleRatingsTable GetUserArticleRatingsTable(IRater rater) { UserArticleRatingsTable table = new UserArticleRatingsTable(); table.UserIndexToID = db.Users.OrderBy(x => x.UserID).Select(x => x.UserID).Distinct().ToList(); table.ArticleIndexToID = db.Articles.OrderBy(x => x.ArticleID).Select(x => x.ArticleID).Distinct().ToList(); foreach (int userId in table.UserIndexToID) { table.Users.Add(new UserArticleRatings(userId, table.ArticleIndexToID.Count)); } var userArticleRatingGroup = db.UserActions .GroupBy(x => new { x.UserID, x.ArticleID }) .Select(g => new { g.Key.UserID, g.Key.ArticleID, Rating = rater.GetRating(g.ToList()) }) .ToList(); foreach (var userAction in userArticleRatingGroup) { int userIndex = table.UserIndexToID.IndexOf(userAction.UserID); int articleIndex = table.ArticleIndexToID.IndexOf(userAction.ArticleID); table.Users[userIndex].ArticleRatings[articleIndex] = userAction.Rating; } return(table); }
private void Initialize(UserArticleRatingsTable ratings) { numUsers = ratings.Users.Count; numArticles = ratings.Users[0].ArticleRatings.Length; Random rand = new Random(); userFeatures = new double[numUsers][]; for (int userIndex = 0; userIndex < numUsers; userIndex++) { userFeatures[userIndex] = new double[numFeatures]; for (int featureIndex = 0; featureIndex < numFeatures; featureIndex++) { userFeatures[userIndex][featureIndex] = rand.NextDouble(); } } articleFeatures = new double[numArticles][]; for (int articleIndex = 0; articleIndex < numUsers; articleIndex++) { articleFeatures[articleIndex] = new double[numFeatures]; for (int featureIndex = 0; featureIndex < numFeatures; featureIndex++) { articleFeatures[articleIndex][featureIndex] = rand.NextDouble(); } } userBiases = new double[numUsers]; articleBiases = new double[numArticles]; }
public static ScoreResults Score(this IRecommender classifier, UserBehaviorDatabase db, IRater rater) { UserBehaviorTransformer ubt = new UserBehaviorTransformer(db); UserArticleRatingsTable actualRatings = ubt.GetUserArticleRatingsTable(rater); var distinctUserArticlePairs = db.UserActions.GroupBy(x => new { x.UserID, x.ArticleID }).ToList(); double score = 0.0; int count = 0; foreach (var userArticle in distinctUserArticlePairs) { int userIndex = actualRatings.UserIndexToID.IndexOf(userArticle.Key.UserID); int articleIndex = actualRatings.ArticleIndexToID.IndexOf(userArticle.Key.ArticleID); double actualRating = actualRatings.Users[userIndex].ArticleRatings[articleIndex]; if (actualRating != 0) { double predictedRating = classifier.GetRating(userArticle.Key.UserID, userArticle.Key.ArticleID); score += Math.Pow(predictedRating - actualRating, 2); count++; } } if (count > 0) { score = Math.Sqrt(score / count); } return(new ScoreResults(score)); }
public void Train(UserBehaviorDatabase db) { UserBehaviorTransformer ubt = new UserBehaviorTransformer(db); ratings = ubt.GetUserArticleRatingsTable(rater); List <ArticleTagCounts> articleTags = ubt.GetArticleTagCounts(); ratings.AppendArticleFeatures(articleTags); FillTransposedRatings(); }
public static TestResults Test(this IRecommender classifier, UserBehaviorDatabase db, int numSuggestions) { // We're only using the ratings to check for existence of a rating, so we can use a simple rater for everything SimpleRater rater = new SimpleRater(); UserBehaviorTransformer ubt = new UserBehaviorTransformer(db); UserArticleRatingsTable ratings = ubt.GetUserArticleRatingsTable(rater); int correctUsers = 0; double averagePrecision = 0.0; double averageRecall = 0.0; // Get a list of users in this database who interacted with an article for the first time List <int> distinctUsers = db.UserActions.Select(x => x.UserID).Distinct().ToList(); var distinctUserArticles = db.UserActions.GroupBy(x => new { x.UserID, x.ArticleID }); // Now get suggestions for each of these users foreach (int user in distinctUsers) { List <Suggestion> suggestions = classifier.GetSuggestions(user, numSuggestions); bool foundOne = false; int userIndex = ratings.UserIndexToID.IndexOf(user); int userCorrectArticles = 0; int userTotalArticles = distinctUserArticles.Count(x => x.Key.UserID == user); foreach (Suggestion s in suggestions) { int articleIndex = ratings.ArticleIndexToID.IndexOf(s.ArticleID); // If one of the top N suggestions is what the user ended up reading, then we're golden if (ratings.Users[userIndex].ArticleRatings[articleIndex] != 0) { userCorrectArticles++; if (!foundOne) { correctUsers++; foundOne = true; } } } averagePrecision += (double)userCorrectArticles / numSuggestions; averageRecall += (double)userCorrectArticles / userTotalArticles; } averagePrecision /= distinctUsers.Count; averageRecall /= distinctUsers.Count; return(new TestResults(distinctUsers.Count, correctUsers, averageRecall, averagePrecision)); }
public void Train(UserBehaviorDatabase db) { UserBehaviorTransformer ubt = new UserBehaviorTransformer(db); ratings = ubt.GetUserArticleRatingsTable(rater); SingularValueDecomposition factorizer = new SingularValueDecomposition(numFeatures, learningIterations); svd = factorizer.FactorizeMatrix(ratings); numUsers = ratings.UserIndexToID.Count; numArticles = ratings.ArticleIndexToID.Count; }
public void Train(UserBehaviorDatabase db) { UserBehaviorTransformer ubt = new UserBehaviorTransformer(db); ratings = ubt.GetUserArticleRatingsTable(rater); if (latentUserFeatureCount > 0) { SingularValueDecomposition svd = new SingularValueDecomposition(latentUserFeatureCount, 100); SvdResult results = svd.FactorizeMatrix(ratings); ratings.AppendUserFeatures(results.UserFeatures); } }
public void Train(UserBehaviorDatabase db) { UserBehaviorTransformer ubt = new UserBehaviorTransformer(db); ratings = ubt.GetUserArticleRatingsTable(rater); List <ArticleTagCounts> articleTags = ubt.GetArticleTagCounts(); //train article List <ArticleAndTag> articles1 = ubt.Angualr2(); ratings.AppendArticleFeatures(articleTags); // ratings.suggestArticle(articles1, 3); FillTransposedRatings(); }
public void Train(UserBehaviorDatabase db) { UserBehaviorTransformer ubt = new UserBehaviorTransformer(db); ratings = ubt.GetUserArticleRatingsTable(rater); ratings.art = ubt.Angualr2(); List <ArticleAndTag> articles1 = ubt.Angualr2(); //if (latentUserFeatureCount > 0) //{ // SingularValueDecomposition svd = new SingularValueDecomposition(latentUserFeatureCount, 100); // SvdResult results = svd.FactorizeMatrix(ratings); // ratings.AppendUserFeatures(results.UserFeatures); //} }
/// <summary> /// Get the average rating of non-zero values across the entire user-article matrix /// </summary> private double GetAverageRating(UserArticleRatingsTable ratings) { double sum = 0.0; int count = 0; for (int userIndex = 0; userIndex < numUsers; userIndex++) { for (int articleIndex = 0; articleIndex < numArticles; articleIndex++) { // If the given user rated the given item, add it to our average if (ratings.Users[userIndex].ArticleRatings[articleIndex] != 0) { sum += ratings.Users[userIndex].ArticleRatings[articleIndex]; count++; } } } return(sum / count); }
public void Load(string file) { ratings = new UserArticleRatingsTable(); using (FileStream fs = new FileStream(file, FileMode.Open)) using (GZipStream zip = new GZipStream(fs, CompressionMode.Decompress)) using (StreamReader r = new StreamReader(zip)) { long total = long.Parse(r.ReadLine()); int features = int.Parse(r.ReadLine()); for (long i = 0; i < total; i++) { int userId = int.Parse(r.ReadLine()); UserArticleRatings uat = new UserArticleRatings(userId, features); for (int x = 0; x < features; x++) { uat.ArticleRatings[x] = double.Parse(r.ReadLine()); } ratings.Users.Add(uat); } total = int.Parse(r.ReadLine()); for (int i = 0; i < total; i++) { ratings.UserIndexToID.Add(int.Parse(r.ReadLine())); } total = int.Parse(r.ReadLine()); for (int i = 0; i < total; i++) { ratings.ArticleIndexToID.Add(int.Parse(r.ReadLine())); } } FillTransposedRatings(); }
/// <summary> /// Get a list of all users and their ratings on every article /// </summary> public UserArticleRatingsTable GetUserArticleRatingsTable(IRater rater) { UserArticleRatingsTable table = new UserArticleRatingsTable(); table.UserIndexToID = db.Users.OrderBy(x => x.UserID).Select(x => x.UserID).Distinct().ToList(); table.ArticleIndexToID = db.Articles.OrderBy(x => x.ArticleID).Select(x => x.ArticleID).Distinct().ToList(); table.Ang1 = db.Articles.Select(x => x.Name).ToList(); List <string> name = new List <string>(); name = table.Ang1; foreach (int userId in table.UserIndexToID) { table.Users.Add(new UserArticleRatings(userId, table.ArticleIndexToID.Count)); } List <ArticleAndTag> articleTags = new List <ArticleAndTag>(); foreach (Article article in db.Articles) { table.art.Add(new ArticleAndTag(article.ArticleID, article.Name, article.Tags)); } var userArticleRatingGroup = db.UserActions .GroupBy(x => new { x.UserID, x.ArticleID }) .Select(g => new { g.Key.UserID, g.Key.ArticleID, Rating = rater.GetRating(g.ToList()) }) .ToList(); foreach (var userAction in userArticleRatingGroup) { int userIndex = table.UserIndexToID.IndexOf(userAction.UserID); int articleIndex = table.ArticleIndexToID.IndexOf(userAction.ArticleID); table.Users[userIndex].ArticleRatings[articleIndex] = userAction.Rating; } return(table); }
public SvdResult FactorizeMatrix(UserArticleRatingsTable ratings) { Initialize(ratings); double squaredError; int count; List <double> rmseAll = new List <double>(); averageGlobalRating = GetAverageRating(ratings); for (int i = 0; i < learningIterations; i++) { squaredError = 0.0; count = 0; for (int userIndex = 0; userIndex < numUsers; userIndex++) { for (int articleIndex = 0; articleIndex < numArticles; articleIndex++) { if (ratings.Users[userIndex].ArticleRatings[articleIndex] != 0) { double predictedRating = averageGlobalRating + userBiases[userIndex] + articleBiases[articleIndex] + Matrix.GetDotProduct(userFeatures[userIndex], articleFeatures[articleIndex]); double error = ratings.Users[userIndex].ArticleRatings[articleIndex] - predictedRating; if (double.IsNaN(predictedRating)) { throw new Exception("Encountered a non-number while factorizing a matrix! Try decreasing the learning rate."); } squaredError += Math.Pow(error, 2); count++; averageGlobalRating += learningRate * (error - regularizationTerm * averageGlobalRating); userBiases[userIndex] += learningRate * (error - regularizationTerm * userBiases[userIndex]); articleBiases[articleIndex] += learningRate * (error - regularizationTerm * articleBiases[articleIndex]); for (int featureIndex = 0; featureIndex < numFeatures; featureIndex++) { userFeatures[userIndex][featureIndex] += learningRate * (error * articleFeatures[articleIndex][featureIndex] - regularizationTerm * userFeatures[userIndex][featureIndex]); articleFeatures[articleIndex][featureIndex] += learningRate * (error * userFeatures[userIndex][featureIndex] - regularizationTerm * articleFeatures[articleIndex][featureIndex]); } } } } squaredError = Math.Sqrt(squaredError / count); rmseAll.Add(squaredError); learningRate *= learningDescent; } //using (StreamWriter w = new StreamWriter("rmse.csv")) //{ // w.WriteLine("epoc,rmse"); // for (int i = 0; i < rmseAll.Count; i++) // { // w.WriteLine((i + 1) + "," + rmseAll[i]); // } //} return(new SvdResult(averageGlobalRating, userBiases, articleBiases, userFeatures, articleFeatures)); }
public void Load(string file) { ratings = new UserArticleRatingsTable(); using (FileStream fs = new FileStream(file, FileMode.Open)) using (GZipStream zip = new GZipStream(fs, CompressionMode.Decompress)) using (StreamReader r = new StreamReader(zip)) { numUsers = int.Parse(r.ReadLine()); numArticles = int.Parse(r.ReadLine()); numFeatures = int.Parse(r.ReadLine()); double averageGlobalRating = double.Parse(r.ReadLine()); double[] userBiases = new double[numUsers]; for (int userIndex = 0; userIndex < numUsers; userIndex++) { userBiases[userIndex] = double.Parse(r.ReadLine()); } double[] articleBiases = new double[numArticles]; for (int articleIndex = 0; articleIndex < numArticles; articleIndex++) { articleBiases[articleIndex] = double.Parse(r.ReadLine()); } double[][] userFeatures = new double[numUsers][]; for (int userIndex = 0; userIndex < numUsers; userIndex++) { userFeatures[userIndex] = new double[numFeatures]; for (int featureIndex = 0; featureIndex < numFeatures; featureIndex++) { userFeatures[userIndex][featureIndex] = double.Parse(r.ReadLine()); } } double[][] articleFeatures = new double[numArticles][]; for (int articleIndex = 0; articleIndex < numUsers; articleIndex++) { articleFeatures[articleIndex] = new double[numFeatures]; for (int featureIndex = 0; featureIndex < numFeatures; featureIndex++) { articleFeatures[articleIndex][featureIndex] = double.Parse(r.ReadLine()); } } svd = new SvdResult(averageGlobalRating, userBiases, articleBiases, userFeatures, articleFeatures); for (int i = 0; i < numUsers; i++) { int userId = int.Parse(r.ReadLine()); UserArticleRatings uat = new UserArticleRatings(userId, numArticles); for (int x = 0; x < numArticles; x++) { uat.ArticleRatings[x] = double.Parse(r.ReadLine()); } ratings.Users.Add(uat); } for (int i = 0; i < numUsers; i++) { ratings.UserIndexToID.Add(int.Parse(r.ReadLine())); } for (int i = 0; i < numArticles; i++) { ratings.ArticleIndexToID.Add(int.Parse(r.ReadLine())); } } }