/// <summary> /// Get user id - item id - ratings 2 level hash table. /// </summary> /// <param name="ratings"></param> /// <returns></returns> public static MyTable GetRatingTable(List <Rating> ratings, bool useImplicitFeedback = false) { MyTable table = new MyTable(); if (useImplicitFeedback) { foreach (Rating r in ratings) { if (!table.ContainsKey(r.UserId, r.ItemId)) { table.Add(r.UserId, r.ItemId, 1.0); } } } else { foreach (Rating r in ratings) { if (!table.ContainsKey(r.UserId, r.ItemId)) { table.Add(r.UserId, r.ItemId, r.Score); } } } return(table); }
public MyTable CalculateCooccurrences(Hashtable userItemsTable, bool multithread = false) { MyTable cooccurrences = new MyTable(); if (multithread) { int[] userIds = new int[userItemsTable.Keys.Count]; userItemsTable.Keys.CopyTo(userIds, 0); Parallel.ForEach(userIds, uId => { List <Rating> items = (List <Rating>)userItemsTable[uId]; foreach (Rating i in items) { foreach (Rating j in items) { if (i.ItemId == j.ItemId) { continue; } lock (cooccurrences) { if (!cooccurrences.ContainsKey(i.ItemId, j.ItemId)) { cooccurrences.Add(i.ItemId, j.ItemId, 0.0); } cooccurrences[i.ItemId, j.ItemId] = (double)cooccurrences[i.ItemId, j.ItemId] + 1.0; } } } }); } else { foreach (int uId in userItemsTable.Keys) { List <Rating> items = (List <Rating>)userItemsTable[uId]; foreach (Rating i in items) { foreach (Rating j in items) { if (i.ItemId == j.ItemId) { continue; } if (!cooccurrences.ContainsKey(i.ItemId, j.ItemId)) { cooccurrences.Add(i.ItemId, j.ItemId, 0.0); } cooccurrences[i.ItemId, j.ItemId] = (double)cooccurrences[i.ItemId, j.ItemId] + 1.0; } } } } return(cooccurrences); }
protected MyTable CalculateCooccurrences(Hashtable itemUsersTable, bool multithread = false) { MyTable cooccurrences = new MyTable(); if (multithread) { int[] itemIds = new int[itemUsersTable.Keys.Count]; itemUsersTable.Keys.CopyTo(itemIds, 0); Parallel.ForEach(itemIds, iId => { List <Rating> users = (List <Rating>)itemUsersTable[iId]; foreach (Rating u in users) { foreach (Rating v in users) { if (u.UserId == v.UserId) { continue; } lock (cooccurrences) { if (!cooccurrences.ContainsKey(u.UserId, v.UserId)) { cooccurrences.Add(u.UserId, v.UserId, 0.0); } cooccurrences[u.UserId, v.UserId] = (double)cooccurrences[u.UserId, v.UserId] + 1.0 / Math.Log(1 + users.Count); } } } }); } else { foreach (int iId in itemUsersTable.Keys) { List <Rating> users = (List <Rating>)itemUsersTable[iId]; foreach (Rating u in users) { foreach (Rating v in users) { if (u.UserId == v.UserId) { continue; } if (!cooccurrences.ContainsKey(u.UserId, v.UserId)) { cooccurrences.Add(u.UserId, v.UserId, 0.0); } cooccurrences[u.UserId, v.UserId] = (double)cooccurrences[u.UserId, v.UserId] + 1.0 / Math.Log(1 + users.Count); } } } } return(cooccurrences); }
/// <summary> /// Get ratings from BX-Book-Ratings.csv /// </summary> /// <param name="file"></param> /// <returns>ratings table: string, userId; string, isbn; double score</returns> public static MyTable GetRatings(string file, string separator = ";") { if (!new FileInfo(file).Exists) { throw new ArgumentException("File doesn't exist: " + file); } MyTable ratingsTable = new MyTable(); StreamReader reader = new StreamReader(file); string _l = reader.ReadLine(); char[] separators = separator.ToArray(); while (!reader.EndOfStream) { string line = reader.ReadLine().TrimStart('\"').TrimEnd('\"'); string[] elements = line.Split(separators, StringSplitOptions.None); if (elements.Length == 3) { string id = elements[0].TrimStart('\"').TrimEnd('\"'); string isbn = elements[1].TrimStart('\"').TrimEnd('\"'); string sRate = elements[2].TrimStart('\"').TrimEnd('\"'); double rate = Double.Parse(sRate); if (!ratingsTable.ContainsKey(id, isbn)) { ratingsTable.Add(id, isbn, rate); } } } reader.Close(); return(ratingsTable); }
/// <summary> /// split up ratings into train and test set with percentage of test size. /// </summary> /// <param name="recordTable">user id - item id - list of tags for this item</param> /// <param name="testSize"></param> /// <returns></returns> public static Tuple <MyTable, MyTable> TrainTestSplit(MyTable recordTable, double testSize = 0.1) { if (recordTable == null) { throw new ArgumentNullException(); } var random = Core.Random.GetInstance(); MyTable baseRecordTable = new MyTable(); MyTable testRecordTable = new MyTable(); foreach (int userId in recordTable.Keys) { Hashtable subTable = (Hashtable)recordTable[userId]; foreach (int itemId in subTable.Keys) { var links = subTable[itemId]; if (random.NextDouble() < testSize) { testRecordTable.Add(userId, itemId, links); } else { baseRecordTable.Add(userId, itemId, links); } } } return(Tuple.Create(baseRecordTable, testRecordTable)); }
/// <summary> /// Precision and Recall metrics which are used for top-k in recommender system. /// </summary> /// <param name="recommended"></param> /// <param name="test"></param> /// <returns>precision, recall</returns> public static Tuple <double, double> PrecisionAndRecall(List <Rating> recommended, List <Rating> test) { MyTable recommendedTable = new MyTable(); foreach (Rating r in recommended) { if (!recommendedTable.ContainsKey(r.UserId, r.ItemId)) { recommendedTable.Add(r.UserId, r.ItemId, r.Score); } } int hit = 0; foreach (Rating r in test) { if (recommendedTable.ContainsKey(r.UserId, r.ItemId)) { hit++; } } double precision = 0.0; double recall = 0.0; if (recommended.Count > 0) { precision = hit * 1.0 / recommended.Count; } if (test.Count > 0) { recall = hit * 1.0 / test.Count; } return(Tuple.Create(precision, recall)); }
// Design for Facebook public void TryListRecommendation(List <Link> baseEdges, List <Link> testEdges, int userNumber, int k) { // Get u not rate item ralate value from Pk MyTable table = new MyTable(); foreach (Link e in baseEdges) { if (!table.ContainsKey(e.From, e.To)) { table.Add(e.From, e.To, null); } } List <Link> recommendations = new List <Link>(); foreach (int uId in table.Keys) { int counter = 0; foreach (Node n in this.Nodes) { if (!table.ContainsKey(uId, n.Id)) // u not rate { recommendations.Add(new Link(uId, n.Id, 1.0)); counter++; } if (counter > k) { break; } } } Console.Write("{0}, ", k); Evaluation(recommendations, testEdges); }
protected List <Rating> GetRecommendations(MyTable ratingTable, MyTable W, int K = 80, int N = 10) { MyTable recommendedTable = new MyTable(); foreach (int userId in ratingTable.Keys) { Hashtable Nu = (Hashtable)ratingTable[userId]; // ratings of user u if (!W.ContainsMainKey(userId)) // NOTE: a user bought an item which had only rated by him. { continue; } List <Link> similarUsers = GetSimilarUsers(W, userId, K); foreach (Link l in similarUsers) { int vId = l.To; // similar user v Hashtable Nv = (Hashtable)ratingTable[vId]; // ratings of user v foreach (int iId in Nv.Keys) { if (Nu.ContainsKey(iId)) { continue; } if (recommendedTable.ContainsKey(userId, iId)) { double _t = (double)recommendedTable[userId, iId]; recommendedTable[userId, iId] = _t + l.Weight; } else { recommendedTable.Add(userId, iId, l.Weight); } } } } List <Rating> recommendedItems = new List <Rating>(); foreach (int uId in recommendedTable.Keys) { List <Rating> li = new List <Rating>(); Hashtable subTable = (Hashtable)recommendedTable[uId]; foreach (int iId in subTable.Keys) { double _t = (double)subTable[iId]; li.Add(new Rating(uId, iId, _t)); } List <Rating> sortedLi = li.OrderByDescending(r => r.Score).ToList(); recommendedItems.AddRange(sortedLi.GetRange(0, Math.Min(sortedLi.Count, N))); } return(recommendedItems); }
protected MyTable GetRecommendationsByTFIDFPlusPlus(MyTable ratingTable, MyTable userTagTable, Hashtable tagUsersTable, MyTable tagItemTable, Hashtable itemTagsTable) { MyTable recommendTable = new MyTable(); int[] userIds = new int[ratingTable.Keys.Count]; ratingTable.Keys.CopyTo(userIds, 0); Parallel.ForEach(userIds, userId => { Hashtable subTable = (Hashtable)ratingTable[userId]; if (userTagTable.ContainsMainKey(userId)) { Hashtable tagTable = (Hashtable)userTagTable[userId]; foreach (int tagId in tagTable.Keys) { if (!tagItemTable.ContainsMainKey(tagId)) { continue; } Hashtable itemTable = (Hashtable)tagItemTable[tagId]; foreach (int itemId in itemTable.Keys) { // if user has rated this item if (subTable.ContainsKey(itemId)) { continue; } List <Link> n_b = (List <Link>)tagUsersTable[tagId]; // # of users who used this tag List <Link> n_i = (List <Link>)itemTagsTable[itemId]; // # of users who used this tag double wut = (double)tagTable[tagId]; double wti = (double)itemTable[itemId]; double pui = wut / Math.Log(1 + n_b.Count) * wti / Math.Log(1 + n_i.Count); lock (recommendTable) { if (recommendTable.ContainsKey(userId, itemId)) { recommendTable[userId, itemId] = (double)recommendTable[userId, itemId] + pui; } else { recommendTable.Add(userId, itemId, pui); } } } } } }); return(recommendTable); }
public List <Rating> GetRecommendations(MyTable ratingTable, Hashtable similarItemsTable, int K = 80, int N = 10) { MyTable recommendedTable = new MyTable(); foreach (int userId in ratingTable.Keys) { Hashtable Nu = (Hashtable)ratingTable[userId]; // ratings of user u foreach (int itemId in Nu.Keys) { if (!similarItemsTable.ContainsKey(itemId)) { continue; } List <Link> similarItems = (List <Link>)similarItemsTable[itemId]; foreach (Link l in similarItems) { int iId = l.To; if (Nu.ContainsKey(iId)) { continue; } if (recommendedTable.ContainsKey(userId, iId)) { double _t = (double)recommendedTable[userId, iId]; recommendedTable[userId, iId] = _t + l.Weight; } else { recommendedTable.Add(userId, iId, l.Weight); } } } } List <Rating> recommendedItems = new List <Rating>(); foreach (int uId in recommendedTable.Keys) { List <Rating> li = new List <Rating>(); Hashtable subTable = (Hashtable)recommendedTable[uId]; foreach (int iId in subTable.Keys) { double _t = (double)subTable[iId]; li.Add(new Rating(uId, iId, _t)); } List <Rating> sortedLi = li.OrderByDescending(r => r.Score).ToList(); recommendedItems.AddRange(sortedLi.GetRange(0, Math.Min(sortedLi.Count, N))); } return(recommendedItems); }
/// <summary> /// Get linked user id - user id - weight table. /// </summary> /// <param name="links"></param> /// <returns></returns> public static MyTable GetReversedLinkTable(List <Link> links) { MyTable table = new MyTable(); foreach (Link l in links) { if (!table.ContainsKey(l.To, l.From)) { table.Add(l.To, l.From, l.Weight); } } return(table); }
/// <summary> /// Get item id - user id - ratings 2 level hash table. /// </summary> /// <param name="ratings"></param> /// <returns></returns> public static MyTable GetReversedRatingTable(List <Rating> ratings) { MyTable table = new MyTable(); foreach (Rating r in ratings) { if (!table.ContainsKey(r.ItemId, r.UserId)) { table.Add(r.ItemId, r.UserId, r.Score); } } return(table); }
protected MyTable GetRecommendations(MyTable ratingTable, MyTable userTagTable, MyTable tagItemTable) { MyTable recommendTable = new MyTable(); int[] userIds = new int[ratingTable.Keys.Count]; ratingTable.Keys.CopyTo(userIds, 0); Parallel.ForEach(userIds, userId => { Hashtable subTable = (Hashtable)ratingTable[userId]; if (userTagTable.ContainsMainKey(userId)) { Hashtable tagTable = (Hashtable)userTagTable[userId]; foreach (int tagId in tagTable.Keys) { if (!tagItemTable.ContainsMainKey(tagId)) { continue; } Hashtable itemTable = (Hashtable)tagItemTable[tagId]; foreach (int itemId in itemTable.Keys) { // if user has rated this item if (subTable.ContainsKey(itemId)) { continue; } double wut = (double)tagTable[tagId]; double wti = (double)itemTable[itemId]; double p = wut * wti; lock (recommendTable) { if (recommendTable.ContainsKey(userId, itemId)) { recommendTable[userId, itemId] = (double)recommendTable[userId, itemId] + p; } else { recommendTable.Add(userId, itemId, p); } } } } } }); return(recommendTable); }
// Design for movielens public void TryListRecommendation(List <Rating> baseRatings, List <Rating> testRatings, int userNumber, int itemNumber, int k) { // Get item part, node id as item id List <Node> nodes = new List <Node>(); for (int i = userNumber + 1; i < userNumber + itemNumber + 1; i++) { nodes.Add(new Node(i - userNumber, PR[i])); } // Sort, descending var rankedItems = nodes.OrderByDescending(n => n.Weight); // Get u not rate item ralate value from Pk MyTable table = new MyTable(); foreach (Rating r in baseRatings) { if (!table.ContainsKey(r.UserId, r.ItemId)) { table.Add(r.UserId, r.ItemId, r.Score); } } List <Rating> recommendations = new List <Rating>(); foreach (int uId in table.Keys) { int counter = 0; foreach (Node n in rankedItems) { if (!table.ContainsKey(uId, n.Id)) // u not rate { recommendations.Add(new Rating(uId, n.Id, 1.0)); counter++; } if (counter > k) { break; } } } // Evaluation var pr = Metrics.PrecisionAndRecall(recommendations, testRatings); Console.WriteLine("{0}, {1}, {2}", k, pr.Item1, pr.Item2); }
public MyTable CalculateSimilarities(MyTable coourrencesTable, Hashtable itemUsersTable) { MyTable wuv = new MyTable(); foreach (int iId in coourrencesTable.Keys) { Hashtable subTable = (Hashtable)coourrencesTable[iId]; List <Rating> iRatings = (List <Rating>)itemUsersTable[iId]; foreach (int jId in subTable.Keys) { double coourrences = (double)subTable[jId]; List <Rating> jRatings = (List <Rating>)itemUsersTable[jId]; wuv.Add(iId, jId, coourrences * 1.0 / Math.Sqrt(iRatings.Count * jRatings.Count)); } } return(wuv); }
protected MyTable CalculateSimilarities(MyTable coourrencesTable, Hashtable userItemsTable) { MyTable wuv = new MyTable(); foreach (int uId in coourrencesTable.Keys) { Hashtable subTable = (Hashtable)coourrencesTable[uId]; List <Rating> uRatings = (List <Rating>)userItemsTable[uId]; foreach (int vId in subTable.Keys) { double coourrences = (double)subTable[vId]; List <Rating> vRatings = (List <Rating>)userItemsTable[vId]; wuv.Add(uId, vId, coourrences / Math.Sqrt(uRatings.Count * vRatings.Count)); } } return(wuv); }
protected MyTable CalculateJaccardSimilarities(MyTable coourrencesTable, MyTable ratingTable) { MyTable wuv = new MyTable(); foreach (int uId in coourrencesTable.Keys) { Hashtable subTable = (Hashtable)coourrencesTable[uId]; Hashtable uRatings = (Hashtable)ratingTable[uId]; foreach (int vId in subTable.Keys) { double coourrences = (double)subTable[vId]; Hashtable vRatings = (Hashtable)ratingTable[vId]; wuv.Add(uId, vId, coourrences / UnionOccurrences(uRatings, vRatings)); // Math.Sqrt(uRatings.Count * vRatings.Count)); } } return(wuv); }
/// <summary> /// A record: userID bookmarkID tagID timestamp /// </summary> /// <param name="file">user_taggedbookmarks-timestamps.dat</param> /// <param name="separator"></param> /// <returns>MyTable: user Id - item Id - List<Link>, where a link is represented as item Id - tag Id </returns> public static MyTable GetRecords(string file, string separator = "\t") { if (!File.Exists(file)) { throw new ArgumentException("File doesn't exist: " + file); } StreamReader reader = new StreamReader(file); MyTable recordsTable = new MyTable(); string firstLine = reader.ReadLine(); while (!reader.EndOfStream) { string line = reader.ReadLine(); string[] elements = line.Split(separator.ToCharArray(), StringSplitOptions.RemoveEmptyEntries); if (elements.Length == 4) { int userId = Int32.Parse(elements[0]); int itemId = Int32.Parse(elements[1]); // bookmark id int tagId = Int32.Parse(elements[2]); string timestamp = elements[3]; Link itemTag = new Link(itemId, tagId); if (recordsTable.ContainsKey(userId, itemId)) { List <Link> links = (List <Link>)recordsTable[userId, itemId]; links.Add(itemTag); } else { List <Link> links = new List <Link>() { itemTag }; recordsTable.Add(userId, itemId, links); } } } reader.Close(); return(recordsTable); }
public static void Evaluation(List <Link> recommendations, List <Link> test) { MyTable table = new MyTable(); foreach (Link e in test) { if (!table.ContainsKey(e.From, e.To)) { table.Add(e.From, e.To, null); } } int hit = 0; foreach (Link e in recommendations) { if (table.ContainsKey(e.From, e.To)) { hit++; } } Console.WriteLine("{0}, {1}", hit * 1.0 / recommendations.Count, hit * 1.0 / test.Count); }
/// <summary> /// Get relation tables /// 1. user - item - #tags /// 2. user - tag - #tags /// 3. item - tag - #tag /// </summary> /// <param name="recordTable"></param> /// <returns></returns> public static Tuple <List <Rating>, List <Link>, List <Link> > GetRelations(MyTable recordTable) { List <Rating> userItemCount = new List <Rating>(); // user - item - #tag List <Link> userTagCount = new List <Link>(); // user - tag - #tag List <Link> itemTagCount = new List <Link>(); // item - tag - #tag MyTable userTagTable = new MyTable(); MyTable itemTagTable = new MyTable(); foreach (int userId in recordTable.Keys) { Hashtable subTable = (Hashtable)recordTable[userId]; foreach (int itemId in subTable.Keys) { List <Link> links = (List <Link>)subTable[itemId]; Rating rating = new Rating(userId, itemId, links.Count); userItemCount.Add(rating); foreach (Link l in links) { // user - tag - # tags table //int itemId = l.From; int tagId = l.To; if (userTagTable.ContainsKey(userId, tagId)) { userTagTable[userId, tagId] = (int)userTagTable[userId, tagId] + 1; } else { userTagTable.Add(userId, tagId, 1); } // item - tag - #tags table if (itemTagTable.ContainsKey(itemId, tagId)) { itemTagTable[itemId, tagId] = (int)itemTagTable[itemId, tagId] + 1; } else { itemTagTable.Add(itemId, tagId, 1); } } } } foreach (int userId in userTagTable.Keys) { Hashtable subTable = (Hashtable)userTagTable[userId]; foreach (int tagId in subTable.Keys) { int counts = (int)subTable[tagId]; Link l = new Link(userId, tagId, counts); userTagCount.Add(l); } } // cull up for item - tag foreach (int itemId in itemTagTable.Keys) { Hashtable subTable = (Hashtable)itemTagTable[itemId]; foreach (int tagId in subTable.Keys) { int counts = (int)subTable[tagId]; Link l = new Link(itemId, tagId, counts); itemTagCount.Add(l); } } return(Tuple.Create(userItemCount, userTagCount, itemTagCount)); }