public void CreateLdaInput(string outputFile) { var map = new Mapping(); var outputLines = File.ReadAllLines(TagFile).Select(l => { if (l.StartsWith("ERROR")) return ""; var tokens = l.Split('\t'); int tagCount = Convert.ToInt32(tokens[1]); string output = ""; if (tagCount > 0) { var tags = tokens[2].Split(new string[] { "##" }, StringSplitOptions.RemoveEmptyEntries); output += map.ToInternalID(tokens[0]); foreach (var tag in tags) { var parts = tag.Split(new string[] { "::" }, StringSplitOptions.None); output += " " + map.ToInternalID(parts[0]) + ":" + parts[1]; } } return output; }).Where(o => !String.IsNullOrEmpty(o)); File.WriteAllLines(outputFile, outputLines); }
/// <summary>Load entity mappings from a file</summary> /// <param name='filename'>the name of the file</param> /// <returns>an object of type EntityMapping</returns> public static IMapping LoadMapping(this string filename) { var mapping = new Mapping(); using ( var reader = new StreamReader(filename) ) { string line; while ( (line = reader.ReadLine()) != null ) { if (line.Length == 0) continue; string[] tokens = line.Split('\t'); if (tokens.Length != 2) throw new FormatException("Expected exactly 2 columns: " + line); int internal_id = int.Parse(tokens[0]); string external_id = tokens[1]; if (internal_id != mapping.NumberOfEntities) throw new FormatException(string.Format("Expected ID {0}, not {1}, in line '{2}'", mapping.NumberOfEntities, internal_id, line)); mapping.internal_to_original.Add(external_id); mapping.original_to_internal[external_id] = internal_id; } } return mapping; }
public override string ToLibFmFeatureVector(Mapping usersItemsMap) { string userClusterFeature = "", itemClusterFeature = "", aUserClusterFeature = "", aItemClusterFeature = ""; if (!string.IsNullOrEmpty(UserCluster)) userClusterFeature = usersItemsMap.ToInternalID(UserCluster).ToString(); if (!string.IsNullOrEmpty(ItemCluster)) itemClusterFeature = usersItemsMap.ToInternalID(ItemCluster).ToString(); if (!string.IsNullOrEmpty(AuxUserCluster)) aUserClusterFeature = usersItemsMap.ToInternalID(AuxUserCluster).ToString(); if (!string.IsNullOrEmpty(AuxItemCluster)) aItemClusterFeature = usersItemsMap.ToInternalID(AuxItemCluster).ToString(); string featVector = string.Format("{0} {1}:1 {2}:1", Rating, usersItemsMap.ToInternalID(User.Id), usersItemsMap.ToInternalID(Item.Id)); if (userClusterFeature != "") featVector += " " + userClusterFeature + ":1"; if (itemClusterFeature != "") featVector += " " + itemClusterFeature + ":1"; if (aUserClusterFeature != "") featVector += " " + aUserClusterFeature + ":1"; if (aItemClusterFeature != "") featVector += " " + aItemClusterFeature + ":1"; return featVector; }
public int GetMappedId(Mapping mapper) { if (_mappedId == -1) _mappedId = mapper.ToInternalID(Id); return _mappedId; }
[Test()] public void TestComputeCorrelations2() { // load data from disk var user_mapping = new Mapping(); var item_mapping = new Mapping(); var ratings = RatingData.Read("../../../../data/ml-100k/u1.base", user_mapping, item_mapping); var p = new Pearson(ratings.AllUsers.Count, 200f); Assert.AreEqual(-0.02788301f, p.ComputeCorrelation(ratings, EntityType.ITEM, 45, 311), 0.00001); }
private static ITimedRatings readDataMapped(string data_file, ref MyMediaLite.Data.Mapping user_mappings, ref MyMediaLite.Data.Mapping item_mappings) { Console.WriteLine("Dataset: " + data_file); Console.WriteLine(DateTime.Now); var all_data = TimedRatingData.Read(data_file, user_mappings, item_mappings, TestRatingFileFormat.WITHOUT_RATINGS, true); Console.Write(all_data.Statistics()); Console.WriteLine("Finished dataset read"); return(all_data); }
public MovieLensCrossDomainContainer(int numDomains, bool randomClusters = false) : base() { _mapper = new Mapping(); NumDomains = numDomains; RandomClusters = randomClusters; _itemsCluster = new Dictionary<string, int>(); for (int i = 0; i < numDomains; i++) { string dId = "ml" + i; Domains.Add(dId, new Domain(dId)); } }
public void CreateNmfClustersAmazon() { var reader = new AmazonReader(Paths.AmazonBooksRatings); var dataset = new Dataset<ItemRating>(reader); var userMapping = new Mapping(); var itemMapping = new Mapping(); var data = dataset.AllSamples.Select(ir => new { UserId = userMapping.ToInternalID(ir.User.Id), ItemId = itemMapping.ToInternalID(ir.Item.Id), Rating = Convert.ToDouble(ir.Rating) }).ToList(); // users int i = 0; var uOut = File.ReadAllLines(Paths.AmazonBooksUsersCluster + ".lf").Select(l => { var values = l.Split(new string[] { " " }, StringSplitOptions.RemoveEmptyEntries) .Select(v => Convert.ToDouble(v)).ToList(); var maxIndex = values.IndexOf(values.Max()); return new { UserId = userMapping.ToOriginalID(i++), ClusterId = maxIndex }; }).Select(uc => string.Format("{0},{1}", uc.UserId, uc.ClusterId)); File.WriteAllLines(Paths.AmazonBooksUsersCluster + ".nmf.u", uOut); // items int j = 0; var iOut = File.ReadAllLines(Paths.AmazonBooksUsersCluster + ".rf").Select(l => { var values = l.Split(new string[] { " " }, StringSplitOptions.RemoveEmptyEntries) .Select(v => Convert.ToDouble(v)).ToList(); var maxIndex = values.IndexOf(values.Max()); return new { ItemId = itemMapping.ToOriginalID(j++), ClusterId = maxIndex }; }).Select(ic => string.Format("{0},{1}", ic.ItemId, ic.ClusterId)); File.WriteAllLines(Paths.AmazonBooksUsersCluster + ".nmf.i", iOut); }
private static void startUserKNN(string data) { MyMediaLite.Data.Mapping user_mapping = new MyMediaLite.Data.Mapping(); MyMediaLite.Data.Mapping item_mapping = new MyMediaLite.Data.Mapping(); ITimedRatings all_data = readDataMapped(data, ref user_mapping, ref item_mapping); removeUserThreshold(ref all_data); Console.WriteLine("Start iteration Test UserKNN"); //for (int i = 0; i < 5; i++) { ITimedRatings validation_data = new TimedRatings(); // 10% ITimedRatings test_data = new TimedRatings(); // 20% ITimedRatings training_data = new TimedRatings(); // 70% readAndSplitData(all_data, ref test_data, ref training_data, ref validation_data); IPosOnlyFeedback training_data_pos = new PosOnlyFeedback <SparseBooleanMatrix> (); // 80% for (int index = 0; index < training_data.Users.Count; index++) { training_data_pos.Add(training_data.Users [index], training_data.Items [index]); } MyMediaLite.ItemRecommendation.UserKNN recommender = new MyMediaLite.ItemRecommendation.UserKNN(); recommender.K = 80; recommender.Q = 1; recommender.Weighted = false; recommender.Alpha = 0.5f; recommender.Correlation = MyMediaLite.Correlation.BinaryCorrelationType.Jaccard; recommender.Feedback = training_data_pos; DateTime start_time = DateTime.Now; recommender.Train(); Console.Write("Total Training time needed:"); Console.WriteLine(((TimeSpan)(DateTime.Now - start_time)).TotalMilliseconds); Console.WriteLine("Final results in this iteration:"); var results = MyMediaLite.Eval.ItemsWeatherItemRecommender.EvaluateTime(recommender, validation_data, training_data, "VALIDATION ", false); results = MyMediaLite.Eval.ItemsWeatherItemRecommender.EvaluateTime(recommender, test_data, training_data, "TEST ", false); //} }
public LibFmFeatureBuilder() { Mapper = new Mapping(); }
public new string ToLibFmFeatureVector(Mapping usersItemsMap) { return string.Format("{0} {1}:1 {2}:1", Rating, usersItemsMap.ToInternalID(User.Id), usersItemsMap.ToInternalID(Item.Id)); }
public Clusterer(IDataset<ItemRating> dataset) { _dataset = dataset; _userMapping = new Mapping(); _itemMapping = new Mapping(); }
private static void TrainForItemPrediction(Mapping userMapping, Mapping itemMapping, String[] args) { var training_data = ItemData.Read(trainingCompactFileForItems, userMapping, itemMapping); itemRecommender = new CustomBPRMF(); if (File.Exists(Path.Combine(args[2], "model"))) { Console.WriteLine("Skipping training, Loading saved model"); itemRecommender.LoadModel(Path.Combine(args[2], "model")); itemRecommender.Feedback = training_data; return; } Console.WriteLine("Training model for Item Prediction, this may take a while..."); itemRecommender.Feedback = training_data; itemRecommender.NumFactors = 50; itemRecommender.NumIter = 100; itemRecommender.Train(); itemRecommender.SaveModel(Path.Combine(args[2], "model")); }
/// <summary> /// Predict the rating of the item by users /// </summary> private static void PredictRating(string[] args) { Console.WriteLine("Predicting ratings for Users..."); String outputFile = Path.Combine(args[2], outputFileForRatings); testingFile = Path.Combine(args[1], testingFile); testingFileModified = Path.Combine(args[2], testingFileModified); ModifyTestingFileForRating(testingFile, testingFileModified); float minRating = 1; float maxRating = 5; var userMapping = new Mapping(); var itemMapping = new Mapping(); var trainingData = StaticRatingData.Read(trainingCompactFileForRatings, userMapping, itemMapping, RatingType.FLOAT, TestRatingFileFormat.WITH_RATINGS, false); var testUsers = trainingData.AllUsers; // users that will be taken into account in the evaluation var candidate_items = trainingData.AllItems; // items that will be taken into account in the evaluation var testData = StaticRatingData.Read(testingFileModified, userMapping, itemMapping, RatingType.FLOAT, TestRatingFileFormat.WITH_RATINGS, false); var recommender = new BiasedMatrixFactorization(); recommender.MinRating = minRating; recommender.MaxRating = maxRating; recommender.Ratings = trainingData; recommender.NumFactors = 30; recommender.NumIter = 100; recommender.RegI = 0.04F; recommender.RegU = 0.04F; //recommender.BiasReg = 0.09F; recommender.FrequencyRegularization = true; recommender.BoldDriver = true; recommender.LearnRate = 0.07F; Stopwatch timeKeeper = new Stopwatch(); timeKeeper.Start(); recommender.Train(); timeKeeper.Stop(); Console.WriteLine("time passed for training rating prediction model: " + timeKeeper.ElapsedMilliseconds); // measure the accuracy on the test data set timeKeeper = new Stopwatch(); timeKeeper.Start(); var results = recommender.Evaluate(testData); timeKeeper.Stop(); Console.WriteLine("time passed for rating prediction: " + timeKeeper.ElapsedMilliseconds); Console.WriteLine("RMSE={0}", results["RMSE"]); recommender.WritePredictions(testData, outputFile, userMapping, itemMapping, "{0}-{1},{2}", "userID-itemID,rating"); }
private static void PredictPurchase(string user, Dictionary<string, List<string>> itemsPurchases, Mapping userMapping, Mapping itemMapping, String[] args) { // Get the master set - feedback string logFile = Path.Combine(args[2], "ItemPrediction.log"); StreamWriter writer = null; try { writer = new StreamWriter(logFile, true); float result1 = 0.0F; //Stopwatch timeKeeper = new Stopwatch(); //timeKeeper.Start(); rankedPurchase = new SortedDictionary<float, List<MeanAverPrecisionDetails>>();//Comparer<float>.Create((x, y) => y.CompareTo(x))); // this user's average ratings - does he buy items which are low rated? float[] userAverageAndNumberOfRatings = GetAverageRatingAndNumberOfRatingsGivenByUser(userMapping.ToInternalID(user)); float averageRatingByUser = userAverageAndNumberOfRatings[0]; float numberOfRatingsByUser = userAverageAndNumberOfRatings[1]; //bool isUserActive = UserActivity(userMapping.ToInternalID(user)); Dictionary<string, int> userCategories = GetUserPurchaseCategories(userMapping.ToInternalID(user)); foreach (String item in itemsPurchases.Keys) { float resultForItemSimilarity = itemRecommender.GetNearestItemSimilarity(userMapping.ToInternalID(user), itemMapping.ToInternalID(item)); //float resultForUserSimilarity = itemRecommender.GetNearestUserSimilarity(userMapping.ToInternalID(user), itemMapping.ToInternalID(item)); float averageRatingForItem = GetAverageRatingForItem(item); bool predictedValueOfPurchase = false; float resultFromItemCategories = CompareUserItemCategories(userCategories, GetItemCategories(item)); float result = resultForItemSimilarity; //if (averageRatingForItem > userAverageRating) // result += ((averageRatingForItem - userAverageRating)/ averageRatingForItem); if (averageRatingForItem > 4.0) { resultFromItemCategories = +((averageRatingForItem - 4.0F) / averageRatingForItem); } ////result += userActivity; //if (!isUserActive) //{ // result -= 0.2f;//(0.1F * result); //} if (resultFromItemCategories > 0.45) { result = result - (1.0F * resultFromItemCategories); } //if (numberOfRatingsByUser > 10) //{ // result = result - 0.1F; //} writer.WriteLine("User: {0}, Item: {1}, hasBought: {2}, result: {3}, rating for Item {4}", user, item, itemsPurchases[item][0], result, averageRatingForItem); result1 +=result; if (result < 0.4) { predictedValueOfPurchase = true; } List<MeanAverPrecisionDetails> elementsToAddToRankedPurchaseList = new List<MeanAverPrecisionDetails>(); elementsToAddToRankedPurchaseList.Add(new MeanAverPrecisionDetails(user, item, predictedValueOfPurchase ? 1 : 0, itemsPurchases[item][0].Equals("1") ? 1 : 0, result, Int32.Parse(itemsPurchases[item][1]))); if (!rankedPurchase.ContainsKey(result)) { rankedPurchase.Add(result, elementsToAddToRankedPurchaseList); } else { List<MeanAverPrecisionDetails> exisitingValues = rankedPurchase[result]; exisitingValues.AddRange(elementsToAddToRankedPurchaseList); rankedPurchase[result] = exisitingValues; } // User has actually bought the item if (itemsPurchases[item][0].Equals("1")) { totalActualPurchases++; if (predictedValueOfPurchase) { // We have correctly identified that the user has purchased item truePositives++; } else { // Missed predicting the user's purchase falseNegatives++; } } else { // User has actually not bought this item if (predictedValueOfPurchase) { // Wrongly predicted that the user bought the item falsePositives++; } else { // Correctly predicted that the user did not buy item trueNegatives++; } } // Find out if this user has bought items from the same category(ies) } CalculateAveragePrecision(args); writer.Close(); //timeKeeper.Stop(); //Console.WriteLine("time passed PredictPurchase: " + timeKeeper.ElapsedMilliseconds); } catch (Exception) {} finally { if (writer != null) { writer.Close(); } } }
public static void RecommentItems(Mapping userMapping, Mapping itemMapping, String[] args) { Console.WriteLine("Predicting Items for Users..."); testingFileForItems = Path.Combine(args[1], testingFileForItems); if (File.Exists(testingFileForItems)) { StreamReader reader = null; try { reader = new StreamReader(testingFileForItems);//(args[0]); String line = null; int userCount = 0; if (reader != null) { line = reader.ReadLine(); line = reader.ReadLine(); while (line != null) { String[] labels = line.Split(','); String[] user_item = labels[0].Split('-'); string user = user_item[0]; Dictionary<string, List<string>> itemsPurchase = new Dictionary<string, List<string>>(); List<string> itemPurchaseDetails = new List<string>(); itemPurchaseDetails.Add(labels[1]); // bought or not, 0 or 1 itemPurchaseDetails.Add(labels[2]); // rank of this item itemsPurchase.Add(user_item[1], itemPurchaseDetails); line = reader.ReadLine(); while (line != null && user.Equals(line.Split(',')[0].Split('-')[0])) { labels = line.Split(','); user_item = labels[0].Split('-'); if (!user.Equals(user_item[0])) break; itemPurchaseDetails = new List<string>(); itemPurchaseDetails.Add(labels[1]); // bought or not, 0 or 1 itemPurchaseDetails.Add(labels[2]); itemsPurchase.Add(user_item[1], itemPurchaseDetails); line = reader.ReadLine(); } PredictPurchase(user, itemsPurchase, userMapping, itemMapping, args); userCount++; } } double precision = (double) truePositives / (truePositives + falsePositives); double recall = (double)truePositives / (truePositives + falseNegatives);//(totalActualPurchases); double f1Measure = 2 * ((precision * recall) / (precision + recall)); double meanAveragePrecision = averagePrecisionSum / userCount; Console.WriteLine("precision : {0}, recall : {1}, f1 : {2}, MAP : {3} ", precision, recall, f1Measure, meanAveragePrecision); } catch (IOException e) { Console.WriteLine("exception reading the training file : {0}", e.Message); } finally { if (reader != null) { reader.Close(); } } } }
/// <summary>Read movie data from a file</summary> /// <param name="filename">the name of the file to be read from</param> /// <param name="item_mapping">ID mapping for the movies</param> public void Read(string filename, Mapping item_mapping) { using ( var reader = new StreamReader(filename) ) Read(reader, item_mapping); }
/// <summary>Read movie data from a StreamReader</summary> /// <param name="reader">a StreamReader to be read from</param> /// <param name="item_mapping">ID mapping for the movies</param> public void Read(StreamReader reader, Mapping item_mapping) { movie_list = new List<Movie>(); IMDB_KEY_To_ID = new Dictionary<string, int>(); var separators = new string[] { "::" }; string line; while (!reader.EndOfStream) { line = reader.ReadLine(); string[] tokens = line.Split(separators, StringSplitOptions.None); if (tokens.Length != 3) throw new FormatException("Expected exactly 3 columns: " + line); int movie_id = item_mapping.ToInternalID(tokens[0]); string movie_imdb_key = tokens[1]; //string[] movie_genres = tokens[2].Split('|'); int movie_year = 1900; string movie_title = movie_imdb_key; movie_list.Add(new Movie(movie_id, movie_title, movie_year, movie_imdb_key)); IMDB_KEY_To_ID[movie_imdb_key] = movie_id; } }