public void CreateLdaInput(string outputFile) { var map = new Mapping(); var outputLines = File.ReadAllLines(TagFile).Select(l => { if (l.StartsWith("ERROR")) return ""; var tokens = l.Split('\t'); int tagCount = Convert.ToInt32(tokens[1]); string output = ""; if (tagCount > 0) { var tags = tokens[2].Split(new string[] { "##" }, StringSplitOptions.RemoveEmptyEntries); output += map.ToInternalID(tokens[0]); foreach (var tag in tags) { var parts = tag.Split(new string[] { "::" }, StringSplitOptions.None); output += " " + map.ToInternalID(parts[0]) + ":" + parts[1]; } } return output; }).Where(o => !String.IsNullOrEmpty(o)); File.WriteAllLines(outputFile, outputLines); }
public int GetMappedId(Mapping mapper) { if (_mappedId == -1) _mappedId = mapper.ToInternalID(Id); return _mappedId; }
public override string ToLibFmFeatureVector(Mapping usersItemsMap) { string userClusterFeature = "", itemClusterFeature = "", aUserClusterFeature = "", aItemClusterFeature = ""; if (!string.IsNullOrEmpty(UserCluster)) userClusterFeature = usersItemsMap.ToInternalID(UserCluster).ToString(); if (!string.IsNullOrEmpty(ItemCluster)) itemClusterFeature = usersItemsMap.ToInternalID(ItemCluster).ToString(); if (!string.IsNullOrEmpty(AuxUserCluster)) aUserClusterFeature = usersItemsMap.ToInternalID(AuxUserCluster).ToString(); if (!string.IsNullOrEmpty(AuxItemCluster)) aItemClusterFeature = usersItemsMap.ToInternalID(AuxItemCluster).ToString(); string featVector = string.Format("{0} {1}:1 {2}:1", Rating, usersItemsMap.ToInternalID(User.Id), usersItemsMap.ToInternalID(Item.Id)); if (userClusterFeature != "") featVector += " " + userClusterFeature + ":1"; if (itemClusterFeature != "") featVector += " " + itemClusterFeature + ":1"; if (aUserClusterFeature != "") featVector += " " + aUserClusterFeature + ":1"; if (aItemClusterFeature != "") featVector += " " + aItemClusterFeature + ":1"; return featVector; }
public void CreateNmfClustersAmazon() { var reader = new AmazonReader(Paths.AmazonBooksRatings); var dataset = new Dataset<ItemRating>(reader); var userMapping = new Mapping(); var itemMapping = new Mapping(); var data = dataset.AllSamples.Select(ir => new { UserId = userMapping.ToInternalID(ir.User.Id), ItemId = itemMapping.ToInternalID(ir.Item.Id), Rating = Convert.ToDouble(ir.Rating) }).ToList(); // users int i = 0; var uOut = File.ReadAllLines(Paths.AmazonBooksUsersCluster + ".lf").Select(l => { var values = l.Split(new string[] { " " }, StringSplitOptions.RemoveEmptyEntries) .Select(v => Convert.ToDouble(v)).ToList(); var maxIndex = values.IndexOf(values.Max()); return new { UserId = userMapping.ToOriginalID(i++), ClusterId = maxIndex }; }).Select(uc => string.Format("{0},{1}", uc.UserId, uc.ClusterId)); File.WriteAllLines(Paths.AmazonBooksUsersCluster + ".nmf.u", uOut); // items int j = 0; var iOut = File.ReadAllLines(Paths.AmazonBooksUsersCluster + ".rf").Select(l => { var values = l.Split(new string[] { " " }, StringSplitOptions.RemoveEmptyEntries) .Select(v => Convert.ToDouble(v)).ToList(); var maxIndex = values.IndexOf(values.Max()); return new { ItemId = itemMapping.ToOriginalID(j++), ClusterId = maxIndex }; }).Select(ic => string.Format("{0},{1}", ic.ItemId, ic.ClusterId)); File.WriteAllLines(Paths.AmazonBooksUsersCluster + ".nmf.i", iOut); }
public new string ToLibFmFeatureVector(Mapping usersItemsMap) { return string.Format("{0} {1}:1 {2}:1", Rating, usersItemsMap.ToInternalID(User.Id), usersItemsMap.ToInternalID(Item.Id)); }
private static void PredictPurchase(string user, Dictionary<string, List<string>> itemsPurchases, Mapping userMapping, Mapping itemMapping, String[] args) { // Get the master set - feedback string logFile = Path.Combine(args[2], "ItemPrediction.log"); StreamWriter writer = null; try { writer = new StreamWriter(logFile, true); float result1 = 0.0F; //Stopwatch timeKeeper = new Stopwatch(); //timeKeeper.Start(); rankedPurchase = new SortedDictionary<float, List<MeanAverPrecisionDetails>>();//Comparer<float>.Create((x, y) => y.CompareTo(x))); // this user's average ratings - does he buy items which are low rated? float[] userAverageAndNumberOfRatings = GetAverageRatingAndNumberOfRatingsGivenByUser(userMapping.ToInternalID(user)); float averageRatingByUser = userAverageAndNumberOfRatings[0]; float numberOfRatingsByUser = userAverageAndNumberOfRatings[1]; //bool isUserActive = UserActivity(userMapping.ToInternalID(user)); Dictionary<string, int> userCategories = GetUserPurchaseCategories(userMapping.ToInternalID(user)); foreach (String item in itemsPurchases.Keys) { float resultForItemSimilarity = itemRecommender.GetNearestItemSimilarity(userMapping.ToInternalID(user), itemMapping.ToInternalID(item)); //float resultForUserSimilarity = itemRecommender.GetNearestUserSimilarity(userMapping.ToInternalID(user), itemMapping.ToInternalID(item)); float averageRatingForItem = GetAverageRatingForItem(item); bool predictedValueOfPurchase = false; float resultFromItemCategories = CompareUserItemCategories(userCategories, GetItemCategories(item)); float result = resultForItemSimilarity; //if (averageRatingForItem > userAverageRating) // result += ((averageRatingForItem - userAverageRating)/ averageRatingForItem); if (averageRatingForItem > 4.0) { resultFromItemCategories = +((averageRatingForItem - 4.0F) / averageRatingForItem); } ////result += userActivity; //if (!isUserActive) //{ // result -= 0.2f;//(0.1F * result); //} if (resultFromItemCategories > 0.45) { result = result - (1.0F * resultFromItemCategories); } //if (numberOfRatingsByUser > 10) //{ // result = result - 0.1F; //} writer.WriteLine("User: {0}, Item: {1}, hasBought: {2}, result: {3}, rating for Item {4}", user, item, itemsPurchases[item][0], result, averageRatingForItem); result1 +=result; if (result < 0.4) { predictedValueOfPurchase = true; } List<MeanAverPrecisionDetails> elementsToAddToRankedPurchaseList = new List<MeanAverPrecisionDetails>(); elementsToAddToRankedPurchaseList.Add(new MeanAverPrecisionDetails(user, item, predictedValueOfPurchase ? 1 : 0, itemsPurchases[item][0].Equals("1") ? 1 : 0, result, Int32.Parse(itemsPurchases[item][1]))); if (!rankedPurchase.ContainsKey(result)) { rankedPurchase.Add(result, elementsToAddToRankedPurchaseList); } else { List<MeanAverPrecisionDetails> exisitingValues = rankedPurchase[result]; exisitingValues.AddRange(elementsToAddToRankedPurchaseList); rankedPurchase[result] = exisitingValues; } // User has actually bought the item if (itemsPurchases[item][0].Equals("1")) { totalActualPurchases++; if (predictedValueOfPurchase) { // We have correctly identified that the user has purchased item truePositives++; } else { // Missed predicting the user's purchase falseNegatives++; } } else { // User has actually not bought this item if (predictedValueOfPurchase) { // Wrongly predicted that the user bought the item falsePositives++; } else { // Correctly predicted that the user did not buy item trueNegatives++; } } // Find out if this user has bought items from the same category(ies) } CalculateAveragePrecision(args); writer.Close(); //timeKeeper.Stop(); //Console.WriteLine("time passed PredictPurchase: " + timeKeeper.ElapsedMilliseconds); } catch (Exception) {} finally { if (writer != null) { writer.Close(); } } }
/// <summary>Read movie data from a StreamReader</summary> /// <param name="reader">a StreamReader to be read from</param> /// <param name="item_mapping">ID mapping for the movies</param> public void Read(StreamReader reader, Mapping item_mapping) { movie_list = new List<Movie>(); IMDB_KEY_To_ID = new Dictionary<string, int>(); var separators = new string[] { "::" }; string line; while (!reader.EndOfStream) { line = reader.ReadLine(); string[] tokens = line.Split(separators, StringSplitOptions.None); if (tokens.Length != 3) throw new FormatException("Expected exactly 3 columns: " + line); int movie_id = item_mapping.ToInternalID(tokens[0]); string movie_imdb_key = tokens[1]; //string[] movie_genres = tokens[2].Split('|'); int movie_year = 1900; string movie_title = movie_imdb_key; movie_list.Add(new Movie(movie_id, movie_title, movie_year, movie_imdb_key)); IMDB_KEY_To_ID[movie_imdb_key] = movie_id; } }