Пример #1
0
        public void CreateLdaInput(string outputFile)
        {
            var map = new Mapping();

            var outputLines = File.ReadAllLines(TagFile).Select(l =>
            {
                if (l.StartsWith("ERROR"))
                    return "";

                var tokens = l.Split('\t');
                int tagCount = Convert.ToInt32(tokens[1]);
                string output = "";
                if (tagCount > 0)
                {
                    var tags = tokens[2].Split(new string[] { "##" }, StringSplitOptions.RemoveEmptyEntries);
                    output += map.ToInternalID(tokens[0]);

                    foreach (var tag in tags)
                    {
                        var parts = tag.Split(new string[] { "::" }, StringSplitOptions.None);
                        output += " " + map.ToInternalID(parts[0]) + ":" + parts[1];
                    }
                }

                return output;
            }).Where(o => !String.IsNullOrEmpty(o));

            File.WriteAllLines(outputFile, outputLines);
        }
		/// <summary>Load entity mappings from a file</summary>
		/// <param name='filename'>the name of the file</param>
		/// <returns>an object of type EntityMapping</returns>
		public static IMapping LoadMapping(this string filename)
		{
			var mapping = new Mapping();

			using ( var reader = new StreamReader(filename) )
			{
				string line;
				while ( (line = reader.ReadLine()) != null )
				{
					if (line.Length == 0)
						continue;

					string[] tokens = line.Split('\t');

					if (tokens.Length != 2)
						throw new FormatException("Expected exactly 2 columns: " + line);

					int internal_id    = int.Parse(tokens[0]);
					string external_id = tokens[1];

					if (internal_id != mapping.NumberOfEntities)
						throw new FormatException(string.Format("Expected ID {0}, not {1}, in line '{2}'", mapping.NumberOfEntities, internal_id, line));

					mapping.internal_to_original.Add(external_id);
					mapping.original_to_internal[external_id] = internal_id;
				}
			}

			return mapping;
		}
Пример #3
0
        public override string ToLibFmFeatureVector(Mapping usersItemsMap)
        {
            string userClusterFeature = "", itemClusterFeature = "", aUserClusterFeature = "", aItemClusterFeature = "";

            if (!string.IsNullOrEmpty(UserCluster))
                userClusterFeature = usersItemsMap.ToInternalID(UserCluster).ToString();

            if (!string.IsNullOrEmpty(ItemCluster))
                itemClusterFeature = usersItemsMap.ToInternalID(ItemCluster).ToString();

            if (!string.IsNullOrEmpty(AuxUserCluster))
                aUserClusterFeature = usersItemsMap.ToInternalID(AuxUserCluster).ToString();

            if (!string.IsNullOrEmpty(AuxItemCluster))
                aItemClusterFeature = usersItemsMap.ToInternalID(AuxItemCluster).ToString();

            string featVector = string.Format("{0} {1}:1 {2}:1",
                Rating,
                usersItemsMap.ToInternalID(User.Id),
                usersItemsMap.ToInternalID(Item.Id));

            if (userClusterFeature != "")
                featVector += " " + userClusterFeature + ":1";

            if (itemClusterFeature != "")
                featVector += " " + itemClusterFeature + ":1";

            if (aUserClusterFeature != "")
                featVector += " " + aUserClusterFeature + ":1";

            if (aItemClusterFeature != "")
                featVector += " " + aItemClusterFeature + ":1";

            return featVector;
        }
Пример #4
0
        public int GetMappedId(Mapping mapper)
        {
            if (_mappedId == -1)
                _mappedId = mapper.ToInternalID(Id);

            return _mappedId;
        }
Пример #5
0
		[Test()] public void TestComputeCorrelations2()
		{
			// load data from disk
			var user_mapping = new Mapping();
			var item_mapping = new Mapping();
			var ratings = RatingData.Read("../../../../data/ml-100k/u1.base", user_mapping, item_mapping);
			
			var p = new Pearson(ratings.AllUsers.Count, 200f);
			Assert.AreEqual(-0.02788301f, p.ComputeCorrelation(ratings, EntityType.ITEM, 45, 311), 0.00001);
		}
Пример #6
0
    private static ITimedRatings readDataMapped(string data_file, ref MyMediaLite.Data.Mapping user_mappings, ref MyMediaLite.Data.Mapping item_mappings)
    {
        Console.WriteLine("Dataset: " + data_file);

        Console.WriteLine(DateTime.Now);
        var all_data = TimedRatingData.Read(data_file, user_mappings, item_mappings, TestRatingFileFormat.WITHOUT_RATINGS, true);

        Console.Write(all_data.Statistics());
        Console.WriteLine("Finished dataset read");
        return(all_data);
    }
        public MovieLensCrossDomainContainer(int numDomains, bool randomClusters = false)
            : base()
        {
            _mapper = new Mapping();
            NumDomains = numDomains;
            RandomClusters = randomClusters;
            _itemsCluster = new Dictionary<string, int>();

            for (int i = 0; i < numDomains; i++)
            {
                string dId = "ml" + i;
                Domains.Add(dId, new Domain(dId));
            }
        }
Пример #8
0
        public void CreateNmfClustersAmazon()
        {
            var reader = new AmazonReader(Paths.AmazonBooksRatings);
            var dataset = new Dataset<ItemRating>(reader);

            var userMapping = new Mapping();
            var itemMapping = new Mapping();

            var data = dataset.AllSamples.Select(ir => new
            {
                UserId = userMapping.ToInternalID(ir.User.Id),
                ItemId = itemMapping.ToInternalID(ir.Item.Id),
                Rating = Convert.ToDouble(ir.Rating)
            }).ToList();

            // users
            int i = 0;
            var uOut = File.ReadAllLines(Paths.AmazonBooksUsersCluster + ".lf").Select(l =>
            {
                var values = l.Split(new string[] { " " }, StringSplitOptions.RemoveEmptyEntries)
                    .Select(v => Convert.ToDouble(v)).ToList();

                var maxIndex = values.IndexOf(values.Max());

                return new { UserId = userMapping.ToOriginalID(i++), ClusterId = maxIndex };
            }).Select(uc => string.Format("{0},{1}", uc.UserId, uc.ClusterId));

            File.WriteAllLines(Paths.AmazonBooksUsersCluster + ".nmf.u", uOut);

            // items
            int j = 0;
            var iOut = File.ReadAllLines(Paths.AmazonBooksUsersCluster + ".rf").Select(l =>
            {
                var values = l.Split(new string[] { " " }, StringSplitOptions.RemoveEmptyEntries)
                    .Select(v => Convert.ToDouble(v)).ToList();

                var maxIndex = values.IndexOf(values.Max());

                return new { ItemId = itemMapping.ToOriginalID(j++), ClusterId =  maxIndex };
            }).Select(ic => string.Format("{0},{1}", ic.ItemId, ic.ClusterId));

            File.WriteAllLines(Paths.AmazonBooksUsersCluster + ".nmf.i", iOut);
        }
Пример #9
0
    private static void startUserKNN(string data)
    {
        MyMediaLite.Data.Mapping user_mapping = new MyMediaLite.Data.Mapping();
        MyMediaLite.Data.Mapping item_mapping = new MyMediaLite.Data.Mapping();
        ITimedRatings            all_data     = readDataMapped(data, ref user_mapping, ref item_mapping);

        removeUserThreshold(ref all_data);
        Console.WriteLine("Start iteration Test UserKNN");
        //for (int i = 0; i < 5; i++) {
        ITimedRatings validation_data = new TimedRatings();    // 10%
        ITimedRatings test_data       = new TimedRatings();    // 20%
        ITimedRatings training_data   = new TimedRatings();    // 70%

        readAndSplitData(all_data, ref test_data, ref training_data, ref validation_data);
        IPosOnlyFeedback training_data_pos = new PosOnlyFeedback <SparseBooleanMatrix> ();        // 80%

        for (int index = 0; index < training_data.Users.Count; index++)
        {
            training_data_pos.Add(training_data.Users [index], training_data.Items [index]);
        }


        MyMediaLite.ItemRecommendation.UserKNN recommender = new MyMediaLite.ItemRecommendation.UserKNN();
        recommender.K           = 80;
        recommender.Q           = 1;
        recommender.Weighted    = false;
        recommender.Alpha       = 0.5f;
        recommender.Correlation = MyMediaLite.Correlation.BinaryCorrelationType.Jaccard;
        recommender.Feedback    = training_data_pos;
        DateTime start_time = DateTime.Now;

        recommender.Train();

        Console.Write("Total Training time needed:");
        Console.WriteLine(((TimeSpan)(DateTime.Now - start_time)).TotalMilliseconds);
        Console.WriteLine("Final results in this iteration:");
        var results = MyMediaLite.Eval.ItemsWeatherItemRecommender.EvaluateTime(recommender, validation_data, training_data, "VALIDATION ", false);

        results = MyMediaLite.Eval.ItemsWeatherItemRecommender.EvaluateTime(recommender, test_data, training_data, "TEST ", false);
        //}
    }
Пример #10
0
 public LibFmFeatureBuilder()
 {
     Mapper = new Mapping();
 }
Пример #11
0
 public new string ToLibFmFeatureVector(Mapping usersItemsMap)
 {
     return string.Format("{0} {1}:1 {2}:1", Rating, usersItemsMap.ToInternalID(User.Id), usersItemsMap.ToInternalID(Item.Id));
 }
Пример #12
0
 public Clusterer(IDataset<ItemRating> dataset)
 {
     _dataset = dataset;
     _userMapping = new Mapping();
     _itemMapping = new Mapping();
 }
Пример #13
0
        private static void TrainForItemPrediction(Mapping userMapping, Mapping itemMapping, String[] args)
        {
            var training_data = ItemData.Read(trainingCompactFileForItems, userMapping, itemMapping);
            itemRecommender = new CustomBPRMF();
            if (File.Exists(Path.Combine(args[2], "model")))
            {
                Console.WriteLine("Skipping training, Loading saved model");
                itemRecommender.LoadModel(Path.Combine(args[2], "model"));
                itemRecommender.Feedback = training_data;
                return;
            }

            Console.WriteLine("Training model for Item Prediction, this may take a while...");
            itemRecommender.Feedback = training_data;
            itemRecommender.NumFactors = 50;
            itemRecommender.NumIter = 100;
            itemRecommender.Train();
            itemRecommender.SaveModel(Path.Combine(args[2], "model"));
        }
Пример #14
0
        /// <summary>
        /// Predict the rating of the item by users
        /// </summary>
        private static void PredictRating(string[] args)
        {
            Console.WriteLine("Predicting ratings for Users...");
            String outputFile = Path.Combine(args[2], outputFileForRatings);
            testingFile = Path.Combine(args[1], testingFile);
            testingFileModified = Path.Combine(args[2], testingFileModified);
            ModifyTestingFileForRating(testingFile, testingFileModified);

            float minRating = 1;
            float maxRating = 5;

            var userMapping = new Mapping();
            var itemMapping = new Mapping();
            var trainingData = StaticRatingData.Read(trainingCompactFileForRatings, userMapping, itemMapping, RatingType.FLOAT, TestRatingFileFormat.WITH_RATINGS, false);
            var testUsers = trainingData.AllUsers; // users that will be taken into account in the evaluation
            var candidate_items = trainingData.AllItems; // items that will be taken into account in the evaluation
            var testData = StaticRatingData.Read(testingFileModified, userMapping, itemMapping, RatingType.FLOAT, TestRatingFileFormat.WITH_RATINGS, false);

            var recommender = new BiasedMatrixFactorization();
            recommender.MinRating = minRating;
            recommender.MaxRating = maxRating;
            recommender.Ratings = trainingData;

            recommender.NumFactors = 30;
            recommender.NumIter = 100;
            recommender.RegI = 0.04F;
            recommender.RegU = 0.04F;
            //recommender.BiasReg = 0.09F;
            recommender.FrequencyRegularization = true;
            recommender.BoldDriver = true;
            recommender.LearnRate = 0.07F;

            Stopwatch timeKeeper = new Stopwatch();
            timeKeeper.Start();
            recommender.Train();
            timeKeeper.Stop();
            Console.WriteLine("time passed for training rating prediction model: " + timeKeeper.ElapsedMilliseconds);
            // measure the accuracy on the test data set

            timeKeeper = new Stopwatch();
            timeKeeper.Start();
            var results = recommender.Evaluate(testData);
            timeKeeper.Stop();
            Console.WriteLine("time passed for rating prediction: " + timeKeeper.ElapsedMilliseconds);
            Console.WriteLine("RMSE={0}", results["RMSE"]);

            recommender.WritePredictions(testData, outputFile, userMapping, itemMapping, "{0}-{1},{2}", "userID-itemID,rating");
        }
Пример #15
0
        private static void PredictPurchase(string user, Dictionary<string, List<string>> itemsPurchases, Mapping userMapping, Mapping itemMapping, String[] args)
        {
            // Get the master set - feedback
            string logFile = Path.Combine(args[2], "ItemPrediction.log");
            StreamWriter writer = null;
            try
            {
                writer = new StreamWriter(logFile, true);
                float result1 = 0.0F;
                //Stopwatch timeKeeper = new Stopwatch();
                //timeKeeper.Start();
                rankedPurchase = new SortedDictionary<float, List<MeanAverPrecisionDetails>>();//Comparer<float>.Create((x, y) => y.CompareTo(x)));

                // this user's average ratings - does he buy items which are low rated?
                float[] userAverageAndNumberOfRatings = GetAverageRatingAndNumberOfRatingsGivenByUser(userMapping.ToInternalID(user));
                float averageRatingByUser = userAverageAndNumberOfRatings[0];
                float numberOfRatingsByUser = userAverageAndNumberOfRatings[1];
                //bool isUserActive = UserActivity(userMapping.ToInternalID(user));
                Dictionary<string, int> userCategories = GetUserPurchaseCategories(userMapping.ToInternalID(user));
                foreach (String item in itemsPurchases.Keys)
                {
                    float resultForItemSimilarity = itemRecommender.GetNearestItemSimilarity(userMapping.ToInternalID(user), itemMapping.ToInternalID(item));
                    //float resultForUserSimilarity = itemRecommender.GetNearestUserSimilarity(userMapping.ToInternalID(user), itemMapping.ToInternalID(item));
                    float averageRatingForItem = GetAverageRatingForItem(item);
                    bool predictedValueOfPurchase = false;

                    float resultFromItemCategories = CompareUserItemCategories(userCategories, GetItemCategories(item));

                    float result = resultForItemSimilarity;
                    //if (averageRatingForItem > userAverageRating)
                    //    result += ((averageRatingForItem - userAverageRating)/ averageRatingForItem);

                    if (averageRatingForItem > 4.0)
                    {
                        resultFromItemCategories = +((averageRatingForItem - 4.0F) / averageRatingForItem);
                    }

                    ////result += userActivity;
                    //if (!isUserActive)
                    //{
                    //    result -= 0.2f;//(0.1F * result);
                    //}

                    if (resultFromItemCategories > 0.45)
                    {
                        result = result - (1.0F * resultFromItemCategories);
                    }

                    //if (numberOfRatingsByUser > 10)
                    //{
                    //    result = result - 0.1F;
                    //}
                    writer.WriteLine("User: {0}, Item: {1}, hasBought: {2}, result: {3}, rating for Item {4}", user, item, itemsPurchases[item][0], result, averageRatingForItem);
                    result1 +=result;
                    if (result < 0.4)
                    {
                        predictedValueOfPurchase = true;
                    }
                    List<MeanAverPrecisionDetails> elementsToAddToRankedPurchaseList = new List<MeanAverPrecisionDetails>();
                    elementsToAddToRankedPurchaseList.Add(new MeanAverPrecisionDetails(user, item, predictedValueOfPurchase ? 1 : 0, itemsPurchases[item][0].Equals("1") ? 1 : 0, result, Int32.Parse(itemsPurchases[item][1])));

                    if (!rankedPurchase.ContainsKey(result))
                    {
                        rankedPurchase.Add(result, elementsToAddToRankedPurchaseList);
                    }
                    else
                    {
                        List<MeanAverPrecisionDetails> exisitingValues = rankedPurchase[result];
                        exisitingValues.AddRange(elementsToAddToRankedPurchaseList);
                        rankedPurchase[result] = exisitingValues;
                    }

                    // User has actually bought the item
                    if (itemsPurchases[item][0].Equals("1"))
                    {
                        totalActualPurchases++;
                        if (predictedValueOfPurchase)
                        {
                            // We have correctly identified that the user has purchased item
                            truePositives++;
                        }
                        else
                        {
                            // Missed predicting the user's purchase
                            falseNegatives++;
                        }
                    }
                    else
                    {
                        // User has actually not bought this item
                        if (predictedValueOfPurchase)
                        {
                            // Wrongly predicted that the user bought the item
                            falsePositives++;
                        }
                        else
                        {
                            // Correctly predicted that the user did not buy item
                            trueNegatives++;
                        }
                    }

                    // Find out if this user has bought items from the same category(ies)
                }

                CalculateAveragePrecision(args);
                writer.Close();
                //timeKeeper.Stop();
                //Console.WriteLine("time passed PredictPurchase: " + timeKeeper.ElapsedMilliseconds);
            }
            catch (Exception) {}
            finally
            {
                if (writer != null)
                {
                    writer.Close();
                }
            }
        }
Пример #16
0
        public static void RecommentItems(Mapping userMapping, Mapping itemMapping, String[] args)
        {
            Console.WriteLine("Predicting Items for Users...");
            testingFileForItems = Path.Combine(args[1], testingFileForItems);
            if (File.Exists(testingFileForItems))
            {
                StreamReader reader = null;
                try
                {
                    reader = new StreamReader(testingFileForItems);//(args[0]);
                    String line = null;
                    int userCount = 0;
                    if (reader != null)
                    {
                        line = reader.ReadLine();
                        line = reader.ReadLine();
                        while (line != null)
                        {
                            String[] labels = line.Split(',');
                            String[] user_item = labels[0].Split('-');
                            string user = user_item[0];
                            Dictionary<string, List<string>> itemsPurchase = new Dictionary<string, List<string>>();
                            List<string> itemPurchaseDetails = new List<string>();
                            itemPurchaseDetails.Add(labels[1]); // bought or not, 0 or 1
                            itemPurchaseDetails.Add(labels[2]); // rank of this item
                            itemsPurchase.Add(user_item[1], itemPurchaseDetails);
                            line = reader.ReadLine();
                            while (line != null && user.Equals(line.Split(',')[0].Split('-')[0]))
                            {
                                labels = line.Split(',');
                                user_item = labels[0].Split('-');
                                if (!user.Equals(user_item[0]))
                                    break;

                                itemPurchaseDetails = new List<string>();
                                itemPurchaseDetails.Add(labels[1]); // bought or not, 0 or 1
                                itemPurchaseDetails.Add(labels[2]);
                                itemsPurchase.Add(user_item[1], itemPurchaseDetails);
                                line = reader.ReadLine();
                            }

                            PredictPurchase(user, itemsPurchase, userMapping, itemMapping, args);
                            userCount++;
                        }
                    }

                    double precision = (double) truePositives / (truePositives + falsePositives);
                    double recall = (double)truePositives / (truePositives + falseNegatives);//(totalActualPurchases);
                    double f1Measure = 2 * ((precision * recall) / (precision + recall));
                    double meanAveragePrecision = averagePrecisionSum / userCount;
                    Console.WriteLine("precision : {0}, recall : {1}, f1 : {2}, MAP : {3} ", precision, recall, f1Measure, meanAveragePrecision);
                }
                catch (IOException e)
                {
                    Console.WriteLine("exception reading the training file : {0}", e.Message);
                }
                finally
                {
                    if (reader != null)
                    {
                        reader.Close();
                    }
                }
            }
        }
Пример #17
0
 /// <summary>Read movie data from a file</summary>
 /// <param name="filename">the name of the file to be read from</param>
 /// <param name="item_mapping">ID mapping for the movies</param>
 public void Read(string filename, Mapping item_mapping)
 {
     using ( var reader = new StreamReader(filename) )
         Read(reader, item_mapping);
 }
Пример #18
0
        /// <summary>Read movie data from a StreamReader</summary>
        /// <param name="reader">a StreamReader to be read from</param>
        /// <param name="item_mapping">ID mapping for the movies</param>
        public void Read(StreamReader reader, Mapping item_mapping)
        {
            movie_list = new List<Movie>();
            IMDB_KEY_To_ID = new Dictionary<string, int>();

            var separators = new string[] { "::" };

            string line;

            while (!reader.EndOfStream)
            {
                line = reader.ReadLine();

                string[] tokens = line.Split(separators, StringSplitOptions.None);

                if (tokens.Length != 3)
                    throw new FormatException("Expected exactly 3 columns: " + line);

                int movie_id          = item_mapping.ToInternalID(tokens[0]);
                string movie_imdb_key = tokens[1];
                //string[] movie_genres = tokens[2].Split('|');

                int movie_year = 1900;
                string movie_title = movie_imdb_key;

                movie_list.Add(new Movie(movie_id, movie_title, movie_year, movie_imdb_key));
                IMDB_KEY_To_ID[movie_imdb_key] =  movie_id;
            }
        }