Esempio n. 1
0
        public void CreateLdaInput(string outputFile)
        {
            var map = new Mapping();

            var outputLines = File.ReadAllLines(TagFile).Select(l =>
            {
                if (l.StartsWith("ERROR"))
                    return "";

                var tokens = l.Split('\t');
                int tagCount = Convert.ToInt32(tokens[1]);
                string output = "";
                if (tagCount > 0)
                {
                    var tags = tokens[2].Split(new string[] { "##" }, StringSplitOptions.RemoveEmptyEntries);
                    output += map.ToInternalID(tokens[0]);

                    foreach (var tag in tags)
                    {
                        var parts = tag.Split(new string[] { "::" }, StringSplitOptions.None);
                        output += " " + map.ToInternalID(parts[0]) + ":" + parts[1];
                    }
                }

                return output;
            }).Where(o => !String.IsNullOrEmpty(o));

            File.WriteAllLines(outputFile, outputLines);
        }
Esempio n. 2
0
        public int GetMappedId(Mapping mapper)
        {
            if (_mappedId == -1)
                _mappedId = mapper.ToInternalID(Id);

            return _mappedId;
        }
        public override string ToLibFmFeatureVector(Mapping usersItemsMap)
        {
            string userClusterFeature = "", itemClusterFeature = "", aUserClusterFeature = "", aItemClusterFeature = "";

            if (!string.IsNullOrEmpty(UserCluster))
                userClusterFeature = usersItemsMap.ToInternalID(UserCluster).ToString();

            if (!string.IsNullOrEmpty(ItemCluster))
                itemClusterFeature = usersItemsMap.ToInternalID(ItemCluster).ToString();

            if (!string.IsNullOrEmpty(AuxUserCluster))
                aUserClusterFeature = usersItemsMap.ToInternalID(AuxUserCluster).ToString();

            if (!string.IsNullOrEmpty(AuxItemCluster))
                aItemClusterFeature = usersItemsMap.ToInternalID(AuxItemCluster).ToString();

            string featVector = string.Format("{0} {1}:1 {2}:1",
                Rating,
                usersItemsMap.ToInternalID(User.Id),
                usersItemsMap.ToInternalID(Item.Id));

            if (userClusterFeature != "")
                featVector += " " + userClusterFeature + ":1";

            if (itemClusterFeature != "")
                featVector += " " + itemClusterFeature + ":1";

            if (aUserClusterFeature != "")
                featVector += " " + aUserClusterFeature + ":1";

            if (aItemClusterFeature != "")
                featVector += " " + aItemClusterFeature + ":1";

            return featVector;
        }
        public void CreateNmfClustersAmazon()
        {
            var reader = new AmazonReader(Paths.AmazonBooksRatings);
            var dataset = new Dataset<ItemRating>(reader);

            var userMapping = new Mapping();
            var itemMapping = new Mapping();

            var data = dataset.AllSamples.Select(ir => new
            {
                UserId = userMapping.ToInternalID(ir.User.Id),
                ItemId = itemMapping.ToInternalID(ir.Item.Id),
                Rating = Convert.ToDouble(ir.Rating)
            }).ToList();

            // users
            int i = 0;
            var uOut = File.ReadAllLines(Paths.AmazonBooksUsersCluster + ".lf").Select(l =>
            {
                var values = l.Split(new string[] { " " }, StringSplitOptions.RemoveEmptyEntries)
                    .Select(v => Convert.ToDouble(v)).ToList();

                var maxIndex = values.IndexOf(values.Max());

                return new { UserId = userMapping.ToOriginalID(i++), ClusterId = maxIndex };
            }).Select(uc => string.Format("{0},{1}", uc.UserId, uc.ClusterId));

            File.WriteAllLines(Paths.AmazonBooksUsersCluster + ".nmf.u", uOut);

            // items
            int j = 0;
            var iOut = File.ReadAllLines(Paths.AmazonBooksUsersCluster + ".rf").Select(l =>
            {
                var values = l.Split(new string[] { " " }, StringSplitOptions.RemoveEmptyEntries)
                    .Select(v => Convert.ToDouble(v)).ToList();

                var maxIndex = values.IndexOf(values.Max());

                return new { ItemId = itemMapping.ToOriginalID(j++), ClusterId =  maxIndex };
            }).Select(ic => string.Format("{0},{1}", ic.ItemId, ic.ClusterId));

            File.WriteAllLines(Paths.AmazonBooksUsersCluster + ".nmf.i", iOut);
        }
Esempio n. 5
0
 public new string ToLibFmFeatureVector(Mapping usersItemsMap)
 {
     return string.Format("{0} {1}:1 {2}:1", Rating, usersItemsMap.ToInternalID(User.Id), usersItemsMap.ToInternalID(Item.Id));
 }
Esempio n. 6
0
        private static void PredictPurchase(string user, Dictionary<string, List<string>> itemsPurchases, Mapping userMapping, Mapping itemMapping, String[] args)
        {
            // Get the master set - feedback
            string logFile = Path.Combine(args[2], "ItemPrediction.log");
            StreamWriter writer = null;
            try
            {
                writer = new StreamWriter(logFile, true);
                float result1 = 0.0F;
                //Stopwatch timeKeeper = new Stopwatch();
                //timeKeeper.Start();
                rankedPurchase = new SortedDictionary<float, List<MeanAverPrecisionDetails>>();//Comparer<float>.Create((x, y) => y.CompareTo(x)));

                // this user's average ratings - does he buy items which are low rated?
                float[] userAverageAndNumberOfRatings = GetAverageRatingAndNumberOfRatingsGivenByUser(userMapping.ToInternalID(user));
                float averageRatingByUser = userAverageAndNumberOfRatings[0];
                float numberOfRatingsByUser = userAverageAndNumberOfRatings[1];
                //bool isUserActive = UserActivity(userMapping.ToInternalID(user));
                Dictionary<string, int> userCategories = GetUserPurchaseCategories(userMapping.ToInternalID(user));
                foreach (String item in itemsPurchases.Keys)
                {
                    float resultForItemSimilarity = itemRecommender.GetNearestItemSimilarity(userMapping.ToInternalID(user), itemMapping.ToInternalID(item));
                    //float resultForUserSimilarity = itemRecommender.GetNearestUserSimilarity(userMapping.ToInternalID(user), itemMapping.ToInternalID(item));
                    float averageRatingForItem = GetAverageRatingForItem(item);
                    bool predictedValueOfPurchase = false;

                    float resultFromItemCategories = CompareUserItemCategories(userCategories, GetItemCategories(item));

                    float result = resultForItemSimilarity;
                    //if (averageRatingForItem > userAverageRating)
                    //    result += ((averageRatingForItem - userAverageRating)/ averageRatingForItem);

                    if (averageRatingForItem > 4.0)
                    {
                        resultFromItemCategories = +((averageRatingForItem - 4.0F) / averageRatingForItem);
                    }

                    ////result += userActivity;
                    //if (!isUserActive)
                    //{
                    //    result -= 0.2f;//(0.1F * result);
                    //}

                    if (resultFromItemCategories > 0.45)
                    {
                        result = result - (1.0F * resultFromItemCategories);
                    }

                    //if (numberOfRatingsByUser > 10)
                    //{
                    //    result = result - 0.1F;
                    //}
                    writer.WriteLine("User: {0}, Item: {1}, hasBought: {2}, result: {3}, rating for Item {4}", user, item, itemsPurchases[item][0], result, averageRatingForItem);
                    result1 +=result;
                    if (result < 0.4)
                    {
                        predictedValueOfPurchase = true;
                    }
                    List<MeanAverPrecisionDetails> elementsToAddToRankedPurchaseList = new List<MeanAverPrecisionDetails>();
                    elementsToAddToRankedPurchaseList.Add(new MeanAverPrecisionDetails(user, item, predictedValueOfPurchase ? 1 : 0, itemsPurchases[item][0].Equals("1") ? 1 : 0, result, Int32.Parse(itemsPurchases[item][1])));

                    if (!rankedPurchase.ContainsKey(result))
                    {
                        rankedPurchase.Add(result, elementsToAddToRankedPurchaseList);
                    }
                    else
                    {
                        List<MeanAverPrecisionDetails> exisitingValues = rankedPurchase[result];
                        exisitingValues.AddRange(elementsToAddToRankedPurchaseList);
                        rankedPurchase[result] = exisitingValues;
                    }

                    // User has actually bought the item
                    if (itemsPurchases[item][0].Equals("1"))
                    {
                        totalActualPurchases++;
                        if (predictedValueOfPurchase)
                        {
                            // We have correctly identified that the user has purchased item
                            truePositives++;
                        }
                        else
                        {
                            // Missed predicting the user's purchase
                            falseNegatives++;
                        }
                    }
                    else
                    {
                        // User has actually not bought this item
                        if (predictedValueOfPurchase)
                        {
                            // Wrongly predicted that the user bought the item
                            falsePositives++;
                        }
                        else
                        {
                            // Correctly predicted that the user did not buy item
                            trueNegatives++;
                        }
                    }

                    // Find out if this user has bought items from the same category(ies)
                }

                CalculateAveragePrecision(args);
                writer.Close();
                //timeKeeper.Stop();
                //Console.WriteLine("time passed PredictPurchase: " + timeKeeper.ElapsedMilliseconds);
            }
            catch (Exception) {}
            finally
            {
                if (writer != null)
                {
                    writer.Close();
                }
            }
        }
Esempio n. 7
0
        /// <summary>Read movie data from a StreamReader</summary>
        /// <param name="reader">a StreamReader to be read from</param>
        /// <param name="item_mapping">ID mapping for the movies</param>
        public void Read(StreamReader reader, Mapping item_mapping)
        {
            movie_list = new List<Movie>();
            IMDB_KEY_To_ID = new Dictionary<string, int>();

            var separators = new string[] { "::" };

            string line;

            while (!reader.EndOfStream)
            {
                line = reader.ReadLine();

                string[] tokens = line.Split(separators, StringSplitOptions.None);

                if (tokens.Length != 3)
                    throw new FormatException("Expected exactly 3 columns: " + line);

                int movie_id          = item_mapping.ToInternalID(tokens[0]);
                string movie_imdb_key = tokens[1];
                //string[] movie_genres = tokens[2].Split('|');

                int movie_year = 1900;
                string movie_title = movie_imdb_key;

                movie_list.Add(new Movie(movie_id, movie_title, movie_year, movie_imdb_key));
                IMDB_KEY_To_ID[movie_imdb_key] =  movie_id;
            }
        }