예제 #1
0
        public void CreateSubsetEpinions()
        {
            int numDomains     = 4;
            var epinionsReader = new EpinionsCrossDomainReader(Paths.EpinionRoot + "Epinions RED");
            var container      = new EpinionsCrossDomainDataContainer(numDomains + 1);

            epinionsReader.LoadData(container);

            var output = container.Users.Values.Where(u =>
            {
                var counts = u.Ratings.Where(r => r.Domain.Id != "ep0").GroupBy(r => r.Domain).Select(g => g.Count());
                return(counts.All(c => c >= 1 && c <= 20) && (counts.Count() > 3));
            })
                         //.Select(u => new { UserId = u.Id, Counts = u.Ratings.GroupBy(r => r.Domain.Id).Select(g => g.Count().ToString()).Aggregate((a,b) => a + " " + b) })
                         //.Select(a => a.UserId + "," + a.Counts);

                         .SelectMany(u => u.Ratings)
                         .GroupBy(r => r.Domain.Id)
                         .Select(g => g.Select(r => r.ToString())).ToList();

            //.SelectMany(u => u.Ratings.GroupBy(r => r.Item.Id).Select(g => g.Take(1).Single()))
            //.Select(r => r.ToString());

            Console.WriteLine("Writing...");
            var header = new string[] { "UserId,ItemId,Rating" };

            int i = 1;

            foreach (var domain in output)
            {
                // Format of the file: Domains{Number of Domains}-{Domain Number}
                File.WriteAllLines(string.Format("{0}Epinions RED\\Domains{1}-{2}.csv", Paths.EpinionRoot, numDomains, i++), header.Concat(domain));
            }
        }
예제 #2
0
        public void ExploreEpinionDataset()
        {
            var epinionReader = new EpinionsCrossDomainReader(Paths.EpinionRoot + "Epinions RED");
            var container     = new EpinionsCrossDomainDataContainer(2);

            epinionReader.LoadData(container);
            container.SpecifyTargetDomain("ep0");

            //container.PrintCategoryStatistics();
            container.PrintStatistics();
        }
        private void DomainsLoad(EpinionsCrossDomainDataContainer container)
        {
            int i = 0;

            foreach (var file in DomainFiles)
            {
                container.CurrentDomain = container.Domains["ep" + i++];
                foreach (string line in File.ReadAllLines(file).Skip(1))
                {
                    var parts = line.Split(',');
                    container.AddRating(parts[0], parts[1], float.Parse(parts[2]), false);
                }
            }
        }
        private void NormalRead(EpinionsCrossDomainDataContainer container)
        {
            string reviewsPath = FolderPath + "\\Reviews.csv";

            // add products
            foreach (string line in File.ReadAllLines(FolderPath + "\\Products.csv").Skip(1))
            {
                var parts = line.Split(',');
                var item  = container.AddItem(parts[0]);
                item.Properties["Category"] = parts[1];
            }

            // add reviews
            foreach (string line in File.ReadAllLines(FolderPath + "\\Reviews.csv").Skip(1))
            {
                var parts = line.Split(',');
                container.AddRating(parts[1], parts[4], float.Parse(parts[2]), false);
            }
        }
예제 #5
0
        public void TestEpinionAllDomains(int numDomains = 3)
        {
            var numAuxRatings = new List <int> {
                0, 1, 2, 3, 4
            };

            var epinionsReader = new EpinionsCrossDomainReader(Paths.EpinionRoot + "Epinions RED");

            //var domainPaths = Enumerable.Range(1, numDomains)
            //    .Select(i => string.Format("{0}Epinions RED\\Domains{1}-{2}.csv", Paths.EpinionRoot, numDomains, i)).ToArray();
            //var epinionsReader = new EpinionsCrossDomainReader(domainPaths);

            var container = new EpinionsCrossDomainDataContainer(numDomains);

            epinionsReader.LoadData(container);

            container.Domains.Remove("ep0");

            double[,] rmseMatrix   = new double[numAuxRatings.Count, numDomains];
            int[,] durationsMatrix = new int[numAuxRatings.Count, numDomains];
            int[] numUsers   = new int[numDomains];
            int[] numItems   = new int[numDomains];
            int[] numRatings = new int[numDomains];

            int domainIndex = 0;

            foreach (Domain d in container.Domains.Values)
            {
                var targetDomain = container.SpecifyTargetDomain(d.Id);
                Console.WriteLine("Target domain: {0}", d.ToString());

                var splitter = new CrossDomainSimpleSplitter(container, 0.25f);

                int numAuxIndex = 0;

                foreach (var num in numAuxRatings)
                {
                    var startTime = DateTime.Now;

                    LibFmTrainTester recommender;
                    CrossDomainLibFmFeatureBuilder featureBuilder = null;

                    if (num == 0)
                    {
                        recommender = new LibFmTrainTester(experimentId: num.ToString());
                    }
                    else
                    {
                        featureBuilder = new CrossDomainLibFmFeatureBuilder(targetDomain, num);
                        recommender    = new LibFmTrainTester(experimentId: num.ToString(), featureBuilder: featureBuilder);
                    }

                    var ctx = new EvalutationContext <ItemRating>(recommender, splitter);
                    var ep  = new EvaluationPipeline <ItemRating>(ctx);
                    ep.Evaluators.Add(new RMSE());
                    ep.Run();

                    var duration = DateTime.Now.Subtract(startTime);

                    rmseMatrix[numAuxIndex, domainIndex]      = recommender.RMSE;
                    durationsMatrix[numAuxIndex, domainIndex] = (int)duration.TotalMilliseconds;

                    numAuxIndex++;
                }

                numUsers[domainIndex]   = d.Ratings.Select(r => r.User.Id).Distinct().Count();
                numItems[domainIndex]   = d.Ratings.Select(r => r.Item.Id).Distinct().Count();
                numRatings[domainIndex] = d.Ratings.Count;

                domainIndex++;
            }


            // Write RMSEs
            Console.WriteLine("\nRMSEs:\n");

            string header = Enumerable.Range(1, numDomains).Select(i => "D" + i).Aggregate((a, b) => a + "\t" + b);

            Console.WriteLine("Num aux. ratings\t" + header);

            for (int i = 0; i < numAuxRatings.Count; i++)
            {
                Console.Write(numAuxRatings[i]);
                for (int j = 0; j < numDomains; j++)
                {
                    Console.Write("\t" + rmseMatrix[i, j]);
                }
                Console.WriteLine();
            }

            // Write domain statistics
            string users   = numUsers.Select(c => c.ToString()).Aggregate((a, b) => a + "\t" + b);
            string items   = numItems.Select(c => c.ToString()).Aggregate((a, b) => a + "\t" + b);
            string ratings = numRatings.Select(c => c.ToString()).Aggregate((a, b) => a + "\t" + b);

            Console.WriteLine();
            Console.WriteLine("Num Users\t" + users);
            Console.WriteLine("Num Items\t" + items);
            Console.WriteLine("Num Ratings\t" + ratings);

            // Write times
            Console.WriteLine("\nTimes:\n");

            header = Enumerable.Range(1, numDomains).Select(i => "T" + i).Aggregate((a, b) => a + "\t" + b);
            Console.WriteLine("Num aux. ratings\t" + header);

            for (int i = 0; i < numAuxRatings.Count; i++)
            {
                Console.Write(numAuxRatings[i]);
                for (int j = 0; j < numDomains; j++)
                {
                    Console.Write("\t" + durationsMatrix[i, j]);
                }
                Console.WriteLine();
            }

            Console.WriteLine("\n");
        }