Esempio n. 1
0
        private static void AnalyseBrandDemand(SparkSession spark, CustomerSentimentContext context)
        {
            Console.WriteLine("Analysing popular brand demand");

            var brands = context.BrandSentiment
                         .OrderByDescending(b => b.ReviewCount)
                         .ThenBy(b => b.SentimentRank)
                         .Take(10)
                         .Select(b => $"'{b.Brand}'")
                         .ToArray();

            var brandList = string.Join(',', brands);

            var brandsDemand = spark.Sql(
                "SELECT EM.brand, FROM_UNIXTIME(ER.unix_time, 'MM') as month, COUNT(1) as demand " +
                "FROM ElectronicsReviews ER " +
                "JOIN ElectronicsMetadata EM ON EM.asin = ER.asin " +
                $"WHERE EM.brand IN ({brandList}) " +
                "GROUP BY EM.brand, FROM_UNIXTIME(ER.unix_time, 'MM') " +
                "ORDER BY EM.brand, FROM_UNIXTIME(ER.unix_time, 'MM')");

            brandsDemand.Cache();
            brandsDemand.CreateOrReplaceTempView("BrandsDemand");

            var items = Mapper.MapRows(
                brandsDemand.Collect(),
                r => new BrandDemand
            {
                Brand  = r.GetAs <string>(0),
                Month  = int.Parse(r.GetAs <string>(1)),
                Demand = r.GetAs <int>(2)
            },
                o => $"{o.Brand}-{o.Month}");

            context.BrandDemand.RemoveRange(context.BrandDemand);
            context.BrandDemand.AddRange(items);
            context.SaveChanges();

            foreach (var brand in brands)
            {
                Console.WriteLine($"Analysing consumer demand for {brand}");

                var brandDemand = spark.Sql(
                    "SELECT * " +
                    "FROM BrandsDemand " +
                    $"WHERE brand = {brand}");

                brandDemand.Show();
            }
        }
Esempio n. 2
0
        private static void AnalyseCategoryDemand(SparkSession spark, CustomerSentimentContext context)
        {
            Console.WriteLine("Analysing category consumer demand");

            var categoriesDemand = spark.Sql(
                "SELECT EM.main_cat, FROM_UNIXTIME(ER.unix_time, 'MM') as month, COUNT(1) as demand " +
                "FROM ElectronicsReviews ER " +
                "JOIN ElectronicsMetadata EM ON EM.asin = ER.asin " +
                "GROUP BY EM.main_cat, from_unixtime(ER.unix_time, 'MM') " +
                "ORDER BY EM.main_cat, FROM_UNIXTIME(ER.unix_time, 'MM')");

            categoriesDemand.Cache();
            categoriesDemand.CreateOrReplaceTempView("CategoryDemand");

            var items = Mapper.MapRows(
                categoriesDemand.Collect(),
                r => new CategoryDemand
            {
                Category = r.GetAs <string>(0),
                Month    = int.Parse(r.GetAs <string>(1)),
                Demand   = r.GetAs <int>(2)
            },
                o => $"{o.Category}-{o.Month}");

            context.CategoryDemand.RemoveRange(context.CategoryDemand);
            context.CategoryDemand.AddRange(items);
            context.SaveChangesAsync();

            var categories = spark.Sql("SELECT main_cat FROM CategoryDemand GROUP BY main_cat")
                             .Collect()
                             .Select(r => r.GetAs <string>(0))
                             .ToArray();

            foreach (var category in categories)
            {
                Console.WriteLine($"Analysing consumer demand for {category}");

                var categoryDemand = spark.Sql(
                    "SELECT * " +
                    "FROM CategoryDemand " +
                    $"WHERE main_cat = '{category}'");

                categoryDemand.Show();
            }
        }
Esempio n. 3
0
        static void Main()
        {
            using var context = new CustomerSentimentContext(_contextOptions);
            context.Database.EnsureCreated();

            var spark = SparkSession
                        .Builder()
                        .AppName(_appName)
                        .Config("spark.cores.max", "4")
                        .Config("spark.ui.port", "4040")
                        .GetOrCreate();

            var start = DateTime.Now;

            var dfMetadata = LoadMetadataFile(_metadataPath, spark);

            MetadataCleanup(dfMetadata);

            var dfReviews = LoadReviewPathFile(_reviewsPath, spark);

            ReviewsCleanup(dfReviews);

            ElectronicsReviewsSentimentAnalysis(spark);

            AnalyseCategorySentiment(spark, context);

            AnalyseBrandSentiment(spark, context);

            AnalyseBrandSentimentVsTime(spark, context);

            AnalyseProductSentiment(spark, context);

            AnalyseCategoryDemand(spark, context);

            AnalyseBrandDemand(spark, context);

            var end = DateTime.Now;

            Console.WriteLine($"Time Elapsed : {(end - start).TotalMinutes} Minutes");

            Console.ReadLine();
        }
Esempio n. 4
0
        private static void AnalyseBrandSentimentVsTime(SparkSession spark, CustomerSentimentContext context)
        {
            Console.WriteLine("Analysing brand consumer sentiment");

            var brands = context.BrandSentiment
                         .OrderByDescending(b => b.SentimentRank)
                         .ThenBy(b => b.ReviewCount)
                         .Take(10)
                         .Select(b => $"'{b.Brand}'")
                         .ToArray();

            var brandList = string.Join(',', brands);

            var brandSentimentVsTime = spark.Sql(
                "SELECT EM.brand, FROM_UNIXTIME(ERS.unix_time, 'YYYY') as year, SUM(ERS.sentiment) / COUNT(1) * 100 as sentiment_rank " +
                "FROM ElectronicsMetadata EM " +
                "JOIN ElectronicsReviewSentiment ERS ON ERS.asin = EM.asin " +
                $"WHERE EM.brand IN ({brandList}) " +
                "GROUP BY EM.brand, FROM_UNIXTIME(ERS.unix_time, 'YYYY') " +
                "ORDER BY FROM_UNIXTIME(ERS.unix_time, 'YYYY')");

            brandSentimentVsTime.Cache();
            brandSentimentVsTime.CreateOrReplaceTempView("BrandSentimentVsTime");
            brandSentimentVsTime.Show();

            var items = Mapper.MapRows(
                brandSentimentVsTime.Collect(),
                r => new BrandSentimentVsTime
            {
                Brand         = r.GetAs <string>(0),
                Year          = int.Parse(r.GetAs <string>(1)),
                SentimentRank = r.GetAs <double>(2)
            },
                o => $"{o.Brand}-{o.Year}");

            context.BrandSentimentVsTime.RemoveRange(context.BrandSentimentVsTime);
            context.BrandSentimentVsTime.AddRange(items);
            context.SaveChanges();
        }
Esempio n. 5
0
        private static void AnalyseCategorySentiment(SparkSession spark, CustomerSentimentContext context)
        {
            Console.WriteLine("Analyzing category sentiment");

            var itemCategorySentiment = spark.Sql(
                "SELECT EM.main_cat, SUM(ERS.sentiment) / COUNT(1) * 100 as sentiment_rank, COUNT(1) review_count " +
                "FROM ElectronicsMetadata EM " +
                "JOIN ElectronicsReviewSentiment ERS ON ERS.asin = EM.asin " +
                "GROUP BY EM.main_cat");

            itemCategorySentiment.Cache();
            itemCategorySentiment.CreateOrReplaceTempView("ItemCategorySentiment");

            Console.WriteLine("Analyzing categories with best consumer sentiment.");

            var categorySentiment = spark.Sql(
                "SELECT * " +
                "FROM ItemCategorySentiment " +
                "ORDER BY sentiment_rank DESC, review_count DESC");

            categorySentiment.Show();

            var items = Mapper.MapRows(
                categorySentiment.Collect(),
                r => new ItemCategorySentiment
            {
                Category      = r.GetAs <string>(0),
                SentimentRank = r.GetAs <double>(1),
                ReviewCount   = r.GetAs <int>(2)
            },
                o => o.Category);

            context.ItemCategorySentiment.RemoveRange(context.ItemCategorySentiment);
            context.ItemCategorySentiment.AddRange(items);

            context.SaveChanges();
        }
Esempio n. 6
0
        private static void AnalyseProductSentiment(SparkSession spark, CustomerSentimentContext context)
        {
            Console.WriteLine("Analyzing product consumer sentiment");

            var productSentiment = spark.Sql(
                "SELECT EM.title, EM.brand,  SUM(ERS.sentiment) / COUNT(1) * 100 as sentiment_rank, COUNT(1) review_count " +
                "FROM ElectronicsMetadata EM " +
                "JOIN ElectronicsReviewSentiment ERS ON ERS.asin = EM.asin " +
                "GROUP BY EM.title, EM.brand " +
                "HAVING COUNT(1) >= 100");

            productSentiment.Cache();
            productSentiment.CreateOrReplaceTempView("ProductSentiment");

            Console.WriteLine("Analyzing top 20 most popular products.");

            var mostPopularProducts = spark.Sql(
                "SELECT * " +
                "FROM ProductSentiment " +
                "ORDER BY review_count DESC " +
                "LIMIT 20");

            mostPopularProducts.Show();
            var rows = mostPopularProducts.Collect();

            Console.WriteLine("Analyzing top 20 products with best consumer sentiment.");

            var bestproductSentiment = spark.Sql(
                "SELECT * " +
                "FROM ProductSentiment " +
                "ORDER BY sentiment_rank DESC, review_count DESC " +
                "LIMIT 20");

            bestproductSentiment.Show();
            rows = rows.Union(bestproductSentiment.Collect());

            Console.WriteLine("Analyzing top 20 lest popular products.");

            var lestPopularProducts = spark.Sql(
                "SELECT * " +
                "FROM ProductSentiment " +
                "ORDER BY review_count ASC " +
                "LIMIT 20");

            lestPopularProducts.Show();
            rows = rows.Union(lestPopularProducts.Collect());

            Console.WriteLine("Analyzing top 20 products with worst consumer sentiment.");

            var worstproductSentiment = spark.Sql(
                "SELECT * " +
                "FROM ProductSentiment " +
                "ORDER BY sentiment_rank ASC, review_count ASC " +
                "LIMIT 20");

            worstproductSentiment.Show();
            rows = rows.Union(worstproductSentiment.Collect());

            var items = Mapper.MapRows(
                rows,
                r => new ProductSentiment
            {
                Name          = r.GetAs <string>(0),
                Brand         = r.GetAs <string>(1),
                SentimentRank = r.GetAs <double>(2),
                ReviewCount   = r.GetAs <int>(3)
            },
                o => $"{o.Name}-{o.Brand}");

            context.ProductSentiment.RemoveRange(context.ProductSentiment);
            context.ProductSentiment.AddRange(items);
            context.SaveChanges();
        }
Esempio n. 7
0
        private static void AnalyseBrandSentiment(SparkSession spark, CustomerSentimentContext context)
        {
            Console.WriteLine("Analyzing brand consumer sentiment");

            var brandSentiment = spark.Sql(
                "SELECT EM.brand, SUM(ERS.sentiment) / COUNT(1) * 100 as sentiment_rank, COUNT(1) review_count " +
                "FROM ElectronicsMetadata EM " +
                "JOIN ElectronicsReviewSentiment ERS ON ERS.asin = EM.asin " +
                "GROUP BY EM.brand " +
                "HAVING COUNT(1) >= 2000");

            brandSentiment.Cache();
            brandSentiment.CreateOrReplaceTempView("BrandSentiment");

            var items = Mapper.MapRows(
                brandSentiment.Collect(),
                r => new BrandSentiment
            {
                Brand         = r.GetAs <string>(0),
                SentimentRank = r.GetAs <double>(1),
                ReviewCount   = r.GetAs <int>(2)
            },
                o => o.Brand);

            context.BrandSentiment.RemoveRange(context.BrandSentiment);
            context.BrandSentiment.AddRange(items);
            context.SaveChanges();

            Console.WriteLine("Analyzing top 20 most popular brands.");

            var mostPopularBrands = spark.Sql(
                "SELECT * " +
                "FROM BrandSentiment " +
                "ORDER BY review_count DESC " +
                "LIMIT 20");

            mostPopularBrands.Show();

            Console.WriteLine("Analyzing top 20 brands with best consumer sentiment.");

            var bestBrandSentiment = spark.Sql(
                "SELECT * " +
                "FROM BrandSentiment " +
                "ORDER BY sentiment_rank DESC, review_count DESC " +
                "LIMIT 20");

            bestBrandSentiment.Show();

            Console.WriteLine("Analyzing top 20 lest popular brands.");

            var leastPopularBrands = spark.Sql(
                "SELECT * " +
                "FROM BrandSentiment " +
                "ORDER BY review_count ASC " +
                "LIMIT 20");

            leastPopularBrands.Show();

            Console.WriteLine("Analyzing top 20 brands with worst consumer sentiment.");

            var worstBrandSentiment = spark.Sql(
                "SELECT * " +
                "FROM BrandSentiment " +
                "ORDER BY sentiment_rank ASC, review_count ASC " +
                "LIMIT 20");

            worstBrandSentiment.Show();
        }