예제 #1
0
        public static void AddRegionsToAreaSenti()
        {
            Console.WriteLine($"Analysing AreaSenti \n");

            const string xmlTemplate = @"medians-{0}p02.xml";
            var          cfg         = new[] { StatArea.SA4, StatArea.SA3, StatArea.SA2, StatArea.SA1 };


            // location feature sets
            var saLoader    = new LoadStatisticalAreas();
            var featureSets = new Dictionary <StatArea, Features>();

            foreach (var area in cfg)
            {
                var xmlFile  = Path.Combine(@"..\..", string.Format(xmlTemplate, area.ToString().ToLower()));
                var features = saLoader.GetFeatures(xmlFile);
                featureSets.Add(area, features);
            }

            // summarise
            foreach (var area in cfg)
            {
                Console.WriteLine(
                    $"{area}\tregions:{featureSets[area].Count,6:N0}\tploygons: {featureSets[area].Sum(x => x.Locations.Count),8:N0}");
            }

            var sad = new SADictionary(featureSets);

            var src = @"E:\uni\Cluster and Cloud Computing\extracted\newActivity";
            var jr  = new JsonRead <AreaSentiExtract>(new[] { src });

            jr.DoLoad();

            var requiredUsers = new Dictionary <long, string>();

            using (var ifs = new StreamReader(@"..\..\userHomeCity.csv"))
            {
                var ln = ifs.ReadLine(); // skip header
                while ((ln = ifs.ReadLine()) != null)
                {
                    var arr = ln.Split(',');
                    requiredUsers.Add(long.Parse(arr[0]), arr[1]);
                }
            }

            var filtered = jr.Records
                           .Where(x => requiredUsers.ContainsKey(x.User)).ToList();

            // extract unique locations

            var locs = filtered
                       .GroupBy(x => new { Y = x.Yloc, X = x.Xloc })
                       .ToDictionary(x => x.Key, x => x.Count());

            using (var ofs = new StreamWriter($@"..\..\recentLocations.csv"))
            {
                ofs.WriteLine("Yloc,Xloc,Count");
                foreach (var kvp in locs.OrderByDescending(x => x.Value))
                {
                    ofs.WriteLine($"{kvp.Key.Y},{kvp.Key.X},{kvp.Value}");
                }
            }


            var cls = new ClassifyArea(filtered, sad); //{SingleThreaded = true};

            cls.DoClassification();

            foreach (var sa in cfg)
            {
                var clusteredBySa = cls.Scores
                                    .Where(x => x.Area.Regions.ContainsKey(sa))
                                    .Select(x => new KeyValuePair <long, double>(x.Area.Regions[sa].Id, x.Parameters.Sentiment))
                                    .ToLookup(x => x.Key);

                using (var of = new StreamWriter($@"..\..\SentimentRecentWithRegion-{sa}.csv"))
                {
                    of.WriteLine("RegionId,Name,Count,SumSentiment,CountExc,SumNeutralExc,Sentiment");

                    // collate regional averages
                    foreach (var rec in clusteredBySa)
                    {
                        var count = rec.Count();
                        var sm    = rec.Sum(x => x.Value) * 100;
                        var avg   = rec.Average(x => x.Value) * 100;

                        var counte = rec.Count(x => x.Value < -0.5 || 0.5 < x.Value);
                        var sme    = rec.Where(x => x.Value < -0.5 || 0.5 < x.Value).Sum(x => x.Value) * 100;

                        of.WriteLine(
                            $"{rec.Key},\"{sad.SANames[sa][rec.Key]}\",{count},{sm:F2},{counte},{sme:F2},{avg:F2}");
                    }
                }
            }
        }
 private ClassifyArea(ClassifyArea src)
 {
     Sad = new SADictionary(src.Sad.SASets);
 }