예제 #1
0
        private static void Main(string[] args)
        {
            const string hospitals = @"E:\uni\Cluster and Cloud Computing\assign2\TwitterExplore\Data\hospital.json";
            const string tweets    =
                @"E:\uni\Cluster and Cloud Computing\assign2\TwitterExplore\TwitterExplore\bin\twitter-extract-all.json";

            // load file criteria
            var eh   = EmergencyHospitals.Load(hospitals);
            var tgts = new TargetRegions(eh.Features.Select(x => x.Description).ToList());

            var jr = new JsonRead <TagPosterDetails>(new[] { tweets });

            jr.DoLoad();
            Console.WriteLine("\n\n");


            using (var ofs = new StreamWriter(@"..\..\hospitalTags.csv"))
            {
                ofs.WriteLine($"TimeStamp,HospitalName,Suburb,State,Tags,Tweet");

                foreach (var tweet in jr.Records)
                {
                    if (tgts.Find(tweet, out var hos))
                    {
                        ofs.WriteLine(
                            $"{tweet.CreateTime:s},{hos.Description.HospitalName.Pack()},{hos.Description.Suburb.Pack()}," +
                            $"{hos.Description.State.Pack()},{tweet.Tags},{tweet.Text.Pack()}");
                    }
                }
            }
        }
        private static void Main(string[] args)
        {
            Console.WriteLine($"Start {DateTime.Now}");

            const string activeUsers =
                @"E:\uni\Cluster and Cloud Computing\assign2\TwitterExplore\Extracts\FilteredExtract\data\twitter-extract-all.json";
            const string tgtFile = @"..\..\twitter-all-geotagged-posters.json";
            var          tgtLocs = new List <string> {
                @"A:\twitter"
            };



            var geoPosts = new JsonRead <TagPosterDetails>(new[] { activeUsers });

            geoPosts.DoLoad();

            // extract unique userIds
            var ids = new HashSet <string>(geoPosts.Records.Select(x => x.UserIdStr));

            Console.WriteLine($"Have {ids.Count} posters\n");

            var jr = new FilterJsonRead(tgtLocs, geoPosts.Records.Count, ids);

            jr.ExtractAndSave(tgtFile);

            Console.WriteLine($"Done {DateTime.Now}");
        }
        private static void Main(string[] args)
        {
            //   const string aurinData = @"E:\uni\Cluster and Cloud Computing\assign2\TwitterExplore\Aurin\sample-sa4p02.xml";
            const string aurinData = @"a:\aurin\medians-sa2p02.xml";
            const string xlst      = @"E:\uni\Cluster and Cloud Computing\assign2\TwitterExplore\Aurin\extract.xslt";

            var cm = CensusMedians.Extract(aurinData, xlst);

            Console.WriteLine(cm.Features.Count);

            cm.TransformFeatures();

            const string activeUsers =
                @"E:\uni\Cluster and Cloud Computing\assign2\TwitterExplore\Extracts\FilteredExtract\data\twitter-extract-all.json";
            const string outFile = @"..\..\..\data\twitter-all-areaTagged.json";


            var geoPosts = new JsonRead <TagPosterDetails>(activeUsers);

            geoPosts.DoLoad();


            var        cnt = 0;
            const byte nl  = (byte)'\n';
            var        ser = new DataContractJsonSerializer(typeof(TagPosterDetails));

            using (var fs = File.Open(outFile, FileMode.Create))
            {
                foreach (var rec in geoPosts.Records)
                {
                    foreach (var feat in cm.Features)
                    {
                        if (rec.Yloc.HasValue && rec.Xloc.HasValue && feat.BoundedBy.InBox(rec.Yloc.Value, rec.Xloc.Value))
                        {
                            var pt = new LatLong(rec.Xloc.Value, rec.Yloc.Value);

                            // in the broad region, double check if falls into a nominated polygon
                            foreach (var poly in feat.Locations)
                            {
                                if (poly.PointInPolygon(pt))
                                {
                                    rec.AreaName        = feat.Parameters.Name;
                                    rec.StatisticalArea = feat.Parameters.Id;

                                    ser.WriteObject(fs, rec);
                                    fs.WriteByte(nl);

                                    if (++cnt % 1000 == 0)
                                    {
                                        Console.WriteLine($"{rec.Location,-20} {rec.AreaName}");
                                    }
                                }
                            }
                        }
                    }
                }
            }
        }
예제 #4
0
        private static void Main(string[] args)
        {
            const string tweets =
                @"E:\uni\Cluster and Cloud Computing\assign2\TwitterExplore\Extracts\FilteredExtract\data\twitter-extract-all.json";
            var jr = new JsonRead <TagPosterDetails>(new[] { tweets });

            jr.DoLoad();
            Console.WriteLine("\n\n");

            var collateByUserLocation = jr.Records
                                        .GroupBy(x => x.UserIdStr)
                                        .ToDictionary(x => x.Key,
                                                      x => x.GroupBy(u => u.Location)
                                                      .ToDictionary(u => u.Key, u => u.ToList()));

            const int freqFilter = 20;

            // those users who have posted from multiple cities
            var freqMultiCitiesPosters = collateByUserLocation
                                         .Where(x => x.Value.Count > 1)                             // more than 1 city
                                         .Where(x => x.Value.Any(c => c.Value.Count >= freqFilter)) // more than 20 in any single city
                                         .ToDictionary(x => x.Key, x => x.Value.SelectMany(t => t.Value).ToList());

            using (var ofs = new StreamWriter($@"..\..\frequentMultiCitiesTweeters-{freqFilter}.csv"))
            {
                ofs.WriteLine($"UserId,UserName,Count,TimeStamp,Location,Yloc,Xloc,Tags,Tweet");

                foreach (var multi in freqMultiCitiesPosters.OrderByDescending(x => x.Value.Count))
                {
                    var id   = multi.Key;
                    var name = multi.Value.First().UserName;
                    var cnt  = multi.Value.Count;

                    foreach (var rec in multi.Value.OrderBy(x => x.CreateTime))
                    {
                        ofs.WriteLine(
                            $"{id},{name.Pack()},{cnt},{rec.CreateTime:s},{rec.Location}," +
                            $"{rec.Yloc},{rec.Xloc},{rec.Tags},{rec.Text.Pack()}");
                    }
                }
            }
        }
        private static void Main(string[] args)
        {
            var analyzer = new SentimentIntensityAnalyzer();

            Console.WriteLine($"Start {DateTime.Now}");

            const string src =
                @"E:\uni\Cluster and Cloud Computing\assign2\TwitterExplore\Extracts\FilteredExtract\bin\twitter-geotagged-posters.json";
            const string bigSrc =
                @"E:\uni\Cluster and Cloud Computing\assign2\TwitterExplore\Extracts\FilteredExtract\bin\data\twitter-all-geotagged-posters.json";

            var geoPosts = new JsonRead <TagPosterDetails>(new[] { bigSrc });

            geoPosts.DoLoad();

            var outcome = new List <KeyValuePair <double, string> >();

            foreach (var txt in geoPosts.Records.Select(x => x.Text))
            {
                var res = analyzer.PolarityScores(txt);
                outcome.Add(new KeyValuePair <double, string>(res.Compound, txt));
            }

            Console.WriteLine($"\nTotal Number: {outcome.Count:N0}\n");
            Console.WriteLine($"\nMin {outcome.Min(x => x.Key):P2}");
            Console.WriteLine($"Max {outcome.Max(x => x.Key):P2}");
            Console.WriteLine($"Number Zeros {outcome.Count(x => -0.02 < x.Key && x.Key < 0.02):N0}");
            Console.WriteLine($"Average rating {outcome.Average(x => x.Key):P2}\n");


            using (var of = new StreamWriter(@"..\..\BigScore.csv"))
            {
                foreach (var kvp in outcome.OrderByDescending(x => x.Key))
                {
                    of.WriteLine($"{kvp.Key:P}\t{kvp.Value.Pack()}");
                }
            }
        }
예제 #6
0
        private static void Main(string[] args)
        {
            Console.WriteLine($"Start {DateTime.Now}");

            var geoPosts = new JsonRead <TagPosterDetails>(
                new[] { @"E:\uni\Cluster and Cloud Computing\assign2\TwitterExplore\TwitterExplore\bin\" });

            geoPosts.DoLoad();

            // extract unique userIds
            var ids = new HashSet <string>(geoPosts.Records.Select(x => x.UserIdStr));

            Console.WriteLine($"Have {ids.Count} posters\n");


            var tgtLocs = new List <string> {
                @"A:\twitter"
            };
            var jr = new FilterJsonRead(tgtLocs, geoPosts.Records.Count, ids);

            jr.DoLoad();

            const byte nl = (byte)'\n';

            var ser = new DataContractJsonSerializer(typeof(TagPosterDetails));

            using (var fs = File.Open(@"..\..\twitter-geotagged-posters.json", FileMode.Create))
            {
                foreach (var rec in jr.Records)
                {
                    ser.WriteObject(fs, rec);
                    fs.WriteByte(nl);
                }
            }

            Console.WriteLine($"Done {DateTime.Now}");
        }
예제 #7
0
        public static void AddRegionsToAreaSenti()
        {
            Console.WriteLine($"Analysing AreaSenti \n");

            const string xmlTemplate = @"medians-{0}p02.xml";
            var          cfg         = new[] { StatArea.SA4, StatArea.SA3, StatArea.SA2, StatArea.SA1 };


            // location feature sets
            var saLoader    = new LoadStatisticalAreas();
            var featureSets = new Dictionary <StatArea, Features>();

            foreach (var area in cfg)
            {
                var xmlFile  = Path.Combine(@"..\..", string.Format(xmlTemplate, area.ToString().ToLower()));
                var features = saLoader.GetFeatures(xmlFile);
                featureSets.Add(area, features);
            }

            // summarise
            foreach (var area in cfg)
            {
                Console.WriteLine(
                    $"{area}\tregions:{featureSets[area].Count,6:N0}\tploygons: {featureSets[area].Sum(x => x.Locations.Count),8:N0}");
            }

            var sad = new SADictionary(featureSets);

            var src = @"E:\uni\Cluster and Cloud Computing\extracted\newActivity";
            var jr  = new JsonRead <AreaSentiExtract>(new[] { src });

            jr.DoLoad();

            var requiredUsers = new Dictionary <long, string>();

            using (var ifs = new StreamReader(@"..\..\userHomeCity.csv"))
            {
                var ln = ifs.ReadLine(); // skip header
                while ((ln = ifs.ReadLine()) != null)
                {
                    var arr = ln.Split(',');
                    requiredUsers.Add(long.Parse(arr[0]), arr[1]);
                }
            }

            var filtered = jr.Records
                           .Where(x => requiredUsers.ContainsKey(x.User)).ToList();

            // extract unique locations

            var locs = filtered
                       .GroupBy(x => new { Y = x.Yloc, X = x.Xloc })
                       .ToDictionary(x => x.Key, x => x.Count());

            using (var ofs = new StreamWriter($@"..\..\recentLocations.csv"))
            {
                ofs.WriteLine("Yloc,Xloc,Count");
                foreach (var kvp in locs.OrderByDescending(x => x.Value))
                {
                    ofs.WriteLine($"{kvp.Key.Y},{kvp.Key.X},{kvp.Value}");
                }
            }


            var cls = new ClassifyArea(filtered, sad); //{SingleThreaded = true};

            cls.DoClassification();

            foreach (var sa in cfg)
            {
                var clusteredBySa = cls.Scores
                                    .Where(x => x.Area.Regions.ContainsKey(sa))
                                    .Select(x => new KeyValuePair <long, double>(x.Area.Regions[sa].Id, x.Parameters.Sentiment))
                                    .ToLookup(x => x.Key);

                using (var of = new StreamWriter($@"..\..\SentimentRecentWithRegion-{sa}.csv"))
                {
                    of.WriteLine("RegionId,Name,Count,SumSentiment,CountExc,SumNeutralExc,Sentiment");

                    // collate regional averages
                    foreach (var rec in clusteredBySa)
                    {
                        var count = rec.Count();
                        var sm    = rec.Sum(x => x.Value) * 100;
                        var avg   = rec.Average(x => x.Value) * 100;

                        var counte = rec.Count(x => x.Value < -0.5 || 0.5 < x.Value);
                        var sme    = rec.Where(x => x.Value < -0.5 || 0.5 < x.Value).Sum(x => x.Value) * 100;

                        of.WriteLine(
                            $"{rec.Key},\"{sad.SANames[sa][rec.Key]}\",{count},{sm:F2},{counte},{sme:F2},{avg:F2}");
                    }
                }
            }
        }
예제 #8
0
        public static void AddRegions()
        {
            Console.WriteLine($"Analysing Geos \n");

            const string xmlTemplate = @"medians-{0}p02.xml";
            var          cfg         = new[] { StatArea.SA4, StatArea.SA3, StatArea.SA2, StatArea.SA1 };


            // location feature sets
            var saLoader    = new LoadStatisticalAreas();
            var featureSets = new Dictionary <StatArea, Features>();

            foreach (var area in cfg)
            {
                var xmlFile  = Path.Combine(@"..\..", string.Format(xmlTemplate, area.ToString().ToLower()));
                var features = saLoader.GetFeatures(xmlFile);
                featureSets.Add(area, features);
            }

            // summarise
            foreach (var area in cfg)
            {
                Console.WriteLine(
                    $"{area}\tregions:{featureSets[area].Count,6:N0}\tploygons: {featureSets[area].Sum(x => x.Locations.Count),8:N0}");
            }

            var sad = new SADictionary(featureSets);

            var src = @"A:\geoCombined\";
            var jr  = new JsonRead <GeoSentimentParameters>(new[] { src });

            jr.DoLoad();

            var requiredUsers = new Dictionary <long, string>();

            using (var ifs = new StreamReader(@"..\..\userHomeCity.csv"))
            {
                var ln = ifs.ReadLine(); // skip header
                while ((ln = ifs.ReadLine()) != null)
                {
                    var arr = ln.Split(',');
                    requiredUsers.Add(long.Parse(arr[0]), arr[1]);
                }
            }

            var filtered = jr.Records
                           .Where(x => x.Compound < -0.05 || 0.05 < x.Compound)
                           .ToList();


            var cls = new Classify(filtered, sad); //{SingleThreaded = true};

            cls.DoClassification();

            foreach (var sa in cfg)
            {
                var clusteredBySa = cls.Scores
                                    .Where(x => requiredUsers.ContainsKey(x.Parameters.UserId) &&
                                           x.Parameters.Location != requiredUsers[x.Parameters.UserId])
                                    .Where(x => x.Area.Regions.ContainsKey(sa))
                                    .Select(x => new KeyValuePair <long, double>(x.Area.Regions[sa].Id, x.Parameters.Compound))
                                    .ToLookup(x => x.Key);

                using (var of = new StreamWriter($@"..\..\SentimentFilterWithRegion-{sa}.csv"))
                {
                    of.WriteLine("RegionId,Name,Observations,Sum,Sentiment");

                    // collate regional averages
                    foreach (var rec in clusteredBySa)
                    {
                        var count = rec.Count();
                        var sum   = rec.Sum(x => x.Value) * 100;
                        var avg   = rec.Average(x => x.Value) * 100;

                        of.WriteLine($"{rec.Key},\"{sad.SANames[sa][rec.Key]}\",{count},{sum:F2},{avg:F2}");
                    }
                }
            }


            foreach (var sa in cfg)
            {
                var clusteredBySa = cls.Scores
                                    .Where(x => x.Area.Regions.ContainsKey(sa))
                                    .Select(x => new KeyValuePair <long, double>(x.Area.Regions[sa].Id, x.Parameters.Compound))
                                    .ToLookup(x => x.Key);

                using (var of = new StreamWriter($@"..\..\SentimentWithRegion-{sa}.csv"))
                {
                    of.WriteLine("RegionId,Name,Observations,Sum,Sentiment");

                    // collate regional averages
                    foreach (var rec in clusteredBySa)
                    {
                        var count = rec.Count();
                        var sum   = rec.Sum(x => x.Value) * 100;
                        var avg   = rec.Average(x => x.Value) * 100;

                        of.WriteLine($"{rec.Key},\"{sad.SANames[sa][rec.Key]}\",{count},{sum:F2},{avg:F2}");
                    }
                }
            }
        }
예제 #9
0
        public static void AnalyseGeoLocations()
        {
            Console.WriteLine($"Analysing Geos \n");

            var src = @"A:\geoCombined\";
            var jr  = new JsonRead <GeoSentimentParameters>(new[] { src });

            jr.DoLoad();


            // ---------------------------------------------------------
            // --  extract muli-city journeys  -  geo_located

            var collateByGeoUserLocation = jr.Records
                                           .Where(x => x.GeoEnabled)
                                           .GroupBy(x => x.UserId)
                                           .ToDictionary(x => x.Key,
                                                         x => x.GroupBy(u => u.Location)
                                                         .ToDictionary(u => u.Key, u => u.ToList()));


            // determine most frequent tweet location for user
            var mostFreqLocationByGeoUser = collateByGeoUserLocation
                                            .Where(x => x.Value.Count > 1) // more than 1 city
                                            .ToDictionary(x => x.Key,
                                                          x => x.Value.OrderByDescending(p => p.Value.Count).First().Key);


            var toGeoJourneys = mostFreqLocationByGeoUser
                                .SelectMany(x =>
                                            collateByGeoUserLocation[x.Key]
                                            .Where(u => x.Value != u.Key)
                                            .Select(u => new
            {
                User       = x.Key,
                SourceCity = x.Value,
                TargetCity = u.Key,
                Locations  = u.Value
            })).GroupBy(
                x => new
            {
                x.SourceCity,
                x.TargetCity
            }).ToDictionary(
                x => x.Key,
                x => x.SelectMany(j => j.Locations.Select(i => new
            {
                X2 = i.Xloc,
                Y2 = i.Yloc,
                i.LocalTime
            }).ToList()
                                  ));


            var fromGeoJourneys = collateByGeoUserLocation
                                  .Where(x => x.Value.Any(c =>
                                                          mostFreqLocationByGeoUser.ContainsKey(x.Key) && c.Key == mostFreqLocationByGeoUser[x.Key]))
                                  .SelectMany(x => x.Value
                                              .Where(u => u.Key != mostFreqLocationByGeoUser[x.Key])
                                              .Select(u => new
            {
                User       = x.Key,
                SourceCity = mostFreqLocationByGeoUser[x.Key],
                TargetCity = u.Key,
                Locations  = x.Value[mostFreqLocationByGeoUser[x.Key]]
            })).GroupBy(
                x => new
            {
                x.SourceCity,
                x.TargetCity
            })
                                  .ToDictionary(
                x => x.Key,
                x => x.SelectMany(j => j.Locations
                                  .Select(i => new
            {
                X1 = i.Xloc,
                Y1 = i.Yloc,
                i.LocalTime
            }).ToList()
                                  ));


            using (var ofs = new StreamWriter($@"..\..\geoToJourneys.csv"))
            {
                ofs.WriteLine("SourceCity,TargetCity,X2,Y2,LocalTime");
                foreach (var kvp in toGeoJourneys)
                {
                    foreach (var loc in kvp.Value)
                    {
                        ofs.WriteLine(
                            $"{kvp.Key.SourceCity},{kvp.Key.TargetCity},{loc.X2},{loc.Y2},{loc.LocalTime:s}");
                    }
                }
            }

            using (var ofs = new StreamWriter($@"..\..\geoFromJourneys.csv"))
            {
                ofs.WriteLine("SourceCity,TargetCity,X1,Y1,LocalTime");
                foreach (var kvp in fromGeoJourneys)
                {
                    foreach (var loc in kvp.Value)
                    {
                        ofs.WriteLine(
                            $"{kvp.Key.SourceCity},{kvp.Key.TargetCity},{loc.X1},{loc.Y1},{loc.LocalTime:s}");
                    }
                }
            }
        }
예제 #10
0
        private static void Main(string[] args)
        {
            Console.WriteLine($"Start {DateTime.Now}");

            var requiredUsers = new Dictionary <string, string>();

            using (var ifs = new StreamReader(@"..\..\userHomeCity.csv"))
            {
                var ln = ifs.ReadLine(); // skip header
                while ((ln = ifs.ReadLine()) != null)
                {
                    var arr = ln.Split(',');
                    requiredUsers.Add(arr[0], arr[1]);
                }
            }

            const string xmlTemplate = @"medians-{1}p02.xml";
            var          cfg         = new[] { StatArea.SA4, StatArea.SA3, StatArea.SA2, StatArea.SA1 };


            // location feature sets
            var saLoader    = new LoadStatisticalAreas();
            var featureSets = new Dictionary <StatArea, Features>();

            foreach (var area in cfg)
            {
                var xmlFile  = Path.Combine(Loc, string.Format(xmlTemplate, Loc, area.ToString().ToLower()));
                var features = saLoader.GetFeatures(xmlFile);
                featureSets.Add(area, features);
            }

            // summarise
            foreach (var area in cfg)
            {
                Console.WriteLine($"{area}\tregions:{featureSets[area].Count,6:N0}\tploygons: {featureSets[area].Sum(x=>x.Locations.Count),8:N0}");
            }

            var sad = new SADictionary(featureSets);


            //      var dataSrc = "twitter-extract-all.json"; var geoPosts = new JsonRead<TagPosterDetails>(new[]{Path.Combine(Loc, dataSrc)});

            var geoPosts = new JsonRead <TagPosterDetails>(new[] { @"A:\locatedTargets" });

            geoPosts.DoLoad();


            var filtered = geoPosts.Records.Where(x => requiredUsers.ContainsKey(x.UserIdStr)).ToList();


            var cls = new Classify(filtered, sad);  // {SingleThreaded = true};

            cls.DoClassification();



            foreach (var sa in cfg)
            {
                var clusteredBySa = cls.Scores
                                    .Where(x => x.Area.Regions.ContainsKey(sa))
                                    .Select(x => new KeyValuePair <long, double>(x.Area.Regions[sa].Id, x.Score))
                                    .ToLookup(x => x.Key);

                using (var of = new StreamWriter($@"..\..\SentimentFilterWithRegion-{sa}.csv"))
                {
                    of.WriteLine("RegionId,Name,Observations,Sentiment");

                    // collate regional averages
                    foreach (var rec in clusteredBySa)
                    {
                        var count = rec.Count();
                        var avg   = rec.Average(x => x.Value);

                        of.WriteLine($"{rec.Key},\"{sad.SANames[sa][rec.Key]}\",{count},{avg:F4}");
                    }
                }
            }
        }