private static void Main(string[] args) { const string hospitals = @"E:\uni\Cluster and Cloud Computing\assign2\TwitterExplore\Data\hospital.json"; const string tweets = @"E:\uni\Cluster and Cloud Computing\assign2\TwitterExplore\TwitterExplore\bin\twitter-extract-all.json"; // load file criteria var eh = EmergencyHospitals.Load(hospitals); var tgts = new TargetRegions(eh.Features.Select(x => x.Description).ToList()); var jr = new JsonRead <TagPosterDetails>(new[] { tweets }); jr.DoLoad(); Console.WriteLine("\n\n"); using (var ofs = new StreamWriter(@"..\..\hospitalTags.csv")) { ofs.WriteLine($"TimeStamp,HospitalName,Suburb,State,Tags,Tweet"); foreach (var tweet in jr.Records) { if (tgts.Find(tweet, out var hos)) { ofs.WriteLine( $"{tweet.CreateTime:s},{hos.Description.HospitalName.Pack()},{hos.Description.Suburb.Pack()}," + $"{hos.Description.State.Pack()},{tweet.Tags},{tweet.Text.Pack()}"); } } } }
private static void Main(string[] args) { Console.WriteLine($"Start {DateTime.Now}"); const string activeUsers = @"E:\uni\Cluster and Cloud Computing\assign2\TwitterExplore\Extracts\FilteredExtract\data\twitter-extract-all.json"; const string tgtFile = @"..\..\twitter-all-geotagged-posters.json"; var tgtLocs = new List <string> { @"A:\twitter" }; var geoPosts = new JsonRead <TagPosterDetails>(new[] { activeUsers }); geoPosts.DoLoad(); // extract unique userIds var ids = new HashSet <string>(geoPosts.Records.Select(x => x.UserIdStr)); Console.WriteLine($"Have {ids.Count} posters\n"); var jr = new FilterJsonRead(tgtLocs, geoPosts.Records.Count, ids); jr.ExtractAndSave(tgtFile); Console.WriteLine($"Done {DateTime.Now}"); }
private static void Main(string[] args) { // const string aurinData = @"E:\uni\Cluster and Cloud Computing\assign2\TwitterExplore\Aurin\sample-sa4p02.xml"; const string aurinData = @"a:\aurin\medians-sa2p02.xml"; const string xlst = @"E:\uni\Cluster and Cloud Computing\assign2\TwitterExplore\Aurin\extract.xslt"; var cm = CensusMedians.Extract(aurinData, xlst); Console.WriteLine(cm.Features.Count); cm.TransformFeatures(); const string activeUsers = @"E:\uni\Cluster and Cloud Computing\assign2\TwitterExplore\Extracts\FilteredExtract\data\twitter-extract-all.json"; const string outFile = @"..\..\..\data\twitter-all-areaTagged.json"; var geoPosts = new JsonRead <TagPosterDetails>(activeUsers); geoPosts.DoLoad(); var cnt = 0; const byte nl = (byte)'\n'; var ser = new DataContractJsonSerializer(typeof(TagPosterDetails)); using (var fs = File.Open(outFile, FileMode.Create)) { foreach (var rec in geoPosts.Records) { foreach (var feat in cm.Features) { if (rec.Yloc.HasValue && rec.Xloc.HasValue && feat.BoundedBy.InBox(rec.Yloc.Value, rec.Xloc.Value)) { var pt = new LatLong(rec.Xloc.Value, rec.Yloc.Value); // in the broad region, double check if falls into a nominated polygon foreach (var poly in feat.Locations) { if (poly.PointInPolygon(pt)) { rec.AreaName = feat.Parameters.Name; rec.StatisticalArea = feat.Parameters.Id; ser.WriteObject(fs, rec); fs.WriteByte(nl); if (++cnt % 1000 == 0) { Console.WriteLine($"{rec.Location,-20} {rec.AreaName}"); } } } } } } } }
private static void Main(string[] args) { const string tweets = @"E:\uni\Cluster and Cloud Computing\assign2\TwitterExplore\Extracts\FilteredExtract\data\twitter-extract-all.json"; var jr = new JsonRead <TagPosterDetails>(new[] { tweets }); jr.DoLoad(); Console.WriteLine("\n\n"); var collateByUserLocation = jr.Records .GroupBy(x => x.UserIdStr) .ToDictionary(x => x.Key, x => x.GroupBy(u => u.Location) .ToDictionary(u => u.Key, u => u.ToList())); const int freqFilter = 20; // those users who have posted from multiple cities var freqMultiCitiesPosters = collateByUserLocation .Where(x => x.Value.Count > 1) // more than 1 city .Where(x => x.Value.Any(c => c.Value.Count >= freqFilter)) // more than 20 in any single city .ToDictionary(x => x.Key, x => x.Value.SelectMany(t => t.Value).ToList()); using (var ofs = new StreamWriter($@"..\..\frequentMultiCitiesTweeters-{freqFilter}.csv")) { ofs.WriteLine($"UserId,UserName,Count,TimeStamp,Location,Yloc,Xloc,Tags,Tweet"); foreach (var multi in freqMultiCitiesPosters.OrderByDescending(x => x.Value.Count)) { var id = multi.Key; var name = multi.Value.First().UserName; var cnt = multi.Value.Count; foreach (var rec in multi.Value.OrderBy(x => x.CreateTime)) { ofs.WriteLine( $"{id},{name.Pack()},{cnt},{rec.CreateTime:s},{rec.Location}," + $"{rec.Yloc},{rec.Xloc},{rec.Tags},{rec.Text.Pack()}"); } } } }
private static void Main(string[] args) { var analyzer = new SentimentIntensityAnalyzer(); Console.WriteLine($"Start {DateTime.Now}"); const string src = @"E:\uni\Cluster and Cloud Computing\assign2\TwitterExplore\Extracts\FilteredExtract\bin\twitter-geotagged-posters.json"; const string bigSrc = @"E:\uni\Cluster and Cloud Computing\assign2\TwitterExplore\Extracts\FilteredExtract\bin\data\twitter-all-geotagged-posters.json"; var geoPosts = new JsonRead <TagPosterDetails>(new[] { bigSrc }); geoPosts.DoLoad(); var outcome = new List <KeyValuePair <double, string> >(); foreach (var txt in geoPosts.Records.Select(x => x.Text)) { var res = analyzer.PolarityScores(txt); outcome.Add(new KeyValuePair <double, string>(res.Compound, txt)); } Console.WriteLine($"\nTotal Number: {outcome.Count:N0}\n"); Console.WriteLine($"\nMin {outcome.Min(x => x.Key):P2}"); Console.WriteLine($"Max {outcome.Max(x => x.Key):P2}"); Console.WriteLine($"Number Zeros {outcome.Count(x => -0.02 < x.Key && x.Key < 0.02):N0}"); Console.WriteLine($"Average rating {outcome.Average(x => x.Key):P2}\n"); using (var of = new StreamWriter(@"..\..\BigScore.csv")) { foreach (var kvp in outcome.OrderByDescending(x => x.Key)) { of.WriteLine($"{kvp.Key:P}\t{kvp.Value.Pack()}"); } } }
private static void Main(string[] args) { Console.WriteLine($"Start {DateTime.Now}"); var geoPosts = new JsonRead <TagPosterDetails>( new[] { @"E:\uni\Cluster and Cloud Computing\assign2\TwitterExplore\TwitterExplore\bin\" }); geoPosts.DoLoad(); // extract unique userIds var ids = new HashSet <string>(geoPosts.Records.Select(x => x.UserIdStr)); Console.WriteLine($"Have {ids.Count} posters\n"); var tgtLocs = new List <string> { @"A:\twitter" }; var jr = new FilterJsonRead(tgtLocs, geoPosts.Records.Count, ids); jr.DoLoad(); const byte nl = (byte)'\n'; var ser = new DataContractJsonSerializer(typeof(TagPosterDetails)); using (var fs = File.Open(@"..\..\twitter-geotagged-posters.json", FileMode.Create)) { foreach (var rec in jr.Records) { ser.WriteObject(fs, rec); fs.WriteByte(nl); } } Console.WriteLine($"Done {DateTime.Now}"); }
public static void AddRegionsToAreaSenti() { Console.WriteLine($"Analysing AreaSenti \n"); const string xmlTemplate = @"medians-{0}p02.xml"; var cfg = new[] { StatArea.SA4, StatArea.SA3, StatArea.SA2, StatArea.SA1 }; // location feature sets var saLoader = new LoadStatisticalAreas(); var featureSets = new Dictionary <StatArea, Features>(); foreach (var area in cfg) { var xmlFile = Path.Combine(@"..\..", string.Format(xmlTemplate, area.ToString().ToLower())); var features = saLoader.GetFeatures(xmlFile); featureSets.Add(area, features); } // summarise foreach (var area in cfg) { Console.WriteLine( $"{area}\tregions:{featureSets[area].Count,6:N0}\tploygons: {featureSets[area].Sum(x => x.Locations.Count),8:N0}"); } var sad = new SADictionary(featureSets); var src = @"E:\uni\Cluster and Cloud Computing\extracted\newActivity"; var jr = new JsonRead <AreaSentiExtract>(new[] { src }); jr.DoLoad(); var requiredUsers = new Dictionary <long, string>(); using (var ifs = new StreamReader(@"..\..\userHomeCity.csv")) { var ln = ifs.ReadLine(); // skip header while ((ln = ifs.ReadLine()) != null) { var arr = ln.Split(','); requiredUsers.Add(long.Parse(arr[0]), arr[1]); } } var filtered = jr.Records .Where(x => requiredUsers.ContainsKey(x.User)).ToList(); // extract unique locations var locs = filtered .GroupBy(x => new { Y = x.Yloc, X = x.Xloc }) .ToDictionary(x => x.Key, x => x.Count()); using (var ofs = new StreamWriter($@"..\..\recentLocations.csv")) { ofs.WriteLine("Yloc,Xloc,Count"); foreach (var kvp in locs.OrderByDescending(x => x.Value)) { ofs.WriteLine($"{kvp.Key.Y},{kvp.Key.X},{kvp.Value}"); } } var cls = new ClassifyArea(filtered, sad); //{SingleThreaded = true}; cls.DoClassification(); foreach (var sa in cfg) { var clusteredBySa = cls.Scores .Where(x => x.Area.Regions.ContainsKey(sa)) .Select(x => new KeyValuePair <long, double>(x.Area.Regions[sa].Id, x.Parameters.Sentiment)) .ToLookup(x => x.Key); using (var of = new StreamWriter($@"..\..\SentimentRecentWithRegion-{sa}.csv")) { of.WriteLine("RegionId,Name,Count,SumSentiment,CountExc,SumNeutralExc,Sentiment"); // collate regional averages foreach (var rec in clusteredBySa) { var count = rec.Count(); var sm = rec.Sum(x => x.Value) * 100; var avg = rec.Average(x => x.Value) * 100; var counte = rec.Count(x => x.Value < -0.5 || 0.5 < x.Value); var sme = rec.Where(x => x.Value < -0.5 || 0.5 < x.Value).Sum(x => x.Value) * 100; of.WriteLine( $"{rec.Key},\"{sad.SANames[sa][rec.Key]}\",{count},{sm:F2},{counte},{sme:F2},{avg:F2}"); } } } }
public static void AddRegions() { Console.WriteLine($"Analysing Geos \n"); const string xmlTemplate = @"medians-{0}p02.xml"; var cfg = new[] { StatArea.SA4, StatArea.SA3, StatArea.SA2, StatArea.SA1 }; // location feature sets var saLoader = new LoadStatisticalAreas(); var featureSets = new Dictionary <StatArea, Features>(); foreach (var area in cfg) { var xmlFile = Path.Combine(@"..\..", string.Format(xmlTemplate, area.ToString().ToLower())); var features = saLoader.GetFeatures(xmlFile); featureSets.Add(area, features); } // summarise foreach (var area in cfg) { Console.WriteLine( $"{area}\tregions:{featureSets[area].Count,6:N0}\tploygons: {featureSets[area].Sum(x => x.Locations.Count),8:N0}"); } var sad = new SADictionary(featureSets); var src = @"A:\geoCombined\"; var jr = new JsonRead <GeoSentimentParameters>(new[] { src }); jr.DoLoad(); var requiredUsers = new Dictionary <long, string>(); using (var ifs = new StreamReader(@"..\..\userHomeCity.csv")) { var ln = ifs.ReadLine(); // skip header while ((ln = ifs.ReadLine()) != null) { var arr = ln.Split(','); requiredUsers.Add(long.Parse(arr[0]), arr[1]); } } var filtered = jr.Records .Where(x => x.Compound < -0.05 || 0.05 < x.Compound) .ToList(); var cls = new Classify(filtered, sad); //{SingleThreaded = true}; cls.DoClassification(); foreach (var sa in cfg) { var clusteredBySa = cls.Scores .Where(x => requiredUsers.ContainsKey(x.Parameters.UserId) && x.Parameters.Location != requiredUsers[x.Parameters.UserId]) .Where(x => x.Area.Regions.ContainsKey(sa)) .Select(x => new KeyValuePair <long, double>(x.Area.Regions[sa].Id, x.Parameters.Compound)) .ToLookup(x => x.Key); using (var of = new StreamWriter($@"..\..\SentimentFilterWithRegion-{sa}.csv")) { of.WriteLine("RegionId,Name,Observations,Sum,Sentiment"); // collate regional averages foreach (var rec in clusteredBySa) { var count = rec.Count(); var sum = rec.Sum(x => x.Value) * 100; var avg = rec.Average(x => x.Value) * 100; of.WriteLine($"{rec.Key},\"{sad.SANames[sa][rec.Key]}\",{count},{sum:F2},{avg:F2}"); } } } foreach (var sa in cfg) { var clusteredBySa = cls.Scores .Where(x => x.Area.Regions.ContainsKey(sa)) .Select(x => new KeyValuePair <long, double>(x.Area.Regions[sa].Id, x.Parameters.Compound)) .ToLookup(x => x.Key); using (var of = new StreamWriter($@"..\..\SentimentWithRegion-{sa}.csv")) { of.WriteLine("RegionId,Name,Observations,Sum,Sentiment"); // collate regional averages foreach (var rec in clusteredBySa) { var count = rec.Count(); var sum = rec.Sum(x => x.Value) * 100; var avg = rec.Average(x => x.Value) * 100; of.WriteLine($"{rec.Key},\"{sad.SANames[sa][rec.Key]}\",{count},{sum:F2},{avg:F2}"); } } } }
public static void AnalyseGeoLocations() { Console.WriteLine($"Analysing Geos \n"); var src = @"A:\geoCombined\"; var jr = new JsonRead <GeoSentimentParameters>(new[] { src }); jr.DoLoad(); // --------------------------------------------------------- // -- extract muli-city journeys - geo_located var collateByGeoUserLocation = jr.Records .Where(x => x.GeoEnabled) .GroupBy(x => x.UserId) .ToDictionary(x => x.Key, x => x.GroupBy(u => u.Location) .ToDictionary(u => u.Key, u => u.ToList())); // determine most frequent tweet location for user var mostFreqLocationByGeoUser = collateByGeoUserLocation .Where(x => x.Value.Count > 1) // more than 1 city .ToDictionary(x => x.Key, x => x.Value.OrderByDescending(p => p.Value.Count).First().Key); var toGeoJourneys = mostFreqLocationByGeoUser .SelectMany(x => collateByGeoUserLocation[x.Key] .Where(u => x.Value != u.Key) .Select(u => new { User = x.Key, SourceCity = x.Value, TargetCity = u.Key, Locations = u.Value })).GroupBy( x => new { x.SourceCity, x.TargetCity }).ToDictionary( x => x.Key, x => x.SelectMany(j => j.Locations.Select(i => new { X2 = i.Xloc, Y2 = i.Yloc, i.LocalTime }).ToList() )); var fromGeoJourneys = collateByGeoUserLocation .Where(x => x.Value.Any(c => mostFreqLocationByGeoUser.ContainsKey(x.Key) && c.Key == mostFreqLocationByGeoUser[x.Key])) .SelectMany(x => x.Value .Where(u => u.Key != mostFreqLocationByGeoUser[x.Key]) .Select(u => new { User = x.Key, SourceCity = mostFreqLocationByGeoUser[x.Key], TargetCity = u.Key, Locations = x.Value[mostFreqLocationByGeoUser[x.Key]] })).GroupBy( x => new { x.SourceCity, x.TargetCity }) .ToDictionary( x => x.Key, x => x.SelectMany(j => j.Locations .Select(i => new { X1 = i.Xloc, Y1 = i.Yloc, i.LocalTime }).ToList() )); using (var ofs = new StreamWriter($@"..\..\geoToJourneys.csv")) { ofs.WriteLine("SourceCity,TargetCity,X2,Y2,LocalTime"); foreach (var kvp in toGeoJourneys) { foreach (var loc in kvp.Value) { ofs.WriteLine( $"{kvp.Key.SourceCity},{kvp.Key.TargetCity},{loc.X2},{loc.Y2},{loc.LocalTime:s}"); } } } using (var ofs = new StreamWriter($@"..\..\geoFromJourneys.csv")) { ofs.WriteLine("SourceCity,TargetCity,X1,Y1,LocalTime"); foreach (var kvp in fromGeoJourneys) { foreach (var loc in kvp.Value) { ofs.WriteLine( $"{kvp.Key.SourceCity},{kvp.Key.TargetCity},{loc.X1},{loc.Y1},{loc.LocalTime:s}"); } } } }
private static void Main(string[] args) { Console.WriteLine($"Start {DateTime.Now}"); var requiredUsers = new Dictionary <string, string>(); using (var ifs = new StreamReader(@"..\..\userHomeCity.csv")) { var ln = ifs.ReadLine(); // skip header while ((ln = ifs.ReadLine()) != null) { var arr = ln.Split(','); requiredUsers.Add(arr[0], arr[1]); } } const string xmlTemplate = @"medians-{1}p02.xml"; var cfg = new[] { StatArea.SA4, StatArea.SA3, StatArea.SA2, StatArea.SA1 }; // location feature sets var saLoader = new LoadStatisticalAreas(); var featureSets = new Dictionary <StatArea, Features>(); foreach (var area in cfg) { var xmlFile = Path.Combine(Loc, string.Format(xmlTemplate, Loc, area.ToString().ToLower())); var features = saLoader.GetFeatures(xmlFile); featureSets.Add(area, features); } // summarise foreach (var area in cfg) { Console.WriteLine($"{area}\tregions:{featureSets[area].Count,6:N0}\tploygons: {featureSets[area].Sum(x=>x.Locations.Count),8:N0}"); } var sad = new SADictionary(featureSets); // var dataSrc = "twitter-extract-all.json"; var geoPosts = new JsonRead<TagPosterDetails>(new[]{Path.Combine(Loc, dataSrc)}); var geoPosts = new JsonRead <TagPosterDetails>(new[] { @"A:\locatedTargets" }); geoPosts.DoLoad(); var filtered = geoPosts.Records.Where(x => requiredUsers.ContainsKey(x.UserIdStr)).ToList(); var cls = new Classify(filtered, sad); // {SingleThreaded = true}; cls.DoClassification(); foreach (var sa in cfg) { var clusteredBySa = cls.Scores .Where(x => x.Area.Regions.ContainsKey(sa)) .Select(x => new KeyValuePair <long, double>(x.Area.Regions[sa].Id, x.Score)) .ToLookup(x => x.Key); using (var of = new StreamWriter($@"..\..\SentimentFilterWithRegion-{sa}.csv")) { of.WriteLine("RegionId,Name,Observations,Sentiment"); // collate regional averages foreach (var rec in clusteredBySa) { var count = rec.Count(); var avg = rec.Average(x => x.Value); of.WriteLine($"{rec.Key},\"{sad.SANames[sa][rec.Key]}\",{count},{avg:F4}"); } } } }