Ejemplo n.º 1
0
        public bool Find(TagPosterDetails tag, out Target tgt)
        {
            tgt = null;
            if (!tag.Xloc.HasValue)
            {
                return(false);
            }
            var longitude = tag.Xloc.Value;

            // skip if longitude too extreme
            foreach (var possible in Targets.From(longitude))
            {
                if (tag.Yloc != null && possible.Value.Box.InBox(tag.Yloc.Value, longitude))
                {
                    tgt = possible.Value;
                    return(true);
                }

                if (longitude < possible.Value.Box.Xmin)
                {
                    return(false); // reach end of possibles
                }
            }

            return(false);
        }
        public Classify Process(long cnt, TagPosterDetails post)
        {
            // find areas
            if (!post.Xloc.HasValue || !post.Yloc.HasValue)
            {
                return(this);
            }

            var pt = new LatLong(post.Xloc.Value, post.Yloc.Value);

            var regions = Sad.WhatRegions(pt);
            var res     = _analyzer.PolarityScores(post.Text);
            var stamp   = post.LocalTime;

            var score = new LocatedScore(res.Compound, stamp, regions);

            Scores.Add(score);

            return(this);
        }
Ejemplo n.º 3
0
        public void DumpToFile(string loc, bool geoTaggedOnly = false)
        {
            using (var ofs = new StreamWriter(loc))
            {
                ofs.WriteLine(TagPosterDetails.GetHeader());

                foreach (var rec in Records)
                {
                    if (geoTaggedOnly)
                    {
                        if (rec.GeoEnabled)
                        {
                            ofs.WriteLine(rec.ToString());
                        }
                    }
                    else
                    {
                        ofs.WriteLine(rec.ToString());
                    }
                }
            }
        }
Ejemplo n.º 4
0
        public JsonRead Process(long cnt, string line)
        {
            var bytes = _encoding.GetBytes(line);

            using (var sf = new MemoryStream(bytes))
            {
                try
                {
                    var row = (UniTwitterRow)_ser.ReadObject(sf);
                    if (GeoTaggedOnly && row.Doc.Coordinates == null)
                    {
                        return(this);
                    }

                    var tm = DateTime.ParseExact(row.Doc.CreatedAt, "ddd MMM dd HH:mm:ss +0000 yyyy", null,
                                                 DateTimeStyles.None);

                    var post = new TagPosterDetails
                    {
                        Location   = row.Key[0],
                        PostId     = row.Id,
                        CreateTime = tm,
                        Text       = row.Doc.Text,
                        UserIdStr  = row.Doc.User.IdStr,
                        UserName   = row.Doc.User.Name,
                        RecordId   = cnt,
                        Source     = row.Doc.Source
                    };

                    if (row.Doc.User.TimeZone != null)
                    {
                        post.TimeZone = row.Doc.User.TimeZone;
                    }

                    if (row.Doc.Coordinates != null)
                    {
                        post.GeoEnabled = true;

                        if (row.Doc.Coordinates.Coord[0].HasValue)
                        {
                            post.Xloc = row.Doc.Coordinates.Coord[0].Value;
                        }

                        if (row.Doc.Coordinates.Coord[1].HasValue)
                        {
                            post.Yloc = row.Doc.Coordinates.Coord[1].Value;
                        }
                    }

                    if (row.Doc.Entities.Hashtags != null)
                    {
                        post.HashTags = row.Doc.Entities.Hashtags.Select(x => x.Text).ToList();
                    }

                    Records.Add(post);
                }

                catch (Exception ex)
                {
                    Console.WriteLine($"Problem {ex.Message}");
                }
            }

            return(this);
        }
Ejemplo n.º 5
0
        private static void GetDetailsFromTwitterJson(string filter, int size)
        {
            var tgtLocs = new List <string>
            {
                $@"E:\uni\Cluster and Cloud Computing\twitter-{filter}"
            };

            var jr = new JsonRead(tgtLocs, false, size);    // {SingleThreaded = true};

            jr.DoLoad();


            // extract tags
            var tags = jr.Records
                       .Where(x => x.HashTags != null)
                       .SelectMany(x => x.HashTags.Select(t => t.ToLower()))
                       .GroupBy(x => x)
                       .ToDictionary(x => x.Key, x => x.Count());

            using (var ofs = new StreamWriter($@"..\..\activeTags-{filter}.csv"))
            {
                ofs.WriteLine("Tag,Count");
                foreach (var kvp in tags.OrderByDescending(x => x.Value))
                {
                    ofs.WriteLine($"\"{kvp.Key}\",{kvp.Value}");
                }
            }


            // extract timing
            var timing = jr.Records
                         .GroupBy(x => new { x.CreateTime.Year, x.CreateTime.DayOfYear })
                         .ToDictionary(x => x.Key, x => x.Count());

            using (var ofs = new StreamWriter($@"..\..\calendarActivity-{filter}.csv"))
            {
                ofs.WriteLine("Year,DayOfYear,Count");
                foreach (var kvp in timing.OrderByDescending(x => x.Value))
                {
                    ofs.WriteLine($"{kvp.Key.Year},{kvp.Key.DayOfYear},{kvp.Value}");
                }
            }

            // extract timing
            var locs = jr.Records
                       .GroupBy(x => new { x.Yloc, x.Xloc })
                       .ToDictionary(x => x.Key, x => x.Count());

            using (var ofs = new StreamWriter($@"..\..\location-{filter}.csv"))
            {
                ofs.WriteLine("Yloc,Xloc,Count");
                foreach (var kvp in locs.OrderByDescending(x => x.Value))
                {
                    ofs.WriteLine($"{kvp.Key.Yloc},{kvp.Key.Xloc},{kvp.Value}");
                }
            }


            //  jr.DumpToFile($@"..\..\twitter-geoOnly-{filter}.csv", true);
            //  jr.DumpToFile($@"..\..\twitter-extract-{filter}.csv");


            var collate = jr.Records.GroupBy(x => x.UserIdStr).ToDictionary(x => x.Key, x => x.ToList());

            using (var ofs = new StreamWriter($@"..\..\twitter-extract-{filter}.csv"))
            {
                ofs.WriteLine(TagPosterDetails.GetHeader());

                foreach (var kvp in collate.OrderByDescending(x => x.Value.Count).ThenBy(x => x.Key))
                {
                    foreach (var item in kvp.Value.OrderBy(x => x.CreateTime).ThenBy(x => x.Location))
                    {
                        item.Count = kvp.Value.Count;
                        ofs.WriteLine(item.ToString());
                    }
                }
            }

            var nl = (byte)'\n';

            var ser = new DataContractJsonSerializer(typeof(TagPosterDetails));

            using (var fs = File.Open($@"..\..\twitter-extract-{filter}.json", FileMode.Create))
            {
                foreach (var rec in jr.Records)
                {
                    ser.WriteObject(fs, rec);
                    fs.WriteByte(nl);
                }
            }


            Console.WriteLine($"Done {DateTime.Now}");
        }
Ejemplo n.º 6
0
        public FilterJsonRead Process(long cnt, Tuple <string, string> item)
        {
            // check if it's from a user of interest
            var match = _re.Match(item.Item2);

            // skip if not required
            if (!match.Success)
            {
                return(this);
            }
            var userId = match.Groups[1].Captures[0].Value;

            if (!Ids.Contains(userId))
            {
                return(this);
            }


            var bytes = _encoding.GetBytes(item.Item2);

            using (var sf = new MemoryStream(bytes))
            {
                try
                {
                    var row = (UniTwitterRow)_ser.ReadObject(sf);

                    var tm = DateTime.ParseExact(row.Doc.CreatedAt,
                                                 "ddd MMM dd HH:mm:ss +0000 yyyy", null, DateTimeStyles.None);

                    var post = new TagPosterDetails
                    {
                        Location   = row.Key[0],
                        PostId     = row.Id,
                        CreateTime = tm,
                        Text       = row.Doc.Text,
                        UserIdStr  = row.Doc.User.IdStr,
                        UserName   = row.Doc.User.Name,
                        RecordId   = cnt,
                        Source     = row.Doc.Source,
                        File       = item.Item1
                    };

                    if (row.Doc.User.UtcOffset.HasValue)
                    {
                        post.UtcOffset = row.Doc.User.UtcOffset.Value;
                    }

                    if (row.Doc.Entities.Urls.Any())
                    {
                        post.ExpandedUrl = row.Doc.Entities.Urls.First().ExpandedUrl;
                    }

                    if (row.Doc.User.TimeZone != null)
                    {
                        post.TimeZone = row.Doc.User.TimeZone;
                    }

                    if (row.Doc.Coordinates != null)
                    {
                        post.GeoEnabled = true;

                        if (row.Doc.Coordinates.Coord[0].HasValue)
                        {
                            post.Xloc = row.Doc.Coordinates.Coord[0].Value;
                        }

                        if (row.Doc.Coordinates.Coord[1].HasValue)
                        {
                            post.Yloc = row.Doc.Coordinates.Coord[1].Value;
                        }
                    }

                    if (row.Doc.Entities.Hashtags != null)
                    {
                        post.HashTags = row.Doc.Entities.Hashtags.Select(x => x.Text).ToList();
                    }


                    Records.Add(post);
                }
                catch (Exception ex)
                {
                    Console.WriteLine($"Problem {ex.Message}");
                }
            }

            return(this);
        }