Ejemplo n.º 1
0
        private JsonRead(JsonRead src)
        {
            // reduce allocation to the expected proportion
            // need by this thread
            var engineCnt = Environment.ProcessorCount - 1;

            ExpectedSize /= engineCnt;

            Init();
        }
Ejemplo n.º 2
0
        private static void GetDetailsFromTwitterJson(string filter, int size)
        {
            var tgtLocs = new List <string>
            {
                $@"E:\uni\Cluster and Cloud Computing\twitter-{filter}"
            };

            var jr = new JsonRead(tgtLocs, false, size);    // {SingleThreaded = true};

            jr.DoLoad();


            // extract tags
            var tags = jr.Records
                       .Where(x => x.HashTags != null)
                       .SelectMany(x => x.HashTags.Select(t => t.ToLower()))
                       .GroupBy(x => x)
                       .ToDictionary(x => x.Key, x => x.Count());

            using (var ofs = new StreamWriter($@"..\..\activeTags-{filter}.csv"))
            {
                ofs.WriteLine("Tag,Count");
                foreach (var kvp in tags.OrderByDescending(x => x.Value))
                {
                    ofs.WriteLine($"\"{kvp.Key}\",{kvp.Value}");
                }
            }


            // extract timing
            var timing = jr.Records
                         .GroupBy(x => new { x.CreateTime.Year, x.CreateTime.DayOfYear })
                         .ToDictionary(x => x.Key, x => x.Count());

            using (var ofs = new StreamWriter($@"..\..\calendarActivity-{filter}.csv"))
            {
                ofs.WriteLine("Year,DayOfYear,Count");
                foreach (var kvp in timing.OrderByDescending(x => x.Value))
                {
                    ofs.WriteLine($"{kvp.Key.Year},{kvp.Key.DayOfYear},{kvp.Value}");
                }
            }

            // extract timing
            var locs = jr.Records
                       .GroupBy(x => new { x.Yloc, x.Xloc })
                       .ToDictionary(x => x.Key, x => x.Count());

            using (var ofs = new StreamWriter($@"..\..\location-{filter}.csv"))
            {
                ofs.WriteLine("Yloc,Xloc,Count");
                foreach (var kvp in locs.OrderByDescending(x => x.Value))
                {
                    ofs.WriteLine($"{kvp.Key.Yloc},{kvp.Key.Xloc},{kvp.Value}");
                }
            }


            //  jr.DumpToFile($@"..\..\twitter-geoOnly-{filter}.csv", true);
            //  jr.DumpToFile($@"..\..\twitter-extract-{filter}.csv");


            var collate = jr.Records.GroupBy(x => x.UserIdStr).ToDictionary(x => x.Key, x => x.ToList());

            using (var ofs = new StreamWriter($@"..\..\twitter-extract-{filter}.csv"))
            {
                ofs.WriteLine(TagPosterDetails.GetHeader());

                foreach (var kvp in collate.OrderByDescending(x => x.Value.Count).ThenBy(x => x.Key))
                {
                    foreach (var item in kvp.Value.OrderBy(x => x.CreateTime).ThenBy(x => x.Location))
                    {
                        item.Count = kvp.Value.Count;
                        ofs.WriteLine(item.ToString());
                    }
                }
            }

            var nl = (byte)'\n';

            var ser = new DataContractJsonSerializer(typeof(TagPosterDetails));

            using (var fs = File.Open($@"..\..\twitter-extract-{filter}.json", FileMode.Create))
            {
                foreach (var rec in jr.Records)
                {
                    ser.WriteObject(fs, rec);
                    fs.WriteByte(nl);
                }
            }


            Console.WriteLine($"Done {DateTime.Now}");
        }