Пример #1
0
        // populate feed registry from azure table
        public void LoadFeedsFromAzure(FeedLoadOption option)
        {
            var dict     = Metadata.LoadFeedsFromAzureTableForId(this.id, option);
            var settings = GenUtils.GetSettingsFromAzureTable("settings");

            foreach (var url in dict.Keys)
            {
                try
                {
                    if (settings["eventful_feeds_enabled"] == "False" && url.StartsWith("http://eventful.com/"))
                    {
                        continue;
                    }
                    if (settings["eventbrite_feeds_enabled"] == "False" && url.Contains("ics_from_eventbrite"))
                    {
                        continue;
                    }
                    if (settings["lastfm_feeds_enabled"] == "False" && url.Contains("ics_from_lastfm_venue"))
                    {
                        continue;
                    }
                    if (url.StartsWith("http://upcoming.yahoo.com"))
                    {
                        continue;
                    }
                }
                catch (Exception e)
                {
                    GenUtils.PriorityLogMsg("exception", "CollectIcal", e.Message + e.StackTrace);
                }
                this.AddFeed(url, dict[url]);
            }
        }
Пример #2
0
        public EventCollectorTest()
        {
            test_calinfo       = new Calinfo(ElmcityUtils.Configurator.azure_compute_account);
            lookup_lat         = test_calinfo.lat;
            lookup_lon         = test_calinfo.lon;
            radius             = Configurator.default_radius;
            test_upcoming_args = string.Format("location={0},{1}&radius={2}&min_date={3}", lookup_lat, lookup_lon, radius, min_date);
            settings           = GenUtils.GetSettingsFromAzureTable();
            basic_ics          = BlobStorage.MakeDefaultBlobStorage().GetBlob("admin", "basic.ics").HttpResponse.DataAsString();
            bs = BlobStorage.MakeDefaultBlobStorage();
            calinfo_berkeley   = new Calinfo(berkeley_test_hub);
            calinfo_keene      = new Calinfo(keene_test_hub);
            collector_berkeley = new Collector(calinfo_berkeley, settings);
            collector_keene    = new Collector(calinfo_keene, settings);
            foreach (var example in ics_examples)
            {
                UpdateYYYY(example, "ics");
            }

            foreach (var example in eventful_examples)
            {
                UpdateYYYY(example, "xml");
            }

            //foreach (var example in upcoming_examples)
            //	UpdateYYYY(example, "xml");

            foreach (var example in eventbrite_examples)
            {
                UpdateYYYY(example, "xml");
            }
        }
Пример #3
0
        // try getting value from source-of-truth azure table, else non-defaults if overridden in azure config.
        // why?
        // 1. dry (don't repeat yourself, in this case by not writing down settings twice, for worker and web role
        // 2. testing: tests run outside azure environment can use same defaults as used within
        private static string GetSettingValue(string setting_name, bool reload)
        {
            // GenUtils.LogMsg("info", "GetSettingValue", setting_name);
            string setting_value = null;

            if (settings.Count == 0 || reload)
            {
                settings = GenUtils.GetSettingsFromAzureTable();
            }

            if (settings.ContainsKey(setting_name))
            {
                setting_value = settings[setting_name];
            }

            if (setting_value == null)
            {
                try
                {
                    if (RoleEnvironment.IsAvailable)
                    {
                        setting_value = RoleEnvironment.GetConfigurationSettingValue(setting_name);
                    }
                }
                catch (Exception e)
                {
                    GenUtils.PriorityLogMsg("exception", "GetSettingValue", e.Message + e.StackTrace);
                }
            }

            if (setting_value == null)
            {
                GenUtils.PriorityLogMsg("warning", "GetSettingValue: " + setting_name, " is null");
            }

            return(setting_value);
        }
Пример #4
0
        public static List <ZonelessEvent> UniqueByTitleAndStart(string id, List <ZonelessEvent> events, bool save_tag_sources)
        {
            var tag_sources = new Dictionary <string, Dictionary <string, int> >();

            var uniques = new Dictionary <string, ZonelessEvent>();

            var merged_tags          = new Dictionary <string, List <string> >();
            var all_urls_and_sources = new Dictionary <string, Dictionary <string, string> >();

            var dt_dict = new Dictionary <DateTime, List <ZonelessEvent> >();             // fill up datetime buckets for matching

            foreach (ZonelessEvent evt in events)
            {
                dt_dict.AddOrAppendDictOfListT(evt.dtstart, evt);
            }

            var settings = GenUtils.GetSettingsFromAzureTable();

            foreach (var dt in dt_dict.Keys)                   // match similar titles within buckets
            {
                MatchSimilarTitles(dt, dt_dict, settings);
            }

            var _events = new List <ZonelessEvent>();

            foreach (var dt in dt_dict.Keys)                     // flatten dt_dict back to list of evt
            {
                foreach (var evt in dt_dict[dt])
                {
                    _events.Add(evt);
                }
            }

            foreach (var evt in _events)                          // build keyed structures
            {
                var key = evt.TitleAndTime();

                evt.url = Utils.NormalizeEventfulUrl(evt.url);                        // try url normalizations
                evt.url = Utils.NormalizeUpcomingUrl(evt.url);

                if (evt.categories != null)
                {
                    var tags = evt.categories.Split(',').ToList();
                    foreach (var tag in tags)
                    {
                        if (tag_sources.ContainsKey(tag))
                        {
                            tag_sources[tag].IncrementOrAdd <string>(evt.source);
                        }
                        else
                        {
                            tag_sources[tag] = new Dictionary <string, int>()
                            {
                                { evt.source, 1 }
                            }
                        };
                    }
                    merged_tags.AddOrUpdateDictOfListStr(key, tags);                      // update keyed tag list for this key
                }

                if (all_urls_and_sources.ContainsKey(key))                 // update keyed url/source list for this key
                {
                    all_urls_and_sources[key][evt.url] = evt.source;
                }
                else
                {
                    all_urls_and_sources[key] = new Dictionary <string, string>()
                    {
                        { evt.url, evt.source }
                    }
                };
            }

            if (save_tag_sources && id != null)
            {
                var bs = BlobStorage.MakeDefaultBlobStorage();
                bs.SerializeObjectToAzureBlob(tag_sources, id, "tag_sources.obj");
            }

            foreach (var evt in _events)                          // use keyed structures
            {
                var key = evt.TitleAndTime();

                if (merged_tags.ContainsKey(key))
                {
                    evt.original_categories = evt.categories;                                         // remember original categories for reporting
                    var tags = merged_tags[key].Unique().ToList();
                    tags.Sort(String.CompareOrdinal);
                    evt.categories = string.Join(",", tags);                              // assign each event its keyed tag union
                }

                // evt.list_of_urls_and_sources = all_urls_and_sources[key];		  // assign each event its keyed url/source pairs
                evt.urls_and_sources = all_urls_and_sources[key];

                uniques.AddOrUpdateDictionary <string, ZonelessEvent>(key, evt);                     // deduplicate
            }

            return((List <ZonelessEvent>)uniques.Values.ToList());
        }
Пример #5
0
		public override void Run()
		{
			try
			{
				var message = "Worker: Run";
				GenUtils.PriorityLogMsg("status", message, null);

				while (true)
				{
					GenUtils.LogMsg("status", "worker waking", null);

					var tmp_settings = GenUtils.GetSettingsFromAzureTable();
					if (tmp_settings.Count == 0)
						GenUtils.PriorityLogMsg("exception", "Run -> GetSettings: cannot!", null);
					else
						settings = tmp_settings;
					GenUtils.LogMsg("status", "worker updated " + settings.Count + " settings", null);

					int check_interval_minutes;
					try
					{
						check_interval_minutes = Convert.ToInt32(settings["scheduler_check_interval_minutes"]);
					}
					catch 
					{
						check_interval_minutes = 5;
					}

					var pause_setting = "worker_is_paused";
					if (settings.ContainsKey(pause_setting) && settings[pause_setting].ToLower().StartsWith("y"))
					{
						GenUtils.LogMsg("status", "worker is paused", null);
						Utils.WaitMinutes(check_interval_minutes);
						continue;
					}

					ids = Metadata.LoadHubIdsFromAzureTable();
					GenUtils.LogMsg("status", "worker loaded " + ids.Count + " ids", null);

					regions = Utils.GetRegionIds();
					GenUtils.LogMsg("status", "worker found " + regions.Count + " regions", null);

					//twitter_direct_messages = TwitterApi.GetNewTwitterDirectMessages(); // get new control messages // disabled for now, twitter didn't like this
					//GenUtils.LogMsg("status", "worker got " + twitter_direct_messages.Count + " messages", null);

					//ids = MaybeAdjustIdsForTesting(ids);

					todo = new Todo();

					var icals_and_nonicals = ids.Except(regions).ToList();

					try
					{
						BuildTodo(todo, icals_and_nonicals);
					}
					catch (Exception e)
					{
						GenUtils.PriorityLogMsg("exception", "BuildTodo", e.Message);
						continue;
					}

					//HandleTwitterMessages(todo, ids);

					//MaybeRemakeWebRoleData();

					var sw_total = new Stopwatch();
					var sw_ical = new Stopwatch();
					var sw_finalize = new Stopwatch();
					var sw_nonical = new Stopwatch();
					var sw_region = new Stopwatch();

					sw_total.Start();

					sw_ical.Start();

					var options = new ParallelOptions();
					int max_ical_tasks;
					try
					{
						max_ical_tasks = Convert.ToInt32(settings["max_concurrent_icaltasks"]);
					}
					catch (Exception e)
					{
						GenUtils.PriorityLogMsg("exception", "Run: getting max_concurrent_icaltasks", e.Message);
						max_ical_tasks = 3;
					}
					options.MaxDegreeOfParallelism = max_ical_tasks;
					Parallel.ForEach(source: todo.icaltasks, parallelOptions: options, body: (id) =>
					//foreach (var id in todo.icaltasks)              // this can be parallelized because there are many separate/unique endpoints
					{
						try
						{
							if ( MemoryIsLow() )
								{
								AlertLowMemory("icaltasks: " + id);
								return;
								}

							Scheduler.UpdateStartTaskForId(id, TaskType.icaltasks);
							ProcessIcal(id);
							StopTask(id, TaskType.icaltasks);
						}
						catch (Exception e)
						{
							GenUtils.PriorityLogMsg("exception", "Worker icaltasks", e.Message);
							return;
						}
					});
					sw_ical.Stop();

					sw_nonical.Start();
					foreach (var id in todo.nonicaltasks)           // this won't be parallelized because of api rate throttling in nonical service endpoints
					{
						try
						{
							Scheduler.UpdateStartTaskForId(id, TaskType.nonicaltasks);  // the todo list has a general start time, now update it to actual start
							ProcessNonIcal(id);
							StopTask(id, TaskType.nonicaltasks);
						}
						catch (Exception e)
						{
							GenUtils.PriorityLogMsg("exception", "Worker nonicaltasks", e.Message);
							return;
						}
					}
					sw_nonical.Stop();

					sw_finalize.Start();
					var finalizers = todo.nonicaltasks.Union(todo.icaltasks);  // finalize ical and/or nonical updates
					Parallel.ForEach(source: finalizers, body: (id) =>
					{
						try
						{
							FinalizeHub(id);
						}
						catch (Exception e)
						{
							GenUtils.PriorityLogMsg("exception", "Worker finalize", e.Message);
							return;
						}
					}
					);
					sw_finalize.Stop();

					sw_region.Start();
					try
					{
						foreach (var id in regions )            // now update regions, this can also be parallelized as needed
						{
							if (RegionIsStale(id))
							{
								Scheduler.UpdateStartTaskForId(id, TaskType.regiontasks);
								ProcessRegion(id);
								StopTask(id, TaskType.regiontasks);
							}
						}
					}
					catch (Exception e)
					{
						GenUtils.PriorityLogMsg("exception", "Worker regiontasks", e.Message);
						return;
					}

					sw_region.Stop();

					sw_total.Stop();

					GenUtils.LogMsg("status", String.Format("worker: ical {0}, finalize {1}, nonical {2}, region {3}, total {4}",
							sw_ical.Elapsed.ToString(),
							sw_finalize.Elapsed.ToString(),
							sw_nonical.Elapsed.ToString(),
							sw_region.Elapsed.ToString(),
							sw_total.Elapsed.ToString()
							),
						null
						);

					GenUtils.LogMsg("status", "worker sleeping", null);
					Sleep();

				}
			}
			catch (Exception e)
			{
				GenUtils.PriorityLogMsg("exception", "Worker.Run", e.Message + e.StackTrace);
			}
		}
Пример #6
0
        public static List <TaggableSource> GetFacebookPages(Calinfo calinfo, string location)
        {
            var search_template      = String.Format("site:www.facebook.com/__TARGET__ \"{0}\"", location);
            var search_for_fan_pages = search_template.Replace("__TARGET__", "pages");
            var search_for_groups    = search_template.Replace("__TARGET__", "groups");
            var stats            = new Dictionary <string, object>();
            var fan_page_results = Search.BingSearch(search_for_fan_pages, 1000, stats);
            // var group_results = Search.BingSearch(search_for_groups, 1000, stats); // doesn't work, location string won't usually appear
            var group_results = new List <SearchResult>();                                        // placeholder for now
            var bing_results  = fan_page_results.Concat(group_results).ToList();

            var taggable_sources = InitializeTaggables(calinfo, "facebook");

            var    seen_ids    = new List <string>();
            string name_and_pk = "facebooksources";

            var settings = GenUtils.GetSettingsFromAzureTable();
            var options  = new ParallelOptions();

            Parallel.ForEach(source: bing_results, parallelOptions: options, body: (result) =>
                             //foreach (var result in bing_results)
            {
                try
                {
                    var url  = Regex.Replace(result.url, @"\?.+", "");                     // remove query string if any
                    var name = Regex.Match(result.url, "facebook.com/(pages|groups)/([^/]+)").Groups[2].Value;
                    name     = name.Replace('-', ' ');

                    var fb_id = Utils.id_from_fb_fanpage_or_group(url);

                    if (seen_ids.Exists(x => x == fb_id))
                    {
                        return;
                    }
                    else
                    {
                        seen_ids.Add(fb_id);
                    }

                    string slat = null;
                    string slon = null;
                    var ical    = new DDay.iCal.iCalendar();
                    var facebook_access_token = settings["facebook_access_token"];                     // todo: merge with code in collector
                    var j_obj  = Utils.GetFacebookEventsAsJsonObject(fb_id, facebook_access_token);
                    var events = Utils.iCalendarizeJsonObjectFromFacebook(j_obj, calinfo, ical, slat, slon, settings);

                    if (events.Count == 0)                      // no calendar on this page
                    {
                        return;
                    }

                    string page;

                    if (FacebookPageMatchesLocation(url, location, settings, out page) == false)
                    {
                        return;
                    }

                    string origin_url = "";
                    if (!String.IsNullOrEmpty(page))
                    {
                        origin_url = GetFacebookPageOrGroupOriginUrl(page);
                    }

                    var ical_url = string.Format("http://{0}/ics_from_fb_page?fb_id={1}&elmcity_id={2}",
                                                 ElmcityUtils.Configurator.appdomain,
                                                 fb_id,
                                                 calinfo.id);

                    var has_future_events = FacebookPageHasFutureEvents(events, calinfo);

                    var taggable = new TaggableSource(name, calinfo.id, url + "?sk=events", ical_url, has_future_events, origin_url);

                    taggable_sources.Add(taggable);

                    RememberTaggable(name_and_pk, fb_id, taggable);
                }
                catch (Exception e)
                {
                    GenUtils.PriorityLogMsg("exception", "GetFacebookPages", e.Message + e.StackTrace);
                    return;
                }
            });

            return(taggable_sources);
        }
Пример #7
0
        public static void _ReloadSettingsAndRoutes()
        {
            GenUtils.LogMsg("status", "webrole _ReloadRoutes", null);

            bool new_routes = false;

            try
            {
                var settings = GenUtils.GetSettingsFromAzureTable();
                if (settings.Keys.Count == 0)
                {
                    GenUtils.PriorityLogMsg("exception", "ReloadSettings: no settings!", null);
                }
                else
                {
                    ElmcityController.settings = settings;
                }
            }
            catch (Exception e0)
            {
                var msg = "_ReloadSettingsAndRoutes: settings";
                GenUtils.PriorityLogMsg("exception", msg, e0.Message);
            }


            try
            {
                var themes = Utils.GetThemesDict();
                if (ObjectUtils.DictOfDictStrEqualsDictOfDictStr(themes, ElmcityController.themes) == false)
                {
                    GenUtils.LogMsg("status", "_ReloadSettingsAndRoutes", "reloading themes");
                    lock (ElmcityController.themes)
                    {
                        ElmcityController.themes = themes;
                    }
                }
            }
            catch (Exception e2)
            {
                var msg = "_ReloadSettingsAndRoutes: themes";
                GenUtils.PriorityLogMsg("exception", msg, e2.Message);
            }

            return;

            try
            {
                var new_wrd = WebRoleData.GetWrd();
                if (new_wrd == null || wrd.IsConsistent() == false)
                {
                    GenUtils.PriorityLogMsg("warning", "null or inconsistent WebRoleData!", null);
                    return;
                }

                if (new_wrd.ready_ids.Count != ElmcityApp.wrd.ready_ids.Count) // did # of hubs change? either on initial load or subsequently
                {
                    new_routes = true;                                         // force rebuild of route map
                    GenUtils.LogMsg("status", "Reload: found a new hub", null);
                    WebRoleData.SaveTimestampedWrd(ElmcityApp.wrd);
                    lock (ElmcityApp.wrd)
                    {
                        ElmcityApp.wrd = new_wrd;                               // update WebRoleData (todo: rewarm caches affected)
                    }
                }

                foreach (var id in ElmcityApp.wrd.ready_ids)                                  // did any hub's renderer change?
                {
                    var cached_renderer  = ElmcityApp.wrd.renderers[id];
                    var current_renderer = Utils.AcquireRenderer(id);

                    if (cached_renderer.timestamp != current_renderer.timestamp)                     // timestamp changed
                    {
                        if (!Utils.RenderersAreEqual(cached_renderer, current_renderer, except_keys: new List <string>()
                        {
                            "timestamp"
                        }))
                        {
                            GenUtils.LogMsg("status", "Reload: new renderer for " + id, null);
                            lock (ElmcityApp.wrd)
                            {
                                ElmcityApp.wrd.renderers[id] = current_renderer;                                                  // update the renderer
                                if (ElmcityApp.home_controller != null)                                                           // skip this if we found a change on startup, controller not ready
                                {
                                    var cache = new AspNetCache(ElmcityApp.home_controller.HttpContext.Cache);
                                    var url   = Utils.MakeBaseZonelessUrl(id);
                                    cache.Remove(url);                                                                                   // flush cached objects for id
                                    var obj = HttpUtils.FetchUrl(new Uri(url));                                                          // rewarm cache
                                }
                            }
                        }
                    }
                }
            }

            catch (Exception e1)
            {
                GenUtils.PriorityLogMsg("exception", "_ReloadSettingsAndRoutes: cannot check/update wrd", e1.Message + e1.StackTrace);
            }


            if (new_routes)
            {
                var existing_routes = RouteTable.Routes;
                var route_count     = existing_routes.Count;
                try
                {
                    GenUtils.LogMsg("status", "_ReloadSettingsAndRoutes: registering " + route_count + " routes", null);

                    lock (RouteTable.Routes)
                    {
                        var route_count_old = RouteTable.Routes.Count;
                        GenUtils.PriorityLogMsg("info", RouteTable.Routes.Count + " routes before reload", null);
                        RouteTable.Routes.Clear();
                        ElmcityApp.RegisterRoutes(RouteTable.Routes, ElmcityApp.wrd);
                        GenUtils.PriorityLogMsg("info", RouteTable.Routes.Count + " routes registered", null);
                        var route_count_new = RouteTable.Routes.Count;
                        if (route_count_new < route_count_old)
                        {
                            GenUtils.PriorityLogMsg("warning", "route count was " + route_count_old + ", is " + route_count_new, null);
                        }
                    }
                }
                catch (Exception e3)
                {
                    GenUtils.PriorityLogMsg("exception", "_ReloadSettingsAndRoutes: registering " + route_count + " routes", e3.Message + e3.StackTrace);
                    ElmcityApp.RegisterRoutes(existing_routes, ElmcityApp.wrd);
                }
            }
        }
Пример #8
0
 private static Dictionary <string, string> GetSettings(string table)
 {
     return(GenUtils.GetSettingsFromAzureTable(table));
 }