// populate feed registry from azure table public void LoadFeedsFromAzure(FeedLoadOption option) { var dict = Metadata.LoadFeedsFromAzureTableForId(this.id, option); var settings = GenUtils.GetSettingsFromAzureTable("settings"); foreach (var url in dict.Keys) { try { if (settings["eventful_feeds_enabled"] == "False" && url.StartsWith("http://eventful.com/")) { continue; } if (settings["eventbrite_feeds_enabled"] == "False" && url.Contains("ics_from_eventbrite")) { continue; } if (settings["lastfm_feeds_enabled"] == "False" && url.Contains("ics_from_lastfm_venue")) { continue; } if (url.StartsWith("http://upcoming.yahoo.com")) { continue; } } catch (Exception e) { GenUtils.PriorityLogMsg("exception", "CollectIcal", e.Message + e.StackTrace); } this.AddFeed(url, dict[url]); } }
public EventCollectorTest() { test_calinfo = new Calinfo(ElmcityUtils.Configurator.azure_compute_account); lookup_lat = test_calinfo.lat; lookup_lon = test_calinfo.lon; radius = Configurator.default_radius; test_upcoming_args = string.Format("location={0},{1}&radius={2}&min_date={3}", lookup_lat, lookup_lon, radius, min_date); settings = GenUtils.GetSettingsFromAzureTable(); basic_ics = BlobStorage.MakeDefaultBlobStorage().GetBlob("admin", "basic.ics").HttpResponse.DataAsString(); bs = BlobStorage.MakeDefaultBlobStorage(); calinfo_berkeley = new Calinfo(berkeley_test_hub); calinfo_keene = new Calinfo(keene_test_hub); collector_berkeley = new Collector(calinfo_berkeley, settings); collector_keene = new Collector(calinfo_keene, settings); foreach (var example in ics_examples) { UpdateYYYY(example, "ics"); } foreach (var example in eventful_examples) { UpdateYYYY(example, "xml"); } //foreach (var example in upcoming_examples) // UpdateYYYY(example, "xml"); foreach (var example in eventbrite_examples) { UpdateYYYY(example, "xml"); } }
// try getting value from source-of-truth azure table, else non-defaults if overridden in azure config. // why? // 1. dry (don't repeat yourself, in this case by not writing down settings twice, for worker and web role // 2. testing: tests run outside azure environment can use same defaults as used within private static string GetSettingValue(string setting_name, bool reload) { // GenUtils.LogMsg("info", "GetSettingValue", setting_name); string setting_value = null; if (settings.Count == 0 || reload) { settings = GenUtils.GetSettingsFromAzureTable(); } if (settings.ContainsKey(setting_name)) { setting_value = settings[setting_name]; } if (setting_value == null) { try { if (RoleEnvironment.IsAvailable) { setting_value = RoleEnvironment.GetConfigurationSettingValue(setting_name); } } catch (Exception e) { GenUtils.PriorityLogMsg("exception", "GetSettingValue", e.Message + e.StackTrace); } } if (setting_value == null) { GenUtils.PriorityLogMsg("warning", "GetSettingValue: " + setting_name, " is null"); } return(setting_value); }
public static List <ZonelessEvent> UniqueByTitleAndStart(string id, List <ZonelessEvent> events, bool save_tag_sources) { var tag_sources = new Dictionary <string, Dictionary <string, int> >(); var uniques = new Dictionary <string, ZonelessEvent>(); var merged_tags = new Dictionary <string, List <string> >(); var all_urls_and_sources = new Dictionary <string, Dictionary <string, string> >(); var dt_dict = new Dictionary <DateTime, List <ZonelessEvent> >(); // fill up datetime buckets for matching foreach (ZonelessEvent evt in events) { dt_dict.AddOrAppendDictOfListT(evt.dtstart, evt); } var settings = GenUtils.GetSettingsFromAzureTable(); foreach (var dt in dt_dict.Keys) // match similar titles within buckets { MatchSimilarTitles(dt, dt_dict, settings); } var _events = new List <ZonelessEvent>(); foreach (var dt in dt_dict.Keys) // flatten dt_dict back to list of evt { foreach (var evt in dt_dict[dt]) { _events.Add(evt); } } foreach (var evt in _events) // build keyed structures { var key = evt.TitleAndTime(); evt.url = Utils.NormalizeEventfulUrl(evt.url); // try url normalizations evt.url = Utils.NormalizeUpcomingUrl(evt.url); if (evt.categories != null) { var tags = evt.categories.Split(',').ToList(); foreach (var tag in tags) { if (tag_sources.ContainsKey(tag)) { tag_sources[tag].IncrementOrAdd <string>(evt.source); } else { tag_sources[tag] = new Dictionary <string, int>() { { evt.source, 1 } } }; } merged_tags.AddOrUpdateDictOfListStr(key, tags); // update keyed tag list for this key } if (all_urls_and_sources.ContainsKey(key)) // update keyed url/source list for this key { all_urls_and_sources[key][evt.url] = evt.source; } else { all_urls_and_sources[key] = new Dictionary <string, string>() { { evt.url, evt.source } } }; } if (save_tag_sources && id != null) { var bs = BlobStorage.MakeDefaultBlobStorage(); bs.SerializeObjectToAzureBlob(tag_sources, id, "tag_sources.obj"); } foreach (var evt in _events) // use keyed structures { var key = evt.TitleAndTime(); if (merged_tags.ContainsKey(key)) { evt.original_categories = evt.categories; // remember original categories for reporting var tags = merged_tags[key].Unique().ToList(); tags.Sort(String.CompareOrdinal); evt.categories = string.Join(",", tags); // assign each event its keyed tag union } // evt.list_of_urls_and_sources = all_urls_and_sources[key]; // assign each event its keyed url/source pairs evt.urls_and_sources = all_urls_and_sources[key]; uniques.AddOrUpdateDictionary <string, ZonelessEvent>(key, evt); // deduplicate } return((List <ZonelessEvent>)uniques.Values.ToList()); }
public override void Run() { try { var message = "Worker: Run"; GenUtils.PriorityLogMsg("status", message, null); while (true) { GenUtils.LogMsg("status", "worker waking", null); var tmp_settings = GenUtils.GetSettingsFromAzureTable(); if (tmp_settings.Count == 0) GenUtils.PriorityLogMsg("exception", "Run -> GetSettings: cannot!", null); else settings = tmp_settings; GenUtils.LogMsg("status", "worker updated " + settings.Count + " settings", null); int check_interval_minutes; try { check_interval_minutes = Convert.ToInt32(settings["scheduler_check_interval_minutes"]); } catch { check_interval_minutes = 5; } var pause_setting = "worker_is_paused"; if (settings.ContainsKey(pause_setting) && settings[pause_setting].ToLower().StartsWith("y")) { GenUtils.LogMsg("status", "worker is paused", null); Utils.WaitMinutes(check_interval_minutes); continue; } ids = Metadata.LoadHubIdsFromAzureTable(); GenUtils.LogMsg("status", "worker loaded " + ids.Count + " ids", null); regions = Utils.GetRegionIds(); GenUtils.LogMsg("status", "worker found " + regions.Count + " regions", null); //twitter_direct_messages = TwitterApi.GetNewTwitterDirectMessages(); // get new control messages // disabled for now, twitter didn't like this //GenUtils.LogMsg("status", "worker got " + twitter_direct_messages.Count + " messages", null); //ids = MaybeAdjustIdsForTesting(ids); todo = new Todo(); var icals_and_nonicals = ids.Except(regions).ToList(); try { BuildTodo(todo, icals_and_nonicals); } catch (Exception e) { GenUtils.PriorityLogMsg("exception", "BuildTodo", e.Message); continue; } //HandleTwitterMessages(todo, ids); //MaybeRemakeWebRoleData(); var sw_total = new Stopwatch(); var sw_ical = new Stopwatch(); var sw_finalize = new Stopwatch(); var sw_nonical = new Stopwatch(); var sw_region = new Stopwatch(); sw_total.Start(); sw_ical.Start(); var options = new ParallelOptions(); int max_ical_tasks; try { max_ical_tasks = Convert.ToInt32(settings["max_concurrent_icaltasks"]); } catch (Exception e) { GenUtils.PriorityLogMsg("exception", "Run: getting max_concurrent_icaltasks", e.Message); max_ical_tasks = 3; } options.MaxDegreeOfParallelism = max_ical_tasks; Parallel.ForEach(source: todo.icaltasks, parallelOptions: options, body: (id) => //foreach (var id in todo.icaltasks) // this can be parallelized because there are many separate/unique endpoints { try { if ( MemoryIsLow() ) { AlertLowMemory("icaltasks: " + id); return; } Scheduler.UpdateStartTaskForId(id, TaskType.icaltasks); ProcessIcal(id); StopTask(id, TaskType.icaltasks); } catch (Exception e) { GenUtils.PriorityLogMsg("exception", "Worker icaltasks", e.Message); return; } }); sw_ical.Stop(); sw_nonical.Start(); foreach (var id in todo.nonicaltasks) // this won't be parallelized because of api rate throttling in nonical service endpoints { try { Scheduler.UpdateStartTaskForId(id, TaskType.nonicaltasks); // the todo list has a general start time, now update it to actual start ProcessNonIcal(id); StopTask(id, TaskType.nonicaltasks); } catch (Exception e) { GenUtils.PriorityLogMsg("exception", "Worker nonicaltasks", e.Message); return; } } sw_nonical.Stop(); sw_finalize.Start(); var finalizers = todo.nonicaltasks.Union(todo.icaltasks); // finalize ical and/or nonical updates Parallel.ForEach(source: finalizers, body: (id) => { try { FinalizeHub(id); } catch (Exception e) { GenUtils.PriorityLogMsg("exception", "Worker finalize", e.Message); return; } } ); sw_finalize.Stop(); sw_region.Start(); try { foreach (var id in regions ) // now update regions, this can also be parallelized as needed { if (RegionIsStale(id)) { Scheduler.UpdateStartTaskForId(id, TaskType.regiontasks); ProcessRegion(id); StopTask(id, TaskType.regiontasks); } } } catch (Exception e) { GenUtils.PriorityLogMsg("exception", "Worker regiontasks", e.Message); return; } sw_region.Stop(); sw_total.Stop(); GenUtils.LogMsg("status", String.Format("worker: ical {0}, finalize {1}, nonical {2}, region {3}, total {4}", sw_ical.Elapsed.ToString(), sw_finalize.Elapsed.ToString(), sw_nonical.Elapsed.ToString(), sw_region.Elapsed.ToString(), sw_total.Elapsed.ToString() ), null ); GenUtils.LogMsg("status", "worker sleeping", null); Sleep(); } } catch (Exception e) { GenUtils.PriorityLogMsg("exception", "Worker.Run", e.Message + e.StackTrace); } }
public static List <TaggableSource> GetFacebookPages(Calinfo calinfo, string location) { var search_template = String.Format("site:www.facebook.com/__TARGET__ \"{0}\"", location); var search_for_fan_pages = search_template.Replace("__TARGET__", "pages"); var search_for_groups = search_template.Replace("__TARGET__", "groups"); var stats = new Dictionary <string, object>(); var fan_page_results = Search.BingSearch(search_for_fan_pages, 1000, stats); // var group_results = Search.BingSearch(search_for_groups, 1000, stats); // doesn't work, location string won't usually appear var group_results = new List <SearchResult>(); // placeholder for now var bing_results = fan_page_results.Concat(group_results).ToList(); var taggable_sources = InitializeTaggables(calinfo, "facebook"); var seen_ids = new List <string>(); string name_and_pk = "facebooksources"; var settings = GenUtils.GetSettingsFromAzureTable(); var options = new ParallelOptions(); Parallel.ForEach(source: bing_results, parallelOptions: options, body: (result) => //foreach (var result in bing_results) { try { var url = Regex.Replace(result.url, @"\?.+", ""); // remove query string if any var name = Regex.Match(result.url, "facebook.com/(pages|groups)/([^/]+)").Groups[2].Value; name = name.Replace('-', ' '); var fb_id = Utils.id_from_fb_fanpage_or_group(url); if (seen_ids.Exists(x => x == fb_id)) { return; } else { seen_ids.Add(fb_id); } string slat = null; string slon = null; var ical = new DDay.iCal.iCalendar(); var facebook_access_token = settings["facebook_access_token"]; // todo: merge with code in collector var j_obj = Utils.GetFacebookEventsAsJsonObject(fb_id, facebook_access_token); var events = Utils.iCalendarizeJsonObjectFromFacebook(j_obj, calinfo, ical, slat, slon, settings); if (events.Count == 0) // no calendar on this page { return; } string page; if (FacebookPageMatchesLocation(url, location, settings, out page) == false) { return; } string origin_url = ""; if (!String.IsNullOrEmpty(page)) { origin_url = GetFacebookPageOrGroupOriginUrl(page); } var ical_url = string.Format("http://{0}/ics_from_fb_page?fb_id={1}&elmcity_id={2}", ElmcityUtils.Configurator.appdomain, fb_id, calinfo.id); var has_future_events = FacebookPageHasFutureEvents(events, calinfo); var taggable = new TaggableSource(name, calinfo.id, url + "?sk=events", ical_url, has_future_events, origin_url); taggable_sources.Add(taggable); RememberTaggable(name_and_pk, fb_id, taggable); } catch (Exception e) { GenUtils.PriorityLogMsg("exception", "GetFacebookPages", e.Message + e.StackTrace); return; } }); return(taggable_sources); }
public static void _ReloadSettingsAndRoutes() { GenUtils.LogMsg("status", "webrole _ReloadRoutes", null); bool new_routes = false; try { var settings = GenUtils.GetSettingsFromAzureTable(); if (settings.Keys.Count == 0) { GenUtils.PriorityLogMsg("exception", "ReloadSettings: no settings!", null); } else { ElmcityController.settings = settings; } } catch (Exception e0) { var msg = "_ReloadSettingsAndRoutes: settings"; GenUtils.PriorityLogMsg("exception", msg, e0.Message); } try { var themes = Utils.GetThemesDict(); if (ObjectUtils.DictOfDictStrEqualsDictOfDictStr(themes, ElmcityController.themes) == false) { GenUtils.LogMsg("status", "_ReloadSettingsAndRoutes", "reloading themes"); lock (ElmcityController.themes) { ElmcityController.themes = themes; } } } catch (Exception e2) { var msg = "_ReloadSettingsAndRoutes: themes"; GenUtils.PriorityLogMsg("exception", msg, e2.Message); } return; try { var new_wrd = WebRoleData.GetWrd(); if (new_wrd == null || wrd.IsConsistent() == false) { GenUtils.PriorityLogMsg("warning", "null or inconsistent WebRoleData!", null); return; } if (new_wrd.ready_ids.Count != ElmcityApp.wrd.ready_ids.Count) // did # of hubs change? either on initial load or subsequently { new_routes = true; // force rebuild of route map GenUtils.LogMsg("status", "Reload: found a new hub", null); WebRoleData.SaveTimestampedWrd(ElmcityApp.wrd); lock (ElmcityApp.wrd) { ElmcityApp.wrd = new_wrd; // update WebRoleData (todo: rewarm caches affected) } } foreach (var id in ElmcityApp.wrd.ready_ids) // did any hub's renderer change? { var cached_renderer = ElmcityApp.wrd.renderers[id]; var current_renderer = Utils.AcquireRenderer(id); if (cached_renderer.timestamp != current_renderer.timestamp) // timestamp changed { if (!Utils.RenderersAreEqual(cached_renderer, current_renderer, except_keys: new List <string>() { "timestamp" })) { GenUtils.LogMsg("status", "Reload: new renderer for " + id, null); lock (ElmcityApp.wrd) { ElmcityApp.wrd.renderers[id] = current_renderer; // update the renderer if (ElmcityApp.home_controller != null) // skip this if we found a change on startup, controller not ready { var cache = new AspNetCache(ElmcityApp.home_controller.HttpContext.Cache); var url = Utils.MakeBaseZonelessUrl(id); cache.Remove(url); // flush cached objects for id var obj = HttpUtils.FetchUrl(new Uri(url)); // rewarm cache } } } } } } catch (Exception e1) { GenUtils.PriorityLogMsg("exception", "_ReloadSettingsAndRoutes: cannot check/update wrd", e1.Message + e1.StackTrace); } if (new_routes) { var existing_routes = RouteTable.Routes; var route_count = existing_routes.Count; try { GenUtils.LogMsg("status", "_ReloadSettingsAndRoutes: registering " + route_count + " routes", null); lock (RouteTable.Routes) { var route_count_old = RouteTable.Routes.Count; GenUtils.PriorityLogMsg("info", RouteTable.Routes.Count + " routes before reload", null); RouteTable.Routes.Clear(); ElmcityApp.RegisterRoutes(RouteTable.Routes, ElmcityApp.wrd); GenUtils.PriorityLogMsg("info", RouteTable.Routes.Count + " routes registered", null); var route_count_new = RouteTable.Routes.Count; if (route_count_new < route_count_old) { GenUtils.PriorityLogMsg("warning", "route count was " + route_count_old + ", is " + route_count_new, null); } } } catch (Exception e3) { GenUtils.PriorityLogMsg("exception", "_ReloadSettingsAndRoutes: registering " + route_count + " routes", e3.Message + e3.StackTrace); ElmcityApp.RegisterRoutes(existing_routes, ElmcityApp.wrd); } } }
private static Dictionary <string, string> GetSettings(string table) { return(GenUtils.GetSettingsFromAzureTable(table)); }