static StoryTagCollection OCGetTags(Story story) { StoryTagCollection tagCollection = new StoryTagCollection(); try { Uri address = new Uri("http://api.opencalais.com/tag/rs/enrich"); //Create the web request HttpWebRequest request = WebRequest.Create(address) as HttpWebRequest; //Set type to POST request.Method = "POST"; request.ContentType = "text/raw"; request.Accept = "Application/Json"; request.Timeout = 15000; //Set headers request.Headers.Add("x-calais-licenseID", "jb47yzz6jsqkhkbsqc3urfs7"); //Append text as byte array StringBuilder data = new StringBuilder(); data.Append(story.Text); byte[] byteData = UTF8Encoding.UTF8.GetBytes(data.ToString()); //Set the content length in the request headers request.ContentLength = byteData.Length; //Write data using (Stream postStream = request.GetRequestStream()) { postStream.Write(byteData, 0, byteData.Length); } //Get response using (HttpWebResponse response = request.GetResponse() as HttpWebResponse) { // Get the response stream StreamReader reader = new StreamReader(response.GetResponseStream()); string resultString = reader.ReadToEnd(); //Extract all geotags that have lat-lon coordinates Hashtable result = JSON.JsonDecode(resultString) as Hashtable; List<Geotag> allGeotags = new List<Geotag>(); foreach (Hashtable item in result.Values) { if (item.ContainsKey("resolutions")) //This is a geotag { Hashtable resolutions = (Hashtable)((ArrayList)item["resolutions"])[0]; if (!resolutions.ContainsKey("latitude")) continue; string parentCountry = ""; if (resolutions.ContainsKey("containedbycountry")) parentCountry = resolutions["containedbycountry"].ToString(); allGeotags.Add(new Geotag() { Name = item["name"].ToString(), ParentCountry = parentCountry, Type = item["_type"].ToString(), Latitude = double.Parse(resolutions["latitude"].ToString(), CultureInfo.InvariantCulture), Longitude = double.Parse(resolutions["longitude"].ToString(), CultureInfo.InvariantCulture), Count = ((ArrayList)item["instances"]).Count } ); } else if (item.ContainsKey("_type") && _OCInterestingEventTypes.Contains((string)item["_type"])) { foreach (DictionaryEntry field in item) { if (_OCKeywordFields.Contains(field.Key) && !((string)field.Value).Contains("http")) tagCollection.Keywords.Add((string)field.Value); if (_OCEntityFields.Contains(field.Key) && !((string)field.Value).Contains("http")) tagCollection.Entities.Add((string)field.Value); } } if (item.ContainsKey("_typeGroup") && item["_typeGroup"].ToString() == "topics") tagCollection.Keywords.Add((string)item["categoryName"]); } //Pick geotags to use if (allGeotags.Count > 0 || story.TwitterGeotags.Count > 0) { //Calculate a weighted average location based on OC tags and Twitter geotags, then take the nearest tag and use it as the story location double lonSum = 0; double latSum = 0; double weightSum = 0; foreach (Geotag g in allGeotags.Distinct()) { double weight = 0; if (g.Type == "Country") weight = 2; else if (g.Type == "ProvinceOrState") weight = 3; else //if (g.Type == "City") weight = 4; lonSum += weight * g.Longitude; latSum += weight * g.Latitude; weightSum += weight; } foreach (Geotag g in story.TwitterGeotags.Distinct()) { double weight = 5; lonSum += weight * g.Longitude; latSum += weight * g.Latitude; weightSum += weight; } Geotag meanLoc = new Geotag() { Latitude = latSum / weightSum, Longitude = lonSum / weightSum }; Geotag nearestTag = allGeotags.Union(story.TwitterGeotags).OrderBy(n => Geotag.Distance(n, meanLoc)).First(); Console.WriteLine("OC: " + allGeotags.Count + ", Twitter: " + story.TwitterGeotags.Count); tagCollection.Geotags.Add(nearestTag); } //List<Geotag> geotags = new List<Geotag>(); //if (allGeotags.Count > 0) //{ // //1) Cities for which the parent country is also mentioned, exept all caps cities (e.g. "LONDON (AP)") // geotags.AddRange(allGeotags.Where(n => n.Type == "City" && n.ParentCountry == "" || allGeotags.Any(m => m.Name == n.ParentCountry))); // //2) else: Top city if no country was mentioned // if (geotags.Count == 0 && allGeotags.Any(n => n.Type == "City") && !allGeotags.Any(n => n.Type == "Country")) // geotags.Add(allGeotags.Where(n => n.Type == "City").OrderByDescending(n => n.Count).First()); // //3) else: All countries // if (geotags.Count == 0 && allGeotags.Any(n => n.Type == "Country")) // geotags.AddRange(allGeotags.Where(n => n.Type == "Country")); //} //if (story.TwitterGeotags.Count > 0) //{ // double meanLon = story.TwitterGeotags.Average(n => n.Longitude); // double meanLat = story.TwitterGeotags.Average(n => n.Latitude); // geotags.Add(new Geotag() { Longitude = meanLon, Latitude = meanLat }); //} //tagCollection.Geotags.AddRange(geotags); } } catch (Exception e) { if (e is WebException && (e.Message.Contains("Timeout") || e.Message.Contains("timed out"))) Console.WriteLine("Timeout"); else if (e is WebException && e.Message.Contains("Internal Server Error")) Console.WriteLine("Internal Server Error"); else CrisisTracker.Common.Output.Print(Name, e); } return tagCollection; }
static StoryTagCollection OCGetTags(Story story) { StoryTagCollection tagCollection = new StoryTagCollection(); try { Uri address = new Uri("http://api.opencalais.com/tag/rs/enrich"); //Create the web request HttpWebRequest request = WebRequest.Create(address) as HttpWebRequest; //Set type to POST request.Method = "POST"; request.ContentType = "text/raw"; request.Accept = "Application/Json"; request.Timeout = 15000; //Set headers request.Headers.Add("x-calais-licenseID", "jb47yzz6jsqkhkbsqc3urfs7"); //Append text as byte array StringBuilder data = new StringBuilder(); data.Append(story.Text); byte[] byteData = UTF8Encoding.UTF8.GetBytes(data.ToString()); //Set the content length in the request headers request.ContentLength = byteData.Length; //Write data using (Stream postStream = request.GetRequestStream()) { postStream.Write(byteData, 0, byteData.Length); } //Get response using (HttpWebResponse response = request.GetResponse() as HttpWebResponse) { // Get the response stream StreamReader reader = new StreamReader(response.GetResponseStream()); string resultString = reader.ReadToEnd(); //Extract all geotags that have lat-lon coordinates Hashtable result = JSON.JsonDecode(resultString) as Hashtable; List <Geotag> allGeotags = new List <Geotag>(); foreach (Hashtable item in result.Values) { if (item.ContainsKey("resolutions")) //This is a geotag { Hashtable resolutions = (Hashtable)((ArrayList)item["resolutions"])[0]; if (!resolutions.ContainsKey("latitude")) { continue; } string parentCountry = ""; if (resolutions.ContainsKey("containedbycountry")) { parentCountry = resolutions["containedbycountry"].ToString(); } allGeotags.Add(new Geotag() { Name = item["name"].ToString(), ParentCountry = parentCountry, Type = item["_type"].ToString(), Latitude = double.Parse(resolutions["latitude"].ToString(), CultureInfo.InvariantCulture), Longitude = double.Parse(resolutions["longitude"].ToString(), CultureInfo.InvariantCulture), Count = ((ArrayList)item["instances"]).Count } ); } else if (item.ContainsKey("_type") && _OCInterestingEventTypes.Contains((string)item["_type"])) { foreach (DictionaryEntry field in item) { if (_OCKeywordFields.Contains(field.Key) && !((string)field.Value).Contains("http")) { tagCollection.Keywords.Add((string)field.Value); } if (_OCEntityFields.Contains(field.Key) && !((string)field.Value).Contains("http")) { tagCollection.Entities.Add((string)field.Value); } } } if (item.ContainsKey("_typeGroup") && item["_typeGroup"].ToString() == "topics") { tagCollection.Keywords.Add((string)item["categoryName"]); } } //Pick geotags to use if (allGeotags.Count > 0 || story.TwitterGeotags.Count > 0) { //Calculate a weighted average location based on OC tags and Twitter geotags, then take the nearest tag and use it as the story location double lonSum = 0; double latSum = 0; double weightSum = 0; foreach (Geotag g in allGeotags.Distinct()) { double weight = 0; if (g.Type == "Country") { weight = 2; } else if (g.Type == "ProvinceOrState") { weight = 3; } else //if (g.Type == "City") { weight = 4; } lonSum += weight * g.Longitude; latSum += weight * g.Latitude; weightSum += weight; } foreach (Geotag g in story.TwitterGeotags.Distinct()) { double weight = 5; lonSum += weight * g.Longitude; latSum += weight * g.Latitude; weightSum += weight; } Geotag meanLoc = new Geotag() { Latitude = latSum / weightSum, Longitude = lonSum / weightSum }; Geotag nearestTag = allGeotags.Union(story.TwitterGeotags).OrderBy(n => Geotag.Distance(n, meanLoc)).First(); Console.WriteLine("OC: " + allGeotags.Count + ", Twitter: " + story.TwitterGeotags.Count); tagCollection.Geotags.Add(nearestTag); } //List<Geotag> geotags = new List<Geotag>(); //if (allGeotags.Count > 0) //{ // //1) Cities for which the parent country is also mentioned, exept all caps cities (e.g. "LONDON (AP)") // geotags.AddRange(allGeotags.Where(n => n.Type == "City" && n.ParentCountry == "" || allGeotags.Any(m => m.Name == n.ParentCountry))); // //2) else: Top city if no country was mentioned // if (geotags.Count == 0 && allGeotags.Any(n => n.Type == "City") && !allGeotags.Any(n => n.Type == "Country")) // geotags.Add(allGeotags.Where(n => n.Type == "City").OrderByDescending(n => n.Count).First()); // //3) else: All countries // if (geotags.Count == 0 && allGeotags.Any(n => n.Type == "Country")) // geotags.AddRange(allGeotags.Where(n => n.Type == "Country")); //} //if (story.TwitterGeotags.Count > 0) //{ // double meanLon = story.TwitterGeotags.Average(n => n.Longitude); // double meanLat = story.TwitterGeotags.Average(n => n.Latitude); // geotags.Add(new Geotag() { Longitude = meanLon, Latitude = meanLat }); //} //tagCollection.Geotags.AddRange(geotags); } } catch (Exception e) { if (e is WebException && (e.Message.Contains("Timeout") || e.Message.Contains("timed out"))) { Console.WriteLine("Timeout"); } else if (e is WebException && e.Message.Contains("Internal Server Error")) { Console.WriteLine("Internal Server Error"); } else { CrisisTracker.Common.Output.Print(Name, e); } } return(tagCollection); }