예제 #1
0
        static StoryTagCollection OCGetTags(Story story)
        {
            StoryTagCollection tagCollection = new StoryTagCollection();

            try
            {
                Uri address = new Uri("http://api.opencalais.com/tag/rs/enrich");

                //Create the web request  
                HttpWebRequest request = WebRequest.Create(address) as HttpWebRequest;

                //Set type to POST  
                request.Method = "POST";
                request.ContentType = "text/raw";
                request.Accept = "Application/Json";
                request.Timeout = 15000;

                //Set headers
                request.Headers.Add("x-calais-licenseID", "jb47yzz6jsqkhkbsqc3urfs7");

                //Append text as byte array
                StringBuilder data = new StringBuilder();
                data.Append(story.Text);
                byte[] byteData = UTF8Encoding.UTF8.GetBytes(data.ToString());

                //Set the content length in the request headers  
                request.ContentLength = byteData.Length;

                //Write data  
                using (Stream postStream = request.GetRequestStream())
                {
                    postStream.Write(byteData, 0, byteData.Length);
                }

                //Get response  
                using (HttpWebResponse response = request.GetResponse() as HttpWebResponse)
                {
                    // Get the response stream  
                    StreamReader reader = new StreamReader(response.GetResponseStream());
                    string resultString = reader.ReadToEnd();

                    //Extract all geotags that have lat-lon coordinates
                    Hashtable result = JSON.JsonDecode(resultString) as Hashtable;
                    List<Geotag> allGeotags = new List<Geotag>();
                    foreach (Hashtable item in result.Values)
                    {
                        if (item.ContainsKey("resolutions")) //This is a geotag
                        {
                            Hashtable resolutions = (Hashtable)((ArrayList)item["resolutions"])[0];

                            if (!resolutions.ContainsKey("latitude"))
                                continue;

                            string parentCountry = "";
                            if (resolutions.ContainsKey("containedbycountry"))
                                parentCountry = resolutions["containedbycountry"].ToString();
                            allGeotags.Add(new Geotag()
                                {
                                    Name = item["name"].ToString(),
                                    ParentCountry = parentCountry,
                                    Type = item["_type"].ToString(),
                                    Latitude = double.Parse(resolutions["latitude"].ToString(), CultureInfo.InvariantCulture),
                                    Longitude = double.Parse(resolutions["longitude"].ToString(), CultureInfo.InvariantCulture),
                                    Count = ((ArrayList)item["instances"]).Count
                                }
                            );
                        }
                        else if (item.ContainsKey("_type") && _OCInterestingEventTypes.Contains((string)item["_type"]))
                        {
                            foreach (DictionaryEntry field in item)
                            {
                                if (_OCKeywordFields.Contains(field.Key) && !((string)field.Value).Contains("http"))
                                    tagCollection.Keywords.Add((string)field.Value);
                                if (_OCEntityFields.Contains(field.Key) && !((string)field.Value).Contains("http"))
                                    tagCollection.Entities.Add((string)field.Value);
                            }
                        }

                        if (item.ContainsKey("_typeGroup") && item["_typeGroup"].ToString() == "topics")
                            tagCollection.Keywords.Add((string)item["categoryName"]);
                    }

                    //Pick geotags to use
                    if (allGeotags.Count > 0 || story.TwitterGeotags.Count > 0)
                    {
                        //Calculate a weighted average location based on OC tags and Twitter geotags, then take the nearest tag and use it as the story location

                        double lonSum = 0;
                        double latSum = 0;
                        double weightSum = 0;

                        foreach (Geotag g in allGeotags.Distinct())
                        {
                            double weight = 0;
                            if (g.Type == "Country")
                                weight = 2;
                            else if (g.Type == "ProvinceOrState")
                                weight = 3;
                            else //if (g.Type == "City")
                                weight = 4;
                            lonSum += weight * g.Longitude;
                            latSum += weight * g.Latitude;
                            weightSum += weight;
                        }

                        foreach (Geotag g in story.TwitterGeotags.Distinct())
                        {
                            double weight = 5;
                            lonSum += weight * g.Longitude;
                            latSum += weight * g.Latitude;
                            weightSum += weight;
                        }

                        Geotag meanLoc = new Geotag() { Latitude = latSum / weightSum, Longitude = lonSum / weightSum };
                        Geotag nearestTag = allGeotags.Union(story.TwitterGeotags).OrderBy(n => Geotag.Distance(n, meanLoc)).First();

                        Console.WriteLine("OC: " + allGeotags.Count + ", Twitter: " + story.TwitterGeotags.Count);

                        tagCollection.Geotags.Add(nearestTag);
                    }

                    //List<Geotag> geotags = new List<Geotag>();
                    //if (allGeotags.Count > 0)
                    //{
                    //    //1) Cities for which the parent country is also mentioned, exept all caps cities (e.g. "LONDON (AP)")
                    //    geotags.AddRange(allGeotags.Where(n => n.Type == "City" && n.ParentCountry == "" || allGeotags.Any(m => m.Name == n.ParentCountry)));
                    //    //2) else: Top city if no country was mentioned
                    //    if (geotags.Count == 0 && allGeotags.Any(n => n.Type == "City") && !allGeotags.Any(n => n.Type == "Country"))
                    //        geotags.Add(allGeotags.Where(n => n.Type == "City").OrderByDescending(n => n.Count).First());
                    //    //3) else: All countries
                    //    if (geotags.Count == 0 && allGeotags.Any(n => n.Type == "Country"))
                    //        geotags.AddRange(allGeotags.Where(n => n.Type == "Country"));
                    //}

                    //if (story.TwitterGeotags.Count > 0)
                    //{
                    //    double meanLon = story.TwitterGeotags.Average(n => n.Longitude);
                    //    double meanLat = story.TwitterGeotags.Average(n => n.Latitude);
                    //    geotags.Add(new Geotag() { Longitude = meanLon, Latitude = meanLat });
                    //}

                    //tagCollection.Geotags.AddRange(geotags);
                }
            }
            catch (Exception e)
            {
                if (e is WebException && (e.Message.Contains("Timeout") || e.Message.Contains("timed out")))
                    Console.WriteLine("Timeout");
                else if (e is WebException && e.Message.Contains("Internal Server Error"))
                    Console.WriteLine("Internal Server Error");
                else
                    CrisisTracker.Common.Output.Print(Name, e);
            }

            return tagCollection;
        }
예제 #2
0
        static StoryTagCollection OCGetTags(Story story)
        {
            StoryTagCollection tagCollection = new StoryTagCollection();

            try
            {
                Uri address = new Uri("http://api.opencalais.com/tag/rs/enrich");

                //Create the web request
                HttpWebRequest request = WebRequest.Create(address) as HttpWebRequest;

                //Set type to POST
                request.Method      = "POST";
                request.ContentType = "text/raw";
                request.Accept      = "Application/Json";
                request.Timeout     = 15000;

                //Set headers
                request.Headers.Add("x-calais-licenseID", "jb47yzz6jsqkhkbsqc3urfs7");

                //Append text as byte array
                StringBuilder data = new StringBuilder();
                data.Append(story.Text);
                byte[] byteData = UTF8Encoding.UTF8.GetBytes(data.ToString());

                //Set the content length in the request headers
                request.ContentLength = byteData.Length;

                //Write data
                using (Stream postStream = request.GetRequestStream())
                {
                    postStream.Write(byteData, 0, byteData.Length);
                }

                //Get response
                using (HttpWebResponse response = request.GetResponse() as HttpWebResponse)
                {
                    // Get the response stream
                    StreamReader reader       = new StreamReader(response.GetResponseStream());
                    string       resultString = reader.ReadToEnd();

                    //Extract all geotags that have lat-lon coordinates
                    Hashtable     result     = JSON.JsonDecode(resultString) as Hashtable;
                    List <Geotag> allGeotags = new List <Geotag>();
                    foreach (Hashtable item in result.Values)
                    {
                        if (item.ContainsKey("resolutions")) //This is a geotag
                        {
                            Hashtable resolutions = (Hashtable)((ArrayList)item["resolutions"])[0];

                            if (!resolutions.ContainsKey("latitude"))
                            {
                                continue;
                            }

                            string parentCountry = "";
                            if (resolutions.ContainsKey("containedbycountry"))
                            {
                                parentCountry = resolutions["containedbycountry"].ToString();
                            }
                            allGeotags.Add(new Geotag()
                            {
                                Name          = item["name"].ToString(),
                                ParentCountry = parentCountry,
                                Type          = item["_type"].ToString(),
                                Latitude      = double.Parse(resolutions["latitude"].ToString(), CultureInfo.InvariantCulture),
                                Longitude     = double.Parse(resolutions["longitude"].ToString(), CultureInfo.InvariantCulture),
                                Count         = ((ArrayList)item["instances"]).Count
                            }
                                           );
                        }
                        else if (item.ContainsKey("_type") && _OCInterestingEventTypes.Contains((string)item["_type"]))
                        {
                            foreach (DictionaryEntry field in item)
                            {
                                if (_OCKeywordFields.Contains(field.Key) && !((string)field.Value).Contains("http"))
                                {
                                    tagCollection.Keywords.Add((string)field.Value);
                                }
                                if (_OCEntityFields.Contains(field.Key) && !((string)field.Value).Contains("http"))
                                {
                                    tagCollection.Entities.Add((string)field.Value);
                                }
                            }
                        }

                        if (item.ContainsKey("_typeGroup") && item["_typeGroup"].ToString() == "topics")
                        {
                            tagCollection.Keywords.Add((string)item["categoryName"]);
                        }
                    }

                    //Pick geotags to use
                    if (allGeotags.Count > 0 || story.TwitterGeotags.Count > 0)
                    {
                        //Calculate a weighted average location based on OC tags and Twitter geotags, then take the nearest tag and use it as the story location

                        double lonSum    = 0;
                        double latSum    = 0;
                        double weightSum = 0;

                        foreach (Geotag g in allGeotags.Distinct())
                        {
                            double weight = 0;
                            if (g.Type == "Country")
                            {
                                weight = 2;
                            }
                            else if (g.Type == "ProvinceOrState")
                            {
                                weight = 3;
                            }
                            else //if (g.Type == "City")
                            {
                                weight = 4;
                            }
                            lonSum    += weight * g.Longitude;
                            latSum    += weight * g.Latitude;
                            weightSum += weight;
                        }

                        foreach (Geotag g in story.TwitterGeotags.Distinct())
                        {
                            double weight = 5;
                            lonSum    += weight * g.Longitude;
                            latSum    += weight * g.Latitude;
                            weightSum += weight;
                        }

                        Geotag meanLoc = new Geotag()
                        {
                            Latitude = latSum / weightSum, Longitude = lonSum / weightSum
                        };
                        Geotag nearestTag = allGeotags.Union(story.TwitterGeotags).OrderBy(n => Geotag.Distance(n, meanLoc)).First();

                        Console.WriteLine("OC: " + allGeotags.Count + ", Twitter: " + story.TwitterGeotags.Count);

                        tagCollection.Geotags.Add(nearestTag);
                    }

                    //List<Geotag> geotags = new List<Geotag>();
                    //if (allGeotags.Count > 0)
                    //{
                    //    //1) Cities for which the parent country is also mentioned, exept all caps cities (e.g. "LONDON (AP)")
                    //    geotags.AddRange(allGeotags.Where(n => n.Type == "City" && n.ParentCountry == "" || allGeotags.Any(m => m.Name == n.ParentCountry)));
                    //    //2) else: Top city if no country was mentioned
                    //    if (geotags.Count == 0 && allGeotags.Any(n => n.Type == "City") && !allGeotags.Any(n => n.Type == "Country"))
                    //        geotags.Add(allGeotags.Where(n => n.Type == "City").OrderByDescending(n => n.Count).First());
                    //    //3) else: All countries
                    //    if (geotags.Count == 0 && allGeotags.Any(n => n.Type == "Country"))
                    //        geotags.AddRange(allGeotags.Where(n => n.Type == "Country"));
                    //}

                    //if (story.TwitterGeotags.Count > 0)
                    //{
                    //    double meanLon = story.TwitterGeotags.Average(n => n.Longitude);
                    //    double meanLat = story.TwitterGeotags.Average(n => n.Latitude);
                    //    geotags.Add(new Geotag() { Longitude = meanLon, Latitude = meanLat });
                    //}

                    //tagCollection.Geotags.AddRange(geotags);
                }
            }
            catch (Exception e)
            {
                if (e is WebException && (e.Message.Contains("Timeout") || e.Message.Contains("timed out")))
                {
                    Console.WriteLine("Timeout");
                }
                else if (e is WebException && e.Message.Contains("Internal Server Error"))
                {
                    Console.WriteLine("Internal Server Error");
                }
                else
                {
                    CrisisTracker.Common.Output.Print(Name, e);
                }
            }

            return(tagCollection);
        }