コード例 #1
0
        private static void ExtractNERTags(CoreDocument coredoc, Lucene.Net.Documents.Document document)
        {
            //I have no clue as to why NER-tagged messages are stored like that. I guess there is some deep idea behind copying the same info over and over again (or, most likely, this is because some documents have more than one sentence. even tho its stil really stupid)
            if (coredoc != null)
            {
                List nerList = coredoc.entityMentions();
                if (nerList.size() > 0)
                {
                    for (int j = 0; j < nerList.size(); j++)
                    {
                        CoreEntityMention em = (CoreEntityMention)nerList.get(j);
                        //Does this need to be a switch case?
                        if (em.entityType() == "DATE")
                        {
                            var datekey = document.GetField("id").GetInt32Value().Value;
                            if (!DateList.ContainsKey(datekey))
                            {
                                DateList.Add(datekey, em.text());
                            }
                            else
                            {
                                DateList.TryUpdate(datekey, DateList[datekey] + ", " + em.text());
                            }
                        }
                        if (em.entityType() == "TIME")
                        {
                            var timekey = document.GetField("id").GetInt32Value().Value;
                            if (!TimeList.ContainsKey(timekey))
                            {
                                TimeList.Add(timekey, em.text());
                            }
                            else
                            {
                                TimeList.TryUpdate(timekey, TimeList[timekey] + ", " + em.text());
                            }
                        }

                        if (em.entityType() == "LOCATION")
                        {
                            var lockey = document.GetField("id").GetInt32Value().Value;
                            if (!LocList.ContainsKey(lockey))
                            {
                                LocList.Add(lockey, em.text());
                            }
                            else
                            {
                                LocList.TryUpdate(lockey, LocList[lockey] + ", " + em.text());
                            }
                        }
                        if (em.entityType() == "ORGANIZATION")
                        {
                            var orgkey = document.GetField("id").GetInt32Value().Value;
                            if (!OrgList.ContainsKey(orgkey))
                            {
                                OrgList.Add(orgkey, em.text());
                            }
                            else
                            {
                                OrgList.TryUpdate(orgkey, OrgList[orgkey] + ", " + em.text());
                            }
                        }

                        if (em.entityType() == "URL")
                        {
                            var urlkey = document.GetField("id").GetInt32Value().Value;
                            if (!URLList.ContainsKey(urlkey))
                            {
                                URLList.Add(urlkey, em.text());
                            }
                            else
                            {
                                URLList.TryUpdate(urlkey, OrgList[urlkey] + ", " + em.text());
                            }
                        }
                    }
                }
            }
        }