Exemplo n.º 1
0
        public ArrayList <Annotation> ExtractEntities(string text, int offset)
        {
            mLogger.Debug("ExtractEntities", "Extracting entities ...");
            mTokenizer.Text = text;
            ArrayList <Annotation> annotations = new ArrayList <Annotation>();
            ArrayList <string>     tmp         = new ArrayList <string>();
            ArrayList <int>        pos         = new ArrayList <int>();

            for (SimpleTokenizer.Enumerator e = mTokenizer.GetEnumerator(); e.MoveNext();)
            {
                tmp.Add(Normalize(e.Current));
                pos.Add(e.CurrentTokenIdx);
            }
            string[] tokens = tmp.ToArray();
            foreach (Gazetteer gazetteer in mGazetteers.Values)
            {
                if (gazetteer.mTerms.Count > 0)
                {
                    int len;
                    for (int startIdx = 0; startIdx < tokens.Length; startIdx++)
                    {
                        if (gazetteer.Match(tokens, startIdx, out len))
                        {
                            ArrayList <ArrayList <Entity> > allPaths = new ArrayList <ArrayList <Entity> >();
                            GetPaths(0, new ArrayList <Entity>(new Entity[] { gazetteer.mUri }), allPaths);
                            foreach (ArrayList <Entity> path in allPaths)
                            {
                                string pathStr = "";
                                for (int i = path.Count - 1; i > 1; i--)
                                {
                                    pathStr += GetLabel(path[i], mRdfStore) + "/";
                                }
                                pathStr = pathStr.TrimEnd('/');
                                annotations.Add(new Annotation(pos[startIdx] + offset, pos[startIdx + len - 1] + tokens[startIdx + len - 1].Length - 1 + offset, pathStr));
                                annotations.Last.Features.SetFeatureValue("gazUri", gazetteer.mUri);
                                if (path.Count >= 2)
                                {
                                    annotations.Last.Features.SetFeatureValue("objUri", path[1].Uri);
                                    annotations.Last.Features.SetFeatureValue("objLabel", GetLabel(path[1], mRdfStore));
                                }
                            }
                        }
                    }
                }
            }
            return(annotations);
        }