Exemple #1
0
 protected internal virtual Sieve.TokenNode CreateNameMatcher()
 {
     Sieve.TokenNode rootNode = new Sieve.TokenNode(this, "$ROOT", -1);
     foreach (string key in characterMap.Keys)
     {
         string[]        tokens   = key.Split(" ");
         Sieve.TokenNode currNode = rootNode;
         for (int i = 0; i < tokens.Length; i++)
         {
             string tok = tokens[i];
             if (currNode.childNodes.Keys.Contains(tok))
             {
                 currNode = currNode.childNodes[tok];
             }
             else
             {
                 Sieve.TokenNode newNode = new Sieve.TokenNode(this, tok, i);
                 currNode.childNodes[tok] = newNode;
                 currNode = newNode;
             }
             if (i == tokens.Length - 1)
             {
                 currNode.personList = characterMap[key];
                 currNode.fullName   = key;
             }
         }
     }
     return(rootNode);
 }
Exemple #2
0
 public Sieve(Annotation doc, IDictionary <string, IList <Person> > characterMap, IDictionary <int, string> pronounCorefMap, ICollection <string> animacySet)
 {
     //mention types
     this.doc             = doc;
     this.characterMap    = characterMap;
     this.pronounCorefMap = pronounCorefMap;
     this.animacySet      = animacySet;
     this.rootNameNode    = CreateNameMatcher();
 }
Exemple #3
0
        //Note: this doesn't necessarily find all possible candidates, but is kind of a greedy version.
        // E.g. "Elizabeth and Jane" will return only "Elizabeth and Jane", but not "Elizabeth", and "Jane" as well.
        public virtual Pair <List <string>, List <Pair <int, int> > > ScanForNamesNew(Pair <int, int> textRun)
        {
            List <string>           potentialNames = new List <string>();
            List <Pair <int, int> > nameIndices    = new List <Pair <int, int> >();
            IList <CoreLabel>       tokens         = doc.Get(typeof(CoreAnnotations.TokensAnnotation));

            Sieve.TokenNode pointer = rootNameNode;
            for (int index = textRun.first; index <= textRun.second && index < tokens.Count; index++)
            {
                CoreLabel token     = tokens[index];
                string    tokenText = token.Word();
                //      System.out.println(token);
                if (pointer.childNodes.Keys.Contains(tokenText))
                {
                    pointer = pointer.childNodes[tokenText];
                }
                else
                {
                    if (!pointer.token.Equals("$ROOT"))
                    {
                        if (pointer.fullName != null)
                        {
                            potentialNames.Add(pointer.fullName);
                            nameIndices.Add(new Pair <int, int>(index - 1 - pointer.level, index - 1));
                        }
                        pointer = rootNameNode;
                    }
                }
            }
            int index_1 = textRun.second + 1;

            if (!pointer.token.Equals("$ROOT"))
            {
                //catch the end case
                if (pointer.fullName != null)
                {
                    potentialNames.Add(pointer.fullName);
                    nameIndices.Add(new Pair <int, int>(index_1 - 1 - pointer.level, index_1 - 1));
                }
                pointer = rootNameNode;
            }
            return(new Pair <List <string>, List <Pair <int, int> > >(potentialNames, nameIndices));
        }