protected internal virtual Sieve.TokenNode CreateNameMatcher() { Sieve.TokenNode rootNode = new Sieve.TokenNode(this, "$ROOT", -1); foreach (string key in characterMap.Keys) { string[] tokens = key.Split(" "); Sieve.TokenNode currNode = rootNode; for (int i = 0; i < tokens.Length; i++) { string tok = tokens[i]; if (currNode.childNodes.Keys.Contains(tok)) { currNode = currNode.childNodes[tok]; } else { Sieve.TokenNode newNode = new Sieve.TokenNode(this, tok, i); currNode.childNodes[tok] = newNode; currNode = newNode; } if (i == tokens.Length - 1) { currNode.personList = characterMap[key]; currNode.fullName = key; } } } return(rootNode); }
public Sieve(Annotation doc, IDictionary <string, IList <Person> > characterMap, IDictionary <int, string> pronounCorefMap, ICollection <string> animacySet) { //mention types this.doc = doc; this.characterMap = characterMap; this.pronounCorefMap = pronounCorefMap; this.animacySet = animacySet; this.rootNameNode = CreateNameMatcher(); }
//Note: this doesn't necessarily find all possible candidates, but is kind of a greedy version. // E.g. "Elizabeth and Jane" will return only "Elizabeth and Jane", but not "Elizabeth", and "Jane" as well. public virtual Pair <List <string>, List <Pair <int, int> > > ScanForNamesNew(Pair <int, int> textRun) { List <string> potentialNames = new List <string>(); List <Pair <int, int> > nameIndices = new List <Pair <int, int> >(); IList <CoreLabel> tokens = doc.Get(typeof(CoreAnnotations.TokensAnnotation)); Sieve.TokenNode pointer = rootNameNode; for (int index = textRun.first; index <= textRun.second && index < tokens.Count; index++) { CoreLabel token = tokens[index]; string tokenText = token.Word(); // System.out.println(token); if (pointer.childNodes.Keys.Contains(tokenText)) { pointer = pointer.childNodes[tokenText]; } else { if (!pointer.token.Equals("$ROOT")) { if (pointer.fullName != null) { potentialNames.Add(pointer.fullName); nameIndices.Add(new Pair <int, int>(index - 1 - pointer.level, index - 1)); } pointer = rootNameNode; } } } int index_1 = textRun.second + 1; if (!pointer.token.Equals("$ROOT")) { //catch the end case if (pointer.fullName != null) { potentialNames.Add(pointer.fullName); nameIndices.Add(new Pair <int, int>(index_1 - 1 - pointer.level, index_1 - 1)); } pointer = rootNameNode; } return(new Pair <List <string>, List <Pair <int, int> > >(potentialNames, nameIndices)); }