/// <summary> /// Determine whether the given clitic pronoun is an indirect object /// pronoun or a reflexive pronoun. /// </summary> /// <remarks> /// Determine whether the given clitic pronoun is an indirect object /// pronoun or a reflexive pronoun. /// This method is only defined when the pronoun is one of /// me, te, se, nos, os /// i.e., those in which the meaning is actually ambiguous. /// </remarks> /// <param name="strippedVerb"> /// Stripped verb as returned by /// <see cref="Edu.Stanford.Nlp.International.Spanish.SpanishVerbStripper.SeparatePronouns(string)"/> /// . /// </param> /// <param name="pronounIdx"> /// The index of the pronoun within /// <c>strippedVerb.getPronouns()</c> /// which should be /// disambiguated. /// </param> /// <param name="clauseYield"> /// A string representing the yield of the /// clause which contains the given verb /// </param> /// <exception cref="System.ArgumentException"> /// If the given pronoun is /// not ambiguous, or its disambiguation is not supported. /// </exception> public static AnCoraPronounDisambiguator.PersonalPronounType DisambiguatePersonalPronoun(SpanishVerbStripper.StrippedVerb strippedVerb, int pronounIdx, string clauseYield) { IList <string> pronouns = strippedVerb.GetPronouns(); string pronoun = pronouns[pronounIdx].ToLower(); if (!ambiguousPersonalPronouns.Contains(pronoun)) { throw new ArgumentException("We don't support disambiguating pronoun '" + pronoun + "'"); } if (pronouns.Count == 1 && Sharpen.Runtime.EqualsIgnoreCase(pronoun, "se")) { return(AnCoraPronounDisambiguator.PersonalPronounType.Reflexive); } string verb = strippedVerb.GetStem(); if (alwaysReflexiveVerbs.Contains(verb)) { return(AnCoraPronounDisambiguator.PersonalPronounType.Reflexive); } else { if (neverReflexiveVerbs.Contains(verb)) { return(AnCoraPronounDisambiguator.PersonalPronounType.Object); } } Pair <string, string> bruteForceKey = new Pair <string, string>(verb, clauseYield); if (bruteForceDecisions.Contains(bruteForceKey)) { return(bruteForceDecisions[bruteForceKey]); } // Log this instance where a clitic pronoun could not be disambiguated. log.Info("Failed to disambiguate: " + verb + "\nContaining clause:\t" + clauseYield + "\n"); return(AnCoraPronounDisambiguator.PersonalPronounType.Unknown); }
/// <summary> /// Handles verbs with attached suffixes, marked by the lexer: /// Escribamosela => Escribamo + se + la => escribamos + se + la /// Sentaos => senta + os => sentad + os /// Damelo => da + me + lo /// </summary> private CoreLabel ProcessVerb(CoreLabel cl) { cl.Remove(typeof(CoreAnnotations.ParentAnnotation)); SpanishVerbStripper.StrippedVerb stripped = verbStripper.SeparatePronouns(cl.Word()); if (stripped == null) { return(cl); } // Split the CoreLabel into separate labels, tracking changing begin + end // positions. int stemEnd = cl.BeginPosition() + stripped.GetOriginalStem().Length; int lengthRemoved = 0; foreach (string pronoun in stripped.GetPronouns()) { int beginOffset = stemEnd + lengthRemoved; compoundBuffer.Add(CopyCoreLabel(cl, pronoun, beginOffset)); lengthRemoved += pronoun.Length; } CoreLabel stem = CopyCoreLabel(cl, stripped.GetStem(), cl.BeginPosition(), stemEnd); stem.SetOriginalText(stripped.GetOriginalStem()); return(stem); }