public virtual void TestHasConditionalSuffix()
 {
     NUnit.Framework.Assert.IsTrue(SpanishUnknownWordSignatures.HasConditionalSuffix("debería"));
     NUnit.Framework.Assert.IsTrue(SpanishUnknownWordSignatures.HasConditionalSuffix("deberías"));
     NUnit.Framework.Assert.IsTrue(SpanishUnknownWordSignatures.HasConditionalSuffix("deberíamos"));
     NUnit.Framework.Assert.IsTrue(SpanishUnknownWordSignatures.HasConditionalSuffix("deberíais"));
     NUnit.Framework.Assert.IsTrue(SpanishUnknownWordSignatures.HasConditionalSuffix("deberían"));
     NUnit.Framework.Assert.IsFalse(SpanishUnknownWordSignatures.HasConditionalSuffix("debía"));
     NUnit.Framework.Assert.IsFalse(SpanishUnknownWordSignatures.HasConditionalSuffix("debías"));
     NUnit.Framework.Assert.IsFalse(SpanishUnknownWordSignatures.HasConditionalSuffix("debíamos"));
     NUnit.Framework.Assert.IsFalse(SpanishUnknownWordSignatures.HasConditionalSuffix("debíais"));
     NUnit.Framework.Assert.IsFalse(SpanishUnknownWordSignatures.HasConditionalSuffix("debían"));
 }
 public virtual void TestHasImperfectErIrSuffix()
 {
     NUnit.Framework.Assert.IsTrue(SpanishUnknownWordSignatures.HasImperfectErIrSuffix("vivía"));
     NUnit.Framework.Assert.IsFalse(SpanishUnknownWordSignatures.HasImperfectErIrSuffix("viviría"));
 }
Ejemplo n.º 3
0
        /// <summary>TODO Can add various signatures, setting the signature via Options.</summary>
        /// <param name="word">The word to make a signature for</param>
        /// <param name="loc">
        /// Its position in the sentence (mainly so sentence-initial
        /// capitalized words can be treated differently)
        /// </param>
        /// <returns>A String that is its signature (equivalence class)</returns>
        public override string GetSignature(string word, int loc)
        {
            string        BaseLabel = "UNK";
            StringBuilder sb        = new StringBuilder(BaseLabel);

            switch (unknownLevel)
            {
            case 1:
            {
                if (StringUtils.IsNumeric(word))
                {
                    sb.Append('#');
                    break;
                }
                else
                {
                    if (StringUtils.IsPunct(word))
                    {
                        sb.Append('!');
                        break;
                    }
                }
                // Mutually exclusive patterns
                sb.Append(SpanishUnknownWordSignatures.ConditionalSuffix(word));
                sb.Append(SpanishUnknownWordSignatures.ImperfectSuffix(word));
                sb.Append(SpanishUnknownWordSignatures.InfinitiveSuffix(word));
                sb.Append(SpanishUnknownWordSignatures.AdverbSuffix(word));
                // Broad coverage patterns -- only apply if we haven't yet matched at all
                if (sb.ToString().Equals(BaseLabel))
                {
                    if (SpanishUnknownWordSignatures.HasVerbFirstPersonPluralSuffix(word))
                    {
                        sb.Append("-vb1p");
                    }
                    else
                    {
                        if (SpanishUnknownWordSignatures.HasGerundSuffix(word))
                        {
                            sb.Append("-ger");
                        }
                        else
                        {
                            if (word.EndsWith("s"))
                            {
                                sb.Append("-s");
                            }
                        }
                    }
                }
                // Backoff to suffix if we haven't matched anything else
                if (unknownSuffixSize > 0 && sb.ToString().Equals(BaseLabel))
                {
                    int min = word.Length < unknownSuffixSize ? word.Length : unknownSuffixSize;
                    sb.Append('-').Append(Sharpen.Runtime.Substring(word, word.Length - min));
                }
                char first = word[0];
                if ((char.IsUpperCase(first) || char.IsTitleCase(first)) && !IsUpperCase(word))
                {
                    sb.Append("-C");
                }
                else
                {
                    sb.Append("-c");
                }
                break;
            }

            default:
            {
                log.Error(string.Format("%s: Invalid unknown word signature! (%d)%n", this.GetType().FullName, unknownLevel));
                break;
            }
            }
            return(sb.ToString());
        }