예제 #1
0
 public virtual void TestIsAllCaps()
 {
     NUnit.Framework.Assert.AreEqual("-allcap", FrenchUnknownWordSignatures.IsAllCaps("YO"));
     NUnit.Framework.Assert.AreEqual(string.Empty, FrenchUnknownWordSignatures.IsAllCaps("\\\\"));
     NUnit.Framework.Assert.AreEqual(string.Empty, FrenchUnknownWordSignatures.IsAllCaps("0D"));
     NUnit.Framework.Assert.AreEqual(string.Empty, FrenchUnknownWordSignatures.IsAllCaps("×"));
     NUnit.Framework.Assert.AreEqual("-allcap", FrenchUnknownWordSignatures.IsAllCaps("ÀÅÆÏÜÝÞ"));
 }
예제 #2
0
 public virtual void TestHasPunc()
 {
     NUnit.Framework.Assert.AreEqual("-hpunc", FrenchUnknownWordSignatures.HasPunc("Yes!"));
     NUnit.Framework.Assert.AreEqual("-hpunc", FrenchUnknownWordSignatures.HasPunc("["));
     NUnit.Framework.Assert.AreEqual("-hpunc", FrenchUnknownWordSignatures.HasPunc("40%"));
     NUnit.Framework.Assert.AreEqual(string.Empty, FrenchUnknownWordSignatures.HasPunc("B"));
     NUnit.Framework.Assert.AreEqual("-hpunc", FrenchUnknownWordSignatures.HasPunc("BQ_BD"));
     NUnit.Framework.Assert.AreEqual(string.Empty, FrenchUnknownWordSignatures.HasPunc("BQBD"));
     NUnit.Framework.Assert.AreEqual(string.Empty, FrenchUnknownWordSignatures.HasPunc("0"));
     NUnit.Framework.Assert.AreEqual("-hpunc", FrenchUnknownWordSignatures.HasPunc("\\"));
     NUnit.Framework.Assert.AreEqual("-hpunc", FrenchUnknownWordSignatures.HasPunc("]aeiou"));
     NUnit.Framework.Assert.AreEqual("-hpunc", FrenchUnknownWordSignatures.HasPunc("]"));
     NUnit.Framework.Assert.AreEqual("-hpunc", FrenchUnknownWordSignatures.HasPunc("÷"));
     NUnit.Framework.Assert.AreEqual(string.Empty, FrenchUnknownWordSignatures.HasPunc("ø"));
 }
        /// <summary>TODO Can add various signatures, setting the signature via Options.</summary>
        /// <param name="word">The word to make a signature for</param>
        /// <param name="loc">
        /// Its position in the sentence (mainly so sentence-initial
        /// capitalized words can be treated differently)
        /// </param>
        /// <returns>A String that is its signature (equivalence class)</returns>
        public override string GetSignature(string word, int loc)
        {
            string        BaseLabel = "UNK";
            StringBuilder sb        = new StringBuilder(BaseLabel);

            switch (unknownLevel)
            {
            case 1:
            {
                //Marie's initial attempt
                sb.Append(FrenchUnknownWordSignatures.NounSuffix(word));
                if (sb.ToString().Equals(BaseLabel))
                {
                    sb.Append(FrenchUnknownWordSignatures.AdjSuffix(word));
                    if (sb.ToString().Equals(BaseLabel))
                    {
                        sb.Append(FrenchUnknownWordSignatures.VerbSuffix(word));
                        if (sb.ToString().Equals(BaseLabel))
                        {
                            sb.Append(FrenchUnknownWordSignatures.AdvSuffix(word));
                        }
                    }
                }
                sb.Append(FrenchUnknownWordSignatures.PossiblePlural(word));
                string hasDigit = FrenchUnknownWordSignatures.HasDigit(word);
                string isDigit  = FrenchUnknownWordSignatures.IsDigit(word);
                if (!hasDigit.Equals(string.Empty))
                {
                    if (isDigit.Equals(string.Empty))
                    {
                        sb.Append(hasDigit);
                    }
                    else
                    {
                        sb.Append(isDigit);
                    }
                }
                //        if(FrenchUnknownWordSignatures.isPunc(word).equals(""))
                sb.Append(FrenchUnknownWordSignatures.HasPunc(word));
                //        else
                //          sb.append(FrenchUnknownWordSignatures.isPunc(word));
                sb.Append(FrenchUnknownWordSignatures.IsAllCaps(word));
                if (loc > 0)
                {
                    if (FrenchUnknownWordSignatures.IsAllCaps(word).Equals(string.Empty))
                    {
                        sb.Append(FrenchUnknownWordSignatures.IsCapitalized(word));
                    }
                }
                //Backoff to suffix if we haven't matched anything else
                if (unknownSuffixSize > 0 && sb.ToString().Equals(BaseLabel))
                {
                    int min = word.Length < unknownSuffixSize ? word.Length : unknownSuffixSize;
                    sb.Append('-').Append(Sharpen.Runtime.Substring(word, word.Length - min));
                }
                break;
            }

            default:
            {
                System.Console.Error.Printf("%s: Invalid unknown word signature! (%d)%n", this.GetType().FullName, unknownLevel);
                break;
            }
            }
            return(sb.ToString());
        }