public virtual void TestIsAllCaps() { NUnit.Framework.Assert.AreEqual("-allcap", FrenchUnknownWordSignatures.IsAllCaps("YO")); NUnit.Framework.Assert.AreEqual(string.Empty, FrenchUnknownWordSignatures.IsAllCaps("\\\\")); NUnit.Framework.Assert.AreEqual(string.Empty, FrenchUnknownWordSignatures.IsAllCaps("0D")); NUnit.Framework.Assert.AreEqual(string.Empty, FrenchUnknownWordSignatures.IsAllCaps("×")); NUnit.Framework.Assert.AreEqual("-allcap", FrenchUnknownWordSignatures.IsAllCaps("ÀÅÆÏÜÝÞ")); }
public virtual void TestHasPunc() { NUnit.Framework.Assert.AreEqual("-hpunc", FrenchUnknownWordSignatures.HasPunc("Yes!")); NUnit.Framework.Assert.AreEqual("-hpunc", FrenchUnknownWordSignatures.HasPunc("[")); NUnit.Framework.Assert.AreEqual("-hpunc", FrenchUnknownWordSignatures.HasPunc("40%")); NUnit.Framework.Assert.AreEqual(string.Empty, FrenchUnknownWordSignatures.HasPunc("B")); NUnit.Framework.Assert.AreEqual("-hpunc", FrenchUnknownWordSignatures.HasPunc("BQ_BD")); NUnit.Framework.Assert.AreEqual(string.Empty, FrenchUnknownWordSignatures.HasPunc("BQBD")); NUnit.Framework.Assert.AreEqual(string.Empty, FrenchUnknownWordSignatures.HasPunc("0")); NUnit.Framework.Assert.AreEqual("-hpunc", FrenchUnknownWordSignatures.HasPunc("\\")); NUnit.Framework.Assert.AreEqual("-hpunc", FrenchUnknownWordSignatures.HasPunc("]aeiou")); NUnit.Framework.Assert.AreEqual("-hpunc", FrenchUnknownWordSignatures.HasPunc("]")); NUnit.Framework.Assert.AreEqual("-hpunc", FrenchUnknownWordSignatures.HasPunc("÷")); NUnit.Framework.Assert.AreEqual(string.Empty, FrenchUnknownWordSignatures.HasPunc("ø")); }
/// <summary>TODO Can add various signatures, setting the signature via Options.</summary> /// <param name="word">The word to make a signature for</param> /// <param name="loc"> /// Its position in the sentence (mainly so sentence-initial /// capitalized words can be treated differently) /// </param> /// <returns>A String that is its signature (equivalence class)</returns> public override string GetSignature(string word, int loc) { string BaseLabel = "UNK"; StringBuilder sb = new StringBuilder(BaseLabel); switch (unknownLevel) { case 1: { //Marie's initial attempt sb.Append(FrenchUnknownWordSignatures.NounSuffix(word)); if (sb.ToString().Equals(BaseLabel)) { sb.Append(FrenchUnknownWordSignatures.AdjSuffix(word)); if (sb.ToString().Equals(BaseLabel)) { sb.Append(FrenchUnknownWordSignatures.VerbSuffix(word)); if (sb.ToString().Equals(BaseLabel)) { sb.Append(FrenchUnknownWordSignatures.AdvSuffix(word)); } } } sb.Append(FrenchUnknownWordSignatures.PossiblePlural(word)); string hasDigit = FrenchUnknownWordSignatures.HasDigit(word); string isDigit = FrenchUnknownWordSignatures.IsDigit(word); if (!hasDigit.Equals(string.Empty)) { if (isDigit.Equals(string.Empty)) { sb.Append(hasDigit); } else { sb.Append(isDigit); } } // if(FrenchUnknownWordSignatures.isPunc(word).equals("")) sb.Append(FrenchUnknownWordSignatures.HasPunc(word)); // else // sb.append(FrenchUnknownWordSignatures.isPunc(word)); sb.Append(FrenchUnknownWordSignatures.IsAllCaps(word)); if (loc > 0) { if (FrenchUnknownWordSignatures.IsAllCaps(word).Equals(string.Empty)) { sb.Append(FrenchUnknownWordSignatures.IsCapitalized(word)); } } //Backoff to suffix if we haven't matched anything else if (unknownSuffixSize > 0 && sb.ToString().Equals(BaseLabel)) { int min = word.Length < unknownSuffixSize ? word.Length : unknownSuffixSize; sb.Append('-').Append(Sharpen.Runtime.Substring(word, word.Length - min)); } break; } default: { System.Console.Error.Printf("%s: Invalid unknown word signature! (%d)%n", this.GetType().FullName, unknownLevel); break; } } return(sb.ToString()); }