public void SymbolPunctuationOnly() { CoreWritingSystemDefinition ws = Cache.ServiceLocator.WritingSystemManager.Create("th"); ws.CharacterSets.Clear(); ws.CharacterSets.Add(new CharacterSetDefinition("main") { Characters = { "a", "b", "c", "d", "e" } }); ws.CharacterSets.Add(new CharacterSetDefinition("punctuation") { Characters = { "'", "-", "#" } }); ValidCharacters validChars = ValidCharacters.Load(ws); var categorizer = new FwCharacterCategorizer(validChars); Assert.IsTrue(categorizer.IsPunctuation('#')); Assert.IsFalse(categorizer.IsWordFormingCharacter('#')); }
public void WordAndPuncs_EmptyString() { CoreWritingSystemDefinition ws = Cache.ServiceLocator.WritingSystemManager.Create("th"); ws.CharacterSets.Clear(); ws.CharacterSets.Add(new CharacterSetDefinition("main") { Characters = { "a", "b", "c" } }); ws.CharacterSets.Add(new CharacterSetDefinition("punctuation") { Characters = { "-", " " } }); ValidCharacters validChars = ValidCharacters.Load(ws); var categorizer = new FwCharacterCategorizer(validChars); List <WordAndPunct> wordsAndPunc = categorizer.WordAndPuncts(""); // We expect one word to be returned. Assert.AreEqual(0, wordsAndPunc.Count); }
public void WordAndPuncs_NoOverridePunc() { CoreWritingSystemDefinition ws = Cache.ServiceLocator.WritingSystemManager.Create("th"); ws.CharacterSets.Clear(); ws.CharacterSets.Add(new CharacterSetDefinition("main") { Characters = { "a", "b", "c", "d", "e" } }); ws.CharacterSets.Add(new CharacterSetDefinition("punctuation") { Characters = { "'", "-", "#" } }); ValidCharacters validChars = ValidCharacters.Load(ws); var categorizer = new FwCharacterCategorizer(validChars); List <WordAndPunct> wordsAndPunc = categorizer.WordAndPuncts("abc.de"); // We expect two words to be returned. Assert.AreEqual(2, wordsAndPunc.Count); Assert.AreEqual("abc", wordsAndPunc[0].Word); Assert.AreEqual("de", wordsAndPunc[1].Word); }