public void ļcien() { LatvianTokenizer tokenizer = new LatvianTokenizer(compile: false); tokenizer.Add<LuMiiAbbreviationToken>(); tokenizer.Compile(); string text = "Ļ.cien. kungs!"; Token[] tokens = tokenizer.Tokenize(text).ToArray(); Assert.AreEqual(3, tokens.Length); Assert.AreEqual("Ļ.cien.", tokens[0].Text); }
public void kā_arī() { LatvianTokenizer tokenizer = new LatvianTokenizer(compile: false); tokenizer.Add<LuMiiCollocationToken>(); tokenizer.Compile(); string text = "Es skrienu kā arī lecu."; Token[] tokens = tokenizer.Tokenize(text).ToArray(); Assert.AreEqual(5, tokens.Length); Assert.AreEqual("kā arī", tokens[2].Text); }
public void kaut_jel_milj_kgs() { LatvianTokenizer tokenizer = new LatvianTokenizer(compile: false); tokenizer.Add<LuMiiAbbreviationToken>(); tokenizer.Add<LuMiiCollocationToken>(); tokenizer.Compile(); string text = "kaut JEL 2 milj. mans k-gs"; Token[] tokens = tokenizer.Tokenize(text).ToArray(); Assert.AreEqual(5, tokens.Length); Assert.AreEqual("kaut JEL", tokens[0].Text); Assert.AreEqual("milj.", tokens[2].Text); Assert.AreEqual("k-gs", tokens[4].Text); }
public void InitialsRemoved() { LatvianTokenizer tokenizer = new LatvianTokenizer(compile: false); tokenizer.Remove<InitialsToken>(); tokenizer.Compile(); // optional Token[] tokens = tokenizer.Tokenize("A.Bērziņš").ToArray(); Assert.AreEqual("A", tokens[0].Text); Assert.AreEqual(".", tokens[1].Text); Assert.AreEqual("Bērziņš", tokens[2].Text); }