Esempio n. 1
0
        public void ļcien()
        {
            LatvianTokenizer tokenizer = new LatvianTokenizer(compile: false);
            tokenizer.Add<LuMiiAbbreviationToken>();
            tokenizer.Compile();

            string text = "Ļ.cien. kungs!";

            Token[] tokens = tokenizer.Tokenize(text).ToArray();

            Assert.AreEqual(3, tokens.Length);
            Assert.AreEqual("Ļ.cien.", tokens[0].Text);
        }
Esempio n. 2
0
        public void kā_arī()
        {
            LatvianTokenizer tokenizer = new LatvianTokenizer(compile: false);
            tokenizer.Add<LuMiiCollocationToken>();
            tokenizer.Compile();

            string text = "Es skrienu kā arī lecu.";

            Token[] tokens = tokenizer.Tokenize(text).ToArray();

            Assert.AreEqual(5, tokens.Length);
            Assert.AreEqual("kā arī", tokens[2].Text);
        }
Esempio n. 3
0
        public void kaut_jel_milj_kgs()
        {
            LatvianTokenizer tokenizer = new LatvianTokenizer(compile: false);
            tokenizer.Add<LuMiiAbbreviationToken>();
            tokenizer.Add<LuMiiCollocationToken>();
            tokenizer.Compile();

            string text = "kaut JEL 2 milj. mans k-gs";

            Token[] tokens = tokenizer.Tokenize(text).ToArray();

            Assert.AreEqual(5, tokens.Length);
            Assert.AreEqual("kaut JEL", tokens[0].Text);
            Assert.AreEqual("milj.", tokens[2].Text);
            Assert.AreEqual("k-gs", tokens[4].Text);
        }
Esempio n. 4
0
        public void InitialsRemoved()
        {
            LatvianTokenizer tokenizer = new LatvianTokenizer(compile: false);
            tokenizer.Remove<InitialsToken>();
            tokenizer.Compile(); // optional

            Token[] tokens = tokenizer.Tokenize("A.Bērziņš").ToArray();
            Assert.AreEqual("A", tokens[0].Text);
            Assert.AreEqual(".", tokens[1].Text);
            Assert.AreEqual("Bērziņš", tokens[2].Text);
        }