Beispiel #1
0
        public void QuickStart_TokenizeSentences()
        {
            string text = "Sveika, pasaule! Man iet labi. Šodienas datums ir 2014-01-01";

            List<Sentence> sentences = new List<Sentence>();

            LatvianTokenizer tokenizer = new LatvianTokenizer();

            foreach (Sentence sentence in tokenizer.TokenizeSentences(text))
            {
                List<Token> sentenceTokens = new List<Token>();

                foreach (Token token in sentence)
                {
                    sentenceTokens.Add(token);
                }

                sentences.Add(sentenceTokens);
            }

            Assert.AreEqual(3, sentences.Count());
            Assert.AreEqual(4, sentences[0].Count());
            Assert.AreEqual(4, sentences[1].Count());
            Assert.AreEqual(4, sentences[2].Count());
        }