public void WordsNumbersPunctuationWithoutDash() { TokenParserUtils pu = new TokenParserUtils(); string[] w = pu.JustTpWordsNumbersPunctuation("1231231"); Assert.AreEqual(1, w.Length); }
public void ParseEachWordInDictionary2() { bool canary = false; TokenParserUtils pu = new TokenParserUtils(); foreach (Word word in Words.Dictionary.Values) { string[] oneWord = pu.JustTpWordsNumbersPunctuation(word.Text); if (oneWord.Length == 0) { canary = true; Console.WriteLine(word.Text); } if (oneWord.Length == 1 && oneWord[0] != word.Text) { canary = true; Console.WriteLine(word.Text); } //Assert.AreEqual(1, oneWord.Length, word.Text); //Assert.AreEqual(word.Text, oneWord[0], word.Text); } if (canary) { Assert.Fail("Something failed to parse"); } }
public void ParseLiAla() { TokenParserUtils pu = new TokenParserUtils(); string[] twoWords = pu.JustTpWordsNumbersPunctuation("li ala"); Assert.AreEqual(2, twoWords.Length); Assert.AreEqual("li", twoWords[0]); Assert.AreEqual("ala", twoWords[1]); }
public void CompoundWordParsePrepPhraseAsCompound() { TokenParserUtils pu = new TokenParserUtils(); string[] oneCompound = pu.JustTpWordsNumbersPunctuation("lon-ma-pi-ike-ale"); oneCompound = pu.RemergeCompounds(oneCompound); foreach (string s in oneCompound) { Console.WriteLine(s); } Assert.AreEqual(1, oneCompound.Length); Assert.AreEqual("lon-ma-pi-ike-ale", oneCompound[0]); }
public void CompoundWordParseSimple() { TokenParserUtils pu = new TokenParserUtils(); string[] oneCompound = pu.JustTpWordsNumbersPunctuation("tomo-tawa-kon"); oneCompound = pu.RemergeCompounds(oneCompound); foreach (string s in oneCompound) { Console.WriteLine(s); } Assert.AreEqual(1, oneCompound.Length); Assert.AreEqual("tomo-tawa-kon", oneCompound[0]); }
public void WordsNumbersPunctuationWithDash() { string value = "123-1231"; TokenParserUtils pu = new TokenParserUtils(); string[] w = pu.JustTpWordsNumbersPunctuation(value); foreach (string s in w) { Console.WriteLine(s); } Assert.AreEqual(1, w.Length); Assert.AreEqual(value, w[0]); }
public void SpellCheck() { string text = CorpusTexts.JanSin; TokenParserUtils pu = new TokenParserUtils(); string[] words = pu.JustTpWordsNumbersPunctuation(text); foreach (string word in words) { try { Word w = new Word(word); } catch (Exception) { Console.WriteLine("Uh-oh: " + word); } } }