Ejemplo n.º 1
0
        public void WordsNumbersPunctuationWithoutDash()
        {
            TokenParserUtils pu = new TokenParserUtils();

            string[] w = pu.JustTpWordsNumbersPunctuation("1231231");
            Assert.AreEqual(1, w.Length);
        }
Ejemplo n.º 2
0
        public void DoubleBadProperModifer()
        {
            const string s = "jan MaliyA";

            Dialect    dialect = Dialect.LooseyGoosey;
            Normalizer norm    = new Normalizer(dialect);

            Console.WriteLine(norm.NormalizeText(s));
            TokenParserUtils pu = new TokenParserUtils();

            Word[] words;
            try
            {
                words = pu.ValidWords(s);
            }
            catch (Exception)
            {
                return;
            }

            foreach (Word word in words)
            {
                Console.WriteLine(word);
            }

            Assert.Fail();
        }
Ejemplo n.º 3
0
        public void ParseEachWordInDictionary2()
        {
            bool             canary = false;
            TokenParserUtils pu     = new TokenParserUtils();

            foreach (Word word in Words.Dictionary.Values)
            {
                string[] oneWord = pu.JustTpWordsNumbersPunctuation(word.Text);
                if (oneWord.Length == 0)
                {
                    canary = true;
                    Console.WriteLine(word.Text);
                }
                if (oneWord.Length == 1 && oneWord[0] != word.Text)
                {
                    canary = true;
                    Console.WriteLine(word.Text);
                }
                //Assert.AreEqual(1, oneWord.Length, word.Text);
                //Assert.AreEqual(word.Text, oneWord[0], word.Text);
            }
            if (canary)
            {
                Assert.Fail("Something failed to parse");
            }
        }
Ejemplo n.º 4
0
        public void ThreWords()
        {
            TokenParserUtils pu = new TokenParserUtils();

            string[] w = pu.JustTpWords("jan lili lon");
            Assert.AreEqual(3, w.Length);
        }
Ejemplo n.º 5
0
        public void OneWord()
        {
            TokenParserUtils pu = new TokenParserUtils();

            string[] w = pu.JustTpWords("jan");
            Assert.AreEqual(1, w.Length);
        }
        public void SpellCheck_ForStressTest_UsingOnlyWordConstructor()
        {
            CorpusFileReader            reader = new CorpusFileReader();
            Dictionary <string, string> bad    = new Dictionary <string, string>();
            List <string>    good    = new List <string>();
            TokenParserUtils tpu     = new TokenParserUtils();
            Dialect          dialect = Dialect.LooseyGoosey;

            SentenceSplitter ss = new SentenceSplitter(dialect);

            Normalizer norm = new Normalizer(dialect);

            foreach (string s in reader.NextFile())
            {
                string[] rawSentences = ss.ParseIntoNonNormalizedSentences(s);
                foreach (string sentence in rawSentences)
                {
                    string normalized = norm.NormalizeText(sentence);
                    //Normalization improved stuff
                    string[] words = tpu.JustTokens(normalized);
                    for (int index = 0; index < words.Length; index++)
                    {
                        //Don't remove double quotes or we can't ID some marked foreign text.
                        //'"'
                        words[index] = words[index].Trim(new[] { ':', '.', '\'', '«', '»', '!', '?', '-', '[', ']' });
                    }
                    foreach (string word in words.Where(x => !string.IsNullOrEmpty(x)))
                    {
                        if (good.Contains(word))
                        {
                            continue;
                        }

                        try
                        {
                            Word w = new Word(word);
                        }
                        catch (Exception ex)
                        {
                            if (bad.ContainsKey(word))
                            {
                                bad[word] = (Convert.ToInt32(bad[word]) + 1).ToString();
                                //bad.Add(word, ex.Message);
                            }
                            else
                            {
                                bad.Add(word, "1");
                            }
                        }
                    }
                }
            }
            foreach (KeyValuePair <string, string> pair in bad)
            {
                if (Convert.ToInt32(pair.Value) > 10)
                {
                    Console.WriteLine("Uh-oh: " + pair.Key + " " + pair.Value);
                }
            }
        }
Ejemplo n.º 7
0
        public void ParseLiAla()
        {
            TokenParserUtils pu = new TokenParserUtils();

            string[] twoWords = pu.JustTpWordsNumbersPunctuation("li ala");
            Assert.AreEqual(2, twoWords.Length);
            Assert.AreEqual("li", twoWords[0]);
            Assert.AreEqual("ala", twoWords[1]);
        }
Ejemplo n.º 8
0
        public void GoDawgs()
        {
            //"Go*Dawgs!"

            TokenParserUtils tpu = new TokenParserUtils();

            string[] oneForeinString = tpu.JustTpWords("\"Go*Dawgs!\"");
            Assert.AreEqual(1, oneForeinString.Length);
        }
Ejemplo n.º 9
0
        public void ParseEachWordInDictionary()
        {
            TokenParserUtils pu = new TokenParserUtils();

            foreach (Word word in Words.Dictionary.Values)
            {
                string[] oneWord = pu.JustTpWords(word.Text);
                Assert.AreEqual(1, oneWord.Length);
                Assert.AreEqual(word.Text, oneWord[0]);
            }
        }
Ejemplo n.º 10
0
        public void WordsNumbersWithDash()
        {
            TokenParserUtils pu = new TokenParserUtils();

            string[] w = pu.JustTpWords("123-1231");
            foreach (string s in w)
            {
                Console.WriteLine(s);
            }
            Assert.AreEqual(1, w.Length);
        }
Ejemplo n.º 11
0
        public void CompoundWordParsePrepPhraseAsCompound()
        {
            TokenParserUtils pu = new TokenParserUtils();

            string[] oneCompound = pu.JustTpWordsNumbersPunctuation("lon-ma-pi-ike-ale");
            oneCompound = pu.RemergeCompounds(oneCompound);
            foreach (string s in oneCompound)
            {
                Console.WriteLine(s);
            }
            Assert.AreEqual(1, oneCompound.Length);
            Assert.AreEqual("lon-ma-pi-ike-ale", oneCompound[0]);
        }
Ejemplo n.º 12
0
        public void CompoundWordParseSimple()
        {
            TokenParserUtils pu = new TokenParserUtils();

            string[] oneCompound = pu.JustTpWordsNumbersPunctuation("tomo-tawa-kon");
            oneCompound = pu.RemergeCompounds(oneCompound);
            foreach (string s in oneCompound)
            {
                Console.WriteLine(s);
            }
            Assert.AreEqual(1, oneCompound.Length);
            Assert.AreEqual("tomo-tawa-kon", oneCompound[0]);
        }
Ejemplo n.º 13
0
        public void WordsNumbersPunctuationWithDash()
        {
            string value = "123-1231";

            TokenParserUtils pu = new TokenParserUtils();

            string[] w = pu.JustTpWordsNumbersPunctuation(value);
            foreach (string s in w)
            {
                Console.WriteLine(s);
            }
            Assert.AreEqual(1, w.Length);
            Assert.AreEqual(value, w[0]);
        }
Ejemplo n.º 14
0
        public void ShouldBeGoodKunpapa()
        {
            const string s       = "jan Kunpapa";
            Dialect      dialect = Dialect.LooseyGoosey;
            Normalizer   norm    = new Normalizer(dialect);

            Console.WriteLine(norm.NormalizeText(s));
            TokenParserUtils pu = new TokenParserUtils();

            Word[] words = pu.ValidWords(s);


            foreach (Word word in words)
            {
                Console.WriteLine(word);
            }
        }
        public void SpellCheck()
        {
            string           text = CorpusTexts.JanSin;
            TokenParserUtils pu   = new TokenParserUtils();

            string[] words = pu.JustTpWordsNumbersPunctuation(text);
            foreach (string word in words)
            {
                try
                {
                    Word w = new Word(word);
                }
                catch (Exception)
                {
                    Console.WriteLine("Uh-oh: " + word);
                }
            }
        }