Ejemplo n.º 1
0
 public void testIsInitialCapitalLetter()
 {
     Assert.True(StringPattern.Recognize("Test").InitialCapitalLetter);
     Assert.False(StringPattern.Recognize("tEST").InitialCapitalLetter);
     Assert.True(StringPattern.Recognize("TesT").InitialCapitalLetter);
     Assert.True(StringPattern.Recognize("Üäöæß").InitialCapitalLetter);
 }
Ejemplo n.º 2
0
 public void testIsAllCapitalLetter()
 {
     Assert.True(StringPattern.Recognize("TEST").AllCapitalLetter);
     Assert.True(StringPattern.Recognize("ÄÄÄÜÜÜÖÖÖÖ").AllCapitalLetter);
     Assert.False(StringPattern.Recognize("ÄÄÄÜÜÜÖÖä").AllCapitalLetter);
     Assert.False(StringPattern.Recognize("ÄÄÄÜÜdÜÖÖ").AllCapitalLetter);
 }
Ejemplo n.º 3
0
 public void testIsAllLetters()
 {
     Assert.True(StringPattern.Recognize("test").AllLetter);
     Assert.True(StringPattern.Recognize("TEST").AllLetter);
     Assert.True(StringPattern.Recognize("TesT").AllLetter);
     Assert.True(StringPattern.Recognize("grün").AllLetter);
     Assert.True(StringPattern.Recognize("üäöæß").AllLetter);
 }
Ejemplo n.º 4
0
        public static void PopulatePOSDictionary(IObjectStream <POSSample> samples, IMutableTagDictionary dictionary, bool caseSensitive, int cutoff)
        {
            var       newEntries = new Dictionary <string, Dictionary <string, int> >();
            POSSample sample;

            while ((sample = samples.Read()) != null)
            {
                for (int i = 0; i < sample.Sentence.Length; i++)
                {
                    if (!StringPattern.Recognize(sample.Sentence[i]).ContainsDigit)
                    {
                        string word = caseSensitive ? sample.Sentence[i] : sample.Sentence[i].ToLowerInvariant();

                        if (!newEntries.ContainsKey(word))
                        {
                            newEntries.Add(word, new Dictionary <string, int>());
                        }

                        var dicTags = dictionary.GetTags(word);
                        if (dicTags != null)
                        {
                            foreach (var tag in dicTags)
                            {
                                if (!newEntries[word].ContainsKey(tag))
                                {
                                    newEntries[word].Add(tag, cutoff);
                                }
                            }
                        }

                        if (!newEntries[word].ContainsKey(sample.Tags[i]))
                        {
                            newEntries[word].Add(sample.Tags[i], 1);
                        }
                        else
                        {
                            newEntries[word][sample.Tags[i]]++;
                        }
                    }
                }
            }

            foreach (var wordEntry in newEntries)
            {
                var tagsForWord = new List <string>();
                foreach (var entry in wordEntry.Value)
                {
                    if (entry.Value >= cutoff)
                    {
                        tagsForWord.Add(entry.Key);
                    }
                }
                if (tagsForWord.Count > 0)
                {
                    dictionary.Put(wordEntry.Key, tagsForWord.ToArray());
                }
            }
        }
Ejemplo n.º 5
0
 public void testIsAllLowerCaseLetter()
 {
     Assert.True(StringPattern.Recognize("test").AllLowerCaseLetter);
     Assert.True(StringPattern.Recognize("öäü").AllLowerCaseLetter);
     Assert.True(StringPattern.Recognize("öäüßßß").AllLowerCaseLetter);
     Assert.False(StringPattern.Recognize("Test").AllLowerCaseLetter);
     Assert.False(StringPattern.Recognize("TEST").AllLowerCaseLetter);
     Assert.False(StringPattern.Recognize("testT").AllLowerCaseLetter);
     Assert.False(StringPattern.Recognize("tesÖt").AllLowerCaseLetter);
 }
Ejemplo n.º 6
0
        /// <summary>
        /// Extracts the features from the given words.
        /// </summary>
        /// <param name="text">The words array.</param>
        /// <param name="extraInformation">The extra information.</param>
        /// <returns>The list of features.</returns>
        public List <string> ExtractFeatures(string[] text, Dictionary <string, object> extraInformation)
        {
            var bag = new List <string>(text.Length);

            foreach (var word in text)
            {
                if (useOnlyAllLetterTokens)
                {
                    if (StringPattern.Recognize(word).AllLetter)
                    {
                        bag.Add("bow=" + word);
                    }
                }
                else
                {
                    bag.Add("bow=" + word);
                }
            }

            return(bag);
        }
Ejemplo n.º 7
0
 public void testIsAllDigit()
 {
     Assert.True(StringPattern.Recognize("123456").AllDigit);
     Assert.False(StringPattern.Recognize("123,56").AllDigit);
     Assert.False(StringPattern.Recognize("12356f").AllDigit);
 }
Ejemplo n.º 8
0
 public void testDigits()
 {
     Assert.AreEqual(6, StringPattern.Recognize("123456").Digits);
     Assert.AreEqual(3, StringPattern.Recognize("123fff").Digits);
     Assert.AreEqual(0, StringPattern.Recognize("test").Digits);
 }
Ejemplo n.º 9
0
 public void testContainsSlash()
 {
     Assert.True(StringPattern.Recognize("test/").ContainsSlash);
     Assert.True(StringPattern.Recognize("23/5").ContainsSlash);
     Assert.False(StringPattern.Recognize("test.1-,").ContainsSlash);
 }
Ejemplo n.º 10
0
 public void testContainsPeriod()
 {
     Assert.True(StringPattern.Recognize("test.").ContainsPeriod);
     Assert.True(StringPattern.Recognize("23.5").ContainsPeriod);
     Assert.False(StringPattern.Recognize("test,/-1").ContainsPeriod);
 }
Ejemplo n.º 11
0
 public void testContainsLetters()
 {
     Assert.True(StringPattern.Recognize("test--").ContainsLetters);
     Assert.True(StringPattern.Recognize("23h5ßm").ContainsLetters);
     Assert.False(StringPattern.Recognize("---.1/,").ContainsLetters);
 }
Ejemplo n.º 12
0
 public void testContainsHyphen()
 {
     Assert.True(StringPattern.Recognize("test--").ContainsHyphen);
     Assert.True(StringPattern.Recognize("23-5").ContainsHyphen);
     Assert.False(StringPattern.Recognize("test.1/,").ContainsHyphen);
 }
Ejemplo n.º 13
0
 public void testContainsDigit()
 {
     Assert.True(StringPattern.Recognize("test1").ContainsDigit);
     Assert.True(StringPattern.Recognize("23,5").ContainsDigit);
     Assert.False(StringPattern.Recognize("test./-,").ContainsDigit);
 }
Ejemplo n.º 14
0
 public void testContainsComma()
 {
     Assert.True(StringPattern.Recognize("test,").ContainsComma);
     Assert.True(StringPattern.Recognize("23,5").ContainsComma);
     Assert.False(StringPattern.Recognize("test./-1").ContainsComma);
 }