예제 #1
0
        public void Overlap_In_Lex_Tags()
        {
            List <LexiconEntry> lex = new List <LexiconEntry>();

            lex.Add(new LexiconEntry()
            {
                Value = "Landry", Tags = new List <string>()
                {
                    "F"
                }
            });
            lex.Add(new LexiconEntry()
            {
                Value = "Landry", Tags = new List <string>()
                {
                    "L"
                }
            });

            Tagger t      = new Tagger(lex);
            var    tokens = t.Tag("Landry");

            Assert.AreEqual(1, tokens.Count);
            Assert.AreEqual(2, tokens[0].Tags.Count);
        }
예제 #2
0
        public void BasicFirstName_DualClasses()
        {
            Tagger t      = GetBasicTagger(GetBasicNameLex());
            var    tokens = t.Tag("Landry");

            Assert.AreEqual(1, tokens.Count);
            Assert.AreEqual(2, tokens[0].Tags.Count);
            Assert.AreEqual("F", tokens[0].Tags[1]);
            Assert.AreEqual("L", tokens[0].Tags[0]);
        }
예제 #3
0
        public void BasicMatch_With_lowcase()
        {
            Tagger t      = GetBasicTagger(GetBasicNameLex());
            var    tokens = t.Tag("fred Flintstone");

            List <Pattern> patterns = PatternMaker.MakePatterns(tokens);

            Assert.IsTrue(patterns.Exists(p => p.PatternText.Equals("?L")));
            Assert.AreEqual(1, patterns.Count);
        }
예제 #4
0
        public void BasicFirstNameLastName()
        {
            Tagger t      = GetBasicTagger(GetBasicNameLex());
            var    tokens = t.Tag("Fred Flintstone");

            Assert.AreEqual(2, tokens.Count);
            Assert.AreEqual(1, tokens[0].Tags.Count);
            Assert.AreEqual("F", tokens[0].Tags[0]);
            Assert.AreEqual(1, tokens[1].Tags.Count);
            Assert.AreEqual("L", tokens[1].Tags[0]);
        }
예제 #5
0
        public void BasicMatchWithSpecialCharsWithSpace()
        {
            Tagger t = GetBasicTagger(GetBasicNameLex());

            t.Options.Separators.Add(";");
            var tokens = t.Tag("Fred ; Flintstone");

            List <Pattern> patterns = PatternMaker.MakePatterns(tokens);

            Assert.IsTrue(patterns.Exists(p => p.PatternText.Equals("FL")));
            Assert.AreEqual(1, patterns.Count);
        }
예제 #6
0
        public void BasicPatternGenerate()
        {
            Tagger t      = GetBasicTagger(GetBasicNameLex());
            var    tokens = t.Tag("Fred Flintstone Wilma Flintstone Barney Rubble Landry");

            List <Pattern> patterns = PatternMaker.MakePatterns(tokens);


            Assert.IsTrue(patterns.Exists(p => p.PatternText.Equals("FLFLFLF")));
            Assert.IsTrue(patterns.Exists(p => p.PatternText.Equals("FLFLFLL")));

            Assert.AreEqual(2, patterns.Count);
        }
예제 #7
0
        public void BasicFirstName_X2_LastName()
        {
            Tagger t      = GetBasicTagger(GetBasicNameLex());
            var    tokens = t.Tag("Fred and Wilma Flintstone");

            Assert.AreEqual(4, tokens.Count);
            Assert.AreEqual(1, tokens[0].Tags.Count);
            Assert.AreEqual("F", tokens[0].Tags[0]);
            Assert.AreEqual(1, tokens[2].Tags.Count);
            Assert.AreEqual("F", tokens[2].Tags[0]);
            Assert.AreEqual(1, tokens[3].Tags.Count);
            Assert.AreEqual("L", tokens[3].Tags[0]);
        }
예제 #8
0
        public void BasicMatch_With_lowcase_proper()
        {
            Tagger t = GetBasicTagger(GetBasicNameLex(), new StringMixOptions()
            {
                MatchesAreCaseSensitive = false
            });

            var tokens = t.Tag("fred Flintstone");

            List <Pattern> patterns = PatternMaker.MakePatterns(tokens);

            Assert.IsTrue(patterns.Exists(p => p.PatternText.Equals("FL")));
            Assert.AreEqual(1, patterns.Count);
        }
예제 #9
0
        /// <summary>
        /// Evaluates the specified document.
        /// </summary>
        /// <param name="factory">The factory used in this analysis.</param>
        /// <param name="document">The document to be analyzed.</param>
        protected override void Evaluate(ITextFactory factory, IDocument document)
        {
            if (document.Sentences == null)
            {
                throw new AnalyzerException(this, "The document does not have the sentences detected.");
            }

            foreach (var sentence in document.Sentences)
            {
                if (sentence.Tokens == null)
                {
                    throw new AnalyzerException(this, "The document have a sentence without the tokenization.");
                }

                string[] tags;
                double[] probs;
                object[] ac = AddContextProvider != null
                    ? AddContextProvider(document, sentence)
                    : null;

                lock (Tagger) {
                    tags  = Tagger.Tag(sentence.Tokens.ToTokenArray(), ac);
                    probs = Tagger.Probabilities;
                }

                var prob = probs.Sum(p => p);
                if (probs.Length > 0)
                {
                    prob /= probs.Length;
                }

                for (var i = 0; i < tags.Length; i++)
                {
                    sentence.Tokens[i].POSTag            = tags[i];
                    sentence.Tokens[i].POSTagProbability = probs[i];
                }

                // TODO: Add the ability to pre/post process each sentence during the analysis.

                sentence.TagProbability = prob;
            }
        }
예제 #10
0
        public void ItsMyFirstTime()
        {
            // Define some Lexicon
            List <LexiconEntry> lex = new List <LexiconEntry>();

            lex.Add(new LexiconEntry()
            {
                Value = "Fred",
                Tags  = new List <string> {
                    "F"
                }                               // For FirstName
            });

            lex.Add(new LexiconEntry()
            {
                Value = "Wilma",
                Tags  = new List <string> {
                    "F"
                }                               // For FirstName
            });

            lex.Add(new LexiconEntry()
            {
                Value = "Flintstone",
                Tags  = new List <string> {
                    "L"
                }                               // For LastName
            });

            // New Up a Tagger
            Tagger tagger = new Tagger(lex);

            List <TaggedToken> tokens = tagger.Tag("Fred and Wilma Flintstone");

            // Get Patterns from these tokens
            List <Pattern> patterns = PatternMaker.MakePatterns(tokens);

            // Do Patterns Match?!
            Assert.IsTrue(patterns.Exists(p => p.PatternText == "F?FL"));
        }
예제 #11
0
 /// <summary>
 /// Given a string to operate on and a tagger, return a list of tagged tokens
 /// </summary>
 /// <param name="str">
 /// The string to be operated upon
 /// </param>
 /// <param name="tagger">
 /// See Tagger class, a library class that uses lexicon and option to product list
 /// of tagged tokens.  Useful in cases where a specific implementation of a tagger
 /// might be needed.  The library provides a general purpose one, but consuming applications
 /// can easily extend that object to meet specific requirements.
 /// </param>
 /// <returns>
 /// List of tagged Tokens
 /// </returns>
 public static List <TaggedToken> Tokenize(this String str, Tagger tagger)
 {
     return(tagger.Tag(str));
 }
 public CoverageElement(string relativePath, XElement node, Tagger tagger)
 {
     RelativePath = relativePath;
     Position     = CodeRange.Locate(node);
     Qualifiers   = tagger.Tag(node);
 }