public void Overlap_In_Lex_Tags() { List <LexiconEntry> lex = new List <LexiconEntry>(); lex.Add(new LexiconEntry() { Value = "Landry", Tags = new List <string>() { "F" } }); lex.Add(new LexiconEntry() { Value = "Landry", Tags = new List <string>() { "L" } }); Tagger t = new Tagger(lex); var tokens = t.Tag("Landry"); Assert.AreEqual(1, tokens.Count); Assert.AreEqual(2, tokens[0].Tags.Count); }
public void BasicFirstName_DualClasses() { Tagger t = GetBasicTagger(GetBasicNameLex()); var tokens = t.Tag("Landry"); Assert.AreEqual(1, tokens.Count); Assert.AreEqual(2, tokens[0].Tags.Count); Assert.AreEqual("F", tokens[0].Tags[1]); Assert.AreEqual("L", tokens[0].Tags[0]); }
public void BasicMatch_With_lowcase() { Tagger t = GetBasicTagger(GetBasicNameLex()); var tokens = t.Tag("fred Flintstone"); List <Pattern> patterns = PatternMaker.MakePatterns(tokens); Assert.IsTrue(patterns.Exists(p => p.PatternText.Equals("?L"))); Assert.AreEqual(1, patterns.Count); }
public void BasicFirstNameLastName() { Tagger t = GetBasicTagger(GetBasicNameLex()); var tokens = t.Tag("Fred Flintstone"); Assert.AreEqual(2, tokens.Count); Assert.AreEqual(1, tokens[0].Tags.Count); Assert.AreEqual("F", tokens[0].Tags[0]); Assert.AreEqual(1, tokens[1].Tags.Count); Assert.AreEqual("L", tokens[1].Tags[0]); }
public void BasicMatchWithSpecialCharsWithSpace() { Tagger t = GetBasicTagger(GetBasicNameLex()); t.Options.Separators.Add(";"); var tokens = t.Tag("Fred ; Flintstone"); List <Pattern> patterns = PatternMaker.MakePatterns(tokens); Assert.IsTrue(patterns.Exists(p => p.PatternText.Equals("FL"))); Assert.AreEqual(1, patterns.Count); }
public void BasicPatternGenerate() { Tagger t = GetBasicTagger(GetBasicNameLex()); var tokens = t.Tag("Fred Flintstone Wilma Flintstone Barney Rubble Landry"); List <Pattern> patterns = PatternMaker.MakePatterns(tokens); Assert.IsTrue(patterns.Exists(p => p.PatternText.Equals("FLFLFLF"))); Assert.IsTrue(patterns.Exists(p => p.PatternText.Equals("FLFLFLL"))); Assert.AreEqual(2, patterns.Count); }
public void BasicFirstName_X2_LastName() { Tagger t = GetBasicTagger(GetBasicNameLex()); var tokens = t.Tag("Fred and Wilma Flintstone"); Assert.AreEqual(4, tokens.Count); Assert.AreEqual(1, tokens[0].Tags.Count); Assert.AreEqual("F", tokens[0].Tags[0]); Assert.AreEqual(1, tokens[2].Tags.Count); Assert.AreEqual("F", tokens[2].Tags[0]); Assert.AreEqual(1, tokens[3].Tags.Count); Assert.AreEqual("L", tokens[3].Tags[0]); }
public void BasicMatch_With_lowcase_proper() { Tagger t = GetBasicTagger(GetBasicNameLex(), new StringMixOptions() { MatchesAreCaseSensitive = false }); var tokens = t.Tag("fred Flintstone"); List <Pattern> patterns = PatternMaker.MakePatterns(tokens); Assert.IsTrue(patterns.Exists(p => p.PatternText.Equals("FL"))); Assert.AreEqual(1, patterns.Count); }
/// <summary> /// Evaluates the specified document. /// </summary> /// <param name="factory">The factory used in this analysis.</param> /// <param name="document">The document to be analyzed.</param> protected override void Evaluate(ITextFactory factory, IDocument document) { if (document.Sentences == null) { throw new AnalyzerException(this, "The document does not have the sentences detected."); } foreach (var sentence in document.Sentences) { if (sentence.Tokens == null) { throw new AnalyzerException(this, "The document have a sentence without the tokenization."); } string[] tags; double[] probs; object[] ac = AddContextProvider != null ? AddContextProvider(document, sentence) : null; lock (Tagger) { tags = Tagger.Tag(sentence.Tokens.ToTokenArray(), ac); probs = Tagger.Probabilities; } var prob = probs.Sum(p => p); if (probs.Length > 0) { prob /= probs.Length; } for (var i = 0; i < tags.Length; i++) { sentence.Tokens[i].POSTag = tags[i]; sentence.Tokens[i].POSTagProbability = probs[i]; } // TODO: Add the ability to pre/post process each sentence during the analysis. sentence.TagProbability = prob; } }
public void ItsMyFirstTime() { // Define some Lexicon List <LexiconEntry> lex = new List <LexiconEntry>(); lex.Add(new LexiconEntry() { Value = "Fred", Tags = new List <string> { "F" } // For FirstName }); lex.Add(new LexiconEntry() { Value = "Wilma", Tags = new List <string> { "F" } // For FirstName }); lex.Add(new LexiconEntry() { Value = "Flintstone", Tags = new List <string> { "L" } // For LastName }); // New Up a Tagger Tagger tagger = new Tagger(lex); List <TaggedToken> tokens = tagger.Tag("Fred and Wilma Flintstone"); // Get Patterns from these tokens List <Pattern> patterns = PatternMaker.MakePatterns(tokens); // Do Patterns Match?! Assert.IsTrue(patterns.Exists(p => p.PatternText == "F?FL")); }
/// <summary> /// Given a string to operate on and a tagger, return a list of tagged tokens /// </summary> /// <param name="str"> /// The string to be operated upon /// </param> /// <param name="tagger"> /// See Tagger class, a library class that uses lexicon and option to product list /// of tagged tokens. Useful in cases where a specific implementation of a tagger /// might be needed. The library provides a general purpose one, but consuming applications /// can easily extend that object to meet specific requirements. /// </param> /// <returns> /// List of tagged Tokens /// </returns> public static List <TaggedToken> Tokenize(this String str, Tagger tagger) { return(tagger.Tag(str)); }
public CoverageElement(string relativePath, XElement node, Tagger tagger) { RelativePath = relativePath; Position = CodeRange.Locate(node); Qualifiers = tagger.Tag(node); }