static void TestNameEntityRecognizer() { string text = "WASHINGTON -- In the wake of a string of abuses by New York police officers in the 1990s, Loretta E. Lynch, " + "the top federal prosecutor in Brooklyn, spoke forcefully about the pain of a broken trust that African-Americans " + "felt and said the responsibility for repairing generations of miscommunication and mistrust fell to law enforcement."; var tokens = Nltk.Tokenize.WordTokenize(text); var posTaggedWords = Nltk.PosTag(tokens.AsNet); // NOTE: This operation requires NumPy library for IronPython var neChunks = Nltk.NeChunk(posTaggedWords); BuiltIns.Print($"NER output for text: '{text}'"); BuiltIns.Print(neChunks); }
static void TestCondFreqDist() { var words = Nltk.Tokenize.WordTokenize(text); var cfdist = new Nltk.Probability.ConditionalFreqDist(); foreach (string word in words.AsNet) { var condition = word.Length; cfdist[condition][word] += 1; } foreach (var condition in cfdist.PyObject) { foreach (var word in cfdist[condition]) { BuiltIns.Print("Cond. frequency of " + word + " " + cfdist[condition].freq(word) + " [condition is word length =" + condition + "]"); } } }