public string[] ChunkSentence(string[] tokens, string[] tags) { if (mChunker == null) { mChunker = new EnglishTreebankChunker(mModelPath + "EnglishChunk.nbin"); } return mChunker.Chunk(tokens, tags);; }
private void TestDechunk() { // detokenize var inputs = new string[] { "- Harry's your sister. - Look, what exactly am I supposed to be doing here?", "\"Piss off!\"", "- Sorry Mrs. Hudson, I'll skip the tea. Off out. - Both of you?", "I love playing half-life; that's just who I am!", "That's why I... have just begun to write a book.", "And they lived happily ever after...", "It's gonna be $1.5 sir." }; // var tokenizer = new EnglishMaximumEntropyTokenizer(currentDirectory + "../Resources/Models/EnglishTok.nbin"); var chunker = new EnglishTreebankChunker(currentDirectory + "../Resources/Models/EnglishChunk.nbin"); var dechunker = new RegexDictionaryDechunker(); var detokienizer = new DictionaryDetokenizer(); var englishPosPath = currentDirectory + "../Resources/Models/EnglishPOS.nbin"; var tagDictPath = currentDirectory + "../Resources/Models/Parser/tagdict"; var posTagger = new EnglishMaximumEntropyPosTagger(englishPosPath, tagDictPath); foreach (var input in inputs) { string[] tokens = tokenizer.Tokenize(input); string[] tags = posTagger.Tag(tokens); var chunks = chunker.GetChunks(tokens, tags); var chunksStrings = chunks .Select(ch => detokienizer.Detokenize(ch.TaggedWords.Select(tw => tw.Word).ToArray())) .ToArray(); var output = dechunker.Dechunk(chunksStrings); Console.WriteLine("input: " + input); Console.WriteLine("chunks: " + string.Join(" | ", chunks)); Console.WriteLine("ouput: " + output); Console.WriteLine("--"); } }
private void initComponents() { sentenceDetector = new EnglishMaximumEntropySentenceDetector(Path.Combine(ModelDir, "EnglishSD.nbin")); tokenizer = new EnglishMaximumEntropyTokenizer(Path.Combine(ModelDir, "EnglishTok.nbin")); posTagger = new EnglishMaximumEntropyPosTagger(Path.Combine(ModelDir, "EnglishPOS.nbin")); chunker = new EnglishTreebankChunker(Path.Combine(ModelDir, "EnglishChunk.nbin")); parser = new EnglishTreebankParser(FileUtils.WithSeparator(ModelDir), true, false); }