public string[] ChunkSentence(string[] tokens, string[] tags)
 {
     if (mChunker == null)
     {
         mChunker = new EnglishTreebankChunker(mModelPath + "EnglishChunk.nbin");
     }
     return mChunker.Chunk(tokens, tags);;
 }
예제 #2
0
        private void TestDechunk()
        {
            // detokenize
            var inputs = new string[]
            {
                "- Harry's your sister. - Look, what exactly am I supposed to be doing here?",
                "\"Piss off!\"",
                "- Sorry Mrs. Hudson, I'll skip the tea. Off out. - Both of you?",
                "I love playing half-life; that's just who I am!",
                "That's why I... have just begun to write a book.",
                "And they lived happily ever after...",
                "It's gonna be $1.5 sir."
            };

            //
            var tokenizer = new EnglishMaximumEntropyTokenizer(currentDirectory + "../Resources/Models/EnglishTok.nbin");
            var chunker = new EnglishTreebankChunker(currentDirectory + "../Resources/Models/EnglishChunk.nbin");
            var dechunker = new RegexDictionaryDechunker();
            var detokienizer = new DictionaryDetokenizer();
            var englishPosPath = currentDirectory + "../Resources/Models/EnglishPOS.nbin";
            var tagDictPath = currentDirectory + "../Resources/Models/Parser/tagdict";
            var posTagger = new EnglishMaximumEntropyPosTagger(englishPosPath, tagDictPath);

            foreach (var input in inputs)
            {
                string[] tokens = tokenizer.Tokenize(input);
                string[] tags = posTagger.Tag(tokens);

                var chunks = chunker.GetChunks(tokens, tags);
                var chunksStrings = chunks
                    .Select(ch => detokienizer.Detokenize(ch.TaggedWords.Select(tw => tw.Word).ToArray()))
                    .ToArray();
                var output = dechunker.Dechunk(chunksStrings);
                Console.WriteLine("input: " + input);
                Console.WriteLine("chunks: " + string.Join(" | ", chunks));
                Console.WriteLine("ouput: " + output);
                Console.WriteLine("--");
            }
        }
예제 #3
0
 private void initComponents()
 {
     sentenceDetector = new EnglishMaximumEntropySentenceDetector(Path.Combine(ModelDir, "EnglishSD.nbin"));
     tokenizer = new EnglishMaximumEntropyTokenizer(Path.Combine(ModelDir, "EnglishTok.nbin"));
     posTagger = new EnglishMaximumEntropyPosTagger(Path.Combine(ModelDir, "EnglishPOS.nbin"));
     chunker = new EnglishTreebankChunker(Path.Combine(ModelDir, "EnglishChunk.nbin"));
     parser = new EnglishTreebankParser(FileUtils.WithSeparator(ModelDir), true, false);
 }