public EnglishTreebankParser(string dataDirectory, bool useTagDictionary, bool useCaseSensitiveTagDictionary, int beamSize, double advancePercentage)
        {
            SharpEntropy.IO.BinaryGisModelReader buildModelReader = new SharpEntropy.IO.BinaryGisModelReader(dataDirectory + "parser\\build.nbin");
            SharpEntropy.GisModel buildModel = new SharpEntropy.GisModel(buildModelReader);

            SharpEntropy.IO.BinaryGisModelReader checkModelReader = new SharpEntropy.IO.BinaryGisModelReader(dataDirectory + "parser\\check.nbin");
            SharpEntropy.IMaximumEntropyModel checkModel = new SharpEntropy.GisModel(checkModelReader);

            EnglishTreebankPosTagger posTagger;

            if (useTagDictionary)
            {
                posTagger = new EnglishTreebankPosTagger(dataDirectory + "parser\\tag.nbin", dataDirectory + "parser\\tagdict", useCaseSensitiveTagDictionary);
            }
            else
            {
                posTagger = new EnglishTreebankPosTagger(dataDirectory + "parser\\tag.nbin");
            }

            EnglishTreebankParserChunker chunker = new EnglishTreebankParserChunker(dataDirectory + "parser\\chunk.nbin");
            EnglishHeadRules headRules = new EnglishHeadRules(dataDirectory + "parser\\head_rules");

            mParser = new MaximumEntropyParser(buildModel, checkModel, posTagger, chunker, headRules, beamSize, advancePercentage);

            mTokenizer = new OpenNLP.Tools.Tokenize.EnglishMaximumEntropyTokenizer(dataDirectory + "EnglishTok.nbin");
        }
        public EnglishTreebankParser(string dataDirectory, bool useTagDictionary, bool useCaseSensitiveTagDictionary, int beamSize, double advancePercentage)
        {
            SharpEntropy.IO.BinaryGisModelReader buildModelReader = new SharpEntropy.IO.BinaryGisModelReader(dataDirectory + "parser\\build.nbin");
            SharpEntropy.GisModel buildModel = new SharpEntropy.GisModel(buildModelReader);

            SharpEntropy.IO.BinaryGisModelReader checkModelReader = new SharpEntropy.IO.BinaryGisModelReader(dataDirectory + "parser\\check.nbin");
            SharpEntropy.IMaximumEntropyModel    checkModel       = new SharpEntropy.GisModel(checkModelReader);

            EnglishTreebankPosTagger posTagger;

            if (useTagDictionary)
            {
                posTagger = new EnglishTreebankPosTagger(dataDirectory + "parser\\tag.nbin", dataDirectory + "parser\\tagdict", useCaseSensitiveTagDictionary);
            }
            else
            {
                posTagger = new EnglishTreebankPosTagger(dataDirectory + "parser\\tag.nbin");
            }

            EnglishTreebankParserChunker chunker   = new EnglishTreebankParserChunker(dataDirectory + "parser\\chunk.nbin");
            EnglishHeadRules             headRules = new EnglishHeadRules(dataDirectory + "parser\\head_rules");

            mParser = new MaximumEntropyParser(buildModel, checkModel, posTagger, chunker, headRules, beamSize, advancePercentage);

            mTokenizer = new OpenNLP.Tools.Tokenize.EnglishMaximumEntropyTokenizer(dataDirectory + "EnglishTok.nbin");
        }
Esempio n. 3
0
 private string[] TokenizeSentence(string sentence)
 {
     if (mTokenizer == null)
     {
         mTokenizer = new OpenNLP.Tools.Tokenize.EnglishMaximumEntropyTokenizer(mModelPath + "EnglishTok.nbin");
     }
     return(mTokenizer.Tokenize(sentence));
 }
Esempio n. 4
0
        public IEnumerable <string> Tokenize(string input)
        {
            var sanitizedInput = Sanitize(input);

            var tokenizer = new OpenNLP.Tools.Tokenize.EnglishMaximumEntropyTokenizer("Resources/EnglishTok.nbin");

            var tokenized = tokenizer.Tokenize(sanitizedInput);

            var output = tokenized.Where(token => !m_stopWords.Contains(token)).ToList();

            return(output);
        }
        private string[] TokenizeSentence(string sentence)
        {
            if (mTokenizer == null)
            {
                mTokenizer = new OpenNLP.Tools.Tokenize.EnglishMaximumEntropyTokenizer(mModelPath + "EnglishTok.nbin");
            }

            return mTokenizer.Tokenize(sentence);
        }
Esempio n. 6
0
        /// <summary>
        /// Turn the sentence into individual words
        /// </summary>
        /// <param name="sentence"></param>
        /// <returns></returns>
        public string[] TokenizeSentence(string sentence)
        {
            if (tokenizer == null)
            {
                tokenizer = new OpenNLP.Tools.Tokenize.EnglishMaximumEntropyTokenizer(modelPath + "EnglishTok.nbin");
            }

            return tokenizer.Tokenize(sentence);
        }