public EnglishTreebankParser(string dataDirectory, bool useTagDictionary, bool useCaseSensitiveTagDictionary, int beamSize, double advancePercentage) { SharpEntropy.IO.BinaryGisModelReader buildModelReader = new SharpEntropy.IO.BinaryGisModelReader(dataDirectory + "parser\\build.nbin"); SharpEntropy.GisModel buildModel = new SharpEntropy.GisModel(buildModelReader); SharpEntropy.IO.BinaryGisModelReader checkModelReader = new SharpEntropy.IO.BinaryGisModelReader(dataDirectory + "parser\\check.nbin"); SharpEntropy.IMaximumEntropyModel checkModel = new SharpEntropy.GisModel(checkModelReader); EnglishTreebankPosTagger posTagger; if (useTagDictionary) { posTagger = new EnglishTreebankPosTagger(dataDirectory + "parser\\tag.nbin", dataDirectory + "parser\\tagdict", useCaseSensitiveTagDictionary); } else { posTagger = new EnglishTreebankPosTagger(dataDirectory + "parser\\tag.nbin"); } EnglishTreebankParserChunker chunker = new EnglishTreebankParserChunker(dataDirectory + "parser\\chunk.nbin"); EnglishHeadRules headRules = new EnglishHeadRules(dataDirectory + "parser\\head_rules"); mParser = new MaximumEntropyParser(buildModel, checkModel, posTagger, chunker, headRules, beamSize, advancePercentage); mTokenizer = new OpenNLP.Tools.Tokenize.EnglishMaximumEntropyTokenizer(dataDirectory + "EnglishTok.nbin"); }
private string[] TokenizeSentence(string sentence) { if (mTokenizer == null) { mTokenizer = new OpenNLP.Tools.Tokenize.EnglishMaximumEntropyTokenizer(mModelPath + "EnglishTok.nbin"); } return(mTokenizer.Tokenize(sentence)); }
public IEnumerable <string> Tokenize(string input) { var sanitizedInput = Sanitize(input); var tokenizer = new OpenNLP.Tools.Tokenize.EnglishMaximumEntropyTokenizer("Resources/EnglishTok.nbin"); var tokenized = tokenizer.Tokenize(sanitizedInput); var output = tokenized.Where(token => !m_stopWords.Contains(token)).ToList(); return(output); }
private string[] TokenizeSentence(string sentence) { if (mTokenizer == null) { mTokenizer = new OpenNLP.Tools.Tokenize.EnglishMaximumEntropyTokenizer(mModelPath + "EnglishTok.nbin"); } return mTokenizer.Tokenize(sentence); }
/// <summary> /// Turn the sentence into individual words /// </summary> /// <param name="sentence"></param> /// <returns></returns> public string[] TokenizeSentence(string sentence) { if (tokenizer == null) { tokenizer = new OpenNLP.Tools.Tokenize.EnglishMaximumEntropyTokenizer(modelPath + "EnglishTok.nbin"); } return tokenizer.Tokenize(sentence); }