public static void Train(SharpEntropy.ITrainingEventReader eventReader, string outputFilename) { SharpEntropy.GisTrainer trainer = new SharpEntropy.GisTrainer(0.1); trainer.TrainModel(100, new SharpEntropy.TwoPassDataIndexer(eventReader, 5)); SharpEntropy.GisModel tokenizeModel = new SharpEntropy.GisModel(trainer); new SharpEntropy.IO.BinaryGisModelWriter().Persist(tokenizeModel, outputFilename); }
public EnglishTreebankParser(string dataDirectory, bool useTagDictionary, bool useCaseSensitiveTagDictionary, int beamSize, double advancePercentage) { SharpEntropy.IO.BinaryGisModelReader buildModelReader = new SharpEntropy.IO.BinaryGisModelReader(dataDirectory + "parser\\build.nbin"); SharpEntropy.GisModel buildModel = new SharpEntropy.GisModel(buildModelReader); SharpEntropy.IO.BinaryGisModelReader checkModelReader = new SharpEntropy.IO.BinaryGisModelReader(dataDirectory + "parser\\check.nbin"); SharpEntropy.IMaximumEntropyModel checkModel = new SharpEntropy.GisModel(checkModelReader); EnglishTreebankPosTagger posTagger; if (useTagDictionary) { posTagger = new EnglishTreebankPosTagger(dataDirectory + "parser\\tag.nbin", dataDirectory + "parser\\tagdict", useCaseSensitiveTagDictionary); } else { posTagger = new EnglishTreebankPosTagger(dataDirectory + "parser\\tag.nbin"); } EnglishTreebankParserChunker chunker = new EnglishTreebankParserChunker(dataDirectory + "parser\\chunk.nbin"); EnglishHeadRules headRules = new EnglishHeadRules(dataDirectory + "parser\\head_rules"); mParser = new MaximumEntropyParser(buildModel, checkModel, posTagger, chunker, headRules, beamSize, advancePercentage); mTokenizer = new OpenNLP.Tools.Tokenize.EnglishMaximumEntropyTokenizer(dataDirectory + "EnglishTok.nbin"); }
private void CreateModels(IEnumerable <string> models) { foreach (string mod in models) { if (!mFinders.ContainsKey(mod)) { string modelName = mModelPath + mod + ".nbin"; SharpEntropy.IMaximumEntropyModel model = new SharpEntropy.GisModel(new SharpEntropy.IO.BinaryGisModelReader(modelName)); var finder = new MaximumEntropyNameFinder(model); mFinders.Add(mod, finder); } } }
private void CreateModels(string[] models) { for (int currentModel = 0; currentModel < models.Length; currentModel++) { if (!mFinders.ContainsKey(models[currentModel])) { string modelName = mModelPath + models[currentModel] + ".nbin"; SharpEntropy.IMaximumEntropyModel model = new SharpEntropy.GisModel(new SharpEntropy.IO.BinaryGisModelReader(modelName)); MaximumEntropyNameFinder finder = new MaximumEntropyNameFinder(model); mFinders.Add(models[currentModel], finder); } } }
// Constructors --------------------- public EnglishTreebankParser(string dataDirectory, bool useTagDictionary, bool useCaseSensitiveTagDictionary, int beamSize, double advancePercentage) { var buildModelReader = new SharpEntropy.IO.BinaryGisModelReader(dataDirectory + Path.Combine("parser", "build.nbin")); var buildModel = new SharpEntropy.GisModel(buildModelReader); var checkModelReader = new SharpEntropy.IO.BinaryGisModelReader(dataDirectory + Path.Combine("parser", "check.nbin")); SharpEntropy.IMaximumEntropyModel checkModel = new SharpEntropy.GisModel(checkModelReader); EnglishTreebankPosTagger posTagger = useTagDictionary ? new EnglishTreebankPosTagger(dataDirectory + Path.Combine("parser", "tag.nbin"), dataDirectory + Path.Combine("parser", "tagdict"), useCaseSensitiveTagDictionary) : new EnglishTreebankPosTagger(dataDirectory + Path.Combine("parser", "tag.nbin")); var chunker = new EnglishTreebankParserChunker(dataDirectory + Path.Combine("parser", "chunk.nbin")); var headRules = new EnglishHeadRules(dataDirectory + Path.Combine("parser", "head_rules")); _parser = new MaximumEntropyParser(buildModel, checkModel, posTagger, chunker, headRules, beamSize, advancePercentage); _tokenizer = new Tokenize.EnglishMaximumEntropyTokenizer(dataDirectory + "EnglishTok.nbin"); }
private void CreateModels(IEnumerable<string> models) { foreach (string mod in models) { if (!mFinders.ContainsKey(mod)) { string modelName = mModelPath + mod + ".nbin"; SharpEntropy.IMaximumEntropyModel model = new SharpEntropy.GisModel(new SharpEntropy.IO.BinaryGisModelReader(modelName)); var finder = new MaximumEntropyNameFinder(model); mFinders.Add(mod, finder); } } }
private static void Learn(String learnFileContent) { UTF8Encoding enc = new UTF8Encoding(); byte[] data = enc.GetBytes(learnFileContent); System.IO.StreamReader trainingStreamReader = new StreamReader(new MemoryStream(data)); SharpEntropy.ITrainingEventReader eventReader = new SharpEntropy.BasicEventReader(new SharpEntropy.PlainTextByLineDataReader(trainingStreamReader)); SharpEntropy.GisTrainer trainer = new SharpEntropy.GisTrainer(); trainer.TrainModel(eventReader); model = new SharpEntropy.GisModel(trainer); positiveIdx = model.GetOutcomeIndex("Positive"); }