// [Explicit-Dispose] public void Dispose() { if (_wn != null) { _wn.Dispose(); } _tokenizer = null; _sentenceDetector = null; _posTagger = null; _chunker = null; // Dispose CLI/C++ Dll ap = null; // Dispose all KB plugins if (PlugInsNumber > 0) { for (int i = 0; i < PlugInsNumber; i++) { KBDrivers[i] = null; KBDriversQueryPointers[i] = null; } } }
public TextSentenceSplitter() { var fullPath = Path.Combine(GetBinariesPath(), "Data", "EnglishSD.nbin"); SaveResourceToFile(fullPath, "kyciti.Data.EnglishSD.nbin"); _maximumEntropySentenceDetector = new EnglishMaximumEntropySentenceDetector(fullPath); }
public WordNetBoostrap(string nlpModelsPath, string wordNetPath) { this._wordNetPath = wordNetPath; _wn = new WordNetEngine(_wordNetPath, true); _tokenizer = new EnglishRuleBasedTokenizer(false); _sentenceDetector = new EnglishMaximumEntropySentenceDetector(nlpModelsPath + "EnglishSD.nbin"); _posTagger = new EnglishMaximumEntropyPosTagger(nlpModelsPath + "EnglishPOS.nbin", nlpModelsPath + @"\Parser\tagdict"); }
public string[] SplitSentences(string paragraph) { if (mSentenceDetector == null) { mSentenceDetector = new EnglishMaximumEntropySentenceDetector(mModelPath + @"\EnglishSD.nbin"); } return(mSentenceDetector.SentenceDetect(paragraph)); }
// NLP methods ------------------------------------------- private string[] SplitSentences(string paragraph) { if (_sentenceDetector == null) { _sentenceDetector = new EnglishMaximumEntropySentenceDetector(_modelPath + "EnglishSD.nbin"); } return(_sentenceDetector.SentenceDetect(paragraph)); }
public void Dispose() { _tokenizer = null; _sentenceDetector = null; _posTagger = null; SynsetArray = null; IsStopWord = null; _wn.Dispose(); }
// Defualt Constructor public SemCluster(string DataFolder) { try { Console.WriteLine("\tSemCluster Text Analytics Tool"); Console.WriteLine("\t------------------------------"); Console.WriteLine("\t-Wikipedia local server couldn't be found!"); Console.WriteLine("\t-Seeds SemAve is in manual mode!"); Console.WriteLine(); Console.WriteLine(); Console.WriteLine("-> Resources loading ..."); Console.WriteLine(); #region Loading External Resources _wn = new WordNetEngine(DataFolder + "WordNet", InMemoryWordNet); _tokenizer = new EnglishRuleBasedTokenizer(TokenizeHyphen); _sentenceDetector = new EnglishMaximumEntropySentenceDetector(DataFolder + "EnglishSD.nbin"); _posTagger = new EnglishMaximumEntropyPosTagger(DataFolder + "EnglishPOS.nbin", DataFolder + "\\Build\\tagdict"); _chunker = new EnglishTreebankChunker(DataFolder + "EnglishChunk.nbin"); #endregion PlugInsManager(DataFolder); Console.WriteLine("\tResources loaded successfully"); Console.WriteLine("\t" + PlugInsNumber + " KB plug-ins found in the repository"); Console.WriteLine("\tPress any key to continue ..."); Console.ReadKey(); Console.WriteLine(); RootVirtualNode = _wn.GetSynSet("Noun:1740"); ap = new AffinityPropagationClustering(); SynSetRelationTypes = new WordNetApi.Core.WordNetEngine.SynSetRelation[2]; SynSetRelationTypes[0] = WordNetApi.Core.WordNetEngine.SynSetRelation.Hypernym; SynSetRelationTypes[1] = WordNetApi.Core.WordNetEngine.SynSetRelation.InstanceHypernym; } catch (Exception ex) { Dispose(); throw new Exception(ex.Message); } }
private static void OptimizeSentenceDetectionTraining() { // all directories in Input folder var inputFolderPath = CurrentDirectory + "Input/"; var allDirectories = Directory.GetDirectories(inputFolderPath); Console.WriteLine("Pick the model to train:"); for (var i = 0; i < allDirectories.Length; i++) { Console.WriteLine("{0} - {1}", i, Path.GetFileName(allDirectories[i])); } // read directory chosen by user int directoryIndexPicked = LoopUntilValidUserInput(input => int.Parse(input), i => i < allDirectories.Length, string.Format("Please enter a number in [0..{0}]", allDirectories.Length - 1)); // read user parameters Console.WriteLine("Enter the iteration values to test, separated by a comma (ex: 10,100,200)"); var iterations = LoopUntilValidUserInput(input => input.Split(',').Select(s => int.Parse(s.Trim())).ToList(), li => li != null && li.Any(), "At least one iteration value is required"); Console.WriteLine("Enter the cut values to test, separated by a comma (ex: 1,2,5)"); var cuts = LoopUntilValidUserInput(input => input.Split(',').Select(s => int.Parse(s.Trim())).ToList(), li => li != null && li.Any(), "At least one cut value is required"); // train model file var directory = allDirectories[directoryIndexPicked]; var allTrainFiles = Directory.GetFiles(directory, "*.train"); Console.WriteLine("Training model with files {0}", string.Join(", ", allTrainFiles.Select(f => Path.GetFileNameWithoutExtension(f)))); // load training data var allSentences = new List <string>(); foreach (var file in allTrainFiles.Where(f => !f.Contains("wsj"))) { allSentences.AddRange(File.ReadAllLines(file)); } var testData = string.Join(" ", allSentences); var bestIterationValue = iterations.First(); var bestCutValue = iterations.First(); var bestAccuracy = 0.0d; var endOfSentenceScanner = new CharactersSpecificEndOfSentenceScanner('.', '?', '!', '"', '-', '…'); foreach (var iteration in iterations) { foreach (var cut in cuts) { var model = MaximumEntropySentenceDetector.TrainModel(allTrainFiles, iteration, cut, endOfSentenceScanner); // compute accuracy var sentenceDetector = new MaximumEntropySentenceDetector(model, endOfSentenceScanner); var results = sentenceDetector.SentenceDetect(testData); // not perfect for comparing files but it gives a very good approximation //var commonValues = allSentences.Intersect(results).Count(); var nbOfCommonSentences = 0; foreach (var result in results) { if (allSentences.Contains(result)) { nbOfCommonSentences++; } else { //Console.WriteLine(result); } } var accuracyScore = (float)2 * nbOfCommonSentences / (allSentences.Count + results.Count()); Console.WriteLine("Accuracy for iteration={0} and cut={1}: {2}", iteration, cut, accuracyScore); if (accuracyScore > bestAccuracy) { bestAccuracy = accuracyScore; bestIterationValue = iteration; bestCutValue = cut; } } } // Persit model var outputFilePath = CurrentDirectory + "Output/" + Path.GetFileName(directory) + ".nbin"; Console.WriteLine("Persisting model for iteration={0} and cut={1} to file '{2}'...", bestIterationValue, bestCutValue, outputFilePath); var bestModel = MaximumEntropySentenceDetector.TrainModel(allTrainFiles, bestIterationValue, bestCutValue, endOfSentenceScanner); new BinaryGisModelWriter().Persist(bestModel, outputFilePath); Console.WriteLine("Output file written."); }