/// <summary> /// Loads all the nescesary resources /// for the library /// </summary> public void LoadLibrary() { try { HelperMethods.CreateResourceInFileSystem("EnglishSD.nbin"); HelperMethods.CreateResourceInFileSystem("EnglishTok.nbin"); HelperMethods.CreateResourceInFileSystem("EnglishPOS.nbin"); sentenceDetector = new EnglishMaximumEntropySentenceDetector("EnglishSD.nbin"); tokenizer = new EnglishMaximumEntropyTokenizer("EnglishTok.nbin"); posTagger = new EnglishMaximumEntropyPosTagger("EnglishPOS.nbin"); positive_words = HelperMethods.Import_PositiveWords(); //data of positive words negative_words = HelperMethods.Import_NegativeWords(); //data of negative words emotion_words = HelperMethods.Import_EmotionWords(); //data of emotion words with 5 expression values inclusion_values = HelperMethods.Import_InclusiveWords(); //data of inclusion with rate exclusion_values = HelperMethods.Import_ExclusionWords(); //data of inclusion with rate isLibraryLoaded = true; } catch (Exception e) { isLibraryLoaded = false; throw new Exception("Error in library load state", e); } }
public string CorrectSentence(string sent) { var modelPath = "EnglishTok.nbin"; var tokenizer = new EnglishMaximumEntropyTokenizer(modelPath); var sentDetector = new EnglishMaximumEntropySentenceDetector("EnglishSD.nbin"); List <string> tokens = new List <string>(); tokens.AddRange(tokenizer.Tokenize(sent)); List <string> correct = new List <string>(); foreach (var token in tokens) { if (Program.Applywords(token)) { correct.Add(Correct(token)); } else { correct.Add(token); } } string res = string.Join("", correct.Select(str => Program.Applywords(str)?str += " " : str)); return(res); }
private void initComponents() { sentenceDetector = new EnglishMaximumEntropySentenceDetector(Path.Combine(ModelDir, "EnglishSD.nbin")); tokenizer = new EnglishMaximumEntropyTokenizer(Path.Combine(ModelDir, "EnglishTok.nbin")); posTagger = new EnglishMaximumEntropyPosTagger(Path.Combine(ModelDir, "EnglishPOS.nbin")); chunker = new EnglishTreebankChunker(Path.Combine(ModelDir, "EnglishChunk.nbin")); parser = new EnglishTreebankParser(FileUtils.WithSeparator(ModelDir), true, false); }
/*public static void genModel(string filePath, string modelDest) * { * HtmlDocument htmlDoc = new HtmlDocument(); * htmlDoc.Load(filePath); * Parser.ParsedCHM parsed = new Parser.ParsedCHM(htmlDoc); * List<List<Parser.Element>> blocks = parsed.blocks; * * using (System.IO.StreamWriter file = * new System.IO.StreamWriter(modelDest)) * { * foreach (List<Parser.Element> elements in blocks) * { * foreach (Parser.Element e in elements) * { * if(e.name != "img") * { * file.WriteLine("<START:" + parsed.title + "> " + e.data+" <END>"); * } * } * } * } * }*/ static void Main(string[] args) { /* var trainingFile = "C:\\Users\\Matthew\\Desktop\\train\\genModel"; * //genModel("C:\\Users\\Matthew\\Desktop\\TestCHM\\DeleteUser.html", trainingFile); * * // The number of iterations; no general rule for finding the best value, just try several! * var iterations = 500; * // The cut; no general rule for finding the best value, just try several! * var cut = 200; * // The characters which can mark an end of sentence * * var nameFind = MaximumEntropyNameFinder.TrainModel(trainingFile, iterations, cut); * var modelPath = "C:\\Users\\Matthew\\Desktop\\train\\"; * //new PlainTextGisModelWriter.Persist(nameFind, modelPath+"genModel.nbin"); * new BinaryGisModelWriter().Persist(nameFind, modelPath + "genModel.nbin"); * * //TTTTTTRYYY THIS NEXT * //var gisModel = new SharpEntropy.GisModel(new PlainTextGisModelReader(modelPath + "genModel.nbin")); * * //MaximumEntropyNameFinder menf = new MaximumEntropyNameFinder(gisModel); * var nameFinder = new EnglishNameFinder(modelPath); //+ "genModel.nbin"); * Console.WriteLine("after"); * var models = new[] { "genModel"}; * * * * * * while (true) * { * var paragraph = Console.ReadLine(); * Console.WriteLine(nameFinder.GetNames(models, paragraph)); * }*/ Func <string, string> func = new Func <string, string>((x) => { var temp = x; new HTMLMessager().removeFromLine(ref temp); return(temp); }); KeyWordFinder kwf = new KeyWordFinderUserBot(); DataAccess dA = new DataAccessDB(); while (true) { var paragraph = Console.ReadLine(); var modelPath = "..\\..\\..\\Resources\\Models"; var emesd = new EnglishMaximumEntropySentenceDetector(modelPath + "\\EnglishSD.nbin"); var sentences = emesd.SentenceDetect(paragraph); var qHandler = new QueryHandler(dA, kwf, func); foreach (Tuple <string, string> response in qHandler.handleQuery(sentences)) { Console.WriteLine("Hits for query: " + "\"" + response.Item1 + "\""); Console.WriteLine(response.Item2); Console.WriteLine(); } } }
//---------------------------------------------------OpenLP Methods private string[] MySentenceDetector(string paragraph) { var modelPath = Path.GetDirectoryName(Process.GetCurrentProcess().MainModule.FileName) + @"\Models\EnglishSD.nbin"; EnglishMaximumEntropySentenceDetector EMESD = new EnglishMaximumEntropySentenceDetector(modelPath); var sentenceDetector = EMESD; //var sentenceDetector = EnglishMaximumEntropySentenceDetector(modelPath); var sentences = sentenceDetector.SentenceDetect(paragraph); return(sentences); }
private static void Main(string[] args) { var inputText = "C#[note 2] (pronounced as see sharp) is a multi-paradigm programming language encompassing strong typing, imperative, declarative, functional, generic, object-oriented (class-based), and component-oriented programming disciplines. It was developed by Microsoft within its .NET initiative and later approved as a standard by Ecma (ECMA-334) and ISO (ISO/IEC 23270:2006). C# is one of the programming languages designed for the Common Language Infrastructure. C# is intended to be a simple, modern, general-purpose, object-oriented programming language.[7] Its development team is led by Anders Hejlsberg. The most recent version is C# 6.0, which was released on July 20, 2015.[8]"; var sentenceDetector = new EnglishMaximumEntropySentenceDetector(currentDirectory + "../Resources/Models/EnglishSD.nbin"); string[] sentences = sentenceDetector.SentenceDetect(inputText); string longest = sentences.OrderByDescending(s => s.Length).First(); }
public void Setup() { var path = Path.Combine(TestContext.CurrentContext.TestDirectory, @"..\..\..\Resources\Models\"); sentenceDetector = new EnglishMaximumEntropySentenceDetector(Path.Combine(path, "EnglishSD.nbin")); postTagger = new EnglishMaximumEntropyPosTagger( Path.Combine(path, @"EnglishPOS.nbin"), Path.Combine(path, @"Parser\tagdict")); tokenizer = new EnglishMaximumEntropyTokenizer(Path.Combine(path, "EnglishTok.nbin")); chunker = new EnglishTreebankChunker(Path.Combine(path, @"EnglishChunk.nbin")); }
/// <summary> /// Initializes the processor /// </summary> /// <param name="rootPath">The root path</param> private static void Initialize(string rootPath) { string tkModelPath = Path.Combine(rootPath, "EnglishTok.nbin"); string sdModelPath = Path.Combine(rootPath, "EnglishSD.nbin"); string posModelPath = Path.Combine(rootPath, "EnglishPOS.nbin"); _tokenizer = new EnglishMaximumEntropyTokenizer(tkModelPath); _sentenceDetector = new EnglishMaximumEntropySentenceDetector(sdModelPath); _posTagger = new EnglishMaximumEntropyPosTagger(posModelPath); _isInitialized = true; }
public void run() { while (true) { var paragraph = Console.ReadLine(); var modelPath = "C:\\Users\\Matthew\\Desktop\\train\\model"; var sentenceDetector = new EnglishMaximumEntropySentenceDetector(modelPath); var sentences = sentenceDetector.SentenceDetect(paragraph); Console.WriteLine(sentences); } }
public override void ProcessText() { var sentenceDetector = new EnglishMaximumEntropySentenceDetector(_modelPath); var detectedSentences = sentenceDetector.SentenceDetect(_inputText).ToList(); foreach (string sentence in detectedSentences) { string sentenceClean = sentence.Replace(".", "") .Replace(@"\s+", " ") .Trim(); sentenceClean = Regex.Replace(sentenceClean, @"\s+", " "); Sentences.Add(sentenceClean.Trim()); } OutputText = string.Join("", Sentences); }
public Detector(string DictionaryLocation, string lann, string w2v, Prepare DataSrc) { // download the OpenNLp dotNet implementation from Github https://github.com/AlexPoint/OpenNlp var modelPath = ".../OpenNlp-master/OpenNlp-master/Resources/Models/EnglishSD.nbin"; sentenceDetector = new EnglishMaximumEntropySentenceDetector(modelPath); tokenizer = new EnglishRuleBasedTokenizer(false); //the false is for the split on hyphen string[] Stopwords = File.ReadAllLines(@".../Dataset/stopwords.txt"); var hash = new HashSet <string>(Stopwords); Stopdict = hash.ToArray().ToDictionary(key => key, value => value); StemDown = new EnglishStemmer(); //StemDown = new DutchStemmer(); ReadEmotionDictionary(DictionaryLocation, lann); MLkernel = new MachineL(w2v, lann, CorpusTokenize(DataSrc.TFCorpus())); }
public string GetUniqueWords(string paragraph) { string modelPath = "Models/EnglishSD.nbin"; EnglishMaximumEntropySentenceDetector sentencedetector = new EnglishMaximumEntropySentenceDetector(modelPath); //Split the paragraph into sentences string[] sentences = sentencedetector.SentenceDetect(paragraph); //Get words from every sentence. OutputWords words = GetWordsFromSentences(sentences); JsonHelper <OutputWords> jsonOutputHelper = new JsonHelper <OutputWords>(); //Serialize our Output object to Json. string jsonWords = jsonOutputHelper.IndentedJsonSerializer(words); return(jsonWords); }
public static List <string> WorkFile(string path) { //model = new Model[n]; using (StreamReader sr = new StreamReader(path)) { var modelPath = "EnglishTok.nbin"; var tokenizer = new EnglishMaximumEntropyTokenizer(modelPath); var sentDetector = new EnglishMaximumEntropySentenceDetector("EnglishSD.nbin"); var text = sr.ReadToEnd(); sr.Close(); var sentencies = sentDetector.SentenceDetect(text); List <string> tokens = new List <string>(); foreach (var item in sentencies) { tokens.AddRange(tokenizer.Tokenize(item)); } return(tokens); } }
public static void ExtractTokensFromTxtFiles(Func <string[], int, bool> tokensProcessor, int nbOfSentencesToParse, int nbOfSentencesToSkip = 0) { var relevantDirectories = Directory.GetDirectories(PathToDownloadDirectory) .Where(dir => Regex.IsMatch(dir, "enwiki-latest-pages-meta-current")); var sentenceDetector = new EnglishMaximumEntropySentenceDetector(PathToProject + "Data/EnglishSD.nbin"); var tokenizer = new EnglishRuleBasedTokenizer(false); var sentenceCounter = 0; foreach (var directory in relevantDirectories.OrderBy(d => d)) // ordering is important here to be able to relaunch the parsing from anywhere { var txtFiles = Directory.GetFiles(directory); foreach (var txtFile in txtFiles.OrderBy(f => f)) // ordering is important here to be able to relaunch the parsing from anywhere { var sentences = File.ReadAllLines(txtFile) .Where(l => !string.IsNullOrEmpty(l)) .SelectMany(l => sentenceDetector.SentenceDetect(l)) .ToList(); foreach (var sentence in sentences) { // Increase counter sentenceCounter++; if (sentenceCounter > nbOfSentencesToParse) { return; } if (sentenceCounter <= nbOfSentencesToSkip) { continue; } var tokens = tokenizer.Tokenize(sentence) .Select(string.Intern) // intern strings to avoid huge consumption of memory .ToArray(); var success = tokensProcessor(tokens, sentenceCounter); } } Console.WriteLine("Done parsing sentences in directory: '{0}'", directory); } }
/// <summary> /// A sentence splitter splits a paragraph in sentences. Technically, the sentence detector will compute the likelihood that a specific character ('.', '?' or '!' in the case of English) marks the end of a sentence. /// </summary> /// <param name="txt">Texto a dividir</param> /// <returns>a set of tokens</returns> #region Sentence Splitter public static string[] SplitTextIntoSentences1(this string txt) { //É necessário dividir primeiro por \n uma vez que o SentenceDetect não faz isso internamente string[] sen = txt.Split(new string[] { "\n" }, StringSplitOptions.RemoveEmptyEntries); //A sentence splitter splits a paragraph in sentences. Technically, the sentence detector will compute the likelihood that a specific character ('.', '?' or '!' in the case of English) marks the end of a sentence. //Models for English. Tive que fazer download deles //Não esquecer de adicionar using OpenNLP.Tools.SentenceDetect; var modelPath = Path + "EnglishSD.nbin"; var sentenceDetector = new EnglishMaximumEntropySentenceDetector(modelPath); List <string> sentencesTemp = new List <string>(); //Temos que criar uma lista uma vez que à partida não sabemos quantas sentences vamos ter (o que invalida a criação de um array) foreach (string text in sen) { string[] sentences = sentenceDetector.SentenceDetect(text); //retorna através do método as respetivas sentences para o txt em questão sentencesTemp.AddRange(sentences.ToList <string>()); //Copia as sentences do txt em questão para a lista } return(sentencesTemp.ToArray()); //Converter para array }
public async Task Hey([Remainder] string message = "hey") { var messages = new List <string>(); // If sentences for fred do not exist for the server, generate them if (!File.Exists($@"{Context.Guild.Id}\{FredID}.txt")) { messages = await GenerateSentenceFile(FredID); } else { messages = File.ReadAllLines($@"{Context.Guild.Id}\{FredID}.txt").ToList(); } var chain = new MarkovChain <string>(3); var tokenizer = new EnglishRuleBasedTokenizer(false); var sentenceDetector = new EnglishMaximumEntropySentenceDetector("EnglishSD.nbin"); messages.ForEach(msg => { var sentences = sentenceDetector.SentenceDetect(msg); foreach (var sentence in sentences) { var tokens = tokenizer.Tokenize(sentence); chain.Add(tokens, tokens.Length); } }); var rand = new Random(); var reply = string.Join(" ", chain.Chain(rand)); if (string.IsNullOrWhiteSpace(reply)) { return; } await Context.Channel.SendMessageAsync(reply); }
public async Task Generate(IGuildUser user, int count = 1, string language = "eng", int depth = 2) { if (!File.Exists($"{user.Id}_{language}.txt")) { return; } var messages = File.ReadAllLines($"{user.Id}_{language}.txt").ToList(); var chain = new MarkovChain <string>(depth); var tokenizer = new EnglishRuleBasedTokenizer(false); var sentenceDetector = new EnglishMaximumEntropySentenceDetector("EnglishSD.nbin"); messages.ForEach(msg => { var sentences = sentenceDetector.SentenceDetect(msg); foreach (var sentence in sentences) { var tokens = tokenizer.Tokenize(sentence); chain.Add(tokens, tokens.Length); } }); var rand = new Random(); var messageString = ""; for (int i = 0; i < count; i++) { var c = chain.Chain(rand); messageString += ">> " + string.Join(" ", c); if (count > 1 && i < count - 1) { messageString += "\n"; } } await Context.Channel.SendMessageAsync(messageString); }
public EnglishSentenceDetector() : base(Language.English) { modelPath = Utility.GetModelPath(OpenNLPModel.SentenceDetector, Language); sd = new EnglishMaximumEntropySentenceDetector(modelPath); }
public static Dictionary <int, string> CreateSentenceDictionary(this string pData, EnglishMaximumEntropySentenceDetector pDetector) { int index = 1; return(pDetector.SentenceDetect(pData).ToDictionary(p => index++)); }
public Tokenizer() { _sentenceDetector = new EnglishMaximumEntropySentenceDetector(modelPath); _tokenizer = new EnglishMaximumEntropyTokenizer(modelTokenPath); }
public ExtractKeyPhrases() { _modelPath = AppDomain.CurrentDomain.BaseDirectory + "../../Resources/Models/"; _tokenizer = new EnglishRuleBasedTokenizer(false); _sentence_tokenizer = new EnglishMaximumEntropySentenceDetector(_modelPath + "/EnglishSD.nbin"); }