Beispiel #1
0
        /// <summary>
        /// Loads all the nescesary resources
        /// for the library
        /// </summary>
        public void LoadLibrary()
        {
            try
            {
                HelperMethods.CreateResourceInFileSystem("EnglishSD.nbin");
                HelperMethods.CreateResourceInFileSystem("EnglishTok.nbin");
                HelperMethods.CreateResourceInFileSystem("EnglishPOS.nbin");

                sentenceDetector = new EnglishMaximumEntropySentenceDetector("EnglishSD.nbin");
                tokenizer        = new EnglishMaximumEntropyTokenizer("EnglishTok.nbin");
                posTagger        = new EnglishMaximumEntropyPosTagger("EnglishPOS.nbin");

                positive_words   = HelperMethods.Import_PositiveWords();  //data of positive words
                negative_words   = HelperMethods.Import_NegativeWords();  //data of negative words
                emotion_words    = HelperMethods.Import_EmotionWords();   //data of emotion words with 5 expression values
                inclusion_values = HelperMethods.Import_InclusiveWords(); //data of inclusion with rate
                exclusion_values = HelperMethods.Import_ExclusionWords(); //data of inclusion with rate

                isLibraryLoaded = true;
            } catch (Exception e)
            {
                isLibraryLoaded = false;
                throw new Exception("Error in library load state", e);
            }
        }
Beispiel #2
0
        public string CorrectSentence(string sent)
        {
            var modelPath = "EnglishTok.nbin";
            var tokenizer = new EnglishMaximumEntropyTokenizer(modelPath);

            var sentDetector = new EnglishMaximumEntropySentenceDetector("EnglishSD.nbin");


            List <string> tokens = new List <string>();

            tokens.AddRange(tokenizer.Tokenize(sent));
            List <string> correct = new List <string>();

            foreach (var token in tokens)
            {
                if (Program.Applywords(token))
                {
                    correct.Add(Correct(token));
                }
                else
                {
                    correct.Add(token);
                }
            }

            string res = string.Join("", correct.Select(str => Program.Applywords(str)?str += " " : str));

            return(res);
        }
 private void initComponents()
 {
     sentenceDetector = new EnglishMaximumEntropySentenceDetector(Path.Combine(ModelDir, "EnglishSD.nbin"));
     tokenizer        = new EnglishMaximumEntropyTokenizer(Path.Combine(ModelDir, "EnglishTok.nbin"));
     posTagger        = new EnglishMaximumEntropyPosTagger(Path.Combine(ModelDir, "EnglishPOS.nbin"));
     chunker          = new EnglishTreebankChunker(Path.Combine(ModelDir, "EnglishChunk.nbin"));
     parser           = new EnglishTreebankParser(FileUtils.WithSeparator(ModelDir), true, false);
 }
        /*public static void genModel(string filePath, string modelDest)
         * {
         *  HtmlDocument htmlDoc = new HtmlDocument();
         *  htmlDoc.Load(filePath);
         *  Parser.ParsedCHM parsed = new Parser.ParsedCHM(htmlDoc);
         *  List<List<Parser.Element>> blocks = parsed.blocks;
         *
         *  using (System.IO.StreamWriter file =
         *          new System.IO.StreamWriter(modelDest))
         *  {
         *      foreach (List<Parser.Element> elements in blocks)
         *      {
         *          foreach (Parser.Element e in elements)
         *          {
         *              if(e.name != "img")
         *              {
         *                  file.WriteLine("<START:" + parsed.title + "> " + e.data+" <END>");
         *              }
         *          }
         *      }
         *  }
         * }*/

        static void Main(string[] args)
        {
            /*  var trainingFile = "C:\\Users\\Matthew\\Desktop\\train\\genModel";
             * //genModel("C:\\Users\\Matthew\\Desktop\\TestCHM\\DeleteUser.html", trainingFile);
             *
             * // The number of iterations; no general rule for finding the best value, just try several!
             * var iterations = 500;
             * // The cut; no general rule for finding the best value, just try several!
             * var cut = 200;
             * // The characters which can mark an end of sentence
             *
             * var nameFind = MaximumEntropyNameFinder.TrainModel(trainingFile, iterations, cut);
             * var modelPath = "C:\\Users\\Matthew\\Desktop\\train\\";
             * //new PlainTextGisModelWriter.Persist(nameFind, modelPath+"genModel.nbin");
             * new BinaryGisModelWriter().Persist(nameFind, modelPath + "genModel.nbin");
             *
             * //TTTTTTRYYY THIS NEXT
             * //var gisModel = new SharpEntropy.GisModel(new PlainTextGisModelReader(modelPath + "genModel.nbin"));
             *
             * //MaximumEntropyNameFinder menf = new MaximumEntropyNameFinder(gisModel);
             * var nameFinder = new EnglishNameFinder(modelPath); //+ "genModel.nbin");
             * Console.WriteLine("after");
             * var models = new[] { "genModel"};
             *
             *
             *
             *
             *
             * while (true)
             * {
             *    var paragraph = Console.ReadLine();
             *    Console.WriteLine(nameFinder.GetNames(models, paragraph));
             * }*/
            Func <string, string> func = new Func <string, string>((x) =>
            {
                var temp = x;
                new HTMLMessager().removeFromLine(ref temp);
                return(temp);
            });
            KeyWordFinder kwf = new KeyWordFinderUserBot();
            DataAccess    dA  = new DataAccessDB();

            while (true)
            {
                var paragraph = Console.ReadLine();
                var modelPath = "..\\..\\..\\Resources\\Models";
                var emesd     = new EnglishMaximumEntropySentenceDetector(modelPath + "\\EnglishSD.nbin");
                var sentences = emesd.SentenceDetect(paragraph);
                var qHandler  = new QueryHandler(dA, kwf, func);
                foreach (Tuple <string, string> response in qHandler.handleQuery(sentences))
                {
                    Console.WriteLine("Hits for query: " + "\"" + response.Item1 + "\"");
                    Console.WriteLine(response.Item2);
                    Console.WriteLine();
                }
            }
        }
Beispiel #5
0
        //---------------------------------------------------OpenLP Methods

        private string[] MySentenceDetector(string paragraph)
        {
            var modelPath = Path.GetDirectoryName(Process.GetCurrentProcess().MainModule.FileName) + @"\Models\EnglishSD.nbin";
            EnglishMaximumEntropySentenceDetector EMESD = new EnglishMaximumEntropySentenceDetector(modelPath);
            var sentenceDetector = EMESD;
            //var sentenceDetector = EnglishMaximumEntropySentenceDetector(modelPath);
            var sentences = sentenceDetector.SentenceDetect(paragraph);

            return(sentences);
        }
        private static void Main(string[] args)
        {
            var inputText =
                "C#[note 2] (pronounced as see sharp) is a multi-paradigm programming language encompassing strong typing, imperative, declarative, functional, generic, object-oriented (class-based), and component-oriented programming disciplines. It was developed by Microsoft within its .NET initiative and later approved as a standard by Ecma (ECMA-334) and ISO (ISO/IEC 23270:2006). C# is one of the programming languages designed for the Common Language Infrastructure. C# is intended to be a simple, modern, general-purpose, object-oriented programming language.[7] Its development team is led by Anders Hejlsberg. The most recent version is C# 6.0, which was released on July 20, 2015.[8]";
            var sentenceDetector =
                new EnglishMaximumEntropySentenceDetector(currentDirectory + "../Resources/Models/EnglishSD.nbin");

            string[] sentences = sentenceDetector.SentenceDetect(inputText);
            string   longest   = sentences.OrderByDescending(s => s.Length).First();
        }
Beispiel #7
0
        public void Setup()
        {
            var path = Path.Combine(TestContext.CurrentContext.TestDirectory, @"..\..\..\Resources\Models\");

            sentenceDetector = new EnglishMaximumEntropySentenceDetector(Path.Combine(path, "EnglishSD.nbin"));
            postTagger       = new EnglishMaximumEntropyPosTagger(
                Path.Combine(path, @"EnglishPOS.nbin"),
                Path.Combine(path, @"Parser\tagdict"));
            tokenizer = new EnglishMaximumEntropyTokenizer(Path.Combine(path, "EnglishTok.nbin"));
            chunker   = new EnglishTreebankChunker(Path.Combine(path, @"EnglishChunk.nbin"));
        }
        /// <summary>
        /// Initializes the processor
        /// </summary>
        /// <param name="rootPath">The root path</param>
        private static void Initialize(string rootPath)
        {
            string tkModelPath  = Path.Combine(rootPath, "EnglishTok.nbin");
            string sdModelPath  = Path.Combine(rootPath, "EnglishSD.nbin");
            string posModelPath = Path.Combine(rootPath, "EnglishPOS.nbin");

            _tokenizer        = new EnglishMaximumEntropyTokenizer(tkModelPath);
            _sentenceDetector = new EnglishMaximumEntropySentenceDetector(sdModelPath);
            _posTagger        = new EnglishMaximumEntropyPosTagger(posModelPath);
            _isInitialized    = true;
        }
 public void run()
 {
     while (true)
     {
         var paragraph        = Console.ReadLine();
         var modelPath        = "C:\\Users\\Matthew\\Desktop\\train\\model";
         var sentenceDetector = new EnglishMaximumEntropySentenceDetector(modelPath);
         var sentences        = sentenceDetector.SentenceDetect(paragraph);
         Console.WriteLine(sentences);
     }
 }
Beispiel #10
0
        public override void ProcessText()
        {
            var sentenceDetector  = new EnglishMaximumEntropySentenceDetector(_modelPath);
            var detectedSentences = sentenceDetector.SentenceDetect(_inputText).ToList();

            foreach (string sentence in detectedSentences)
            {
                string sentenceClean = sentence.Replace(".", "")
                                       .Replace(@"\s+", " ")
                                       .Trim();
                sentenceClean = Regex.Replace(sentenceClean, @"\s+", " ");
                Sentences.Add(sentenceClean.Trim());
            }
            OutputText = string.Join("", Sentences);
        }
Beispiel #11
0
    public Detector(string DictionaryLocation, string lann, string w2v, Prepare DataSrc)
    {
        // download the OpenNLp dotNet implementation from Github https://github.com/AlexPoint/OpenNlp
        var modelPath = ".../OpenNlp-master/OpenNlp-master/Resources/Models/EnglishSD.nbin";

        sentenceDetector = new EnglishMaximumEntropySentenceDetector(modelPath);
        tokenizer        = new EnglishRuleBasedTokenizer(false); //the false is for the split on hyphen
        string[] Stopwords = File.ReadAllLines(@".../Dataset/stopwords.txt");
        var      hash      = new HashSet <string>(Stopwords);

        Stopdict = hash.ToArray().ToDictionary(key => key, value => value);
        StemDown = new EnglishStemmer();
        //StemDown = new DutchStemmer();
        ReadEmotionDictionary(DictionaryLocation, lann);
        MLkernel = new MachineL(w2v, lann, CorpusTokenize(DataSrc.TFCorpus()));
    }
Beispiel #12
0
        public string GetUniqueWords(string paragraph)
        {
            string modelPath = "Models/EnglishSD.nbin";

            EnglishMaximumEntropySentenceDetector sentencedetector = new EnglishMaximumEntropySentenceDetector(modelPath);

            //Split the paragraph into sentences
            string[] sentences = sentencedetector.SentenceDetect(paragraph);

            //Get words from every sentence.
            OutputWords words = GetWordsFromSentences(sentences);

            JsonHelper <OutputWords> jsonOutputHelper = new JsonHelper <OutputWords>();

            //Serialize our Output object to Json.
            string jsonWords = jsonOutputHelper.IndentedJsonSerializer(words);

            return(jsonWords);
        }
Beispiel #13
0
        public static List <string> WorkFile(string path)
        {
            //model = new Model[n];
            using (StreamReader sr = new StreamReader(path))
            {
                var modelPath = "EnglishTok.nbin";
                var tokenizer = new EnglishMaximumEntropyTokenizer(modelPath);

                var sentDetector = new EnglishMaximumEntropySentenceDetector("EnglishSD.nbin");
                var text         = sr.ReadToEnd();
                sr.Close();
                var           sentencies = sentDetector.SentenceDetect(text);
                List <string> tokens     = new List <string>();
                foreach (var item in sentencies)
                {
                    tokens.AddRange(tokenizer.Tokenize(item));
                }
                return(tokens);
            }
        }
Beispiel #14
0
        public static void ExtractTokensFromTxtFiles(Func <string[], int, bool> tokensProcessor, int nbOfSentencesToParse,
                                                     int nbOfSentencesToSkip = 0)
        {
            var relevantDirectories = Directory.GetDirectories(PathToDownloadDirectory)
                                      .Where(dir => Regex.IsMatch(dir, "enwiki-latest-pages-meta-current"));
            var sentenceDetector = new EnglishMaximumEntropySentenceDetector(PathToProject + "Data/EnglishSD.nbin");
            var tokenizer        = new EnglishRuleBasedTokenizer(false);

            var sentenceCounter = 0;

            foreach (var directory in relevantDirectories.OrderBy(d => d)) // ordering is important here to be able to relaunch the parsing from anywhere
            {
                var txtFiles = Directory.GetFiles(directory);
                foreach (var txtFile in txtFiles.OrderBy(f => f)) // ordering is important here to be able to relaunch the parsing from anywhere
                {
                    var sentences = File.ReadAllLines(txtFile)
                                    .Where(l => !string.IsNullOrEmpty(l))
                                    .SelectMany(l => sentenceDetector.SentenceDetect(l))
                                    .ToList();
                    foreach (var sentence in sentences)
                    {
                        // Increase counter
                        sentenceCounter++;
                        if (sentenceCounter > nbOfSentencesToParse)
                        {
                            return;
                        }
                        if (sentenceCounter <= nbOfSentencesToSkip)
                        {
                            continue;
                        }

                        var tokens = tokenizer.Tokenize(sentence)
                                     .Select(string.Intern) // intern strings to avoid huge consumption of memory
                                     .ToArray();
                        var success = tokensProcessor(tokens, sentenceCounter);
                    }
                }
                Console.WriteLine("Done parsing sentences in directory: '{0}'", directory);
            }
        }
Beispiel #15
0
        /// <summary>
        /// A sentence splitter splits a paragraph in sentences. Technically, the sentence detector will compute the likelihood that a specific character ('.', '?' or '!' in the case of English) marks the end of a sentence.
        /// </summary>
        /// <param name="txt">Texto a dividir</param>
        /// <returns>a set of tokens</returns>
        #region Sentence Splitter
        public static string[] SplitTextIntoSentences1(this string txt)
        {
            //É necessário dividir primeiro por \n uma vez que o SentenceDetect não faz isso internamente
            string[] sen = txt.Split(new string[] { "\n" }, StringSplitOptions.RemoveEmptyEntries);

            //A sentence splitter splits a paragraph in sentences. Technically, the sentence detector will compute the likelihood that a specific character ('.', '?' or '!' in the case of English) marks the end of a sentence.
            //Models for English. Tive que fazer download deles
            //Não esquecer de adicionar using OpenNLP.Tools.SentenceDetect;
            var modelPath        = Path + "EnglishSD.nbin";
            var sentenceDetector = new EnglishMaximumEntropySentenceDetector(modelPath);

            List <string> sentencesTemp = new List <string>(); //Temos que criar uma lista uma vez que à partida não sabemos quantas sentences vamos ter (o que invalida a criação de um array)

            foreach (string text in sen)
            {
                string[] sentences = sentenceDetector.SentenceDetect(text); //retorna através do método as respetivas sentences para o txt em questão
                sentencesTemp.AddRange(sentences.ToList <string>());        //Copia as sentences do txt em questão para a lista
            }

            return(sentencesTemp.ToArray()); //Converter para array
        }
Beispiel #16
0
        public async Task Hey([Remainder] string message = "hey")
        {
            var messages = new List <string>();

            // If sentences for fred do not exist for the server, generate them
            if (!File.Exists($@"{Context.Guild.Id}\{FredID}.txt"))
            {
                messages = await GenerateSentenceFile(FredID);
            }
            else
            {
                messages = File.ReadAllLines($@"{Context.Guild.Id}\{FredID}.txt").ToList();
            }

            var chain            = new MarkovChain <string>(3);
            var tokenizer        = new EnglishRuleBasedTokenizer(false);
            var sentenceDetector = new EnglishMaximumEntropySentenceDetector("EnglishSD.nbin");

            messages.ForEach(msg =>
            {
                var sentences = sentenceDetector.SentenceDetect(msg);
                foreach (var sentence in sentences)
                {
                    var tokens = tokenizer.Tokenize(sentence);
                    chain.Add(tokens, tokens.Length);
                }
            });

            var rand = new Random();

            var reply = string.Join(" ", chain.Chain(rand));

            if (string.IsNullOrWhiteSpace(reply))
            {
                return;
            }

            await Context.Channel.SendMessageAsync(reply);
        }
Beispiel #17
0
        public async Task Generate(IGuildUser user, int count = 1, string language = "eng", int depth = 2)
        {
            if (!File.Exists($"{user.Id}_{language}.txt"))
            {
                return;
            }

            var messages = File.ReadAllLines($"{user.Id}_{language}.txt").ToList();

            var chain            = new MarkovChain <string>(depth);
            var tokenizer        = new EnglishRuleBasedTokenizer(false);
            var sentenceDetector = new EnglishMaximumEntropySentenceDetector("EnglishSD.nbin");

            messages.ForEach(msg =>
            {
                var sentences = sentenceDetector.SentenceDetect(msg);
                foreach (var sentence in sentences)
                {
                    var tokens = tokenizer.Tokenize(sentence);
                    chain.Add(tokens, tokens.Length);
                }
            });
            var rand          = new Random();
            var messageString = "";

            for (int i = 0; i < count; i++)
            {
                var c = chain.Chain(rand);
                messageString += ">> " + string.Join(" ", c);
                if (count > 1 && i < count - 1)
                {
                    messageString += "\n";
                }
            }
            await Context.Channel.SendMessageAsync(messageString);
        }
Beispiel #18
0
 public EnglishSentenceDetector() : base(Language.English)
 {
     modelPath = Utility.GetModelPath(OpenNLPModel.SentenceDetector, Language);
     sd        = new EnglishMaximumEntropySentenceDetector(modelPath);
 }
Beispiel #19
0
        public static Dictionary <int, string> CreateSentenceDictionary(this string pData, EnglishMaximumEntropySentenceDetector pDetector)
        {
            int index = 1;

            return(pDetector.SentenceDetect(pData).ToDictionary(p => index++));
        }
 public Tokenizer()
 {
     _sentenceDetector = new EnglishMaximumEntropySentenceDetector(modelPath);
     _tokenizer        = new EnglishMaximumEntropyTokenizer(modelTokenPath);
 }
Beispiel #21
0
 public ExtractKeyPhrases()
 {
     _modelPath          = AppDomain.CurrentDomain.BaseDirectory + "../../Resources/Models/";
     _tokenizer          = new EnglishRuleBasedTokenizer(false);
     _sentence_tokenizer = new EnglishMaximumEntropySentenceDetector(_modelPath + "/EnglishSD.nbin");
 }