public void TestTrigramLanguageModelCreationFromText()
        {
            var ngramSize     = 3;
            var languageModel = new NGramLanguageModel(ngramSize);

            var stream = Tests.OpenFile("/opennlp/tools/languagemodel/sentences.txt", Encoding.UTF8);

            string line;

            while ((line = stream.ReadLine()) != null)
            {
                var list             = new List <string>(line.Split(new[] { ' ' }, StringSplitOptions.None));
                var generatedStrings = NGramGenerator.Generate(list, ngramSize, " ");
                foreach (var generatedString in generatedStrings)
                {
                    var tokens = generatedString.Split(new[] { ' ' }, StringSplitOptions.None);
                    if (tokens.Length > 0)
                    {
                        languageModel.Add(new StringList(tokens), 1, ngramSize);
                    }
                }
            }


            var predited = languageModel.PredictNextTokens(new StringList("neural", "network", "language"));

            Assert.That(predited, Is.EqualTo(new StringList("models")));

            var p1 = languageModel.CalculateProbability(new StringList("neural", "network", "language", "models"));
            var p2 = languageModel.CalculateProbability(new StringList("neural", "network", "language", "model"));

            Assert.That(p1, Is.GreaterThan(p2));
        }
Exemplo n.º 2
0
    // TODO finish this
    private void GenerateNewDictionaries()
    {
        string directoryPath = Application.dataPath + "/Resources/WordPrediction";

        if (!Directory.Exists(directoryPath))
        {
            Directory.CreateDirectory(directoryPath);
        }

        Debug.Log("Building a new dictionaries. This can take a while depending on the corpus size.");

        TextAsset corpusFile = Resources.Load("Sample") as TextAsset;

        if (corpusFile == null)
        {
            throw new FileNotFoundException($"No text file found at:\n{corpusFile}");
        }

        string         rawCorpus      = corpusFile.ToString();
        NGramGenerator nGramGenerator = PunchKeyboardSettings.Load().NGramGenerator;

        string rawNGramDictionary = nGramGenerator.GenerateBiGrams(rawCorpus);

        File.WriteAllText(Application.dataPath + "/Resources/AutoCorrect/biGramDict.txt", rawNGramDictionary);
        AssetDatabase.Refresh();

        string rawLevenshteinDictionary = nGramGenerator.GenerateLevenshteinDictionary(rawCorpus);

        File.WriteAllText(Application.dataPath + "/Resources/AutoCorrect/levenshteinDict.txt",
                          rawLevenshteinDictionary);
        AssetDatabase.Refresh();

        nGramGenerator.GenerateLevenshteinCorpus();
        Debug.Log("Dictionaries were successfully generated.");
    }
Exemplo n.º 3
0
        private Tuple <List <string>, List <string> > GetColumnsSemiGuaranteed(ICompiledGram compiled, ICompiledGram simpleCompiled)
        {
            List <string> columns;
            List <string> simplified;

            if (simplifiedGram == null)
            {
                columns    = NGramGenerator.Generate(compiled, startInput, size, includeStart: false);
                simplified = LevelParser.BreakColumnsIntoSimplifiedTokens(
                    columns,
                    game);
            }
            else
            {
                simplified = NGramGenerator.GenerateBestAttempt(
                    simpleCompiled,
                    LevelParser.BreakColumnsIntoSimplifiedTokens(startInput, game),
                    size,
                    maxAttempts);

                Games localGame = game;
                columns = NGramGenerator.GenerateRestricted(
                    compiled,
                    startInput,
                    simplified,
                    (inColumn) =>
                {
                    return(LevelParser.ClassifyColumn(inColumn, localGame));
                },
                    includeStart: false);
            }

            return(new Tuple <List <string>, List <string> >(columns, simplified));
        }
Exemplo n.º 4
0
    private void Start()
    {
        if (inputField == null)
        {
            inputField = GetComponent <InputField>();
        }

        PunchKeyboardSettings settings = PunchKeyboardSettings.Load();

        nGramGenerator = settings.NGramGenerator;
    }
Exemplo n.º 5
0
        static void Main(string[] args)
        {
            //Leer archivo
            string path      = @"./data/";
            string inputData = "corpusBillboard.csv";
            string exitData  = "corpusBillboardCaracterizado.csv";

            HashSet <string> types = new HashSet <string>();

            leerStopWords();

            //crear vocabulario
            using (StreamReader sr = new StreamReader(path + inputData))
            {
                while (sr.Peek() >= 0)
                {
                    string   line      = sr.ReadLine();
                    string[] lineArray = line.Split(',');

                    //descartamos letras en blanco o etiquetas en blanco
                    if (lineArray[0] != "rank" | lineArray[4] != "" | lineArray[4] != "NA" | lineArray[6] != "")
                    {
                        string letra = lineArray[4];

                        letra = quitarStopWords(letra);                                        //quitar stops words
                        letra = letra.ToLower();                                               //minusculas

                        var     ngrams      = NGramGenerator.generate(letra.ToArray(), 3, ""); //creamos trigramas
                        dynamic ngramsArray = ngrams.toArray();

                        foreach (var token in ngramsArray)
                        {
                            types.Add(token);
                        }
                    }
                }
            }

            //caracterizar tweets
            using (StreamReader sr = new StreamReader(path + inputData))
            {
                while (sr.Peek() >= 0)
                {
                    string   line      = sr.ReadLine();
                    string[] lineArray = line.Split(',');

                    //descartamos tuits tipo 3
                    if (lineArray[0] != "rank" | lineArray[4] != "" | lineArray[4] != "NA" | lineArray[6] != "")
                    {
                        string letra = lineArray[4];
                        letra = quitarStopWords(letra);                                        //quitar stops words
                        letra = letra.ToLower();                                               //minusculas

                        var     ngrams      = NGramGenerator.generate(letra.ToArray(), 3, ""); //creamos trigramas
                        dynamic ngramsArray = ngrams.toArray();

                        int[] caract = new int[types.Count];
                        Array.Clear(caract, 0, caract.Length);

                        int count = 0;
                        //for (int i = 0; i < types.Count; i++)
                        foreach (var item in types)
                        {
                            for (int j = 0; j < ngramsArray.Length; j++)
                            {
                                if (item == ngramsArray[j])
                                {
                                    caract[count] = 1;
                                }
                            }
                            count++;
                        }

                        //tweetsFinal.Add(tweetCaract);


                        using (TextWriter tw = new StreamWriter(path + exitData, true))
                        {
                            for (int i = 0; i < caract.Length; i++)
                            {
                                tw.Write(caract[i]);
                            }

                            tw.Write(" " + lineArray[6]);
                            tw.WriteLine();
                        }
                    }
                }
            }
        }
Exemplo n.º 6
0
        static void Main(string[] args)
        {
            //Leer archivo
            string path      = @"./data/";
            string inputData = "corpusTweets.csv";
            string exitData  = "corpusTweetsCaracterizado.csv";

            HashSet <string> types = new HashSet <string>();

            leerStopWords();

            //crear vocabulario
            using (StreamReader sr = new StreamReader(path + inputData))
            {
                while (sr.Peek() >= 0)
                {
                    string   line      = sr.ReadLine();
                    string[] lineArray = line.Split(',');

                    if (!(lineArray.Length < 3))
                    {
                        //descartamos tuits tipo 3
                        if (lineArray[1] != "3")
                        {
                            if ((lineArray[1] != "" && lineArray[2] != ""))
                            {
                                string tweet = lineArray[0];
                                //tweet.Replace("|", ""); //quitar comas
                                tweet.Replace("|", ",");                                               //regresar comas
                                tweet = quitarStopWords(tweet);                                        //quitar stops words
                                tweet = tweet.ToLower();                                               //minusculas

                                var     ngrams      = NGramGenerator.generate(tweet.ToArray(), 3, ""); //creamos trigramas
                                dynamic ngramsArray = ngrams.toArray();

                                foreach (var token in ngramsArray)
                                {
                                    types.Add(token);
                                }
                            }
                        }
                    }
                }
            }

            //caracterizar tweets
            using (StreamReader sr = new StreamReader(path + inputData))
            {
                while (sr.Peek() >= 0)
                {
                    string   line      = sr.ReadLine();
                    string[] lineArray = line.Split(',');

                    if (!(lineArray.Length < 3))
                    {
                        //descartamos tuits tipo 3
                        if (lineArray[1] != "3")
                        {
                            if ((lineArray[1] != "" && lineArray[2] != ""))
                            {
                                string tweet = lineArray[0];
                                //tweet.Replace("|", ""); //quitar comas
                                tweet.Replace("|", ",");                                               //regresar comas
                                tweet = quitarStopWords(tweet);                                        //quitar stops words
                                tweet = tweet.ToLower();                                               //minusculas

                                var     ngrams      = NGramGenerator.generate(tweet.ToArray(), 3, ""); //creamos trigramas
                                dynamic ngramsArray = ngrams.toArray();

                                int[] tweetCaract = new int[types.Count];
                                Array.Clear(tweetCaract, 0, tweetCaract.Length);

                                int count = 0;
                                //for (int i = 0; i < types.Count; i++)
                                foreach (var item in types)
                                {
                                    for (int j = 0; j < ngramsArray.Length; j++)
                                    {
                                        if (item == ngramsArray[j])
                                        {
                                            tweetCaract[count] = 1;
                                        }
                                    }
                                    count++;
                                }

                                //tweetsFinal.Add(tweetCaract);
                                //escribir en archivo
                                int target;
                                if (lineArray[2] == "amlo")
                                {
                                    target = 0;
                                }
                                else if (lineArray[2] == "anaya")
                                {
                                    target = 1;
                                }
                                else if (lineArray[2] == "meade")
                                {
                                    target = 2;
                                }
                                else if (lineArray[2] == "bronco")
                                {
                                    target = 3;
                                }
                                else if (lineArray[2] == "debate")
                                {
                                    target = 4;
                                }
                                else
                                {
                                    target = 5;
                                }


                                using (TextWriter tw = new StreamWriter(path + exitData, true))
                                {
                                    for (int i = 0; i < tweetCaract.Length; i++)
                                    {
                                        tw.Write(tweetCaract[i]);
                                    }
                                    tw.Write(" " + lineArray[1]);
                                    tw.WriteLine();
                                }
                            }
                        }
                    }
                }
            }
        }
Exemplo n.º 7
0
    private bool GenerateLevel()
    {
        List <List <char> > level = new List <List <char> >();

        if (blackBoard.ConfigUI.Config.UsingSimplifiedNGram)
        {
            ICompiledGram compiledGram = simpleGrammar.Compile();
            int           levelIndex   = levelColumns.RandomIndex();

            List <string> simpleInput = simplifiedLevelColumns[levelIndex].GetRange
                                            (0,
                                            blackBoard.ConfigUI.Config.N + 7);

            blackBoard.LevelColumns = levelColumns[levelIndex].GetRange(
                0,
                blackBoard.ConfigUI.Config.N + 7);

            blackBoard.SimpleLevelColumns = NGramGenerator.Generate(
                compiledGram,
                simpleInput,
                blackBoard.ConfigUI.Config.LevelSize);

            compiledGram            = grammar.Compile();
            blackBoard.LevelColumns = NGramGenerator.GenerateRestricted(
                compiledGram,
                blackBoard.LevelColumns,
                blackBoard.SimpleLevelColumns,
                (inColumn) =>
            {
                return(LevelParser.ClassifyColumn(
                           inColumn,
                           blackBoard.ConfigUI.Config.Game));
            });
        }
        else
        {
            ICompiledGram compiledGram = grammar.Compile();
            blackBoard.LevelColumns = NGramGenerator.Generate(
                compiledGram,
                levelColumns.RandomValue().GetRange(0, blackBoard.ConfigUI.Config.N + 7),
                blackBoard.ConfigUI.Config.LevelSize);
        }

        bool generationWorked = blackBoard.LevelColumns != null;

        if (generationWorked)
        {
            foreach (string column in blackBoard.LevelColumns)
            {
                level.Add(new List <char>(column));
            }

            // add ending column to the level
            char        flagChar     = Tile.playerOneFinish.ToChar();
            List <char> endingColumn = new List <char>();
            for (int i = 0; i < level[0].Count; ++i)
            {
                endingColumn.Add(flagChar);
            }

            level.Add(endingColumn);
            blackBoard.LevelInfo = LevelLoader.Build(level, blackBoard.Tilemap, blackBoard.CameraFollow);
        }

        return(generationWorked);
    }