Пример #1
0
        public void StarterGramsFromMessage(string message, NGram starterGrams)
        {
            message = NormalizeInput(message);
            string[] seperators = { " " };
            string[] words      = message.Split(seperators, StringSplitOptions.RemoveEmptyEntries);
            bool     prevWordIsEndOfSentence = false;

            if (words.Length == 0 || words == null)
            {
                return;
            }

            for (int i = 0; i < words.Length; i++)
            {
                if (i == 0)
                {
                    AddStarterGram(words[i], starterGrams);
                }
                if (IsEndOfSentence(words[i]))
                {
                    prevWordIsEndOfSentence = true;
                }
                else if (prevWordIsEndOfSentence)
                {
                    AddStarterGram(words[i], starterGrams);
                }
            }
        }
Пример #2
0
 private void AddStarterGram(string word, NGram starterGrams)
 {
     if (starterGrams.dictionary.ContainsKey(word))
     {
         int ponderation = (int)((starterGrams.Sum() / starterGrams.dictionary.Count) * 0.2);
         if (ponderation < 1)
         {
             ponderation = 1;
         }
         starterGrams.dictionary[word].Frequency += ponderation;
         starterGrams.Sort(false);
     }
     else
     {
         int ponderation = (int)((starterGrams.Sum() / starterGrams.dictionary.Count) * 1.5);
         if (ponderation < 1)
         {
             ponderation = 1;
         }
         Sequence newKey = new Sequence();
         newKey.Frequency = ponderation;
         starterGrams.dictionary.Add(word, newKey);
         starterGrams.Sort(true);
     }
 }
        private void AddSuggestionsFromUniGram(NGram nGram)
        {
            int count = 0;

            nGram.Sort();
            foreach (KeyValuePair <string, Sequence> entry in nGram.orderedSequence)
            {
                //Should first take the most frequent x keys
                //calcul de probabilit/ de base
                if (results.Keys.Contains(entry.Key) == false)
                {
                    results.Add(entry.Key, entry.Value.Frequency / nGram.Sum());
                }
                count += 1;
                if (count >= 100)
                {
                    break;
                }
            }
        }
Пример #4
0
        //string end
        public void GramsFromMessage(int gramsize, string message, NGram gramCollection)
        {
            //clef, mot, frequence
            //List<KeyValuePair<string,Sequence>> newGrams = new List<KeyValuePair<string, Sequence>>();
            message = NormalizeInput(message);
            string[] seperators = { " " };
            string[] words      = message.Split(seperators, StringSplitOptions.RemoveEmptyEntries);
            for (int i = 0; i <= words.Length - gramsize; i++)
            {
                string key  = "";
                string word = "";
                if (gramsize - 1 > 0)
                { //nGrams
                    key  = String.Join(" ", words.Skip(i).Take(gramsize - 1));
                    word = words[i + gramsize - 1];
                    if (gramCollection.dictionary.ContainsKey(key))
                    {
                        if (gramCollection.dictionary[key].dictionary.ContainsKey(word))
                        {
                            int ponderation = (int)((gramCollection.dictionary[key].Sum() / gramCollection.dictionary[key].dictionary.Count) * 0.2);
                            if (ponderation < 1)
                            {
                                ponderation = 1;
                            }
                            gramCollection.dictionary[key].dictionary[word].Frequency += ponderation;
                            gramCollection.dictionary[key].dictionary[word].Sort(false);
                        }
                        else
                        {
                            Sequence newSequence = new Sequence();
                            newSequence.Frequency = (int)((gramCollection.dictionary[key].Sum() / gramCollection.dictionary[key].dictionary.Count) * 1.5);
                            gramCollection.dictionary[key].dictionary.Add(word, newSequence);
                            gramCollection.dictionary[key].dictionary[word].Sort(true);
                        }
                    }
                    else
                    {
                        Sequence newKey      = new Sequence();
                        Sequence newSequence = new Sequence();
                        newSequence.Frequency = 5;

                        newKey.dictionary = new Dictionary <string, Sequence>();
                        newKey.dictionary.Add(word, newSequence);
                        gramCollection.dictionary.Add(key, newKey);
                        gramCollection.Sort(true);
                    }
                } //Unigrams
                else
                {
                    key = words[i];
                    if (gramCollection.dictionary.ContainsKey(key))
                    {
                        int ponderation = (int)((gramCollection.Sum() / gramCollection.dictionary.Count) * 0.2);
                        if (ponderation < 1)
                        {
                            ponderation = 1;
                        }
                        gramCollection.dictionary[key].Frequency += ponderation;
                        gramCollection.Sort();
                    }
                    else
                    {
                        int ponderation = (int)((gramCollection.Sum() / gramCollection.dictionary.Count) * 1.5);
                        if (ponderation < 1)
                        {
                            ponderation = 1;
                        }
                        Sequence newSequence = new Sequence();
                        newSequence.Frequency = ponderation;
                        gramCollection.dictionary.Add(key, newSequence);
                        gramCollection.Sort(true);
                    }
                }
            }
        }
Пример #5
0
        private void LoadFromFile(bool isPerso, string PREFIX, string POSTFIX)
        {
            string[] args   = Environment.GetCommandLineArgs();
            string   folder = "";
            var      n      = 1;

            if (isPerso)
            {
                if (args != null && args.Length > 1 && args[1] != null)
                {
                    folder = args[1];
                    //cherche des fichiers contenant un nombre >= 0
                    n = 0;
                }
            }
            else
            {
                if (args != null && args.Length > 2 && args[2] != null)
                {
                    folder = args[2];
                    //cherche des fichiers contenant un nombre >= 1
                    n = 1;
                }
            }
            string line;

            while (true)
            {
                var nGram = new NGram();
                // Read the file load it line by line.
                System.IO.StreamReader file;
                var path = Environment.ExpandEnvironmentVariables(folder + PREFIX + n + POSTFIX);
                try
                {
                    file = new System.IO.StreamReader(path);
                }
                catch (FileNotFoundException e)
                {
                    //Load les nGrams en ordre tant qu'il y en a sinon sort.
                    break;
                }
                while ((line = file.ReadLine()) != null)
                {
                    var    index       = line.LastIndexOf(',');
                    var    rawSequence = line.Substring(0, index);
                    var    frequency   = line.Substring(index + 1);;
                    String key;
                    var    sequence = new Sequence();

                    if (n > 1)
                    {
                        var words = rawSequence.Split(' ');
                        var last  = words.Last();
                        key = "";
                        for (var i = 0; i < words.Length - 1; ++i)
                        {
                            key = i > 0 ? key + " " + words[i] : words[i];
                        }

                        if (nGram.dictionary.Keys.Contains(key))
                        {
                            //ajout dans le subdictionary
                            sequence.Frequency = parseFrequency(frequency);
                            if (!nGram.dictionary[key].dictionary.Keys.Contains(last))
                            {
                                nGram.dictionary[key].dictionary.Add(last, sequence);
                            }
                        }
                        else
                        {
                            //creation du subdictionary et ajout de l'entree
                            sequence.dictionary = new Dictionary <string, Sequence>();
                            var temp = new Sequence();
                            temp.Frequency = parseFrequency(frequency);
                            if (!sequence.dictionary.Keys.Contains(last))
                            {
                                sequence.dictionary.Add(last, temp);
                            }
                            if (!nGram.dictionary.Keys.Contains(key))
                            {
                                nGram.dictionary.Add(key, sequence);
                            }
                        }
                    }
                    else
                    {
                        //Pour l'unigram et l'unigram de debut de phrase
                        key = rawSequence;
                        sequence.Frequency = parseFrequency(frequency);
                        if (!nGram.dictionary.Keys.Contains(key))
                        {
                            nGram.dictionary.Add(key, sequence);
                        }
                    }
                }
                file.Close();

                nGram.Sort();
                nGram.Sum();

                if (n == 0)
                {
                    this.nGramDebutPhrase = nGram;
                }
                else
                {
                    if (isPerso)
                    {
                        this.nGramsPerso.Add(nGram);
                    }
                    else
                    {
                        this.nGramsPublic.Add(nGram);
                    }
                }

                n += 1;
            }
        }