private void AdditionToGlossary(string s, SystemCollection system)
        {
            ILemmatizer   lmtz        = new LemmatizerPrebuiltFull(LanguagePrebuilt.Russian);
            List <String> wordsInText = new List <string>();

            wordsInText.AddRange(s.Split(new char[] { '.', '?', '!', '(', ')', ',', ':', ';', ' ' }, StringSplitOptions.RemoveEmptyEntries));
            for (int i = 0; i < wordsInText.Count; i++)
            {
                wordsInText[i] = wordsInText[i].ToLower();
                wordsInText[i] = lmtz.Lemmatize(wordsInText[i]);
                for (int w = 0; w < stopWords.Count; w++)
                {
                    if (wordsInText[i] == stopWords[w])
                    {
                        wordsInText.RemoveAt(i);
                        i--;
                        break;
                    }
                }
            }

            List <String> words = new List <string>();

            foreach (var a in wordsInText.Distinct <string>())
            {
                words.Add(a);
            }

            for (int i = 0; i < words.Count; i++)
            {
                bool           t = false;
                GlossarySystem glossarySystem = new GlossarySystem();

                var gos = (from p in model.GlossarySystems where p.SystemCollectionId == system.Id select p).ToList();
                for (int g = 0; g < gos.Count; g++)
                {
                    if (gos[g].WordGlossary == words[i])
                    {
                        glossarySystem = model.GlossarySystems.Find(gos[g].Id);
                        t = true;
                        break;
                    }
                }
                if (t != true)
                {
                    glossarySystem.WordGlossary       = words[i];
                    glossarySystem.SystemCollectionId = system.Id;
                    glossarySystem.WordValue          = 1;
                    glossarySystem.SystemCollection   = system;
                    model.GlossarySystems.Add(glossarySystem);
                    model.SaveChanges();
                }
                else
                {
                    glossarySystem.WordValue++;
                    model.SaveChanges();
                }
            }
        }
Exemple #2
0
        private List <Double> token(string s)
        {
            ILemmatizer   lmtz        = new LemmatizerPrebuiltFull(LanguagePrebuilt.Russian);
            List <String> wordsInText = new List <string>();

            wordsInText.AddRange(s.Split(new char[] { '.', '?', '!', '(', ')', ',', ':', ';', ' ' }, StringSplitOptions.RemoveEmptyEntries));
            for (int i = 0; i < wordsInText.Count; i++)
            {
                wordsInText[i] = wordsInText[i].ToLower();
                wordsInText[i] = lmtz.Lemmatize(wordsInText[i]);
                for (int w = 0; w < stopWords.Count; w++)
                {
                    if (wordsInText[i] == stopWords[w])
                    {
                        wordsInText.RemoveAt(i);
                        i--;
                        break;
                    }
                }
            }
            int size = (from p in model.TypeDemands
                        where p.GroupDemand.SystemCollectionId == systemCollection.Id
                        select p).Count();

            var gos = (from p in model.GlossarySystems
                       where p.SystemCollectionId == systemCollection.Id
                       select p).ToList();
            List <Double> tfidf = new List <double>();
            double        tf    = 0;

            for (int g = 0; g < gos.Count; g++)
            {
                for (int k = 0; k < wordsInText.Count; k++)
                {
                    if (wordsInText[k] == gos[g].WordGlossary)
                    {
                        tf++;
                    }
                }
                tf = (tf / wordsInText.Count) * (Math.Log(1 + size / gos[g].WordValue));
                tfidf.Add(tf);
                tf = 0;
            }
            return(tfidf);
        }
 public string LemmatizeTwo(string iskalni_kljuc)
 {
     Regex pattern = new Regex(@"([^\W_\d]([^\W_\d]|[-'\d](?=[^\W_\d|]))*[^\W_\d])", RegexOptions.IgnorePatternWhitespace);
     ILemmatizer lmtz = new LemmatizerPrebuiltFull(LemmaSharp.LanguagePrebuilt.Slovene);
     string lemma;
     string leme_kljuc = "";
     foreach (Match m in pattern.Matches(iskalni_kljuc))
     {
         lemma = lmtz.Lemmatize(m.Groups[1].Value.ToLower());
         leme_kljuc = leme_kljuc + "'" + lemma + "',";
     }
     return leme_kljuc;
 }
 static IEnumerable <string> LemmatizeWordsArray(IEnumerable <string> words)
 {
     return(words.Select(word => Lemmatizer.Lemmatize(word)));
 }
        public void Lematiziraj(string teme, string vsebina)
        {
            MySqlConnection connection = new MySqlConnection("server=localhost;user id=keko;password=keko;database=feri;");
            connection.Open();
            ILemmatizer lmtz = new LemmatizerPrebuiltFull(LemmaSharp.LanguagePrebuilt.Slovene);
            Dictionary<string, string> dictionary = new Dictionary<string, string>();

            Regex pattern = new Regex(@"([^\W_\d]([^\W_\d]|[-'\d](?=[^\W_\d|]))*[^\W_\d])", RegexOptions.IgnorePatternWhitespace);
            string prebrano = vsebina;
            foreach (Match m in pattern.Matches(prebrano))
            {
                string lemma;
                lemma = lmtz.Lemmatize(m.Groups[1].Value.ToLower());
                leme_string = leme_string + lemma + ",";
                //LemmatizeOne(m.Groups[1].Value);
            }

            dictionary.Add(teme, leme_string);
            leme_string = "";
            besede.Clear();

            foreach (KeyValuePair<string, string> kvp in dictionary)
            {
                string datoteka = kvp.Key;
                string[] leme = kvp.Value.Split(',');
                string[] odstranjeni_duplikati = leme.Distinct().ToArray();
                int stevec = 0;
                for (int i = 0; i < odstranjeni_duplikati.Count() - 1; i++)
                {
                    for (int j = 0; j < leme.Count(); j++)
                    {
                        if (odstranjeni_duplikati[i] == leme[j])
                            stevec++;

                    }
                    string sql = "INSERT INTO tf(Lema,St_pojavitev,Dokument)VALUES (@lema,@st_pojavitev,@dokument)";
                    MySqlCommand cmd = new MySqlCommand(sql, connection);
                    cmd.Parameters.AddWithValue("@lema", odstranjeni_duplikati[i]);
                    cmd.Parameters.AddWithValue("@st_pojavitev", stevec);
                    cmd.Parameters.AddWithValue("@dokument", datoteka);
                    try
                    {
                        cmd.ExecuteNonQuery();
                    }
                    catch (Exception ex)
                    {
                    }
                    stevec = 0;
                }
            }
            connection.Close();
            connection.Open();
            List<int> C = new List<int>();
            List<string> Lema = new List<string>();
            MySqlCommand cmd1 = new MySqlCommand("SELECT COUNT(*) as C, Lema FROM tf GROUP BY Lema ORDER BY C Desc", connection);
            MySqlDataReader reader = cmd1.ExecuteReader();
            while (reader.Read())
            {
                C.Add(reader.GetInt32(0));
                Lema.Add(reader.GetString(1));
            }
            connection.Close();
            connection.Open();
            string strSql = "TRUNCATE TABLE df";
            MySqlCommand cmd2 = new MySqlCommand(strSql, connection);
            cmd2.ExecuteNonQuery();

            connection.Close();
            connection.Open();
            string sql1 = "INSERT INTO df(Lema,DF)VALUES (@lema,@DF)";
            for (int i = 0; i < Lema.Count(); i++)
            {
                MySqlCommand cmd3 = new MySqlCommand(sql1, connection);
                cmd3.Parameters.AddWithValue("@lema", Lema[i]);
                cmd3.Parameters.AddWithValue("@DF", C[i]);
                cmd3.ExecuteNonQuery();
            }
            connection.Close();
        }