Esempio n. 1
0
        public double CalculateFrequencyNgram(string domain, int lengthGram)
        {
            List <Gram> grams;

            //Получения словаря грамм
            if (dictionarys.ContainsKey(lengthGram))
            {
                grams = dictionarys[lengthGram];
            }
            else
            {
                grams = new List <Gram>();

                string[] rows = File.ReadAllLines($"{Directory.GetCurrentDirectory()}\\Слоаврь Ngram - {lengthGram}.csv");

                for (int i = 0; i < rows.Length; i++)
                {
                    string[] row = rows[i].Split(';');

                    Gram gram = new Gram();
                    gram.text   = row[0];
                    gram.weight = Convert.ToDouble(row[1]);

                    grams.Add(gram);
                }

                dictionarys.Add(lengthGram, grams);
            }

            //Вычисление частоты
            double frequency = 0;

            int lengthDomain = domain.Length;

            for (int k = 0; k < lengthDomain - lengthGram; k++)
            {
                string text = domain.Substring(k, lengthGram);
                Gram   gram = grams.Find(g => g.text == text);

                if (gram == null)
                {
                    continue;
                }
                else
                {
                    frequency += gram.weight;
                }
            }

            return(frequency);
        }
Esempio n. 2
0
        public void CreateDictionary(string[] domains, string[] labelsClass, int lengthGram, Label status, ProgressBar progress)
        {
            List <Gram> grams = new List <Gram>();

            int countDomains = domains.Length;

            //Выделение грамм
            for (int i = 0; i < countDomains; i++)
            {
                string domain       = domains[i].Substring(0, domains[i].LastIndexOf('.'));
                int    lengthDomain = domain.Length;

                for (int k = 0; k < lengthDomain - lengthGram; k++)
                {
                    string text = domain.Substring(k, lengthGram);
                    Gram   gram = grams.Find(g => g.text == text);

                    if (gram == null)
                    {
                        gram      = new Gram();
                        gram.text = text;
                        grams.Add(gram);
                    }
                    else
                    {
                    }

                    if (labelsClass[i] == "0")
                    {
                        gram.legitimate++;
                    }
                    else
                    {
                        gram.noLegitimate++;
                    }
                }

                progress.Invoke(new Action(() =>
                {
                    progress.Value = i;
                    status.Text    = $"{i}/{countDomains}";
                }));
            }

            //Вычисление весов грамм
            for (int i = 0; i < grams.Count; i++)
            {
                Gram gram         = grams[i];
                int  legitimate   = gram.legitimate;
                int  noLegitimate = gram.noLegitimate;

                if ((legitimate > 0) && (noLegitimate == 0))
                {
                    gram.weight = -1;
                    continue;
                }

                if ((legitimate == 0) && (noLegitimate > 0))
                {
                    gram.weight = 1;
                    continue;
                }

                gram.weight = (noLegitimate / (double)(legitimate + noLegitimate)) * 2 - 1;
            }

            //Сохранение словаря
            string dictionary = "";

            foreach (Gram gram in grams)
            {
                dictionary += $"{gram.text};{gram.weight}\r\n";
            }

            File.WriteAllText($"{Directory.GetCurrentDirectory()}\\Слоаврь Ngram - {lengthGram}.csv", dictionary);
        }