示例#1
0
        /// <summary>
        /// Получение случайной следующей триграммы за текущей
        /// </summary>
        /// <param name="previous">Текущая триграмма, за которой будет получена следующая</param>
        /// <param name="biIfMoreThanOneTri">Шанс генерации биграммы вместо триграммы, если найдено более 1 триграммы, следующей за текущей</param>
        /// <param name="biIfOnlyOneTri">Шанс генерации биграммы вместо триграммы, если найдена лишь одна подходящая триграмма</param>
        /// <returns></returns>
        private NGram GetNext(NGram previous, double biIfMoreThanOneTri = 0.2, double biIfOnlyOneTri = 0.85)
        {
            var bi  = Ngrams.Where(t => t.isNextFor(previous, 1)).ToArray();
            var tri = bi.Where(t => t.isNextFor(previous, 2)).ToArray();

            var count3 = tri.Length;
            var count2 = bi.Length;

            if (count2 == 0)
            {
                return(null);
            }

            if (count3 == 1)
            {
                if (Rnd.NextDouble() > biIfOnlyOneTri)
                {
                    return(tri[0]);
                }
            }

            if (count3 > 1)
            {
                if (Rnd.NextDouble() > biIfMoreThanOneTri)
                {
                    return(tri[Rnd.Next(tri.Length)]);
                }
            }

            return(bi[Rnd.Next(bi.Length)]);
        }
        private static Dictionary <string, string> GetNgrams(List <List <string> > text,
                                                             Dictionary <string, string> result,
                                                             Ngrams gramma)
        {
            var grammasWithFrequency = new Dictionary <string, Dictionary <string, int> >();

            grammasWithFrequency = GetGrammasWithFrequency(grammasWithFrequency, text, gramma);
            return(GetGrammasWithoutFrequency(grammasWithFrequency, result));
        }
        /// <summary> Продолжение генерации текста после n-граммы <paramref name="start"/> </summary> <param name="start">Начальная N-грамма</param>
        protected override string ContinueGeneratingText(NGram start)
        {
            var sb      = new StringBuilder();
            var curword = start;

            sb.Append(curword.Current.StartWithUpper());
            while (curword.Current != null)
            {
                curword = Ngrams.Where(w => w.isNextFor(curword, 1)).OrderBy(w => Rnd.Next()).FirstOrDefault();

                if (curword.Current != null)
                {
                    switch (curword.Divider)
                    {
                    case '-':
                        sb.Append(" - " + curword.Current);
                        break;

                    case ',':
                        sb.Append(", " + curword.Current);
                        break;

                    case '.':
                        sb.Append(". " + curword.Current.StartWithUpper());
                        break;

                    case ';':
                        sb.Append("; " + curword.Current);
                        break;

                    case ' ':
                        sb.Append(' ' + curword.Current);
                        break;

                    case '\n':
                        sb.Append(". " + curword.Current.StartWithUpper());
                        break;

                    default:
                        sb.Append(curword.Divider + curword.Current);
                        break;
                    }
                }
            }

            if (curword.Divider != ' ')
            {
                sb.Append(curword.Divider);
            }

            return(sb.ToString());
        }
示例#4
0
        static void Main(string[] args)
        {
            Console.Write("Enter ngram size wanted : ");
            string value = Console.ReadLine();

            Console.Write(Environment.NewLine);
            int n = 0;

            while (!int.TryParse(value, out n))
            {
                Console.Write("Error bad integer value try again : ");
                value = Console.ReadLine();
                Console.Write(Environment.NewLine);
            }

            try
            {
                // Get texts content
                IEnumerable <string> texts = FileManager.GetFilesContent(Defaults.dataFilesPath);

                // Get ngrams for all texts
                List <string> nGramsResult = new List <string>();
                Ngrams        ng           = new Ngrams();
                var           options      = new ParallelOptions
                {
                    MaxDegreeOfParallelism = 20
                };

                Parallel.ForEach(texts, options, text =>
                {
                    var temp = ng.GetNgramsWords(text, n);
                    nGramsResult.AddRange(temp);
                }
                                 );
                // Save Ngrams to a file
                FileManager.SaveFile(Defaults.resultNGramFilePath, nGramsResult);

                // Sort ngrams (and delete duplicate in the same time) with tf idf
                var tfIdfResult = TfIdf.SortNGrams(nGramsResult);

                // Save sorted tf idf to a file
                FileManager.SaveFile(Defaults.resultTfIdfFilePath, tfIdfResult);
            }
            catch (Exception e)
            {
                Console.WriteLine(e.ToString());
            }
            Console.ReadKey();
        }
        /// <summary> Добавление N-граммы </summary> <param name="divider">Символ - разделитель</param> <param name="words">Слова</param>
        protected override void Add(char divider, params string[] words)
        {
            BiGram New;

            if (words.Length == 2)
            {
                New = new BiGram(words[0], words[1], divider);
            }
            else if (words.Length == 1)
            {
                New = new BiGram(null, words[0], divider);
            }
            else
            {
                throw new ArgumentException();
            }

            Ngrams.Add(New);
        }
 /// <summary>
 /// Adds pattern ending in this node
 /// </summary>
 /// <param name="ngram">Pattern</param>
 public void AddNgram(ngram_t <TValue> ngram)
 {
     Ngrams.Add(ngram);
 }
 /// <summary> Сохранение в файл </summary> <param name="filename">Путь к файлу</param>
 public override void SaveToFile(string filename)
 {
     _SaveToFile <BiGram[]>(filename, Ngrams.Cast <BiGram>().ToArray());
 }
        private static Dictionary <string, Dictionary <string, int> > GetGrammasWithFrequency(Dictionary <string, Dictionary <string, int> > grammas,
                                                                                              List <List <string> > text,
                                                                                              Ngrams gramma)
        {
            for (int i = 0; i < text.Count; i++)
            {
                var sentence = text[i];
                for (int j = 0; j < sentence.Count - (int)gramma; j++)
                {
                    string firstWord;
                    string nextWord;
                    switch (gramma)
                    {
                    case Ngrams.Bigramm:
                        firstWord = sentence[j];
                        nextWord  = sentence[j + 1];
                        break;

                    case Ngrams.Trigramm:
                        firstWord = String.Join(" ", new string[2] {
                            sentence[j], sentence[j + 1]
                        });
                        nextWord = sentence[j + 2];
                        break;

                    default:
                        throw new ArgumentException();
                    }
                    grammas = GetFrequency(grammas, firstWord, nextWord);
                }
            }
            return(grammas);
        }
示例#9
0
 /// <summary>
 /// Adds pattern ending in this node
 /// </summary>
 /// <param name="ngram">Pattern</param>
 public void AddNgram(ngram_t ngram)
 {
     Ngrams.Add(ngram);
 }
示例#10
0
 /// <summary>
 /// Adds pattern ending in this node
 /// </summary>
 /// <param name="ngram">Pattern</param>
 public void AddNgram(string[] ngram)
 {
     Ngrams.Add(ngram);
 }