/// <summary> /// Получение случайной следующей триграммы за текущей /// </summary> /// <param name="previous">Текущая триграмма, за которой будет получена следующая</param> /// <param name="biIfMoreThanOneTri">Шанс генерации биграммы вместо триграммы, если найдено более 1 триграммы, следующей за текущей</param> /// <param name="biIfOnlyOneTri">Шанс генерации биграммы вместо триграммы, если найдена лишь одна подходящая триграмма</param> /// <returns></returns> private NGram GetNext(NGram previous, double biIfMoreThanOneTri = 0.2, double biIfOnlyOneTri = 0.85) { var bi = Ngrams.Where(t => t.isNextFor(previous, 1)).ToArray(); var tri = bi.Where(t => t.isNextFor(previous, 2)).ToArray(); var count3 = tri.Length; var count2 = bi.Length; if (count2 == 0) { return(null); } if (count3 == 1) { if (Rnd.NextDouble() > biIfOnlyOneTri) { return(tri[0]); } } if (count3 > 1) { if (Rnd.NextDouble() > biIfMoreThanOneTri) { return(tri[Rnd.Next(tri.Length)]); } } return(bi[Rnd.Next(bi.Length)]); }
private static Dictionary <string, string> GetNgrams(List <List <string> > text, Dictionary <string, string> result, Ngrams gramma) { var grammasWithFrequency = new Dictionary <string, Dictionary <string, int> >(); grammasWithFrequency = GetGrammasWithFrequency(grammasWithFrequency, text, gramma); return(GetGrammasWithoutFrequency(grammasWithFrequency, result)); }
/// <summary> Продолжение генерации текста после n-граммы <paramref name="start"/> </summary> <param name="start">Начальная N-грамма</param> protected override string ContinueGeneratingText(NGram start) { var sb = new StringBuilder(); var curword = start; sb.Append(curword.Current.StartWithUpper()); while (curword.Current != null) { curword = Ngrams.Where(w => w.isNextFor(curword, 1)).OrderBy(w => Rnd.Next()).FirstOrDefault(); if (curword.Current != null) { switch (curword.Divider) { case '-': sb.Append(" - " + curword.Current); break; case ',': sb.Append(", " + curword.Current); break; case '.': sb.Append(". " + curword.Current.StartWithUpper()); break; case ';': sb.Append("; " + curword.Current); break; case ' ': sb.Append(' ' + curword.Current); break; case '\n': sb.Append(". " + curword.Current.StartWithUpper()); break; default: sb.Append(curword.Divider + curword.Current); break; } } } if (curword.Divider != ' ') { sb.Append(curword.Divider); } return(sb.ToString()); }
static void Main(string[] args) { Console.Write("Enter ngram size wanted : "); string value = Console.ReadLine(); Console.Write(Environment.NewLine); int n = 0; while (!int.TryParse(value, out n)) { Console.Write("Error bad integer value try again : "); value = Console.ReadLine(); Console.Write(Environment.NewLine); } try { // Get texts content IEnumerable <string> texts = FileManager.GetFilesContent(Defaults.dataFilesPath); // Get ngrams for all texts List <string> nGramsResult = new List <string>(); Ngrams ng = new Ngrams(); var options = new ParallelOptions { MaxDegreeOfParallelism = 20 }; Parallel.ForEach(texts, options, text => { var temp = ng.GetNgramsWords(text, n); nGramsResult.AddRange(temp); } ); // Save Ngrams to a file FileManager.SaveFile(Defaults.resultNGramFilePath, nGramsResult); // Sort ngrams (and delete duplicate in the same time) with tf idf var tfIdfResult = TfIdf.SortNGrams(nGramsResult); // Save sorted tf idf to a file FileManager.SaveFile(Defaults.resultTfIdfFilePath, tfIdfResult); } catch (Exception e) { Console.WriteLine(e.ToString()); } Console.ReadKey(); }
/// <summary> Добавление N-граммы </summary> <param name="divider">Символ - разделитель</param> <param name="words">Слова</param> protected override void Add(char divider, params string[] words) { BiGram New; if (words.Length == 2) { New = new BiGram(words[0], words[1], divider); } else if (words.Length == 1) { New = new BiGram(null, words[0], divider); } else { throw new ArgumentException(); } Ngrams.Add(New); }
/// <summary> /// Adds pattern ending in this node /// </summary> /// <param name="ngram">Pattern</param> public void AddNgram(ngram_t <TValue> ngram) { Ngrams.Add(ngram); }
/// <summary> Сохранение в файл </summary> <param name="filename">Путь к файлу</param> public override void SaveToFile(string filename) { _SaveToFile <BiGram[]>(filename, Ngrams.Cast <BiGram>().ToArray()); }
private static Dictionary <string, Dictionary <string, int> > GetGrammasWithFrequency(Dictionary <string, Dictionary <string, int> > grammas, List <List <string> > text, Ngrams gramma) { for (int i = 0; i < text.Count; i++) { var sentence = text[i]; for (int j = 0; j < sentence.Count - (int)gramma; j++) { string firstWord; string nextWord; switch (gramma) { case Ngrams.Bigramm: firstWord = sentence[j]; nextWord = sentence[j + 1]; break; case Ngrams.Trigramm: firstWord = String.Join(" ", new string[2] { sentence[j], sentence[j + 1] }); nextWord = sentence[j + 2]; break; default: throw new ArgumentException(); } grammas = GetFrequency(grammas, firstWord, nextWord); } } return(grammas); }
/// <summary> /// Adds pattern ending in this node /// </summary> /// <param name="ngram">Pattern</param> public void AddNgram(ngram_t ngram) { Ngrams.Add(ngram); }
/// <summary> /// Adds pattern ending in this node /// </summary> /// <param name="ngram">Pattern</param> public void AddNgram(string[] ngram) { Ngrams.Add(ngram); }