private void LoadWordFrequencies() { foreach (XmlNode book in BibleXml.DocumentElement.ChildNodes) { var bookName = book.Attributes["n"].InnerText; foreach (XmlNode chapter in book.ChildNodes) { var chapterName = chapter.Attributes["n"].InnerText; foreach (XmlNode verse in chapter.ChildNodes) { TotalVerseCount++; var verseNumber = verse.Attributes["n"].InnerText; string text = verse.InnerText.ToUpperInvariant(); var chars = new List <char>(); for (int i = 0; i < text.Length; i++) { var c = text[i]; if (!char.IsLetter(c) && c != ' ' && c != '-') { continue; } // allow hyphenated words if (i > 0 && i < text.Length - 1 && c == '-') { if (!char.IsLetter(text[i - 1]) || !char.IsLetter(text[i + 1])) { continue; } } chars.Add(text[i]); } text = string.Concat(chars).ToUpperInvariant(); var words = text.Split(' ', StringSplitOptions.RemoveEmptyEntries); foreach (var word in words) { var tempWord = word.Trim().TrimStart('-').TrimEnd('-'); if (string.IsNullOrWhiteSpace(tempWord)) { continue; } TotalWordCount++; if (WordFrequencies.ContainsKey(tempWord)) { WordFrequencies[tempWord]++; } else { WordFrequencies[tempWord] = 1; } UniqueWords.Add(tempWord); } } } } }
public static void Test(){ UniqueWords u = new UniqueWords(); string[] arr = {"apple","bat","cat","cat","tin","jungle","bat"}; }
static void Main(string[] args) { ApplicationContext context = new ApplicationContext(); context.UniqueWords.Load(); Console.Write("Что сделать? (1-вывести из базы, 2-ввести URL): "); var answer = int.Parse(Console.ReadLine()); if (answer == 1) { Console.WriteLine("\n\nСписок слов:\n"); foreach (var word in context.UniqueWords.Local.ToList()) { Console.WriteLine($"{word.Word} - {word.Count}"); } } else if (answer == 2) { ParseHtml parse = new ParseHtml(); Console.Write("Введите URL: "); var url = Console.ReadLine(); Console.WriteLine(); Console.Write("Введите кол-во вхождений слова: "); var countText = Console.ReadLine(); try { if (int.TryParse(countText, out int count)) { //"https://www.simbirsoft.com/" var streamHtml = parse.GetHtmlStream(url); var parsedHtml = parse.ParseHtmlOfStream(streamHtml); UniqueWords uniqueWords = new UniqueWords(); var clearArray = uniqueWords.ClearArrayWithWord(parsedHtml); var dictionaryUniqueWord = uniqueWords.CountUniqueWord(clearArray); foreach (var keyValue in dictionaryUniqueWord) { if (keyValue.Value >= count) { Console.WriteLine($"{keyValue.Key} - {keyValue.Value}"); var item = context.UniqueWords.Local.FirstOrDefault(x => x.Word == keyValue.Key && x.Url == url); if (item == null) { var lastItem = context.UniqueWords.Local.LastOrDefault(); var lastIndex = lastItem == null ? 1 : lastItem.IdWord + 1; context.UniqueWords.Add(new UniqueWord { IdWord = lastIndex, Url = url, Word = keyValue.Key, Count = keyValue.Value }); } else { item.Count += keyValue.Value; } } } context.SaveChanges(); Console.WriteLine("\nНажмите на любую кнопку"); } else { throw new Exception("Некорректное число"); } } catch (Exception ex) { Console.WriteLine(ex.Message); } } Console.WriteLine("\nНажмите любую кнопку..."); Console.ReadKey(); Process.Start(Assembly.GetExecutingAssembly().Location); Environment.Exit(0); }