private void LoadWordFrequencies()
        {
            foreach (XmlNode book in BibleXml.DocumentElement.ChildNodes)
            {
                var bookName = book.Attributes["n"].InnerText;

                foreach (XmlNode chapter in book.ChildNodes)
                {
                    var chapterName = chapter.Attributes["n"].InnerText;

                    foreach (XmlNode verse in chapter.ChildNodes)
                    {
                        TotalVerseCount++;
                        var    verseNumber = verse.Attributes["n"].InnerText;
                        string text        = verse.InnerText.ToUpperInvariant();
                        var    chars       = new List <char>();

                        for (int i = 0; i < text.Length; i++)
                        {
                            var c = text[i];
                            if (!char.IsLetter(c) && c != ' ' && c != '-')
                            {
                                continue;
                            }

                            // allow hyphenated words
                            if (i > 0 && i < text.Length - 1 && c == '-')
                            {
                                if (!char.IsLetter(text[i - 1]) || !char.IsLetter(text[i + 1]))
                                {
                                    continue;
                                }
                            }

                            chars.Add(text[i]);
                        }

                        text = string.Concat(chars).ToUpperInvariant();

                        var words = text.Split(' ', StringSplitOptions.RemoveEmptyEntries);

                        foreach (var word in words)
                        {
                            var tempWord = word.Trim().TrimStart('-').TrimEnd('-');
                            if (string.IsNullOrWhiteSpace(tempWord))
                            {
                                continue;
                            }

                            TotalWordCount++;

                            if (WordFrequencies.ContainsKey(tempWord))
                            {
                                WordFrequencies[tempWord]++;
                            }
                            else
                            {
                                WordFrequencies[tempWord] = 1;
                            }

                            UniqueWords.Add(tempWord);
                        }
                    }
                }
            }
        }
示例#2
0
	public static void Test(){
		UniqueWords u = new UniqueWords();
		string[] arr = {"apple","bat","cat","cat","tin","jungle","bat"};

	}
        static void Main(string[] args)
        {
            ApplicationContext context = new ApplicationContext();

            context.UniqueWords.Load();

            Console.Write("Что сделать? (1-вывести из базы, 2-ввести URL): ");
            var answer = int.Parse(Console.ReadLine());

            if (answer == 1)
            {
                Console.WriteLine("\n\nСписок слов:\n");
                foreach (var word in context.UniqueWords.Local.ToList())
                {
                    Console.WriteLine($"{word.Word} - {word.Count}");
                }
            }
            else if (answer == 2)
            {
                ParseHtml parse = new ParseHtml();

                Console.Write("Введите URL: ");
                var url = Console.ReadLine();

                Console.WriteLine();

                Console.Write("Введите кол-во вхождений слова: ");
                var countText = Console.ReadLine();

                try
                {
                    if (int.TryParse(countText, out int count))
                    {
                        //"https://www.simbirsoft.com/"
                        var streamHtml = parse.GetHtmlStream(url);

                        var parsedHtml = parse.ParseHtmlOfStream(streamHtml);

                        UniqueWords uniqueWords = new UniqueWords();

                        var clearArray = uniqueWords.ClearArrayWithWord(parsedHtml);

                        var dictionaryUniqueWord = uniqueWords.CountUniqueWord(clearArray);

                        foreach (var keyValue in dictionaryUniqueWord)
                        {
                            if (keyValue.Value >= count)
                            {
                                Console.WriteLine($"{keyValue.Key} - {keyValue.Value}");
                                var item = context.UniqueWords.Local.FirstOrDefault(x => x.Word == keyValue.Key && x.Url == url);
                                if (item == null)
                                {
                                    var lastItem  = context.UniqueWords.Local.LastOrDefault();
                                    var lastIndex = lastItem == null ? 1 : lastItem.IdWord + 1;
                                    context.UniqueWords.Add(new UniqueWord {
                                        IdWord = lastIndex, Url = url, Word = keyValue.Key, Count = keyValue.Value
                                    });
                                }
                                else
                                {
                                    item.Count += keyValue.Value;
                                }
                            }
                        }
                        context.SaveChanges();
                        Console.WriteLine("\nНажмите на любую кнопку");
                    }
                    else
                    {
                        throw new Exception("Некорректное число");
                    }
                }
                catch (Exception ex)
                {
                    Console.WriteLine(ex.Message);
                }
            }
            Console.WriteLine("\nНажмите любую кнопку...");
            Console.ReadKey();
            Process.Start(Assembly.GetExecutingAssembly().Location);
            Environment.Exit(0);
        }