Пример #1
0
        private void ConfigureItemSections(IEnumerable <string> contentInLowerCase, ListPageHandler handler)
        {
            string[] testedDishNames      = new string[] { "kurczak", "gulasz", "makaron", "filet" };
            string[] bannedNames          = new string[] { };
            var      possibleSections     = FilterForExcept(contentInLowerCase, testedDishNames, bannedNames);
            var      morePossibleSections = FilterFor(possibleSections, new string[]
                                                      { "<a href" });

            ApplyItemSections(morePossibleSections, testedDishNames, handler);
        }
Пример #2
0
        public IListPageHandler CreateForList(Uri firstListPage)
        {
            _currentPage = firstListPage;
            _pageContent = GetContent(firstListPage);
            ListPageHandler handler = new ListPageHandler();

            ConfigureItemSections(_pageContent.Select(l => l.ToLower()), handler);
            ConfigureNextPageSwitch(handler);
            return(handler);
        }
Пример #3
0
        private void ConfigureNextPageSwitch(ListPageHandler handler)
        {
            var pageSwitch = new InceremntPageStrategy(_currentPage);
            var page       = pageSwitch.NextPage();

            try
            {
                GetContent(page);
                handler.NextPageStrategy = pageSwitch;
            }
            catch (WebException)
            {
                handler.NextPageStrategy = new SearchContentForUriStrategy();
            }
        }
Пример #4
0
        private void ApplyItemSections(IEnumerable <string> sections, string[] testedDishNames, ListPageHandler handler)
        {
            foreach (var section in sections)
            {
                int begin        = section.IndexOf("<a href=");
                int betweenBegin = section.IndexOf('>', begin + 5);
                int betweenEnd   = section.IndexOf('<', betweenBegin);
                int end          = section.IndexOf("</a>", betweenEnd - 1) + 4;
                var dishName     = section.Substring(betweenBegin + 1, betweenEnd - betweenBegin - 1);
                if (testedDishNames.Any(w => dishName.Contains(w)))
                {
                    var    preceedingSectionBegin = section.LastIndexOf('<', begin - 1);
                    string sectionBegin           = section.Substring(0, begin);
                    var    preceedingSectionEnd   = section.IndexOf('>', end + 1);

                    string sectionEnd = section.Substring(end);
                    handler.ItemSection      = new KeyValuePair <string, string>(sectionBegin, sectionEnd);
                    handler.ItemUriExtractor = GetItemExtractStrategyFor(section.Substring(begin, betweenBegin - begin));
                    // So far the engine sucks a little and misses cases, can't put all the stake on one miss, so so far all hrefs are acceptable
                    // when engine improves add return; after first match
                }
            }
        }