private void ConfigureItemSections(IEnumerable <string> contentInLowerCase, ListPageHandler handler) { string[] testedDishNames = new string[] { "kurczak", "gulasz", "makaron", "filet" }; string[] bannedNames = new string[] { }; var possibleSections = FilterForExcept(contentInLowerCase, testedDishNames, bannedNames); var morePossibleSections = FilterFor(possibleSections, new string[] { "<a href" }); ApplyItemSections(morePossibleSections, testedDishNames, handler); }
public IListPageHandler CreateForList(Uri firstListPage) { _currentPage = firstListPage; _pageContent = GetContent(firstListPage); ListPageHandler handler = new ListPageHandler(); ConfigureItemSections(_pageContent.Select(l => l.ToLower()), handler); ConfigureNextPageSwitch(handler); return(handler); }
private void ConfigureNextPageSwitch(ListPageHandler handler) { var pageSwitch = new InceremntPageStrategy(_currentPage); var page = pageSwitch.NextPage(); try { GetContent(page); handler.NextPageStrategy = pageSwitch; } catch (WebException) { handler.NextPageStrategy = new SearchContentForUriStrategy(); } }
private void ApplyItemSections(IEnumerable <string> sections, string[] testedDishNames, ListPageHandler handler) { foreach (var section in sections) { int begin = section.IndexOf("<a href="); int betweenBegin = section.IndexOf('>', begin + 5); int betweenEnd = section.IndexOf('<', betweenBegin); int end = section.IndexOf("</a>", betweenEnd - 1) + 4; var dishName = section.Substring(betweenBegin + 1, betweenEnd - betweenBegin - 1); if (testedDishNames.Any(w => dishName.Contains(w))) { var preceedingSectionBegin = section.LastIndexOf('<', begin - 1); string sectionBegin = section.Substring(0, begin); var preceedingSectionEnd = section.IndexOf('>', end + 1); string sectionEnd = section.Substring(end); handler.ItemSection = new KeyValuePair <string, string>(sectionBegin, sectionEnd); handler.ItemUriExtractor = GetItemExtractStrategyFor(section.Substring(begin, betweenBegin - begin)); // So far the engine sucks a little and misses cases, can't put all the stake on one miss, so so far all hrefs are acceptable // when engine improves add return; after first match } } }