public static bool StasrtCrawling(string optionUrl, Crawling crawling, List <string> indexs) { crawling.Url.Option = optionUrl; HtmlDocument html = DownloadHtml(crawling.Url.ToString()); if (!IsSuccess(html)) { return(false); } List <List <string> > results = EncodingWordHelper.GetEncodedWords( html, crawling.WordList, crawling.OtherCrawlingList, crawling.BaseXPath); foreach (BaseAction action in crawling.ActionList) { List <Word> words = crawling.WordList .Concat(crawling.OtherCrawlingList .SelectMany(x => x.CrawlingPointer.WordList)).ToList(); ExcuteAction( action, words, words.Find(x => x.IsIndex), crawling, optionUrl, results, indexs); } return(true); }
public static List <Word> CrawlingOne(Crawling crawling, string urlOption) { if (crawling == null) { return(new List <Word>()); } crawling.Url.Option = urlOption; var html = InfiniteDownloadHtml(crawling.Url.ToString()); var nodes = GetResults(html, crawling.BaseXPath); if (nodes == null || nodes.Count < 1) { return(new List <Word>()); } EncodingWordHelper.SetOriginalWords(crawling.WordList, nodes[0]); EncodingWordHelper.SetEncodedWords(crawling.WordList); return(crawling.WordList); }