Esempio n. 1
0
        public static bool StasrtCrawling(string optionUrl, Crawling crawling, List <string> indexs)
        {
            crawling.Url.Option = optionUrl;
            HtmlDocument html = DownloadHtml(crawling.Url.ToString());

            if (!IsSuccess(html))
            {
                return(false);
            }

            List <List <string> > results = EncodingWordHelper.GetEncodedWords(
                html,
                crawling.WordList,
                crawling.OtherCrawlingList,
                crawling.BaseXPath);

            foreach (BaseAction action in crawling.ActionList)
            {
                List <Word> words = crawling.WordList
                                    .Concat(crawling.OtherCrawlingList
                                            .SelectMany(x => x.CrawlingPointer.WordList)).ToList();
                ExcuteAction(
                    action,
                    words,
                    words.Find(x => x.IsIndex),
                    crawling,
                    optionUrl,
                    results,
                    indexs);
            }

            return(true);
        }
Esempio n. 2
0
        public static List <Word> CrawlingOne(Crawling crawling, string urlOption)
        {
            if (crawling == null)
            {
                return(new List <Word>());
            }

            crawling.Url.Option = urlOption;
            var html  = InfiniteDownloadHtml(crawling.Url.ToString());
            var nodes = GetResults(html, crawling.BaseXPath);

            if (nodes == null || nodes.Count < 1)
            {
                return(new List <Word>());
            }

            EncodingWordHelper.SetOriginalWords(crawling.WordList, nodes[0]);
            EncodingWordHelper.SetEncodedWords(crawling.WordList);

            return(crawling.WordList);
        }