Example #1
0
        private int GetPageCount(Category category, char alpha) {
            int page = 1;
            while (true) {
                string url = String.Format(PAGE_URL_TEMPLATE, category.Id, alpha, page);
                HtmlDocument document = download.AsDocument(url);

                HtmlNode pager = document.DocumentNode.SelectSingleNode("//ul[@class='list paginate']");

                if (pager == null) {
                    return 1;
                }

                // 找到内容是数字的
                int lastPage = pager.Descendants("a")
                    .Where(a => Regex.IsMatch(a.InnerHtml.Trim(), @"^\d+$"))
                    .Select(a => Convert.ToInt32(a.InnerHtml.Trim()))
                    .Last();
                if (lastPage == page) {
                    // 如果最后还有“下一页”,则再加1
                    if (pager.Descendants("a").Last().GetAttributeValue("class", String.Empty) == "paginate-more") {
                        page++;
                    }
                    return page;
                }
                else {
                    page = lastPage;
                }
            }
        }
Example #2
0
        private void FromAlpha(Category category, char alpha, int attempts = 0) {
            string url = String.Format(ALPHA_URL_TEMPLATE, category.Id, alpha);

            try {
                int pageCount = GetPageCount(category, alpha);

                logger.Debug(
                    "There are {0} pages in category {1}-{2}, alpha {3}",
                    pageCount, category.Id, category.Name, alpha
                );

                Enumerable.Range(1, pageCount).AsParallel()
                    .WithDegreeOfParallelism(settings.ParallelDegree)
                    .ForAll(i => FromPage(category, alpha, i));
            }
            catch (WebException ex) {
                string logMessage = String.Format(
                    "Failed to get page count for category {0}-{1}, alpha {2}",
                    category.Id, category.Name, alpha
                );


                if (attempts >= settings.RetryAttemptCount) {
                    logger.ErrorException(logMessage, ex);
                }
                else {
                    logger.WarnException(logMessage, ex);
                }
            }
        }
Example #3
0
        private void FromPage(Category category, char alpha, int pageIndex, int attempts = 0) {
            Stopwatch watch = new Stopwatch();
            watch.Start();

            string url = alpha == Char.MinValue ?
                String.Format(CATEGORY_URL_TEMPLATE, category.Id) :
                String.Format(PAGE_URL_TEMPLATE, category.Id, alpha, pageIndex);
            try {
                HtmlDocument document = download.AsDocument(url);
                IEnumerable<HtmlNode> nodes = document.GetElementbyId("selectedcontent").Descendants("a");
                lock (output) {
                    foreach (HtmlNode node in nodes) {
                        string name = node.InnerHtml.Trim();
                        string href = node.GetAttributeValue("href", String.Empty);
                        int id = Utility.FindIdFromUrl(href);
                        output.Add(id);
                        logger.Trace("Found app {0}-{1}", id, name);
                    }
                }
                watch.Stop();
                logger.Debug("Found {0} apps in {1} using {2}ms", nodes.Count(), url, watch.ElapsedMilliseconds);
            }
            catch (WebException ex) {
                string logMessage = alpha == Char.MinValue ?
                    String.Format(
                        "Failed to extract apps for category {0}-{1}, alpha {2}, page {3}",
                        category.Id, category.Name, alpha, pageIndex
                    ) :
                    String.Format(
                        "Fail to extract apps from special page for category {0}-{1}",
                        category.Id, category.Name
                    );

                if (attempts >= settings.RetryAttemptCount) {
                    logger.ErrorException(logMessage, ex);
                }
                else {
                    logger.WarnException(logMessage, ex);
                    FromPage(category, alpha, pageIndex, attempts + 1);
                }
            }
        }
Example #4
0
 private void FromCategory(Category category) {
     CATALOG_ALPHAS.AsParallel()
         .WithDegreeOfParallelism(settings.ParallelDegree)
         .ForAll(a => FromAlpha(category, a));
 }