Пример #1
0
        private static ConcurrentDictionary <string, DoubanBook> LoadBooks()
        {
            var dic = new ConcurrentDictionary <string, DoubanBook>();

            var files = Directory.GetFiles("Books");

            foreach (var file in files)
            {
                var stream1 = File.OpenRead(file);
                var work1   = new HSSFWorkbook(stream1);

                var sheet1 = work1.GetSheetAt(0);

                for (int x = 0; x <= sheet1.LastRowNum; ++x)
                {
                    var row1 = sheet1.GetRow(x);
                    if (row1 == null)
                    {
                        continue;
                    }
                    var book1 = new DoubanBook
                    {
                        Title        = row1.GetCell(0).ToString(),
                        Score        = float.Parse(row1.GetCell(1).ToString()),
                        ScoreNumber  = int.Parse(row1.GetCell(2).ToString()),
                        Link         = row1.GetCell(3).ToString(),
                        PublishHouse = row1.GetCell(4).ToString(),
                        PublishDate  = DateTime.Parse(row1.GetCell(5).ToString()),
                        Desc         = row1.GetCell(6).ToString(),
                    };
                    dic.TryAdd(book1.Link, book1);
                }
                stream1.Close();
            }

            return(dic);
        }
Пример #2
0
        private void ProcessSingleTag(Tag tag, CancellationToken token)
        {
            _logger.Info($"process tag.{tag.TagName}");

            int pageSize = 0;

            try
            {
                pageSize = this.GetTagPageSize(tag);
            }
            catch (Exception e)
            {
                _logger.Error(e, "获取页面大小失败:{0}", tag.TagName);
                return;
            }

            DoubanBook lastBook = null;

            //HtmlNode tagFirstPage = null;
            while (!token.IsCancellationRequested && pageSize >= 0)
            {
                // 防止ip被封


                var items = this.GetItems(tag, pageSize);
                pageSize--;

                if (items == null)
                {
                    continue;
                }

                foreach (var item in items)
                {
                    DoubanBook bookBaseInfo = null;
                    try
                    {
                        bookBaseInfo = this.ProcessSingleBook(item);
                        lastBook     = bookBaseInfo;
                        //if (bookBaseInfo.PublishDate < tag.CompletedTime)
                        //{
                        //    _logger.Info($"到达Tag:{tag.TagName}截止时间:{tag.CompletedTime.ToLongDateString()}");
                        //    _tagManager.AddCompleteTag(tag.TagName, lastBook.PublishDate);
                        //    return;
                        //}
                        if (bookBaseInfo.Score < MIN_SCORE)
                        {
                            _logger.Info($"ignore book.because score:{bookBaseInfo.Score}.title:{bookBaseInfo.Title}");
                            continue;
                        }
                    }
                    catch (Exception e)
                    {
                        _logger.Warn(e, "parse book error");
                        continue;
                    }
                    var success = this._books.TryAdd(bookBaseInfo.Link, bookBaseInfo);
                    if (success)
                    {
                        _bookCount++;
                        _logger.Info($"add book:{_bookCount}.{bookBaseInfo.Title}");
                    }
                    else
                    {
                        _logger.Info($"repeat.skip.{bookBaseInfo.Title}");
                    }

                    if (!_books[bookBaseInfo.Link].Tags.Contains(tag.TagName))
                    {
                        _books[bookBaseInfo.Link].Tags.Add(tag.TagName);
                    }
                }

                //index += items.Count;
            }
            //this.ProcessRelatedTags(tagFirstPage);
            if (lastBook != null)
            {
                _tagManager.AddCompleteTag(tag.TagName, lastBook.PublishDate);
            }
            else
            {
                _tagManager.AddCompleteTag(tag.TagName, DateTime.MinValue);
            }
        }