private static ConcurrentDictionary <string, DoubanBook> LoadBooks() { var dic = new ConcurrentDictionary <string, DoubanBook>(); var files = Directory.GetFiles("Books"); foreach (var file in files) { var stream1 = File.OpenRead(file); var work1 = new HSSFWorkbook(stream1); var sheet1 = work1.GetSheetAt(0); for (int x = 0; x <= sheet1.LastRowNum; ++x) { var row1 = sheet1.GetRow(x); if (row1 == null) { continue; } var book1 = new DoubanBook { Title = row1.GetCell(0).ToString(), Score = float.Parse(row1.GetCell(1).ToString()), ScoreNumber = int.Parse(row1.GetCell(2).ToString()), Link = row1.GetCell(3).ToString(), PublishHouse = row1.GetCell(4).ToString(), PublishDate = DateTime.Parse(row1.GetCell(5).ToString()), Desc = row1.GetCell(6).ToString(), }; dic.TryAdd(book1.Link, book1); } stream1.Close(); } return(dic); }
private void ProcessSingleTag(Tag tag, CancellationToken token) { _logger.Info($"process tag.{tag.TagName}"); int pageSize = 0; try { pageSize = this.GetTagPageSize(tag); } catch (Exception e) { _logger.Error(e, "获取页面大小失败:{0}", tag.TagName); return; } DoubanBook lastBook = null; //HtmlNode tagFirstPage = null; while (!token.IsCancellationRequested && pageSize >= 0) { // 防止ip被封 var items = this.GetItems(tag, pageSize); pageSize--; if (items == null) { continue; } foreach (var item in items) { DoubanBook bookBaseInfo = null; try { bookBaseInfo = this.ProcessSingleBook(item); lastBook = bookBaseInfo; //if (bookBaseInfo.PublishDate < tag.CompletedTime) //{ // _logger.Info($"到达Tag:{tag.TagName}截止时间:{tag.CompletedTime.ToLongDateString()}"); // _tagManager.AddCompleteTag(tag.TagName, lastBook.PublishDate); // return; //} if (bookBaseInfo.Score < MIN_SCORE) { _logger.Info($"ignore book.because score:{bookBaseInfo.Score}.title:{bookBaseInfo.Title}"); continue; } } catch (Exception e) { _logger.Warn(e, "parse book error"); continue; } var success = this._books.TryAdd(bookBaseInfo.Link, bookBaseInfo); if (success) { _bookCount++; _logger.Info($"add book:{_bookCount}.{bookBaseInfo.Title}"); } else { _logger.Info($"repeat.skip.{bookBaseInfo.Title}"); } if (!_books[bookBaseInfo.Link].Tags.Contains(tag.TagName)) { _books[bookBaseInfo.Link].Tags.Add(tag.TagName); } } //index += items.Count; } //this.ProcessRelatedTags(tagFirstPage); if (lastBook != null) { _tagManager.AddCompleteTag(tag.TagName, lastBook.PublishDate); } else { _tagManager.AddCompleteTag(tag.TagName, DateTime.MinValue); } }