public override DataContainer ParseHandler(DataResponse dr) { var html = dr.Text; //书的名称 var name = parseTitle.Match(html).Groups[1].Value; var baseInfoHtml = parseBaseInfoBody.Match(html).Value; //出版日期, 没有出版日期说明不是书籍页面 #region 日期处理 var dateMatch = parsePublishDate.Match(baseInfoHtml).Groups; var dateString = string.IsNullOrWhiteSpace(dateMatch[1].Value) ? dateMatch[2].Value : dateMatch[1].Value; if (string.IsNullOrWhiteSpace(dateString)) { throw new ParseErrorException("This is not a Book Page"); } DateTime publishDate; try { publishDate = DateTime.Parse(dateString); } catch (Exception) { throw new ParseErrorException("This is not a Book Page"); } #endregion //链接 var hrefs = parseHref.Matches(baseInfoHtml).ForEach((Match m) => new Hyperlink(m.Groups[1].Value, m.Groups[2].Value)); //介绍 var introduction = parseIntroduction.Match(baseInfoHtml).Groups[1].Value; var metaHtml = parseMetaBody.Match(html).Value; //分类 var categorys = parseCategory.Matches(metaHtml).ForEach((Match m) => m.Groups[1].Value); //标签 var tags = parseTag.Matches(metaHtml).ForEach((Match m) => m.Groups[1].Value); //生成Book对象 var book = new Book(name, publishDate, hrefs.ElementAt(0).Href) { Introduction = introduction }; book.Hrefs.AddRange(hrefs.Skip(1)); book.Categorys.AddRange(categorys); book.Tags.AddRange(tags); Logger.Info($"Parse book: {book.Name}"); return new DataContainer(dr.GUID, book); }
/// <summary> /// 解析队列处理程序 /// </summary> /// <param name="dresp"></param> /// <param name="storeQueue"></param> public void ParseHandler(DataResponse dresp, ActionBlock<DataContainer> storeQueue) { //TODO [待完善]异常处理 var dcp = _dcpList[dresp.GUID]; DataContainer dc; try { dc = dcp.ParseHandler(dresp); } catch (ParseErrorException) { //TODO [待完善]统一保存异常页面 var path = Path.Combine(WorkPath.ErrorPagePath, $"{ DateTime.Now.ToString("yyyy-MM-dd_HH.mm.ss.fffffff")}.html"); File.WriteAllText(path, dresp.Text); return; } storeQueue.Post(dc); }
public abstract DataContainer ParseHandler(DataResponse dr);
public override DataContainer ParseHandler(DataResponse dr) { var xmresp = (XMusicResponse)dr; return new DemoModel(dr.GUID, dr.Text, xmresp.ID); }