Esempio n. 1
0
        public override DataContainer ParseHandler(DataResponse dr)
        {
            var html = dr.Text;
            //书的名称
            var name = parseTitle.Match(html).Groups[1].Value;

            var baseInfoHtml = parseBaseInfoBody.Match(html).Value;
            //出版日期, 没有出版日期说明不是书籍页面
            #region 日期处理
            var dateMatch = parsePublishDate.Match(baseInfoHtml).Groups;
            var dateString = string.IsNullOrWhiteSpace(dateMatch[1].Value) ? dateMatch[2].Value : dateMatch[1].Value;
            if (string.IsNullOrWhiteSpace(dateString))
            {
                throw new ParseErrorException("This is not a Book Page");
            }
            DateTime publishDate;
            try
            {
                publishDate = DateTime.Parse(dateString);
            }
            catch (Exception)
            {
                throw new ParseErrorException("This is not a Book Page");
            }
            #endregion
            //链接
            var hrefs = parseHref.Matches(baseInfoHtml).ForEach((Match m) => new Hyperlink(m.Groups[1].Value, m.Groups[2].Value));
            //介绍
            var introduction = parseIntroduction.Match(baseInfoHtml).Groups[1].Value;

            var metaHtml = parseMetaBody.Match(html).Value;
            //分类
            var categorys = parseCategory.Matches(metaHtml).ForEach((Match m) => m.Groups[1].Value);
            //标签
            var tags = parseTag.Matches(metaHtml).ForEach((Match m) => m.Groups[1].Value);

            //生成Book对象
            var book = new Book(name, publishDate, hrefs.ElementAt(0).Href) { Introduction = introduction };
            book.Hrefs.AddRange(hrefs.Skip(1));
            book.Categorys.AddRange(categorys);
            book.Tags.AddRange(tags);

            Logger.Info($"Parse book: {book.Name}");
            return new DataContainer(dr.GUID, book);
        }
Esempio n. 2
0
 /// <summary>
 /// 解析队列处理程序
 /// </summary>
 /// <param name="dresp"></param>
 /// <param name="storeQueue"></param>
 public void ParseHandler(DataResponse dresp, ActionBlock<DataContainer> storeQueue)
 {
     //TODO [待完善]异常处理
     var dcp = _dcpList[dresp.GUID];
     DataContainer dc;
     try
     {
         dc = dcp.ParseHandler(dresp);
     }
     catch (ParseErrorException)
     {
         //TODO [待完善]统一保存异常页面
         var path = Path.Combine(WorkPath.ErrorPagePath, $"{ DateTime.Now.ToString("yyyy-MM-dd_HH.mm.ss.fffffff")}.html");
         File.WriteAllText(path, dresp.Text);
         return;
     }
     storeQueue.Post(dc);
 }
 public abstract DataContainer ParseHandler(DataResponse dr);
Esempio n. 4
0
 public abstract DataContainer ParseHandler(DataResponse dr);
Esempio n. 5
0
 public override DataContainer ParseHandler(DataResponse dr)
 {
     var xmresp = (XMusicResponse)dr;
     return new DemoModel(dr.GUID, dr.Text, xmresp.ID);
 }