Exemplos de código com HasAttributeFilter em C# (CSharp)

Exemplo n.º 1

0

Exibir arquivo

Arquivo: RegexComm.cs Projeto: io24m/Spider-ZhiHu

        public static List <string> GetAnswerDivRichContent(this string s)
        {
            List <string> res = new List <string>();

            Parser   parser   = Parser.CreateParser(s, "utf-8");
            HtmlPage htmlPage = new HtmlPage(parser);

            parser.VisitAllNodesWith(htmlPage);


            HasAttributeFilter richContent = new HasAttributeFilter();

            richContent.AttributeName  = "class";
            richContent.AttributeValue = "RichContent RichContent--unescapable";

            HasAttributeFilter span = new HasAttributeFilter();

            span.AttributeName  = "class";
            span.AttributeValue = "RichText CopyrightRichText-richText";

            var divNodes  = htmlPage.Body.ExtractAllNodesThatMatch(richContent, true);
            var spanNodes = divNodes.ExtractAllNodesThatMatch(span, true);


            //var spanChildrens = spanNodes[0].Children;

            //var noscriptNodes = spanChildrens.ExtractAllNodesThatMatch(new TagNameFilter("noscript"));
            //var noscriptImgNodes = noscriptNodes.ExtractAllNodesThatMatch(new TagNameFilter("img"), true);
            //for (int i = 0; i < noscriptNodes.Count; i++)
            //{
            //    var isRemove = spanChildrens.Remove(noscriptNodes[i]);
            //}

            var imgNodes = spanNodes.ExtractAllNodesThatMatch(new TagNameFilter("img"), true);

            // imgNodes = imgNodes.ExtractAllNodesThatMatch(,true);
            for (int i = 0; i < imgNodes.Count; i++)
            {
                var imageTag = (Winista.Text.HtmlParser.Tags.ImageTag)imgNodes[i];
                var imgUrl   = imageTag.GetAttribute("data-actualsrc");
                if (!string.IsNullOrEmpty(imgUrl))
                {
                    res.Add(imgUrl);
                }
            }
            res = res.Distinct().ToList();
            //Winista.Text.HtmlParser.Tags.ImageTag
            //List<string> res = new List<string>();
            //var divimg = Regex.Match(s, _divImg).ToString();
            //var imgs = Regex.Matches(divimg, _img).ToString();
            //var imgCount = imgs.Count();
            //for (int i = 0; i < imgCount; i++)
            //{
            //    var dataImg = imgs[i].ToString();
            //    var tempStr = Regex.Match(dataImg, _absoluteReg).ToString();
            //    res.Add(tempStr);
            //}

            return(res);
        }

Exemplo n.º 2

0

Exibir arquivo

        static void GetStoryOfRevolution()
        {
            StreamReader       reader                 = new StreamReader("catalogue.htm");
            Lexer              lexer                  = new Lexer(reader.ReadToEnd());
            Parser             parser                 = new Parser(lexer);
            HasAttributeFilter linkFilterByParent     = new HasAttributeFilter("class", "row zhangjieUl");
            HasAttributeFilter linkFilterByClass      = new HasAttributeFilter("class", "fontStyle2 colorStyleLink");
            AndFilter          linkFilter             = new AndFilter(new HasParentFilter(linkFilterByParent, true), linkFilterByClass);
            NodeList           linkNodeList           = parser.Parse(linkFilter);
            List <string>      linkUrlList            = new List <string>(linkNodeList.Size());
            List <string>      chapterHtmlContentList = new List <string>(linkNodeList.Size());
            HttpWebRequest     httpWebRequest;
            StreamReader       chapterReader = null;

            for (int i = 0; i < linkNodeList.Size(); i++)
            {
                ATag linkNode = (ATag)linkNodeList[i];
                linkUrlList.Add(linkNode.Link);
                httpWebRequest = HttpWebRequest.CreateHttp("http://www.mlxiaoshuo.com" + linkUrlList[linkUrlList.Count - 1]);
                chapterReader  = new StreamReader(new BufferedStream(httpWebRequest.GetResponse().GetResponseStream(), 4 * 200 * 1024));
                string chapterHtmlContent = chapterReader.ReadToEnd();
                chapterHtmlContentList.Add(chapterHtmlContent);
                Console.WriteLine("第" + (i + 1) + "个页面获取完毕！");
            }
            chapterReader.Close();
            HasAttributeFilter praghFilter = new HasAttributeFilter("class", "textP fontStyle2 colorStyleText");
            StreamWriter       writer      = new StreamWriter("革命逸事.txt");

            for (int i = 0; i < chapterHtmlContentList.Count; i++)
            {
                writer.WriteLine("第" + (i + 1) + "章");
                lexer  = new Lexer(chapterHtmlContentList[i]);
                parser = new Parser(lexer);
                NodeList praghNodeList = parser.Parse(praghFilter);
                if (praghNodeList.Size() == 1)
                {
                    for (int j = 0; j < praghNodeList[0].Children.Size(); j++)
                    {
                        if (praghNodeList[0].Children[j].GetType().Equals(typeof(ParagraphTag)))
                        {
                            ParagraphTag praghTag = (ParagraphTag)praghNodeList[0].Children[j];
                            writer.WriteLine("    " + praghTag.StringText);
                        }
                    }
                    writer.WriteLine();
                }
                else
                {
                    Console.WriteLine("第" + (i + 1) + "页中，判断段落的标准出错！");
                }
            }
            writer.Close();
        }

Exemplo n.º 3

0

Exibir arquivo

        public DataTable GetWXBySogou(string key, int count, DateTime time)
        {
            string baseurl = "http://weixin.sogou.com/weixin?type=2&query={0}&fr=sgsearch&ie=utf8&_ast=1433216256&_asf=null&w=01059900&cid=null&page={1}";

            if (string.IsNullOrEmpty(key))
            {
                return(null);
            }
            DataTable dt = GetStruct(key); DateTime cdate = DateTime.Now;

            for (int p = 0; p * 10 < count; p++)
            {
                string   url  = string.Format(baseurl, HttpUtility.UrlEncode(key), p + 1);
                string   html = ieHelp.GetHtmlFromSite(url);
                HtmlPage page = htmlHelp.GetPage(html);
                //int cpage = GetCurPage(page.Body); if (cpage <= p) { break; }
                Winista.Text.HtmlParser.Util.NodeList nodes = page.Body.ExtractAllNodesThatMatch(new HasAttributeFilter("class", "wx-rb wx-rb3"), true);
                if (nodes.Count <= 0)
                {
                    break;
                }
                //将其序列化为模型并存入相应类中
                for (int i = 0; i < nodes.Count; i++)
                {
                    Winista.Text.HtmlParser.Util.NodeList cnodes = nodes[i].Children;
                    DataRow    dr      = dt.NewRow();
                    NodeFilter f_title = new AndFilter(new HasParentFilter(new TagNameFilter("h4")), new TagNameFilter("a"));
                    ATag       a       = (ATag)cnodes.ExtractAllNodesThatMatch(f_title, true)[0];
                    dr["Title"] = a.StringText;
                    dr["Link"]  = a.Link;
                    f_title     = new AndFilter(new HasAttributeFilter("id", "weixin_account"), new TagNameFilter("a"));
                    ATag author_a = (ATag)cnodes.ExtractAllNodesThatMatch(f_title, true)[0];
                    dr["Author"] = author_a.GetAttribute("title");
                    f_title      = new HasAttributeFilter("class", "s-p");
                    Div    div      = (Div)cnodes.ExtractAllNodesThatMatch(f_title, true)[0];
                    string unixtime = div.GetAttribute("t");
                    dr["Cdate"]  = GetDateTime(unixtime);
                    dr["Day"]    = GetDateTime(unixtime).Day;
                    dr["Source"] = "微信";
                    dt.Rows.Add(dr);
                }
                if (cdate < time)
                {
                    break;
                }
            }
            return(dt);
        }

Exemplo n.º 4

0

Exibir arquivo

        //根据各种筛选条件,获取到需要的元素,后其看是否改为全Filter
        public string GetByFilter(string html, FilterModel model)//OR与AND都只能同时接受两个
        {
            string result = "";

            if (model.EType.ToLower().Equals("title"))
            {
                return(GetTitle(html));
            }
            NodeList nodes = GetTagList(html, model.EType);

            if (!string.IsNullOrEmpty(model.ID))
            {
                HasAttributeFilter filter = new HasAttributeFilter("id", model.ID);
                nodes = nodes.ExtractAllNodesThatMatch(filter);
            }
            if (!string.IsNullOrEmpty(model.CSS))
            {
                HasAttributeFilter filter = new HasAttributeFilter("class", model.CSS);
                nodes = nodes.ExtractAllNodesThatMatch(filter);
            }
            if (!model.AllowScript)
            {
                TagNameFilter filter = new TagNameFilter("script");
                nodes.ExtractAllNodesThatMatch(filter, true);
            }
            //将图片文件本地化
            {
                TagNameFilter filter = new TagNameFilter("img");
                NodeList      imgs   = nodes.ExtractAllNodesThatMatch(filter, true);
                for (int i = 0; i < imgs.Count; i++)
                {
                    ImageTag img      = imgs[i] as ImageTag;
                    string   savepath = function.VToP(vdir + Path.GetFileName(img.ImageURL));
                    if (File.Exists(savepath))
                    {
                        continue;
                    }                                       //避免图片重复下载
                    img.ImageURL = httpHelp.DownFile(baseurl, img.ImageURL, savepath);
                }
            }
            result = nodes.AsHtml();
            if (!string.IsNullOrWhiteSpace(model.Start) && !string.IsNullOrWhiteSpace(model.End))
            {
                result = regHelper.GetValueBySE(result, model.Start, model.End);
            }
            return(result);
        }

Exemplo n.º 5

0

Exibir arquivo

Arquivo: FansAndFollowCrawler.cs Projeto: CaseyYang/WebProjects

        /// <summary>
        /// 配置各种HTML节点过滤器
        /// </summary>
        private static void MakeFilters()
        {
            HasAttributeFilter fansListFilterByClass   = new HasAttributeFilter("class", "cnfList");
            HasAttributeFilter fanListFilterByNodeType = new HasAttributeFilter("node-type", "userListBox");
            AndFilter          fansListFilter          = new AndFilter(fanListFilterByNodeType, fansListFilterByClass);

            fanFilter = new AndFilter(new HasParentFilter(fansListFilter, false), new HasAttributeFilter("class", "clearfix S_line1"));
            HasAttributeFilter portraitFilterByParent = new HasAttributeFilter("class", "left");

            portraitFilter = new AndFilter(new HasParentFilter(portraitFilterByParent, false), new HasAttributeFilter("class", "face mbspace"));
            HasAttributeFilter fanNameFilterByParent = new HasAttributeFilter("class", "con_left");

            fanNameFilter      = new AndFilter(new HasParentFilter(fanNameFilterByParent, false), new HasAttributeFilter("class", "name"));
            fanConnectFilter   = new AndFilter(new HasParentFilter(fanNameFilterByParent, false), new HasAttributeFilter("class", "connect"));
            fanInfoFilter      = new AndFilter(new HasParentFilter(fanNameFilterByParent, false), new HasAttributeFilter("class", "info"));
            followMethodFilter = new AndFilter(new HasParentFilter(fanNameFilterByParent, false), new HasAttributeFilter("class", "from W_textb"));
        }

Exemplo n.º 6

0

Exibir arquivo

Arquivo: DianPinCrawler.cs Projeto: CaseyYang/WebProjects

        private static void MakeFilters()
        {
            NodeClassFilter    dlFilter         = new NodeClassFilter(typeof(DefinitionList));
            HasAttributeFilter searchListFilter = new HasAttributeFilter("id", "searchList");

            poiListFilter     = new AndFilter(new HasParentFilter(searchListFilter, false), dlFilter);
            poiFilter         = new NodeClassFilter(typeof(DefinitionListBullet));
            tasteFilter       = new HasAttributeFilter("class", "score1");
            environmentFilter = new HasAttributeFilter("class", "score2");
            serviceFilter     = new HasAttributeFilter("class", "score3");
            averageFilter     = new HasAttributeFilter("class", "average");
            commentFilter     = new AndFilter(new HasAttributeFilter("class", "B"), new HasAttributeFilter("module", "list-readreview"));
            HasAttributeFilter nameFilterByParent = new HasAttributeFilter("class", "shopname");

            nameFilter    = new AndFilter(new HasParentFilter(nameFilterByParent, false), new HasAttributeFilter("class", "BL"));
            addressFilter = new HasAttributeFilter("class", "address");
            tagsFilter    = new HasAttributeFilter("class", "tags");
        }

Exemplo n.º 7

0

Exibir arquivo

Arquivo: Fetch.cs Projeto: zq535228/renzhex3

        public static string GetKeywords(string url)
        {
            string re = "";

            try {
                CookieCollection cookies   = new CookieCollection();
                string           html      = new xkHttp().httpGET(url, ref cookies).ToLower();
                NodeFilter       filter    = new HasAttributeFilter("name", "keywords");
                NodeList         htmlNodes = new Parser(new Lexer(html.ToLower())).Parse(filter);
                ITag             t         = (MetaTag)htmlNodes[0];

                if (t.Attributes != null && t.Attributes.Count > 0)
                {
                    re = t.Attributes["CONTENT"].ToString();
                }
            } catch {
            }
            return(re);
        }

Exemplo n.º 8

0

Exibir arquivo

Arquivo: Program.cs Projeto: CaseyYang/WebProjects

        //分析HtmlContents中给定索引条目的内容，提取信息
        private static void GetInfoFromHtml(int index)
        {
            //使用Winista.HtmlParser库解析HTML
            //建立HTML分析工具对象
            Lexer  lexer  = new Lexer(HtmlContents[index]);
            Parser parser = new Parser(lexer);
            //按属性的过滤器：两个参数分别代表要过滤的属性和属性值
            HasAttributeFilter nameFilter  = new HasAttributeFilter("class", "lrg");
            HasAttributeFilter priceFilter = new HasAttributeFilter("class", "bld lrg red");
            //获得所有满足过滤条件的HTML节点
            NodeList nameList = parser.Parse(nameFilter);

            for (int j = 0; j < nameList.Size(); j++)
            {
                //确定节点nameList[j]为Span类型的标签；HttpUtility.HtmlDecode方法把HTML编码转为文本编码，使中文正常显示
                string name = HttpUtility.HtmlDecode(((Span)nameList[j]).StringText);
                //Parent表示该HTML节点的父节点
                //NextSobling表示该HTML节点的下一个兄弟节点
                //Children表示该HTML节点的所有孩子节点组成的集合
                //ExtractAllNodesThatMatch表示获取所有满足给定过滤器条件的节点，两个参数分别代表过滤器和是否进入孩子节点中迭代查找
                //注意：对Winista.HtmlParser来说，“空文本节点”也是一个节点（在IE的开发者工具中显示“空文本节点”，而Chrome则不显示）；形似<del>内容</ del>在Children中会表达成三个节点
                NodeList priceList = nameList[j].Parent.Parent.NextSibling.NextSibling.Children.ExtractAllNodesThatMatch(priceFilter, true);
                if (priceList.Size() == 1)
                {
                    string priceStr = ((Span)priceList[0]).StringText;
                    double price    = Double.Parse(priceStr.Substring(2, priceStr.Length - 2));
                    TradeList.Add(new Commodity(name, price, "RMB"));
                }
                else
                {
                    badRecordCount++;
                }
            }
            Console.WriteLine("第" + (index + 1) + "个页面处理完成！");
            //保存当前页面到本地文件
            //StreamWriter writer = new StreamWriter("searchresult"+i+".html");
            //writer.Write(s);
            //writer.Close();
        }

Exemplo n.º 9

0

Exibir arquivo

Arquivo: ParseFilterSettings.cs Projeto: ibrahim-elsakka/TypeRight

 /// <summary>
 /// Creates the parse filter settings object with the default settings
 /// </summary>
 /// <returns>The parse filter to use for finding classes to extract </returns>
 public ParseFilterSettings()
 {
     ClassFilter      = new HasAttributeFilter(typeof(ScriptObjectAttribute).FullName);
     EnumFilter       = new HasAttributeFilter(typeof(ScriptEnumAttribute).FullName);
     ControllerFilter = new IsOfAnyTypeFilter("System.Web.Mvc.Controller", "Microsoft.AspNetCore.Mvc.Controller");
 }

Exemplo n.º 10

0

Exibir arquivo

Arquivo: MobileCrawler.cs Projeto: CaseyYang/WebProjects

        /// <summary>
        /// 从移动版微博中获取微博信息
        /// </summary>
        /// <param name="index">要获取页面的页面序号</param>
        /// <param name="feedList">保存微博的Feed数组</param>
        public void GetInfoFromHtml(int index, List <Feed> feedList)
        {
            Lexer  lexer  = new Lexer(htmlContent);
            Parser parser = new Parser(lexer);
            //移动版网页中，爬取个人主页的微博，过滤出包含用户名称和信息的div
            HasAttributeFilter userFilter = new HasAttributeFilter("class", "u");
            //移动版网页中，每条微博的div都含有class=c的属性
            HasAttributeFilter feedFilter = new HasAttributeFilter("class", "c");
            //移动版网页中，每条转发微博的第一个子div中都含有带class=c的属性的span标记
            HasAttributeFilter refeedFilter = new HasAttributeFilter("class", "cmt");
            //移动版网页中，每条微博内容都存于带class="ctt"的属性的span标记内
            HasAttributeFilter feedContentFilter = new HasAttributeFilter("class", "ctt");
            //移动版网页中，每条微博的发送时间和发送方式都存于带class="ct"的属性的span标记内
            HasAttributeFilter feedTimeFilter = new HasAttributeFilter("class", "ct");
            //在移动版网页中过滤出包含每条微博的转发理由的div。注意：内层的HasChildFilter只过滤出了包含文字“转发理由:”的span标记，所以需要再套一层HasChildFilter才能得到包含span标记的div
            HasChildFilter reFeedReasonFilter = new HasChildFilter(new HasChildFilter(new StringFilter("转发理由:")));

            //若user.NickName为空，则说明是第一次爬取该个人主页的微博，需要获得用户信息
            if (user.NickName.Equals(""))
            {
                #region 爬取个人主页的微博，首先获得用户信息
                NodeList userNodeList = parser.Parse(userFilter);
                if (userNodeList.Size() == 1)
                {
                    NodeList userDetailNodeList = userNodeList[0].Children.ExtractAllNodesThatMatch(feedContentFilter, true);//此处只是借用feedContentFilter过滤器，因为要过滤的节点正好符合这个过滤器
                    if (userDetailNodeList.Size() >= 2)
                    {
                        //获取微博用户名
                        if (userDetailNodeList[0].Children[0].GetType().Equals(typeof(TextNode)))
                        {
                            string nickName = ((TextNode)userDetailNodeList[0].Children[0]).ToPlainTextString();
                            //尝试把备注名提取出来
                            if (nickName.Contains("("))
                            {
                                int start = nickName.IndexOf('(');
                                int end   = nickName.IndexOf(')');
                                if (end > start)
                                {
                                    string remarkName = nickName.Substring(start + 1, end - start - 1);
                                    user.RemarkName = remarkName;
                                }
                                user.NickName = nickName.Substring(0, start);
                            }
                            else
                            {
                                user.NickName = nickName;
                            }
                        }
                        else
                        {
                            Console.WriteLine("获取微博用户名出错！");
                        }
                        //获取自我描述
                        user.SelfIntroduction = ((Span)userDetailNodeList[1]).StringText;
                    }
                    else
                    {
                        Console.WriteLine("获取包含微博用户名和自我描述的div出错！");
                    }
                }
                else
                {
                    Console.WriteLine("获取包含微博用户信息的div出错！");
                }
                //注意：重复使用parser前一定要调用Reset方法
                parser.Reset();
                #endregion
            }
            NodeList feedNodeList = parser.Parse(feedFilter);
            int      count        = 0;
            for (int i = 0; i < feedNodeList.Size(); i++)
            {
                //保存该条微博
                Feed feed = new Feed();
                feed.Page   = index;
                feed.Number = i + 1;
                //记录微博条数
                count++;

                //取得第i条微博的div；
                //把一个node转为具体的TagNode，以便取得其中的属性值
                TagNode feedNode = (TagNode)feedNodeList[i];
                //注意：获取某个属性的值时，作为键值的属性需要大写，如“ID”
                if (feedNode.Attributes.Contains("ID"))//若ID属性不存在，则说明不是这个节点不是微博内容
                {
                    //通过分析移动版网页可知，
                    //每条微博的div中的一个子div中一般包含微博内容；
                    //第二个子div包含图片和发送时间等
                    //若是转发微博，则有第三个子div，其中包含转发理由、转发来源和时间等

                    //第一个子div
                    TagNode feedFirstDiv = (TagNode)feedNode.Children[0];
                    //找出包含转发微博的标记
                    NodeList reFeedList = feedFirstDiv.Children.ExtractAllNodesThatMatch(refeedFilter, true);
                    if (reFeedList.Size() > 0)                                                                                              //实践表明，class="cmt"属性往往不止被转发微博所使用
                    {
                        if (HttpUtility.HtmlDecode(((TextNode)reFeedList[0].Children[0]).ToPlainTextString()).Substring(0, 2).Equals("转发")) //为了保证取到的是转发微博的来源，故加这一条辅助判断
                        {
                            feed.ReFeedOrNot    = true;
                            feed.OriginalAuthor = HttpUtility.HtmlDecode(((ATag)reFeedList[0].Children[1]).StringText);
                            //找到包含转发理由的子div
                            NodeList reFeedReasonList = feedNode.Children.ExtractAllNodesThatMatch(reFeedReasonFilter, true);
                            if (reFeedReasonList.Size() == 1)
                            {
                                TagNode reFeedReasonDiv = (TagNode)reFeedReasonList[0];
                                //在包含转发理由的子div中，第一个子节点总为span标记，为文本“转发理由”四字
                                //第二个子节点开始的一些系列子节点组成保存转发理由的内容，可能有文本，有链接（@某人）
                                //判断转发理由结束的几个条件：若为文本节点，则最后两个字符应为“//”；若为链接节点，则其文本应为“赞[X]”（或其链接为“http://weibo.cn/attitude/……”）
                                for (int j = 1; j < reFeedReasonDiv.Children.Size(); j++)
                                {
                                    Type t = reFeedReasonDiv.Children[j].GetType();
                                    if (t.Equals(typeof(TextNode)))
                                    {
                                        string str = HttpUtility.HtmlDecode(((TextNode)reFeedReasonDiv.Children[j]).ToPlainTextString());
                                        if (str.Length >= 2 && str.Substring(str.Length - 2, 2).Equals("//"))
                                        {
                                            feed.ReFeedReason += str.Substring(0, str.Length - 2);
                                            feed.ReFeedFrom    = HttpUtility.HtmlDecode(((ATag)reFeedReasonDiv.Children[j + 1]).StringText);
                                            if (feed.ReFeedFrom.Substring(0, 1).Equals("@"))//去掉上一个转发者前的@符号
                                            {
                                                feed.ReFeedFrom = feed.ReFeedFrom.Substring(1);
                                            }
                                            break;
                                        }
                                        else
                                        {
                                            feed.ReFeedReason += str;
                                        }
                                        continue;
                                    }
                                    if (t.Equals(typeof(ATag)))
                                    {
                                        string str = HttpUtility.HtmlDecode(((ATag)reFeedReasonDiv.Children[j]).StringText);
                                        if (str.Substring(0, 1).Equals("赞"))
                                        {
                                            feed.ReFeedFrom = feed.OriginalAuthor;
                                            break;
                                        }
                                        else
                                        {
                                            feed.ReFeedReason += str;
                                        }
                                        continue;
                                    }
                                }
                            }
                            else
                            {
                                Console.WriteLine("好像找到不止一个转发理由？！");
                            }
                        }
                        else
                        {
                            Console.WriteLine("糟糕！第" + count + "条微博中，找不到转发微博的来源！");
                        }
                    }
                    //找出包含微博正文的标记
                    NodeList feedContentList = feedFirstDiv.Children.ExtractAllNodesThatMatch(feedContentFilter, true);
                    switch (feedContentList.Size())
                    {
                    case 1:
                        //微博正文包含在一个span标记内
                        Span feedContentListNode = (Span)feedContentList[0];
                        //因为微博正文是不确定数量的文本和链接（如@某人）的组合，因此对于span的每个子节点，根据其类型（是文本节点还是链接节点），分别处理
                        for (int j = 0; j < feedContentListNode.Children.Size(); j++)
                        {
                            Type t = feedContentListNode.Children[j].GetType();
                            if (t.Equals(typeof(TextNode)))
                            {
                                feed.Content += HttpUtility.HtmlDecode(((TextNode)feedContentListNode.Children[j]).ToPlainTextString());
                                continue;
                            }
                            if (t.Equals(typeof(ATag)))
                            {
                                feed.Content += HttpUtility.HtmlDecode(((ATag)feedContentListNode.Children[j]).StringText);
                                continue;
                            }
                        }
                        break;

                    default:
                        Console.WriteLine("糟糕！第" + count + "条微博中，取得微博正文的判断标准出错了！");
                        break;
                    }

                    //从整个feed的范围内，找出包含微博发送时间的标记
                    NodeList feedTimeList = feedNode.Children.ExtractAllNodesThatMatch(feedTimeFilter, true);
                    switch (feedTimeList.Size())
                    {
                    case 1:
                        string time = HttpUtility.HtmlDecode(((TextNode)((Span)feedTimeList[0]).Children[0]).ToHtml());
                        feed.Time = Program.GetTime(time);
                        if (feedTimeList[0].Children.Size() > 1)
                        {
                            feed.Device = HttpUtility.HtmlDecode(((ATag)((Span)feedTimeList[0]).Children[1]).StringText);
                        }
                        //从包含微博发送时间的标记往前推，便是“赞”、“转发”和“评论”的标记
                        INode node = feedTimeList[0];
                        for (int j = 0; j < 9; j++)
                        {
                            node = node.PreviousSibling;
                            switch (j)
                            {
                            case 4:
                                //评论
                                string strCommentCount = ((ATag)node).StringText;
                                feed.CommentCount = Int32.Parse(strCommentCount.Substring(3, strCommentCount.Length - 4));
                                break;

                            case 6:
                                //转发
                                string strReFeedCount = ((ATag)node).StringText;
                                feed.ReFeedCount = Int32.Parse(strReFeedCount.Substring(3, strReFeedCount.Length - 4));
                                break;

                            case 8:
                                //赞
                                string strLikeCount = ((ATag)node).StringText;
                                feed.LikeCount = Int32.Parse(strLikeCount.Substring(2, strLikeCount.Length - 3));
                                break;

                            default:
                                break;
                            }
                        }
                        break;

                    default:
                        Console.WriteLine("糟糕！第" + count + "条微博中，取得微博时间的判断标准出错了！");
                        break;
                    }
                    feedList.Add(feed);
                }
            }
        }

Exemplo n.º 11

0

Exibir arquivo

 /// <summary>
 /// Creates the parse filter settings object with the default settings
 /// </summary>
 /// <returns>The parse filter to use for finding classes to extract </returns>
 public ParseFilterSettings()
 {
     ClassFilter      = new HasAttributeFilter(typeof(ScriptObjectAttribute).FullName);
     EnumFilter       = new HasAttributeFilter(typeof(ScriptEnumAttribute).FullName);
     ControllerFilter = new IsOfAnyTypeFilter(MvcConstants.ControllerBaseFullName_AspNetCore);
 }

Exemplos de HasAttributeFilter em C# (CSharp)