コード例 #1
0
        static void GetStoryOfRevolution()
        {
            StreamReader       reader                 = new StreamReader("catalogue.htm");
            Lexer              lexer                  = new Lexer(reader.ReadToEnd());
            Parser             parser                 = new Parser(lexer);
            HasAttributeFilter linkFilterByParent     = new HasAttributeFilter("class", "row zhangjieUl");
            HasAttributeFilter linkFilterByClass      = new HasAttributeFilter("class", "fontStyle2 colorStyleLink");
            AndFilter          linkFilter             = new AndFilter(new HasParentFilter(linkFilterByParent, true), linkFilterByClass);
            NodeList           linkNodeList           = parser.Parse(linkFilter);
            List <string>      linkUrlList            = new List <string>(linkNodeList.Size());
            List <string>      chapterHtmlContentList = new List <string>(linkNodeList.Size());
            HttpWebRequest     httpWebRequest;
            StreamReader       chapterReader = null;

            for (int i = 0; i < linkNodeList.Size(); i++)
            {
                ATag linkNode = (ATag)linkNodeList[i];
                linkUrlList.Add(linkNode.Link);
                httpWebRequest = HttpWebRequest.CreateHttp("http://www.mlxiaoshuo.com" + linkUrlList[linkUrlList.Count - 1]);
                chapterReader  = new StreamReader(new BufferedStream(httpWebRequest.GetResponse().GetResponseStream(), 4 * 200 * 1024));
                string chapterHtmlContent = chapterReader.ReadToEnd();
                chapterHtmlContentList.Add(chapterHtmlContent);
                Console.WriteLine("第" + (i + 1) + "个页面获取完毕!");
            }
            chapterReader.Close();
            HasAttributeFilter praghFilter = new HasAttributeFilter("class", "textP fontStyle2 colorStyleText");
            StreamWriter       writer      = new StreamWriter("革命逸事.txt");

            for (int i = 0; i < chapterHtmlContentList.Count; i++)
            {
                writer.WriteLine("第" + (i + 1) + "章");
                lexer  = new Lexer(chapterHtmlContentList[i]);
                parser = new Parser(lexer);
                NodeList praghNodeList = parser.Parse(praghFilter);
                if (praghNodeList.Size() == 1)
                {
                    for (int j = 0; j < praghNodeList[0].Children.Size(); j++)
                    {
                        if (praghNodeList[0].Children[j].GetType().Equals(typeof(ParagraphTag)))
                        {
                            ParagraphTag praghTag = (ParagraphTag)praghNodeList[0].Children[j];
                            writer.WriteLine("    " + praghTag.StringText);
                        }
                    }
                    writer.WriteLine();
                }
                else
                {
                    Console.WriteLine("第" + (i + 1) + "页中,判断段落的标准出错!");
                }
            }
            writer.Close();
        }
コード例 #2
0
 public Element Message(string text) => ParagraphTag.WithId("message").WithText(text);