コード例 #1
0
        public void CrawlerTest()
        {
            CookieContainer cookieContainer = new CookieContainer();
            HttpWebRequest  request         = (HttpWebRequest)WebRequest.Create(@"https://bbs.sjtu.edu.cn/bbslogin?id=guest");

            request.ProtocolVersion = HttpVersion.Version10;
            request.ContentType     = "application/x-www-form-urlencoded";
            request.UserAgent       = @"Mozilla/5.0";
            HttpWebResponse response = (HttpWebResponse)(request.GetResponse());

            cookieContainer.Add(response.Cookies);

            string url   = @"https://bbs.sjtu.edu.cn/bbsdoc?board=PPPerson";
            var    model = new XMLEntityModel(@".//tr[position() > 1]");

            model.AddXMLColumn("ID", @"./td[1]");

            WebCrawlerSourceNode crawler = new WebCrawlerSourceNode(new string[] { url }, model, cookieContainer, @"bbs.sjtu.edu.cn");

            PipelineTask.Create(crawler)
            .AddMonitor(
                (entity) =>
            {
                Console.WriteLine(entity);
            })
            .Start();
        }
コード例 #2
0
        public void JsonBasic()
        {
            XMLEntityModel model = new XMLEntityModel(@".//Results");

            model.AddXMLColumn("Name", "./Name");
            model.AddXMLColumn("Desc", "./Desc");

            PipelineTask.FromJsonFile("Course", model)
            .ToTextFile(Output)
            .Start();

            Assert.IsTrue(TestHelper.CompareTwoFile(SampleJsonOutput, Output));
        }
コード例 #3
0
        public void XmlBasic()
        {
            XMLEntityModel model = new XMLEntityModel(@".//Entity");

            model.AddXMLColumn("col1", "./col1");
            model.AddXMLColumn("col2", "./col2");

            PipelineTask.FromXmlFile(XmlSource, model)
            .ToTextFile(Output)
            .Start();

            Assert.IsTrue(TestHelper.CompareTwoFile(SampleXMLOutput, Output));
        }
コード例 #4
0
        public static IEnumerable <Entity> GetEntitiesFromContent(string content, XMLEntityModel Model)
        {
            HtmlDocument document = new HtmlDocument();

            document.LoadHtml(content);
            HtmlNode rootNode = document.DocumentNode;

            var nodes = rootNode.SelectNodes(Model.ItemXPath);


            if (nodes != null)
            {
                foreach (var node in nodes)
                {
                    var entity = new Entity();

                    foreach (string column in Model.ColumnXPath.Keys)
                    {
                        string xpath = Model.ColumnXPath[column];

                        // For htmlAgilityPack do not support select attribute
                        string attributeName;
                        GetAttributeNameFromXPath(ref xpath, out attributeName);

                        var columnNode = node.SelectSingleNode(xpath);
                        if (columnNode != null)
                        {
                            if (attributeName == null)
                            {
                                string resultValue = columnNode.InnerText;
                                entity.SetValue(column, resultValue);
                            }
                            else
                            {
                                entity.SetValue(column, columnNode.Attributes[attributeName].Value);
                            }
                        }
                    }

                    if (!entity.IsEmpty())
                    {
                        yield return(entity);
                    }
                }
            }
        }
コード例 #5
0
        public void HtmlBasic()
        {
            XMLEntityModel model = new XMLEntityModel(@"//table[@class='wikitable sortable']/tr[not(@*)]");

            model.AddXMLColumn("GB", "./td[1]");
            model.AddXMLColumn("Province", "./td[3]");

            PipelineTask.FromWeb("http://en.wikipedia.org/wiki/China_provinces")
            .ParseHtml(model)
            .AddMonitor((entity) =>
            {
                Console.WriteLine();
            })
            .ToTextFile(Output, model)
            .Start();

            TestHelper.CompareTwoFile(Province, Output);
        }
コード例 #6
0
 public ParseHtmlProcessNode(DataNode parent, string targetColumn, XMLEntityModel model)
     : base(parent)
 {
     Model        = model;
     TargetColumn = targetColumn;
 }