示例#1
0
        public string GetHtml(string url, out HttpStatusCode code,
                              string post = null)
        {
            var mc = extract.Matches(url);
            Dictionary <string, string> paradict = null;

            foreach (Match m in mc)
            {
                if (paradict == null)
                {
                    paradict = XPathAnalyzer.ParseUrl(URL);
                }
                if (paradict == null)
                {
                    break;
                }
                var str = m.Groups[1].Value;
                if (paradict.ContainsKey(str))
                {
                    url = url.Replace(m.Groups[0].Value, paradict[str]);
                }
            }
            WebHeaderCollection headerCollection;
            var  content = helper.GetHtml(Http, out headerCollection, out code, url, post);
            bool isjson;

            content = JavaScriptAnalyzer.Json2XML(content, out isjson, IsSuperMode);
            return(content);
        }
示例#2
0
        private void Design()
        {
            (!string.IsNullOrWhiteSpace(CrawlerSelector)).SafeCheck("采集器名称不能为空");

            var isRealJson = false;
            var newhtml    = JavaScriptAnalyzer.Json2XML(lastData, out isRealJson, true);

            if (!(isRealJson).SafeCheck("只有标准json格式才能启用采集器设计"))
            {
                return;
            }
            var selector = GetCrawler(CrawlerSelector);

            if (selector == null)
            {
                if (MessageBox.Show($"是否要创建名为{CrawlerSelector}的网页采集器?", "提示信息", MessageBoxButton.OKCancel) !=
                    MessageBoxResult.OK)
                {
                    return;
                }
                var crawler = new SmartCrawler();
                crawler.Name = CrawlerSelector;
                processManager.CurrentProcessCollections.Add(crawler);
                selector = crawler;
            }

            (MainDescription.MainFrm as IDockableManager).ActiveThisContent(CrawlerSelector);
            selector.URLHTML = newhtml;
            selector.HtmlDoc.LoadHtml(newhtml);
            selector.enableRefresh = false;
            //selector.GreatHand();
        }
示例#3
0
        public override object TransformData(IFreeDocument datas)
        {
            var item = datas[Column];

            if (item == null || string.IsNullOrWhiteSpace(item.ToString()))
            {
                return(null);
            }
            bool isrealjson;
            var  itemstr = item.ToString();

            if (lastData == null)
            {
                var html = JavaScriptAnalyzer.Json2XML(itemstr, out isrealjson, true);
                if (isrealjson)
                {
                    lastData = itemstr;
                }
            }

            if (crawlerEnabled)
            {
                var html = JavaScriptAnalyzer.Json2XML(itemstr, out isrealjson, true);
                if (isrealjson)
                {
                    HtmlDocument htmldoc = null;
                    var          doc     = selector.CrawlHtmlData(html, out htmldoc).FirstOrDefault();
                    doc.DictCopyTo(datas);
                }
                return(null);
            }
            dynamic d = null;

            try
            {
                d = serialier.DeserializeObject(item.ToString());
            }
            catch (Exception ex)
            {
                SetValue(datas, ex.Message);
                // XLogSys.Print.Error(ex);
                return(null);
            }
            if (ScriptWorkMode == ScriptWorkMode.单文档)
            {
                var newdoc = ScriptHelper.ToDocument(d) as FreeDocument;
                newdoc.DictCopyTo(datas);
            }
            else
            {
                SetValue(datas, d);
            }

            return(null);
        }
示例#4
0
        public override IEnumerable <IFreeDocument> TransformManyData(IEnumerable <IFreeDocument> datas, AnalyzeItem analyzer)
        {
            foreach (var data in datas)
            {
                var item = data[Column].ToString();
                if (string.IsNullOrEmpty(item))
                {
                    continue;
                }
                var itemstr = item;
                lastData = itemstr;
                if (crawlerEnabled)
                {
                    bool isrealjson;
                    var  html = JavaScriptAnalyzer.Json2XML(itemstr, out isrealjson, true);
                    if (isrealjson)
                    {
                        HtmlDocument htmldoc = null;
                        var          doc     = selector.CrawlHtmlData(html, out htmldoc);
                        foreach (var item3 in doc)
                        {
                            yield return(item3.MergeQuery(data, NewColumn));
                        }
                    }
                    continue;
                }
                dynamic d = null;
                try
                {
                    d = serialier.DeserializeObject(itemstr);
                }
                catch (Exception ex)
                {
                    //  XLogSys.Print.Error(ex);
                    continue;
                }


                foreach (var item2 in ScriptHelper.ToDocuments(d))
                {
                    var item3 = item2 as FreeDocument;
                    yield return(item3.MergeQuery(data, NewColumn));
                }
            }
        }