Esempio n. 1
0
        public void TestMethod1()
        {
            var url = "http://www.onezh.com/hall/show_{# page(146,707) #}.html";

            CodeCompilerManager.Create("url", new List <ICodeProvider> {
                new LiteDbCodeProvider(Node.Feed.Db.FuncType.URLFUNCTION),
                new FileCodeProvider("funcs/js", "fun")
            });

            CodeCompilerManager.Create("proc", new List <ICodeProvider> {
                new LiteDbCodeProvider(Node.Feed.Db.FuncType.SELECTORPROCESSOR),
                new FileCodeProvider("funcs/js", "pro")
            });
            var addrs = CodeCompilerManager.GetResult("url", url);

            Assert.True(true);
        }
Esempio n. 2
0
        /// <summary>
        /// process need
        /// </summary>
        /// <param name="selector">function selector</param>
        /// <param name="result">pre process result</param>
        /// <returns>new process result</returns>
        public override ProcessResult ProcessNeed(FunctionSelector selector, ProcessResult result)
        {
            var pr = new ProcessResult();

            var r = CodeCompilerManager.GetResult("proc", selector.Name, result.Content);

            if (r.Length > 0)
            {
                pr.Matches.Add(r.First().ToString());
            }
            else
            {
                pr.Matches.Add(result.Content);
            }

            return(pr);
        }
Esempio n. 3
0
        public async Task Execute(IJobExecutionContext context)
        {
            baseUrl = context.JobDetail.JobDataMap.Get("baseUrl").ToString();
            var feedRequest = context.JobDetail.JobDataMap.Get("request") as FeedRequest;

            Logger.GetLogger(baseUrl).Info(" feed job " + context.JobDetail.Key + " add to feed crawl queue");

            var addrs = CodeCompilerManager.GetResult("url", feedRequest.Request.Uri.ToString());

            foreach (var addr in addrs)
            {
                queuePool.QueueAction(() =>
                {
                    Logger.GetLogger(baseUrl).Info(" feed job " + addr.ToString() + " starting");

                    feedRequest.Request     = feedRequest.Request.Clone() as Request;
                    feedRequest.Request.Uri = new Uri(addr.ToString());

                    var response = DoTask(feedRequest);
                    Save(feedRequest, response);
                });
            }
        }
Esempio n. 4
0
        public static void StartServers()
        {
            if (!String.IsNullOrEmpty(RuiJiConfiguration.DocServer))
            {
                var server = new DocumentServer(RuiJiConfiguration.DocServer);
                server.Start();

                servers.Add(server);
            }

            if (RuiJiConfiguration.Standalone)
            {
                var baseUrl = RuiJiConfiguration.RuiJiServer;
                if (string.IsNullOrEmpty(baseUrl))
                {
                    Logger.GetLogger("").Info("RuiJiServer not exsit in AppSettings");
                    return;
                }

                try
                {
                    Start(baseUrl, "s");
                }
                catch (Exception ex)
                {
                    Logger.GetLogger("").Fatal(ex.Message);
                }

                CodeCompilerManager.Create("url", new List <ICodeProvider> {
                    new LiteDbCodeProvider(Node.Feed.Db.FuncType.URLFUNCTION)
                });

                CodeCompilerManager.Create("proc", new List <ICodeProvider> {
                    new LiteDbCodeProvider(Node.Feed.Db.FuncType.SELECTORPROCESSOR)
                });
            }
            else
            {
                var zkServer = RuiJiConfiguration.ZkServer;
                if (string.IsNullOrEmpty(zkServer))
                {
                    Logger.GetLogger("").Info("zkServer not defined");
                    return;
                }

                StartZKServer();

                RuiJiConfiguration.Nodes.ForEach(m =>
                {
                    try
                    {
                        Start(m.BaseUrl, m.Type, zkServer, m.Proxy);
                    }
                    catch (Exception ex)
                    {
                        Logger.GetLogger("").Info(ex.Message);
                    }
                });

                CodeCompilerManager.Create("url", new List <ICodeProvider> {
                    new RemoteCodeProvider(ZkNode().BaseUrl, FuncType.URLFUNCTION.ToString())
                });

                CodeCompilerManager.Create("proc", new List <ICodeProvider> {
                    new RemoteCodeProvider(ZkNode().BaseUrl, FuncType.SELECTORPROCESSOR.ToString())
                });
            }
        }
Esempio n. 5
0
        public object TestFeed([FromBody] FeedModel feed, bool down, bool debug = false)
        {
            try
            {
                //var compile = new Node.Compile.JSUrlCompile();
                var addrs   = CodeCompilerManager.GetResult("url", feed.Address); //compile.GetResult(feed.Address);
                var results = new List <ExtractResult>();

                foreach (var addr in addrs)
                {
                    feed.Address = addr.ToString();
                    var job      = new FeedJob();
                    var response = job.DoTask(feed);
                    if (response.StatusCode != System.Net.HttpStatusCode.OK)
                    {
                        return(response.Data);
                    }
                    if (string.IsNullOrEmpty(feed.RuiJiExpression))
                    {
                        results.Add(new ExtractResult());
                        continue;
                    }

                    var block = RuiJiBlockParser.ParserBlock(feed.RuiJiExpression);

                    var result = RuiJiExtractor.Extract(response.Data.ToString(), block);

                    if (!debug)
                    {
                        CrawlTaskFunc.ClearContent(result);
                    }

                    if (down)
                    {
                        var s = new FileStorage(Path.Combine(AppDomain.CurrentDomain.BaseDirectory, "wwwroot", "download"));

                        var files = result.Content.ToString().Replace("\r\n", "\n").Split('\n');
                        foreach (var file in files)
                        {
                            if (!string.IsNullOrEmpty(file) && Uri.IsWellFormedUriString(file, UriKind.Absolute))
                            {
                                var res = Crawler.Request(file);
                                var c   = new DownloadContentModel();
                                c.Url   = file.Trim();
                                c.IsRaw = res.IsRaw;
                                c.Data  = res.Data;

                                s.Insert(c);
                            }
                        }
                    }

                    results.Add(result);
                }

                return(results);
            }
            catch (Exception ex)
            {
                return(ex);
            }
        }
Esempio n. 6
0
        public object FuncTest([FromBody] FuncModel func)
        {
            var type = (func.Type == FuncType.URLFUNCTION) ? "url" : "proc";

            return(CodeCompilerManager.Test(type, func.Sample, func.Code));
        }
Esempio n. 7
0
        public object Run(object t, ParallelTask task)
        {
            var model = t as CrawlTaskModel;

            var results  = new List <object>();
            var reporter = task.Progress as IProgress <string>;

            reporter.Report("正在读取Feed记录");
            var feed = FeedLiteDb.GetFeed(model.FeedId);

            reporter.Report("正在下载 Feed");

            //var compile = new Node.Compile.JSUrlCompile();
            var addrs = CodeCompilerManager.GetResult("url", feed.Address); //compile.GetResult(feed.Address);

            foreach (var addr in addrs)
            {
                feed.Address = addr.ToString();

                var job      = new FeedJob();
                var response = job.DoTask(feed);
                reporter.Report("Feed 下载完成");

                var block = RuiJiBlockParser.ParserBlock(feed.RuiJiExpression);

                var feedResult = RuiJiExtractor.Extract(response.Data.ToString(), block);
                results.Add(feedResult);

                var snap = new FeedSnapshot
                {
                    Url             = feed.Address,
                    Content         = response.Data.ToString(),
                    Type            = feed.Type,
                    RuiJiExpression = feed.RuiJiExpression
                };

                reporter.Report("正在提取Feed地址");
                var j    = new FeedExtractJob();
                var urls = j.ExtractAddress(snap);
                reporter.Report("Feed地址提取完成");

                if (!string.IsNullOrEmpty(snap.RuiJiExpression))
                {
                    foreach (var url in urls)
                    {
                        reporter.Report("正在提取地址 " + url);
                        var result = Cooperater.GetResult(url);

                        if (result != null)
                        {
                            var cm = new ContentModel();
                            cm.Id    = model.FeedId;
                            cm.Url   = url;
                            cm.Metas = result.Metas;
                            cm.CDate = DateTime.Now;

                            results.Add(cm);
                        }
                    }
                }

                reporter.Report("计算完成");

                if (!model.IncludeContent)
                {
                    results.ForEach((m) =>
                    {
                        ClearContent(m);
                    });
                }
            }

            return(results);
        }