Example #1
0
 public Job CreateJob(int workerId, string name, string command, string request, int parentJobId, int jobDefId)
 {
     Job que = new Job();
     que.Command = command;
     que.Name = name.StrCrop(497);
     que.WorkerId = workerId;
     que.ParentJobId = parentJobId;
     que.JobDefinitionId = jobDefId;
     return que;
 }
Example #2
0
        public override string ExecuteJob(Job job, JobData jobData)
        {
            var jobDef = db.Read<JobDefinition>(job.JobDefinitionId);
            var jobDefName = jobDef.Name;

            switch (job.Command)
            {
                case "FindLinks":
                    {
                        string res = findLinks(job.Name);

                        var d = DateTime.Now;
                        string path = baseDir + "\\crawler\\" + d.ToString("yyyyMMdd") + "\\" + jobDefName;
                        Directory.CreateDirectory(path);
                        if(!String.IsNullOrWhiteSpace(res))
                            File.WriteAllText(path + "\\found.links", res, Encoding.UTF8);

                        var links = res.SplitWithTrim("\n");
                        var foundLinkCount = links.Length;

                        var filters = FormMain.GetUrlFilters().ContainsKey(job.JobDefinitionId) ? FormMain.GetUrlFilters()[job.JobDefinitionId] : null;
                        if (filters != null)
                            links = links.Where(l => !isTheLinkToBeSkipped(l, jobDef, filters)).ToArray();

                        string whereIn = "'" + links.Select(l => l.Replace("'", "''")).StringJoin("','") + "'";
                        var linksAlreadySaved = db.GetList<string>("select Name from Job where Name in (" + whereIn + ") AND Status='Done' AND ResLength>0");
                        if (linksAlreadySaved != null && linksAlreadySaved.Count > 0)
                            links = links.Except(linksAlreadySaved).ToArray();

                        List<int> workerIds = db.GetList<int>("select Id from Worker where Disabled=0 order by Id");
                        int counter = 0;
                        List<Job> list = new List<Job>();
                        int newLinks = 0, tryAgain = 0;
                        foreach (string url in links)
                        {
                            var downloadCount = db.GetInt("select count(*) from Job where Name={0}", url);
                            if (downloadCount > 0 && downloadCount < 5) tryAgain++;
                            if (downloadCount == 0) newLinks++;
                            if (downloadCount < 5)
                                list.Add(this.CreateJob(workerIds[counter++ % workerIds.Count], url, "DownloadContent", url, job.Id, job.JobDefinitionId));
                        }

                        this.Log(string.Format("{0} {1} {2} total links, {3} new, {4} try again",
                            DateTime.Now.ToString("yyyy.MM.dd HH.mm   "),
                            jobDefName.PadRight(30),
                            foundLinkCount.ToString().PadLeft(5),
                            newLinks.ToString().PadLeft(5),
                            tryAgain.ToString().PadLeft(5)));

                        AddJobs(db, list);

                        return res;
                    }
                case "DownloadContent":
                    {
                        Stopwatch sw = new Stopwatch();
                        var time = new Dictionary<string, long>();

                        var d = DateTime.Now;
                        string path = baseDir + "\\crawler\\" + d.ToString("yyyyMMdd") + "\\" + jobDefName;
                        Directory.CreateDirectory(path);
                        string content = "";

                        try
                        {
                            sw.Start();
                            content = downloadContent(job.Name);
                            File.WriteAllText(path + "\\" + job.Id + ".html", content, Encoding.UTF8);
                            time.Add("Download", sw.ElapsedMilliseconds);
                        }
                        catch (Exception ex)
                        {
                            File.WriteAllText(path + "\\" + job.Id + ".json", new CleanText { Title = "Error occured while downloading the content!", Content = ex.ToStringBetter() }.ToJSON(), Encoding.UTF8);
                            sw.Stop();
                            this.Log(string.Format("{0} {1} {2} (ERROR DOWNLOAD {3})",
                                DateTime.Now.ToString("yyyy.MM.dd HH.mm "),
                                "Worker " + job.WorkerId.ToString().PadLeft(2),
                                job.Name.Replace("http://", "").Replace("www.", "").Replace(":", "").StrCrop(50).PadRight(53),
                                ex.ToStringBetter().Replace(":", "")));
                            this.Log("stats failed");
                            throw ex;
                        }

                        try
                        {
                            var cleanText = getCleanText(job.Name, content);
                            var clean = cleanText.ToJSON();

                            job.ResLength = cleanText.Content.Length;

                            File.WriteAllText(path + "\\" + job.Id + ".json", clean, Encoding.UTF8);
                            time.Add("Clear", sw.ElapsedMilliseconds-time["Download"]);
                            sw.Stop();

                            if(sw.ElapsedMilliseconds>10000)
                                this.Log(string.Format("{0} {1} {2} (Download {3} | Clear {4})",
                                    DateTime.Now.ToString("yyyy.MM.dd HH.mm "),
                                    "Worker " + job.WorkerId.ToString().PadLeft(2),
                                    job.Name.Replace("http://","").Replace("www.","").Replace(":","").StrCrop(50).PadRight(53),
                                    time["Download"].ToString().PadLeft(6),
                                    time["Clear"].ToString().PadLeft(6)));

                            this.Log("stats done");
                            if(job.ResLength>0) this.Log("stats contentFound");
                            return clean;
                        }
                        catch (Exception ex)
                        {
                            File.WriteAllText(path + "\\" + job.Id + ".json", new CleanText { Title = "Error occured while cleaning the text!", Content = ex.ToStringBetter() }.ToJSON(), Encoding.UTF8);
                            sw.Stop();
                            this.Log(string.Format("{0} {1} {2} (ERROR CLEAN {3})",
                                DateTime.Now.ToString("yyyy.MM.dd HH.mm "),
                                "Worker " + job.WorkerId.ToString().PadLeft(2),
                                job.Name.Replace("http://", "").Replace("www.", "").Replace(":", "").StrCrop(50).PadRight(53),
                                ex.ToStringBetter().Replace(":", "")));
                            this.Log("stats failed");
                            throw ex;
                        }

                    }
            }

            return "Command not implemented";
        }
Example #3
0
 public abstract string ExecuteJob(Job job, JobData jobData);
Example #4
0
        private void executeCommand(Job job)
        {
            try
            {
                db.FillEntity(worker);
                worker.LastExecution = DateTime.Now;
                worker.LastExecutionInfo = job.Command + ": " + job.Name;
                db.Save(worker);
            }
            catch
            {
            }

            JobData jobData = null;
            if (workerProcess.UseJobData)
            {
                jobData = (JobData)db.Read(workerProcess.GetQueueDataType(), "JobId = {0}", job.Id) ?? new JobData();
                if (jobData.Id <= 0)
                {
                    jobData = new JobData
                    {
                        JobId = job.Id,
                        Response = "Job related data not found!"
                    };
                    db.Save(jobData);
                    job.Status = JobStatuses.Failed;
                    db.Save(job);
                    backgroundWorker.ReportProgress(100, string.Format("error:{0}:{1}", "Job related data not found!", worker.Name));
                    return;
                }
            }

            Stopwatch sw = new Stopwatch();
            try
            {
                sw.Start();
                var res = workerProcess.ExecuteJob(job, jobData);
                if (workerProcess.UseJobData)
                    jobData.Response = res;
                sw.Stop();
            }
            catch (Exception ex)
            {
                if (workerProcess.UseJobData)
                {
                    jobData.Response = ex.Message + "\n" + (ex.InnerException != null ? "- " + ex.InnerException.Message : "");
                    db.Save(jobData);
                }

                job.Status = JobStatuses.Failed;

                db.Save(job);
                return;
            }

            if (workerProcess.UseJobData)
                db.Save(jobData);

            job.Status = JobStatuses.Done;
            job.ProcessTime = (int)sw.ElapsedMilliseconds;
            db.Save(job);
        }