Exemple #1
0
        private void DoMoreCategory(Task task, string html, ContentProcessResult result)
        {
            HtmlNode root = GetRoot(html);

            if (root == null || html.Contains("td.refinementContainer"))
            {
                TaskFail(task, result);
                return;
            }
            HtmlNodeCollection navs = root.SelectNodes(".//div[@id='refinements']//ul[@data-typeid='n']/li/a/span[@class='refinementLink']");

            if (navs == null)
            {
                //no more refinements
                DoPages(task, html, result, root);
            }
            else
            {
                foreach (HtmlNode nav in navs)
                {
                    HtmlNode link = nav.ParentNode;
                    if (link != null)
                    {
                        string path = HAH.SafeGetAttributeStringValue(link, "href");
                        if (path != null)
                        {
                            result.NewTasks.Add(MakeTask(path, AmazonTaskType.MORE_CATEGORY, null));
                        }
                    }
                }
            }
        }
Exemple #2
0
        protected override ContentProcessResult Process(TaskData td)
        {
            ContentProcessResult cpr = new ContentProcessResult();
            TaskProcess          tp  = new TaskProcess();

            tp.TaskData = td;
            tp.CpResult = cpr;

            switch ((TaobaoTaskType)td.Task.Type)
            {
            case TaobaoTaskType.COMBINED_LIST:
                HandleCombinedList(tp);
                break;

            case TaobaoTaskType.PROVIDER_LIST:
                HandleProviderList(tp);
                break;

            case TaobaoTaskType.PROVIDER_RATE:
                HandleProviderRate(tp);
                break;

            default:
                throw new ArgumentOutOfRangeException();
            }
            return(cpr);
        }
Exemple #3
0
        protected override ContentProcessResult Process(TaskData td)
        {
            Task task = td.Task;

            byte[]               bytes  = td.Bytes;
            AmazonTaskType       att    = (AmazonTaskType)task.Type;
            ContentProcessResult result = new ContentProcessResult();

            //set to true, but could fail
            result.Success = true;
            if (att == AmazonTaskType.IMAGE
                //|| att == AmazonTaskType.IMAGE_ORI
                )
            {
                //DoImage();
            }
            else
            {
                string html = GetString(bytes, null);
                if (!Validate(html))
                {
                    result.Success = false;
                    return(result);
                }
                switch (att)
                {
                case AmazonTaskType.INDEX:
                    DoIndex(task, html, result);
                    break;

                case AmazonTaskType.CATEGORY:
//						DoCategory(task, html, result);
                    break;

                case AmazonTaskType.MORE_CATEGORY:
                    DoMoreCategory(task, html, result);
                    break;

                case AmazonTaskType.PAGES:
                    DoPages(task, html, result, null);
                    break;

                case AmazonTaskType.PAGE:
                    DoPage(task, html, result);
                    break;
                }
            }
            return(result);
        }
Exemple #4
0
        private void DoPages(Task task, string html, ContentProcessResult result, HtmlNode root)
        {
            if (root == null)
            {
                root = GetRoot(html);
                if (root == null)
                {
                    TaskFail(task, result);
                    return;
                }
            }

            HtmlNodeCollection shops       = root.SelectNodes(".//div[@id='rightResultsATF']//a[@class='title']");
            HtmlNode           contextNode = root.SelectSingleNode("id('breadCrumb')");

            if (contextNode == null)
            {
                TaskFail(task, result);
                return;
            }
            string context = HttpUtility.HtmlDecode(contextNode.InnerText);

            context = ReplaceSpace.Replace(context, "");
            context = context.Replace('\\', ',');
            context = context.Replace('/', ',');
            context = context.Replace('›', '\\');
            if (shops != null)
            {
                foreach (HtmlNode node in shops)
                {
                    string path = HAH.SafeGetAttributeStringValue(node, "href");
                    result.NewTasks.Add(MakeTask(path, AmazonTaskType.PAGE, context));
                }
            }
            HtmlNode nextPageLink = root.SelectSingleNode(".//a[@id='pagnNextLink']");

            if (nextPageLink != null)
            {
                string path = HAH.SafeGetAttributeStringValue(nextPageLink, "href");
                if (path != null)
                {
                    result.NewTasks.Add(MakeTask(path, AmazonTaskType.PAGES, null));
                }
            }
        }
Exemple #5
0
        private void DoIndex(Task task, string html, ContentProcessResult result)
        {
            HtmlNode root = GetRoot(html);

            if (root == null)
            {
                TaskFail(task, result);
                return;
            }

            HtmlNodeCollection links = root.SelectNodes(".//div[@id='siteDirectory']//td//a");

            if (links == null)
            {
                return;
            }

            foreach (HtmlNode node in links)
            {
                string path = HAH.SafeGetAttributeStringValue(node, "href");
                if (path != null)
                {
                    Match m = GetNodeID.Match(path);
                    if (m.Success)
                    {
                        string sid = m.Groups["id"].Value;
                        int    id;
                        if (Int32.TryParse(sid, out id))
                        {
                            //http://www.amazon.cn/gp/search/ref=sr_hi_1?rh=n%3A658390051&ie=UTF8

                            Task t = MakeTask("http://www.amazon.cn/gp/search/ref=sr_hi_1?rh=n%3A" + id + "&ie=UTF8", AmazonTaskType.MORE_CATEGORY, null);
                            result.NewTasks.Add(t);
                        }
                    }
                }
            }
        }
Exemple #6
0
 private void TaskFail(Task task, ContentProcessResult result)
 {
     result.Success = false;
     result.NewTasks.Clear();
 }
Exemple #7
0
        private void DoPage(Task task, string html, ContentProcessResult result)
        {
//			HtmlNode root = GetRoot(html);
//			if (root == null)
//			{
//				TaskFail(task,result);
//				return;
//			}

            MatchCollection mc;

            if ((mc = PicPathRegex.Matches(html)).Count != 0)
            {
                foreach (Match m in mc)
                {
                    string thumbPath = m.Groups["thumb"].Value;
                    string zoomPath  = m.Groups["zoom"].Value;
                    if (!string.IsNullOrEmpty(thumbPath))
                    {
                        result.NewTasks.Add(MakeTask(thumbPath, AmazonTaskType.IMAGE, task.Context));
                    }
//					if (!string.IsNullOrEmpty(zoomPath))
//					{
//						result.NewTasks.Add(MakeTask(thumbPath, AmazonTaskType.IMAGE_ORI, task.Context));
//					}
                }
            }
            else if ((mc = PicPathRegex2.Matches(html)).Count != 0)
            {
                foreach (Match m in mc)
                {
                    string thumbPath = m.Value;
                    if (!string.IsNullOrEmpty(thumbPath))
                    {
                        result.NewTasks.Add(MakeTask(thumbPath, AmazonTaskType.IMAGE, task.Context));
                    }
                }
            }
            else if ((mc = PicPathRegex3.Matches(html)).Count != 0)
            {
                foreach (Match m in mc)
                {
                    string thumbPath = m.Groups["img"].Value;
                    if (!string.IsNullOrEmpty(thumbPath))
                    {
                        result.NewTasks.Add(MakeTask(thumbPath, AmazonTaskType.IMAGE, task.Context));
                    }
                }
            }
            else if ((mc = PicPathRegex4.Matches(html)).Count != 0)
            {
                foreach (Match m in mc)
                {
                    string picPath = m.Groups["pic"].Value;
                    if (!string.IsNullOrEmpty(picPath))
                    {
//						Console.WriteLine(picPath);
                        result.NewTasks.Add(MakeTask(picPath, AmazonTaskType.IMAGE, task.Context));
                    }
                }
            }
            else
            {
                Log("{0} has not picture", task.Url);
                TaskFail(task, result);
            }
        }