Beispiel #1
0
        private void DoMoreCategory(Task task, string html, ContentProcessResult result)
        {
            HtmlNode root = GetRoot(html);

            if (root == null || html.Contains("td.refinementContainer"))
            {
                TaskFail(task, result);
                return;
            }
            HtmlNodeCollection navs = root.SelectNodes(".//div[@id='refinements']//ul[@data-typeid='n']/li/a/span[@class='refinementLink']");

            if (navs == null)
            {
                //no more refinements
                DoPages(task, html, result, root);
            }
            else
            {
                foreach (HtmlNode nav in navs)
                {
                    HtmlNode link = nav.ParentNode;
                    if (link != null)
                    {
                        string path = HAH.SafeGetAttributeStringValue(link, "href");
                        if (path != null)
                        {
                            result.NewTasks.Add(MakeTask(path, AmazonTaskType.MORE_CATEGORY, null));
                        }
                    }
                }
            }
        }
Beispiel #2
0
        private void HandleProviderList(TaskProcess tp)
        {
            string html = Encoding.Default.GetString(tp.TaskData.Bytes);

            if (!Validate(html))
            {
                tp.CpResult.Success = false;
                return;
            }

            HtmlNode root = GetRoot(html);

            if (root == null)
            {
                FailProcess(tp);
                return;
            }
            HtmlNodeCollection list = root.SelectNodes(@"//div[@id='list-content']//li[@class='list-item']");

            if (list == null)
            {
                FailProcess(tp);
                return;
            }
            bool success = true;

            foreach (HtmlNode node in list)
            {
                success &= HandleProviderListNode(tp, node);
                if (!success)
                {
                    break;
                }
            }
            //next page
            string nextPageURL = HAH.SafeGetSuccessorAttributeStringValue(root, @".//a[@class='page-next']", "href");

            if (nextPageURL != null)
            {
                tp.CpResult.NewTasks.Add(new Task
                {
                    Url     = FixRelativeURL(nextPageURL, tp.TaskData.Task.Host),
                    Type    = (int)TaobaoTaskType.PROVIDER_LIST,
                    Context = tp.TaskData.Task.Context
                });
            }
            tp.CpResult.Success = success;
        }
Beispiel #3
0
        private void DoPages(Task task, string html, ContentProcessResult result, HtmlNode root)
        {
            if (root == null)
            {
                root = GetRoot(html);
                if (root == null)
                {
                    TaskFail(task, result);
                    return;
                }
            }

            HtmlNodeCollection shops       = root.SelectNodes(".//div[@id='rightResultsATF']//a[@class='title']");
            HtmlNode           contextNode = root.SelectSingleNode("id('breadCrumb')");

            if (contextNode == null)
            {
                TaskFail(task, result);
                return;
            }
            string context = HttpUtility.HtmlDecode(contextNode.InnerText);

            context = ReplaceSpace.Replace(context, "");
            context = context.Replace('\\', ',');
            context = context.Replace('/', ',');
            context = context.Replace('›', '\\');
            if (shops != null)
            {
                foreach (HtmlNode node in shops)
                {
                    string path = HAH.SafeGetAttributeStringValue(node, "href");
                    result.NewTasks.Add(MakeTask(path, AmazonTaskType.PAGE, context));
                }
            }
            HtmlNode nextPageLink = root.SelectSingleNode(".//a[@id='pagnNextLink']");

            if (nextPageLink != null)
            {
                string path = HAH.SafeGetAttributeStringValue(nextPageLink, "href");
                if (path != null)
                {
                    result.NewTasks.Add(MakeTask(path, AmazonTaskType.PAGES, null));
                }
            }
        }
Beispiel #4
0
        private void DoIndex(Task task, string html, ContentProcessResult result)
        {
            HtmlNode root = GetRoot(html);

            if (root == null)
            {
                TaskFail(task, result);
                return;
            }

            HtmlNodeCollection links = root.SelectNodes(".//div[@id='siteDirectory']//td//a");

            if (links == null)
            {
                return;
            }

            foreach (HtmlNode node in links)
            {
                string path = HAH.SafeGetAttributeStringValue(node, "href");
                if (path != null)
                {
                    Match m = GetNodeID.Match(path);
                    if (m.Success)
                    {
                        string sid = m.Groups["id"].Value;
                        int    id;
                        if (Int32.TryParse(sid, out id))
                        {
                            //http://www.amazon.cn/gp/search/ref=sr_hi_1?rh=n%3A658390051&ie=UTF8

                            Task t = MakeTask("http://www.amazon.cn/gp/search/ref=sr_hi_1?rh=n%3A" + id + "&ie=UTF8", AmazonTaskType.MORE_CATEGORY, null);
                            result.NewTasks.Add(t);
                        }
                    }
                }
            }
        }
Beispiel #5
0
        private bool HandleCombinedListMultiShops(TaskProcess tp, HtmlNode node)
        {
            string link = HAH.SafeGetSuccessorAttributeStringValue(node, @".//div[@class='legend2']/a", "href");

            if (link == null)
            {
                LogMissing("URL", node.InnerHtml);
                return(false);
            }
            Match m = RegexUniqID.Match(link);

            if (m.Success)
            {
                string uniqIDs = m.Groups["number"].Value;
                tp.CpResult.NewTasks.Add(new Task
                {
                    Type    = (int)TaobaoTaskType.PROVIDER_LIST,
                    Url     = FixRelativeURL(link, tp.TaskData.Task.Host),
                    Context = uniqIDs,
                });
            }
            return(true);
        }
Beispiel #6
0
        private bool HandleCombinedListSingleShop(TaskProcess tp, HtmlNode node)
        {
            #region build an item
            Item i = new Item();

            string freight = HAH.SafeGetSuccessorInnerText(node, @".//li[@class='shipment']/span[@class='fee']");
            if (freight != null && freight.Length > 3)
            {
                //运费:8.00
                string freightD = freight.Substring(3);
                double d;
                if (double.TryParse(freightD, out d))
                {
                    i.Freight = d;
                }
                else
                {
                    LogMissing("freight", freightD);
                    return(false);
                }
            }
            else
            {
                LogMissing("freight", freight);
                return(false);
            }

            i.Name = HAH.SafeGetSuccessorAttributeStringValue(node, @".//a[@class='EventCanSelect']", "title");
            if (i.Name == null)
            {
                LogMissing("Name", node.InnerHtml);
                return(false);
            }
            i.Location = HAH.SafeGetSuccessorInnerText(node, @".//li[@class='shipment']/span[@class='loc']");
            if (i.Location == null)
            {
                LogMissing("Location", node.InnerHtml);
                return(false);
            }
            string price = HAH.SafeGetSuccessorInnerText(node, @".//li[@class='price']/em");
            if (!string.IsNullOrEmpty(price))
            {
                //359.00
                double d;
                if (double.TryParse(price, out d))
                {
                    i.Price = d;
                }
                else
                {
                    LogMissing("price", price);
                    return(false);
                }
            }
            else
            {
                LogMissing("price", price);
                return(false);
            }

            i.RecentDeal = 0;
            string recentDeal = HAH.SafeGetSuccessorInnerText(node, @".//li[@class='price']/span");

            if (!string.IsNullOrEmpty(recentDeal))
            {
                Match m = RegexRecentSellCount.Match(recentDeal);
                if (m.Success)
                {
                    i.RecentDeal = Int32.Parse(m.Groups["number"].Value);
                }
            }

            string sellerID = HAH.SafeGetSuccessorAttributeStringValue(node, @".//li[@class='seller']/a", "href");

            if (!string.IsNullOrEmpty(sellerID))
            {
                Match m = RegexSellerID.Match(sellerID);
                if (m.Success)
                {
                    i.SellerTaobaoId = Int32.Parse(m.Groups["number"].Value);
                }
                else
                {
                    LogMissing("SellerID", sellerID);
                    return(false);
                }
            }
            else
            {
                LogMissing("SellerID", node.InnerHtml);
                return(false);
            }

            i.UniqId  = 0;
            i.UrlLink = HAH.SafeGetSuccessorAttributeStringValue(node, ".//a[@class='EventCanSelect']", "href");
            i.UrlLink = FixRelativeURL(i.UrlLink, tp.TaskData.Task.Host);

            if (!string.IsNullOrEmpty(i.UrlLink))
            {
                string taobaoID;
                Match  m = RegexItemTaobaoID.Match(i.UrlLink);
                if (m.Success)
                {
                    taobaoID   = m.Groups["number"].Value;
                    i.TaobaoId = long.Parse(taobaoID);
                }
            }

            OpsItem.Upsert(i);
            #endregion
            #region build new task
            //Seller
            tp.CpResult.NewTasks.Add(new Task
            {
                Url     = RateURL.Replace("#UID#", i.SellerTaobaoId.ToString()),
                Type    = (int)TaobaoTaskType.PROVIDER_RATE,
                Context = i.SellerTaobaoId.ToString()
            });
            #endregion

            return(true);
        }
Beispiel #7
0
        private void HandleCombinedList(TaskProcess tp)
        {
            string html = Encoding.Default.GetString(tp.TaskData.Bytes);

            if (!Validate(html))
            {
                tp.CpResult.Success = false;
                return;
            }

            HtmlNode root = GetRoot(html);

            if (root == null)
            {
                FailProcess(tp);
                return;
            }

            HtmlNodeCollection list = root.SelectNodes(@"//div[@id='list-content']//li[@class='list-item']");

            if (list == null)
            {
                FailProcess(tp);
                return;
            }
            bool success = true;

            foreach (HtmlNode node in list)
            {
                //only 1 shop?
                string count = HAH.SafeGetSuccessorInnerText(node, @".//div[@class='legend2']/a");
                if (string.IsNullOrEmpty(count))
                {
                    LogMissing("count", count);
                }
                else
                {
                    Match m = RegexShopCount.Match(count);
                    if (m.Success)
                    {
                        int c = Int32.Parse(m.Groups["number"].Value);
                        if (c == 1)
                        {
                            //single shop
                            success &= HandleCombinedListSingleShop(tp, node);
                        }
                        else
                        {
                            success &= HandleCombinedListMultiShops(tp, node);
                        }
                    }
                    else
                    {
                        LogMissing("count", count);
                        success = false;
                    }
                }
                if (!success)
                {
                    break;
                }
            }
            //next page
            string nextPageURL = HAH.SafeGetSuccessorAttributeStringValue(root, @".//a[@class='page-next']", "href");

            if (nextPageURL != null)
            {
                tp.CpResult.NewTasks.Add(new Task
                {
                    Url  = FixRelativeURL(nextPageURL, tp.TaskData.Task.Host),
                    Type = (int)TaobaoTaskType.COMBINED_LIST,
                });
            }


            tp.CpResult.Success = success;
        }
Beispiel #8
0
        private void HandleProviderRate(TaskProcess tp)
        {
            string html = Encoding.Default.GetString(tp.TaskData.Bytes);

            if (!Validate(html))
            {
                tp.CpResult.Success = false;
                return;
            }
            HtmlNode root = GetRoot(html);

            if (root == null)
            {
                FailProcess(tp);
                return;
            }
            Seller s = new Seller();

            if (tp.TaskData.Task.Context == null)
            {
                LogMissing("TaobaoID", tp.TaskData.Task.Context);
                tp.CpResult.Success = false;
                return;
            }
            s.TaobaoId = Int32.Parse(tp.TaskData.Task.Context);
            s.IsTmall  = root.SelectSingleNode(@".//div[@class='tmall-pro']") != null;

            if (!s.IsTmall)
            {
                string credit = HAH.SafeGetSuccessorInnerText(root, @".//ul[contains(@class,'sep')]/li");
                if (string.IsNullOrEmpty(credit))
                {
                    LogMissing("credit", credit);
                    tp.CpResult.Success = false;
                    return;
                }
                credit = RegexEmpty.Replace(credit, "");
                //卖家信用:5
                int crediti;
                if (credit.Length > 5 && int.TryParse(credit.Substring(5), out crediti))
                {
                    s.Credit = crediti;
                }
                else
                {
                    LogMissing("Credit", credit.Substring(5));
                }

                string goodRate = HAH.SafeGetSuccessorInnerText(root, @".//div[@id='seller-rate']//em");
                //好评率:98.30%
                if (!string.IsNullOrEmpty(goodRate) && goodRate.IndexOf('%') >= 4)
                {
                    double goodrated;
                    if (double.TryParse(goodRate.Substring(4, goodRate.IndexOf("%") - 4), out goodrated))
                    {
                        s.Goodrate = goodrated;
                    }
                    else
                    {
                        LogMissing("GoodRate", goodRate);
                    }
                }
                else
                {
                    LogMissing("GoodRate", goodRate);
                }
            }



//
//			HtmlNodeCollection infos = root.SelectNodes(@".//div[@class='bd']/ul/li");
//			foreach (HtmlNode node in infos)
//			{
//				Match m = RegexCreateTime.Match(node.InnerText);
//				if (m.Success)
//				{
//					string date = m.Groups["time"].Value;
//					s.StartTime = DateTime.Parse(date);
//				}
//			}

            //半年动态评分
            {
                HtmlNodeCollection nodes = root.SelectNodes(@".//div[@id='sixmonth']//div[@class='item-scrib']");
                if (nodes != null)
                {
                    foreach (var node in nodes)
                    {
                        string   title   = HAH.SafeGetSuccessorInnerText(node, @"./span[@class='title']");
                        string   count   = HAH.SafeGetSuccessorInnerText(node, @"./em[@class='count']");
                        HtmlNode percent = node.SelectSingleNode(@".//strong[contains(@class,'percent')]");
                        if (percent == null)
                        {
                            LogMissing(title, "Percent");
                            continue;
                        }

                        double c;
                        if (!double.TryParse(count, out c))
                        {
                            LogMissing(title, count);
                            continue;
                        }

                        double d;
                        string rate = percent.InnerText.Replace("%", "");
                        if (rate == "----")
                        {
                            d = 0;
                        }
                        else if (!double.TryParse(rate, out d))
                        {
                            LogMissing(title, rate);
                            continue;
                        }
                        string percentClass = percent.Attributes["class"].Value;
                        if (percentClass.Contains("lower"))
                        {
                            d *= -1.0;
                        }

                        if (title == "宝贝与描述相符:")
                        {
                            s.Rmatch = c;
                            s.Pmatch = d;
                        }
                        else if (title == "卖家的服务态度:")
                        {
                            s.Rservice = c;
                            s.Pservice = d;
                        }
                        else if (title == "卖家发货的速度:")
                        {
                            s.Rspeed = c;
                            s.Pspeed = d;
                        }
                    }
                }
            }
            //保障
            string text = HAH.SafeGetSuccessorInnerHtml(root, @".//div[@class='desc' or @class='promise']");

            if (!string.IsNullOrEmpty(text))
            {
                s.Pprotect   = text.Contains("消费者保障");
                s.Psevendays = text.Contains("7天无理由退换货") || text.Contains("七天退换");
                s.Preal      = text.Contains("正品保障");
                s.Pinvoice   = text.Contains("提供发票");
            }
            else
            {
                s.Pprotect   = false;
                s.Psevendays = false;
                s.Preal      = false;
                s.Pinvoice   = false;
            }


            //30天服务

            /*
             * {
             *      HtmlNodeCollection nodes = root.SelectNodes(@".//div[@class='each']");
             *      foreach (var node in nodes)
             *      {
             *              HtmlNodeCollection innerNodes = node.SelectNodes(@"./span");
             *              if (innerNodes.Count == 4)
             *              {
             *                      string title = innerNodes[0].InnerText;
             *                      if (title.Contains("平均退款速度"))
             *                      {
             *                              s.Refunddays = Double.Parse(innerNodes[1].InnerText.Replace("%", ""));
             *                      }
             *                      else if (title.Contains("近30天退款率"))
             *                      {
             *                              s.Refundrate = Double.Parse(innerNodes[1].InnerText.Replace("%", ""));
             *                      }
             *                      else if (title.Contains("近30天投诉率"))
             *                      {
             *                              s.Complaint = Double.Parse(innerNodes[1].InnerText.Replace("%", ""));
             *                      }
             *                      else if (title.Contains("近30天处罚数"))
             *                      {
             *                              //0 次
             *                              s.Penalty = Int32.Parse(innerNodes[1].InnerText.Replace(" 次", ""));
             *                      }
             *              }
             *      }
             * }*/
            OpsSeller.Upsert(s);
        }