Ejemplo n.º 1
0
        public void Query(QueryParttern parttern)
        {
            try
            {
                var runner = new PhantomRunner();
                var driver = (PhantomJSDriver)runner.Create(parttern.CurrentUrl);
                if (driver == null)
                {
                    Console.WriteLine("driver null");
                    Console.WriteLine(parttern.CurrentUrl);
                    return;
                }
                driverQueue.Enqueue(driver);
                //process current

                if (parttern.QueryTarget != null)
                {
                    var resultDic  = new List <SpiderResultDicionary>();
                    var resultList = new List <SpiderQueryResult>();
                    foreach (var queryTarge in parttern.QueryTarget)
                    {
                        var result = new SpiderQueryResult();
                        foreach (var queryItem in queryTarge.Query)
                        {
                            result.KeyName     = queryTarge.PartternName;
                            result.QueryResult = driver.GetSpiderResults(queryItem);
                        }
                        resultList.Add(result);
                    }
                    resultDic.Add(new SpiderResultDicionary
                    {
                        QueryName   = driver.Title,
                        QueryResult = resultList
                    });

                    allResultCollection.Add(resultDic);
                    if (OnEveryGetResult != null)
                    {
                        OnEveryGetResult(resultDic);
                    }
                }

                if (parttern.LoadMore != null)
                {
                    switch (parttern.LoadMore.Operation)
                    {
                    case "click":
                        var clickElement = driver.FindElement(By.CssSelector(parttern.LoadMore.LoadMoreParttern));
                        if (clickElement != null)
                        {
                            for (int i = 0; i < parttern.LoadMore.ExcuteTime; i++)
                            {
                                clickElement.Click();
                                Thread.Sleep(parttern.LoadMore.WatingTimeSeconds * 1000);
                            }
                        }
                        break;

                    case "script":
                        for (int i = 0; i < parttern.LoadMore.ExcuteTime; i++)
                        {
                            driver.ExecuteScript(parttern.LoadMore.LoadMoreParttern);
                            Thread.Sleep(parttern.LoadMore.WatingTimeSeconds * 1000);
                        }
                        break;

                    case "url":
                        var nextPage = driver.FindElement(By.CssSelector(".ui-page-inner a:last-child"));
                        if (nextPage.Text.Contains("下一页"))
                        {
                            var currentParttern = parttern.Clone() as QueryParttern;
                            currentParttern.CurrentUrl = nextPage.GetAttribute("href");
                            var kvActionList = new List <KeyValuePair <Action <QueryParttern>, QueryParttern> >();
                            var kvActionItem = new KeyValuePair <Action <QueryParttern>, QueryParttern>(_nextParttern =>
                            {
                                Query(_nextParttern);
                            }, currentParttern);
                            kvActionList.Add(kvActionItem);
                            excutor.ExcuteWait(kvActionList, 64);
                        }
                        break;
                    }
                }

                if (parttern.PageParameter != null)
                {
                    var pageNaviElements = driver.FindElements(By.CssSelector(".paginate a"));
                    foreach (var eleItem in pageNaviElements)
                    {
                        var targetHref = eleItem.GetAttribute("href");
                        if (targetHref == "#")
                        {
                            continue;
                        }

                        var targetClass = eleItem.GetAttribute("class");

                        if (targetClass.Contains("pg_prev"))
                        {
                            continue;
                        }

                        if (targetClass.Contains("page"))
                        {
                            Query(new QueryParttern
                            {
                                CurrentUrl      = targetHref,
                                PageParameter   = "page",
                                NextUrlParttern = parttern.NextUrlParttern,
                                NextParttern    = parttern.NextParttern
                            });
                        }
                        else
                        {
                            Query(new QueryParttern
                            {
                                CurrentUrl      = targetHref,
                                NextUrlParttern = parttern.NextUrlParttern,
                                NextParttern    = parttern.NextParttern
                            });
                        }
                    }
                }
                //process nextUrl
                if (parttern.NextUrlParttern != null)
                {
                    var targetNextElements = driver.FindElements(By.CssSelector(parttern.NextUrlParttern));
                    //#warning test , please remove take function()
                    var targetNextUrlArr = targetNextElements.Select(x => x.GetAttribute("href"));
                    var actionList       = new List <Action <QueryParttern> >();
                    List <KeyValuePair <Action <QueryParttern>, QueryParttern> > kvActionList = new List <KeyValuePair <Action <QueryParttern>, QueryParttern> >();
                    foreach (var urlItem in targetNextUrlArr)
                    {
                        var nextParttern = parttern.NextParttern.Clone() as QueryParttern;
                        nextParttern.CurrentUrl = urlItem;
                        var kvActionItem = new KeyValuePair <Action <QueryParttern>, QueryParttern>(_nextParttern =>
                        {
                            Query(_nextParttern);
                        }, nextParttern);
                        kvActionList.Add(kvActionItem);
                    }
                    //ThreadExcutor<QueryParttern>.ExcuteAsync(kvActionList).ConfigureAwait(false);

                    excutor.ExcuteWait(kvActionList, 64);
                }

                driver.Quit();
            }
            catch (Exception exc)
            {
                Console.WriteLine(exc.Message);
            }
        }
Ejemplo n.º 2
0
        public void Query(QueryParttern parttern, SpiderResultDicionary prevParent = null)
        {
            var currentResultList = new List <SpiderResultDicionary>();

            try
            {
                var runner = new PhantomRunner();
                var driver = (PhantomJSDriver)runner.Create(parttern.CurrentUrl);
                if (driver == null)
                {
                    Console.WriteLine("driver null");
                    Console.WriteLine(parttern.CurrentUrl);
                    return;
                }
                driverQueue.Enqueue(driver);
                //process current
                //var resultDic = new List<SpiderResultDicionary>();
                if (!string.IsNullOrEmpty(parttern.StateNum))
                {
                    currentResultList.Add(new SpiderResultDicionary
                    {
                        QueryName   = "statet",
                        QueryResult = new List <SpiderQueryResult> {
                            new SpiderQueryResult
                            {
                                KeyName     = "state",
                                QueryResult = new List <string> {
                                    parttern.StateNum
                                }
                            }
                        }
                    });
                }
                if (parttern.QueryTarget != null)
                {
                    var resultList = new List <SpiderQueryResult>();
                    foreach (var queryTarge in parttern.QueryTarget)
                    {
                        var result = new SpiderQueryResult();
                        foreach (var queryItem in queryTarge.Query)
                        {
                            result.KeyName     = queryTarge.PartternName;
                            result.QueryResult = driver.GetSpiderResults(queryItem);
                            Thread.Sleep(30);
                        }
                        resultList.Add(result);
                    }
                    currentResultList.Add(new SpiderResultDicionary
                    {
                        Parent      = prevParent,
                        QueryName   = driver.Title,
                        QueryResult = resultList
                    });

                    //将结果以事件的形式抛出
                    if (OnEveryGetResult != null)
                    {
                        //只有当false时才可调用(防止重复结果调用)
                        if (!parttern.IsQueryTagetToChildren)
                        {
                            OnEveryGetResult(currentResultList);
                        }
                    }
                }

                SpiderResultDicionary transformResultDict = null;

                if (currentResultList != null && currentResultList.Count > 0)
                {
                    transformResultDict = currentResultList[0];
                }

                //处理加载更多/ 模拟点击分页, 模拟下滑 , 模拟page路由
                if (parttern.LoadMore != null)
                {
                    switch (parttern.LoadMore.Operation)
                    {
                    case "click":
                        var clickElement = driver.FindElement(By.CssSelector(parttern.LoadMore.LoadMoreParttern));
                        if (clickElement != null)
                        {
                            for (int i = 0; i < parttern.LoadMore.ExcuteTime; i++)
                            {
                                clickElement.Click();
                                Thread.Sleep(parttern.LoadMore.WatingTimeSeconds * 1000);
                            }
                        }
                        break;

                    case "script":
                        for (int i = 0; i < parttern.LoadMore.ExcuteTime; i++)
                        {
                            driver.ExecuteScript(parttern.LoadMore.LoadMoreParttern);
                            Thread.Sleep(parttern.LoadMore.WatingTimeSeconds * 1000);
                        }
                        break;

                    case "url":
                        var nextPage = driver.FindElement(By.CssSelector(".ui-page-inner a:last-child"));
                        if (nextPage.Text.Contains("下一页"))
                        {
                            var currentParttern = parttern.Clone() as QueryParttern;
                            currentParttern.CurrentUrl = nextPage.GetAttribute("href");
                            var kvActionList = new List <KeyValuePair <Action <QueryParttern>, QueryParttern> >();
                            var kvActionItem = new KeyValuePair <Action <QueryParttern>, QueryParttern>(_nextParttern =>
                            {
                                Query(_nextParttern, transformResultDict);
                            }, currentParttern);
                            kvActionList.Add(kvActionItem);
                            excutor.ExcuteWait(kvActionList, HandlerNumber);
                        }
                        break;
                    }
                }

                //处理下一页
                if (parttern.PageParameter != null)
                {
                    var pageNaviElements = driver.FindElements(By.CssSelector(".paginate a"));
                    foreach (var eleItem in pageNaviElements)
                    {
                        var targetHref = eleItem.GetAttribute("href");
                        if (targetHref == "#")
                        {
                            continue;
                        }

                        var targetClass = eleItem.GetAttribute("class");

                        if (targetClass.Contains("pg_prev"))
                        {
                            continue;
                        }

                        if (targetClass.Contains("page"))
                        {
                            Query(new QueryParttern
                            {
                                CurrentUrl      = targetHref,
                                PageParameter   = "page",
                                NextUrlParttern = parttern.NextUrlParttern,
                                NextParttern    = parttern.NextParttern
                            }, transformResultDict);
                        }
                        else
                        {
                            Query(new QueryParttern
                            {
                                CurrentUrl      = targetHref,
                                NextUrlParttern = parttern.NextUrlParttern,
                                NextParttern    = parttern.NextParttern
                            }, transformResultDict);
                        }
                    }
                }

                //process nextUrl
                if (parttern.NextUrlParttern != null)
                {
                    var targetNextElements = driver.FindElements(By.CssSelector(parttern.NextUrlParttern));
                    //#warning test , please remove take function()
                    var targetNextUrlArr = targetNextElements.Select(x => x.GetAttribute("href"));
                    var actionList       = new List <Action <QueryParttern> >();
                    List <KeyValuePair <Action <QueryParttern>, QueryParttern> > kvActionList = new List <KeyValuePair <Action <QueryParttern>, QueryParttern> >();
                    foreach (var urlItem in targetNextUrlArr)
                    {
                        var nextParttern = parttern.NextParttern.Clone() as QueryParttern;
                        nextParttern.CurrentUrl = urlItem;
                        var kvActionItem = new KeyValuePair <Action <QueryParttern>, QueryParttern>(_nextParttern =>
                        {
                            Query(_nextParttern, transformResultDict);
                        }, nextParttern);
                        kvActionList.Add(kvActionItem);
                    }
                    //ThreadExcutor<QueryParttern>.ExcuteAsync(kvActionList).ConfigureAwait(false);

                    excutor.ExcuteWait(kvActionList, HandlerNumber);
                }

                driver.Quit();
            }
            catch (Exception exc)
            {
                Console.WriteLine(exc.Message);
            }
        }