public void Query(QueryParttern parttern) { try { var runner = new PhantomRunner(); var driver = (PhantomJSDriver)runner.Create(parttern.CurrentUrl); if (driver == null) { Console.WriteLine("driver null"); Console.WriteLine(parttern.CurrentUrl); return; } driverQueue.Enqueue(driver); //process current if (parttern.QueryTarget != null) { var resultDic = new List <SpiderResultDicionary>(); var resultList = new List <SpiderQueryResult>(); foreach (var queryTarge in parttern.QueryTarget) { var result = new SpiderQueryResult(); foreach (var queryItem in queryTarge.Query) { result.KeyName = queryTarge.PartternName; result.QueryResult = driver.GetSpiderResults(queryItem); } resultList.Add(result); } resultDic.Add(new SpiderResultDicionary { QueryName = driver.Title, QueryResult = resultList }); allResultCollection.Add(resultDic); if (OnEveryGetResult != null) { OnEveryGetResult(resultDic); } } if (parttern.LoadMore != null) { switch (parttern.LoadMore.Operation) { case "click": var clickElement = driver.FindElement(By.CssSelector(parttern.LoadMore.LoadMoreParttern)); if (clickElement != null) { for (int i = 0; i < parttern.LoadMore.ExcuteTime; i++) { clickElement.Click(); Thread.Sleep(parttern.LoadMore.WatingTimeSeconds * 1000); } } break; case "script": for (int i = 0; i < parttern.LoadMore.ExcuteTime; i++) { driver.ExecuteScript(parttern.LoadMore.LoadMoreParttern); Thread.Sleep(parttern.LoadMore.WatingTimeSeconds * 1000); } break; case "url": var nextPage = driver.FindElement(By.CssSelector(".ui-page-inner a:last-child")); if (nextPage.Text.Contains("下一页")) { var currentParttern = parttern.Clone() as QueryParttern; currentParttern.CurrentUrl = nextPage.GetAttribute("href"); var kvActionList = new List <KeyValuePair <Action <QueryParttern>, QueryParttern> >(); var kvActionItem = new KeyValuePair <Action <QueryParttern>, QueryParttern>(_nextParttern => { Query(_nextParttern); }, currentParttern); kvActionList.Add(kvActionItem); excutor.ExcuteWait(kvActionList, 64); } break; } } if (parttern.PageParameter != null) { var pageNaviElements = driver.FindElements(By.CssSelector(".paginate a")); foreach (var eleItem in pageNaviElements) { var targetHref = eleItem.GetAttribute("href"); if (targetHref == "#") { continue; } var targetClass = eleItem.GetAttribute("class"); if (targetClass.Contains("pg_prev")) { continue; } if (targetClass.Contains("page")) { Query(new QueryParttern { CurrentUrl = targetHref, PageParameter = "page", NextUrlParttern = parttern.NextUrlParttern, NextParttern = parttern.NextParttern }); } else { Query(new QueryParttern { CurrentUrl = targetHref, NextUrlParttern = parttern.NextUrlParttern, NextParttern = parttern.NextParttern }); } } } //process nextUrl if (parttern.NextUrlParttern != null) { var targetNextElements = driver.FindElements(By.CssSelector(parttern.NextUrlParttern)); //#warning test , please remove take function() var targetNextUrlArr = targetNextElements.Select(x => x.GetAttribute("href")); var actionList = new List <Action <QueryParttern> >(); List <KeyValuePair <Action <QueryParttern>, QueryParttern> > kvActionList = new List <KeyValuePair <Action <QueryParttern>, QueryParttern> >(); foreach (var urlItem in targetNextUrlArr) { var nextParttern = parttern.NextParttern.Clone() as QueryParttern; nextParttern.CurrentUrl = urlItem; var kvActionItem = new KeyValuePair <Action <QueryParttern>, QueryParttern>(_nextParttern => { Query(_nextParttern); }, nextParttern); kvActionList.Add(kvActionItem); } //ThreadExcutor<QueryParttern>.ExcuteAsync(kvActionList).ConfigureAwait(false); excutor.ExcuteWait(kvActionList, 64); } driver.Quit(); } catch (Exception exc) { Console.WriteLine(exc.Message); } }
public void Query(QueryParttern parttern, SpiderResultDicionary prevParent = null) { var currentResultList = new List <SpiderResultDicionary>(); try { var runner = new PhantomRunner(); var driver = (PhantomJSDriver)runner.Create(parttern.CurrentUrl); if (driver == null) { Console.WriteLine("driver null"); Console.WriteLine(parttern.CurrentUrl); return; } driverQueue.Enqueue(driver); //process current //var resultDic = new List<SpiderResultDicionary>(); if (!string.IsNullOrEmpty(parttern.StateNum)) { currentResultList.Add(new SpiderResultDicionary { QueryName = "statet", QueryResult = new List <SpiderQueryResult> { new SpiderQueryResult { KeyName = "state", QueryResult = new List <string> { parttern.StateNum } } } }); } if (parttern.QueryTarget != null) { var resultList = new List <SpiderQueryResult>(); foreach (var queryTarge in parttern.QueryTarget) { var result = new SpiderQueryResult(); foreach (var queryItem in queryTarge.Query) { result.KeyName = queryTarge.PartternName; result.QueryResult = driver.GetSpiderResults(queryItem); Thread.Sleep(30); } resultList.Add(result); } currentResultList.Add(new SpiderResultDicionary { Parent = prevParent, QueryName = driver.Title, QueryResult = resultList }); //将结果以事件的形式抛出 if (OnEveryGetResult != null) { //只有当false时才可调用(防止重复结果调用) if (!parttern.IsQueryTagetToChildren) { OnEveryGetResult(currentResultList); } } } SpiderResultDicionary transformResultDict = null; if (currentResultList != null && currentResultList.Count > 0) { transformResultDict = currentResultList[0]; } //处理加载更多/ 模拟点击分页, 模拟下滑 , 模拟page路由 if (parttern.LoadMore != null) { switch (parttern.LoadMore.Operation) { case "click": var clickElement = driver.FindElement(By.CssSelector(parttern.LoadMore.LoadMoreParttern)); if (clickElement != null) { for (int i = 0; i < parttern.LoadMore.ExcuteTime; i++) { clickElement.Click(); Thread.Sleep(parttern.LoadMore.WatingTimeSeconds * 1000); } } break; case "script": for (int i = 0; i < parttern.LoadMore.ExcuteTime; i++) { driver.ExecuteScript(parttern.LoadMore.LoadMoreParttern); Thread.Sleep(parttern.LoadMore.WatingTimeSeconds * 1000); } break; case "url": var nextPage = driver.FindElement(By.CssSelector(".ui-page-inner a:last-child")); if (nextPage.Text.Contains("下一页")) { var currentParttern = parttern.Clone() as QueryParttern; currentParttern.CurrentUrl = nextPage.GetAttribute("href"); var kvActionList = new List <KeyValuePair <Action <QueryParttern>, QueryParttern> >(); var kvActionItem = new KeyValuePair <Action <QueryParttern>, QueryParttern>(_nextParttern => { Query(_nextParttern, transformResultDict); }, currentParttern); kvActionList.Add(kvActionItem); excutor.ExcuteWait(kvActionList, HandlerNumber); } break; } } //处理下一页 if (parttern.PageParameter != null) { var pageNaviElements = driver.FindElements(By.CssSelector(".paginate a")); foreach (var eleItem in pageNaviElements) { var targetHref = eleItem.GetAttribute("href"); if (targetHref == "#") { continue; } var targetClass = eleItem.GetAttribute("class"); if (targetClass.Contains("pg_prev")) { continue; } if (targetClass.Contains("page")) { Query(new QueryParttern { CurrentUrl = targetHref, PageParameter = "page", NextUrlParttern = parttern.NextUrlParttern, NextParttern = parttern.NextParttern }, transformResultDict); } else { Query(new QueryParttern { CurrentUrl = targetHref, NextUrlParttern = parttern.NextUrlParttern, NextParttern = parttern.NextParttern }, transformResultDict); } } } //process nextUrl if (parttern.NextUrlParttern != null) { var targetNextElements = driver.FindElements(By.CssSelector(parttern.NextUrlParttern)); //#warning test , please remove take function() var targetNextUrlArr = targetNextElements.Select(x => x.GetAttribute("href")); var actionList = new List <Action <QueryParttern> >(); List <KeyValuePair <Action <QueryParttern>, QueryParttern> > kvActionList = new List <KeyValuePair <Action <QueryParttern>, QueryParttern> >(); foreach (var urlItem in targetNextUrlArr) { var nextParttern = parttern.NextParttern.Clone() as QueryParttern; nextParttern.CurrentUrl = urlItem; var kvActionItem = new KeyValuePair <Action <QueryParttern>, QueryParttern>(_nextParttern => { Query(_nextParttern, transformResultDict); }, nextParttern); kvActionList.Add(kvActionItem); } //ThreadExcutor<QueryParttern>.ExcuteAsync(kvActionList).ConfigureAwait(false); excutor.ExcuteWait(kvActionList, HandlerNumber); } driver.Quit(); } catch (Exception exc) { Console.WriteLine(exc.Message); } }