Exemplo n.º 1
0
        public ExtractPagerNode GetPagerNode()
        {
            foreach (ExtractDataNode node in _extractNodes)
            {
                if (node.GetType().Equals(typeof(ExtractPagerNode)))
                {
                    return(node as ExtractPagerNode);
                }

                ExtractPagerNode childPager = getPagerNodeFromExtractDataNode(node);
                if (childPager != null)
                {
                    return(childPager);
                }
            }
            return(null);
        }
Exemplo n.º 2
0
        public static ExtractDataNode ExtractNodeAll(XmlNode node, int deepth, ExtractTaskDocument taskDoc)
        {
            ExtractDataNode eNode = new ExtractDataNode(node, deepth);

            eNode.OwnerTaskDocument = taskDoc;

            XmlNodeList nodesList = node.ChildNodes;

            if (nodesList != null && nodesList.Count > 0)
            {
                for (int i = 0, j = nodesList.Count; i < j; i++)
                {
                    XmlNode         subNode   = nodesList[i];
                    ExtractDataNode childNode = ExtractDataNode.ExtractNodeAll(subNode, deepth + 1, taskDoc);
                    string          pagerAttr = GetNodeNotNullAttrValue(subNode, "isPage");
                    if (!string.IsNullOrEmpty(pagerAttr) && Convert.ToBoolean(pagerAttr))
                    {
                        //分页节点定义
                        ExtractPagerNode pagerNode = new ExtractPagerNode(subNode, deepth + 1);
                        pagerNode.OwnerTaskDocument = taskDoc;
                        pagerNode.ParentExtractNode = eNode;

                        if (childNode.ChildNodes.Count > 0)
                        {
                            pagerNode.ChildNodes.AddRange(childNode.ChildNodes);
                        }
                        eNode.childNodes.Add(pagerNode);
                    }
                    else
                    {
                        childNode.OwnerTaskDocument = taskDoc;
                        childNode.ParentExtractNode = eNode;
                        eNode.ChildNodes.Add(childNode);
                    }
                }
            }
            return(eNode);
        }
Exemplo n.º 3
0
        public static void Execute(ExtractTaskConfig task)
        {
            ExtractTaskDocument taskDoc = ExtractTaskDocument.FromExtractRule(task.InvokeArguments);
            Document            rootDoc = taskDoc.GetStartupDocument();

fetchPageData:
            using (ExecutionContextScope scope = new ExecutionContextScope())
            {
                ExtractDocumentReport report = taskDoc.ExtractWith(rootDoc);
                if (!report.IsSuccess())
                {
                    throw report.ExtractExcetpion;
                }
                else
                {
                    if (task.DataReceiver is ISimpleObjectReceiver)
                    {
                        #region 简单对象
                        ISimpleObjectReceiver receriver = task.DataReceiver as ISimpleObjectReceiver;
                        receriver.Accept(report.CurrentExtractResult);
                        #endregion
                    }
                    else if (task.DataReceiver is IObjectListReceiver)
                    {
                        #region 仅集合对象
                        IObjectListReceiver rev2 = task.DataReceiver as IObjectListReceiver;
                        var allKeys = report.CurrentExtractResult.Keys;
                        foreach (string item in allKeys)
                        {
                            if (report.CurrentExtractResult[item] is List <Dictionary <string, object> > )
                            {
                                List <Dictionary <string, object> > allResultList = (List <Dictionary <string, object> >)report.CurrentExtractResult[item];
                                rev2.RecordCount = allResultList.Count;
                                allResultList.ForEach(ed =>
                                {
                                    rev2.Send(ed);
                                });
                            }
                        }
                        #endregion
                    }
                    else if (task.DataReceiver is IMixedDataReceiver)
                    {
                        #region 符合对象
                        IMixedDataReceiver          rev3       = task.DataReceiver as IMixedDataReceiver;
                        MixedExtractData            data       = new MixedExtractData();
                        Dictionary <string, object> temSummary = new Dictionary <string, object>();
                        Dictionary <string, List <Dictionary <string, object> > > nameDict = new Dictionary <string, List <Dictionary <string, object> > >();
                        var allKeys = report.CurrentExtractResult.Keys;
                        foreach (string item in allKeys)
                        {
                            if (report.CurrentExtractResult[item] is List <Dictionary <string, object> > )
                            {
                                List <Dictionary <string, object> > allResultList = (List <Dictionary <string, object> >)report.CurrentExtractResult[item];
                                nameDict.Add(item, allResultList);
                            }
                            else
                            {
                                temSummary.Add(item, report.CurrentExtractResult[item]);
                            }
                        }

                        data.Summary         = temSummary;
                        data.NamedListObject = nameDict;
                        rev3.Accept(data);
                        #endregion
                    }

                    #region Mergin Action
                    if (task.DataReceiver is INSoupMerginReceiver)
                    {
                        ((INSoupMerginReceiver)task.DataReceiver).Mergin();
                    }
                    #endregion

                    #region 持续抽取(重复)
                    ExtractPagerNode node = taskDoc.GetPagerNode();
                    if (node != null)
                    {
                        List <string> nextUrls = node.GetPageUrlList();
                        if (node.PageListType == PagerType.ByNext)
                        {
                            if (nextUrls.Any())
                            {
                                taskDoc.DocumentUrl = nextUrls[0];
                                rootDoc             = taskDoc.GetDocumentByUrl(taskDoc.DocumentUrl);
                                goto fetchPageData;
                            }
                        }
                        else
                        {
                            string currentDocUrl = taskDoc.EntryUrl.GetUrl();
                        }
                    }
                    #endregion
                }
            }
        }