Example #1
0
        protected override IEnumerable <IFreeDocument> InternalTransformManyData(IFreeDocument document)
        {
            int itemPerPage = 1;
            int total       = 1;
            int min         = 1;
            var col         = string.IsNullOrEmpty(NewColumn) ? Column : NewColumn;

            if (int.TryParse(document.Query(ItemPerPage), out itemPerPage) &&
                int.TryParse(document[Column].ToString(), out total) && int.TryParse(document.Query(MinValue), out min))
            {
                if (itemPerPage == 0)
                {
                    itemPerPage = 1;
                }
                var remainder = total % itemPerPage;

                int totalp = total / itemPerPage;
                if (remainder != 0)
                {
                    totalp += 1;
                }
                for (int i = min; i < min + totalp; i += 1)
                {
                    var doc = document.Clone();
                    doc[col] = i;
                    yield return(doc);
                }
            }
        }
Example #2
0
        public override IEnumerable <IFreeDocument> Generate(IFreeDocument document = null)
        {
            int    interval;
            double max, min;

            if (int.TryParse(document.Query(Interval), out interval) &&
                double.TryParse(document.Query(MinValue), out min) && double.TryParse(document.Query(MaxValue), out max))
            {
                for (var i = min; i <= max; i += interval)
                {
                    var item = new FreeDocument();

                    double value = Math.Round(i, 5);
                    object result;
                    if (!string.IsNullOrEmpty(Format))
                    {
                        result = value.ToString(Format);
                    }
                    else
                    {
                        result = Math.Round(i, 5);
                    }
                    item.Add(Column, result);
                    yield return(item);
                }
            }
        }
Example #3
0
        public override object TransformData(IFreeDocument datas)
        {
            var item = datas[Column];

            if (item == null)
            {
                item = "";
            }
            var strs = new List <object> {
                item
            };

            if (string.IsNullOrEmpty(Format))
            {
                return(item);
            }
            var format = datas.Query(Format);
            var exps   = rgx.Matches(format);

            foreach (Match exp in exps)
            {
                format = format.Replace(exp.Value, datas.Query(exp.Value));
            }
            var columns = MergeWith.Split(new[] { " " }, StringSplitOptions.RemoveEmptyEntries);

            strs.AddRange(columns.Select(key =>
            {
                if (datas.ContainsKey(key))
                {
                    return(datas[key]);
                }
                return(key);
            }));
            return(string.Format(format, strs.ToArray()));
        }
Example #4
0
        public override IEnumerable <IFreeDocument> Generate(IFreeDocument document = null)
        {
            var path   = FileName;
            var result = document?.Query(FileName);

            if (result != null)
            {
                path = result;
            }
            // var item = datas[Column].ToString();
            var res = buffHelper.Get(path);

            if (res != null)
            {
                yield return(res);

                yield break;
            }
            var content = File.ReadAllText(path, AttributeHelper.GetEncoding(EncodingType));
            var item    = new FreeDocument();

            item.Add(Column, content);
            buffHelper.Set(path, item);
            yield return(item);
        }
Example #5
0
        public override object TransformData(IFreeDocument document)
        {
            var item = document[Column];

            if (item is IFreeDocument)
            {
                return((item as IFreeDocument).GetDataFromXPath(XPath));
            }
            var docu = new HtmlDocument();

            docu.LoadHtml(item.ToString());
            if (GetText)
            {
                var path     = docu.DocumentNode.GetTextNode();
                var textnode = docu.DocumentNode.SelectSingleNode(path);
                if (textnode != null)
                {
                    return(textnode.GetNodeText());
                }
            }
            if (GetCount)
            {
                var textnode = docu.DocumentNode.SelectNodes(XPath);
                return(textnode.Count);
            }

            return(docu.DocumentNode.GetDataFromXPath(document.Query(XPath)));
        }
Example #6
0
        public override IEnumerable <IFreeDocument> Generate(IFreeDocument document = null)
        {
            if (Connector == null)
            {
                return(new List <IFreeDocument>());
            }
            var path   = FileName;
            var result = document?.Query(FileName);

            if (result != null)
            {
                path = result;
            }

            Connector.FileName = path;

            if (!IsExecute)
            {
                return(Connector.ReadFile());
            }
            else
            {
                return(Connector.ReadFile().CacheDo(buffHelper.GetOrCreate(path, new List <FreeDocument>()), this.Father.SampleMount * 2));
            }
        }
Example #7
0
        public override object TransformData(IFreeDocument document)
        {
            var item = document[Column];

            if (htmlResults.Count < 5)
            {
                htmlResults.Add(new XPathDetectorModel.HtmlResult()
                {
                    HTML = item.ToString(), Url = "URL_" + htmlResults.Count
                });
            }
            if (item is IFreeDocument)
            {
                return((item as IFreeDocument).GetDataFromXPath(XPath));
            }
            var docu = new HtmlDocument();

            docu.LoadHtml(item.ToString());
            string path;

            if (GetText)
            {
                path = docu.DocumentNode.GetTextNode();
                return(docu.DocumentNode.GetDataFromXPath(path, CrawlType));
            }
            else
            {
                path = document.Query(XPath);
                return(docu.DocumentNode.GetDataFromXPath(path, CrawlType, SelectorFormat));
            }
        }
Example #8
0
        protected override IEnumerable <IFreeDocument> InternalTransformManyData(IFreeDocument data)
        {
            var item = data[Column];
            var docu = new HtmlDocument();

            docu.LoadHtml(item.ToString());
            var path = data.Query(XPath);

            var p2 = docu.DocumentNode.SelectNodes(path, this.SelectorFormat);

            if (p2 == null)
            {
                return(new List <IFreeDocument>());
            }
            return(p2.Select(node =>
            {
                var doc = new FreeDocument();

                doc.MergeQuery(data, NewColumn);
                doc.SetValue("Text", node.GetNodeText());
                doc.SetValue("HTML", node.InnerHtml);
                doc.SetValue("OHTML", node.OuterHtml);
                return doc;
            }));
        }
Example #9
0
        private List <FreeDocument> GetDatas(IFreeDocument data)
        {
            var p = data[Column];

            if (p == null)
            {
                return(new List <FreeDocument>());
            }
            var url    = p.ToString();
            var bufkey = url;
            var post   = data.Query(PostData);

            if (crawler.Http.Method == MethodType.POST)
            {
                bufkey += post;
            }
            var htmldoc = buffHelper.Get(bufkey);
            var docs    = new List <FreeDocument>();

            if (htmldoc == null)
            {
                HttpStatusCode code;
                var            maxcount = 1;
                int.TryParse(data.Query(MaxTryCount), out maxcount);

                var count = 0;
                while (count < maxcount)
                {
                    docs = crawler.CrawlData(url, out htmldoc, out code, post);
                    if (HttpHelper.IsSuccess(code) && docs.Count > 0)
                    {
                        buffHelper.Set(bufkey, htmldoc);
                        break;
                    }
                    Thread.Sleep(ErrorDelay);
                    count++;
                }
            }
            else
            {
                docs = crawler.CrawlData(htmldoc);
            }


            return(docs);
        }
Example #10
0
        public override IEnumerable <IFreeDocument> Generate(IFreeDocument document = null)
        {
            int    interval;
            double max, min;

            if (int.TryParse(document.Query(Interval), out interval) &&
                double.TryParse(document.Query(MinValue), out min) && double.TryParse(document.Query(MaxValue), out max))
            {
                for (var i = min; i <= max; i += interval)
                {
                    var item = new FreeDocument();

                    item.Add(Column, Math.Round(i, 5));
                    yield return(item);
                }
            }
        }
Example #11
0
        public override IEnumerable<FreeDocument> Generate(IFreeDocument document = null)
        {
            int interval;
            double max, min;
            if (int.TryParse(document.Query( Interval), out interval)&&
                double.TryParse(document.Query(MinValue), out min) && double.TryParse(document.Query(MaxValue), out max))
            {
                for (var i = Position * interval + min; i <= max; i += interval)
                {

                        var item = new FreeDocument();

                        item.Add(Column, Math.Round(i, 5));
                        yield return item;
                }
            }
        }
Example #12
0
        public override bool FilteDataBase(IFreeDocument data)
        {
            object item = data[this.Column];
            if (item == null)
            {
                return false;
            }

            bool res = false;
            var v = (double)AttributeHelper.ConvertTo(item, SimpleDataType.DOUBLE, ref res);
            if (res == false)
            {
                return false;
            }
            double max=1, min=0;
            if (double.TryParse(data.Query(Max), out max) && double.TryParse(data.Query(Min), out min))
                return v >= min && v <=max;
            return true;
        }
Example #13
0
        private IEnumerable <FreeDocument> GetDatas(IFreeDocument data)
        {
            var p = data[Column];

            if (p == null || Crawler == null)
            {
                return(new List <FreeDocument>());
            }
            var urlOrHtml = p.ToString();
            var bufkey    = urlOrHtml;
            var post      = data.Query(PostData);
            var crawler   = Crawler;

            if (crawler == null)
            {
                crawler = defaultCrawler;
            }
            if (crawler.Http.Method == MethodType.POST)
            {
                bufkey += post;
            }
            var htmldoc = buffHelper.Get(bufkey);

            if (htmldoc == null)
            {
                IEnumerable <FreeDocument> docs = null;
                HttpStatusCode             code;
                if (!ProcessHtmlOnly)
                {
                    docs = crawler.CrawlData(urlOrHtml, out htmldoc, out code, post);
                    var any = docs.Any();
                    if (HttpHelper.IsSuccess(code))
                    {
                        if (!any)
                        {
                            ConfigFile.GetConfig <DataMiningConfig>().ParseErrorCount++;
                            throw new Exception(string.Format(GlobalHelper.Get("key_669"), urlOrHtml));
                        }
                        if (this.IsExecute == false)
                        {
                            buffHelper.Set(bufkey, htmldoc);
                        }
                        return(docs);
                    }
                    throw new Exception("Web Request Error:" + code);
                }
                else
                {
                    docs = crawler.CrawlHtmlData(urlOrHtml, out htmldoc);
                    return(docs);
                }
            }
            return(crawler.CrawlData(htmldoc.DocumentNode));
        }
Example #14
0
        public override IEnumerable <IFreeDocument> Generate(IFreeDocument document = null)
        {
            int count;
            int max, min;

            if (int.TryParse(document.Query(Count), out count) &&
                int.TryParse(document.Query(MinValue), out min) && int.TryParse(document.Query(MaxValue), out max))
            {
                int i = 0;
                while (i < count)
                {
                    var item = new FreeDocument();

                    item.Add(Column, random.Next(min, max));
                    yield return(item);

                    i += 1;
                }
            }
        }
Example #15
0
        public override object TransformData(IFreeDocument datas)
        {
            //初始化方案信息实体类。
            var item = datas[Column];

            if (item == null)
            {
                return(null);
            }
            try
            {
                var newlocation = buffHelper.Get(item.ToString());
                if (newlocation == null)
                {
                    //以 Get 形式请求 Api 地址


                    var r      = datas.Query(Region);
                    var tag    = datas.Query(Tag);
                    var apiUrl =
                        $"http://api.map.baidu.com/place/v2/search?q={item}&region={r}&tag={tag}&output={format}&ak={apikey}";


                    //初始化方案信息实体类。
                    var result = HttpHelper.GetWebSourceHtml(apiUrl, "utf-8");
                    //以 Get 形式请求 Api 地址
                    //    var result = HttpHelper.DoGet(apiUrl, param);
                    dynamic info = serialier.DeserializeObject(result);
                    //  if (info[0]["status"].ToInt32() == 0)
                    {
                        newlocation = Parse(info);
                    }
                    buffHelper.Set(item.ToString(), newlocation);
                }
                newlocation.DictCopyTo(datas);
            }
            catch (Exception)
            {
            }
            return(true);
        }
Example #16
0
        protected override IEnumerable <IFreeDocument> InternalTransformManyData(IFreeDocument data)
        {
            var r      = data.Query(DelayTime);
            var result = 100;

            if (int.TryParse(r, out result))
            {
                Thread.Sleep(result);
            }
            return(new List <IFreeDocument>()
            {
                data
            });
        }
Example #17
0
        public override IEnumerable <FreeDocument> Generate(IFreeDocument document = null)
        {
            int    interval, repeat;
            double max, min;

            if (int.TryParse(document.Query(Interval), out interval) && int.TryParse(document.Query(RepeatCount), out repeat) &&
                double.TryParse(document.Query(MinValue), out min) && double.TryParse(document.Query(MaxValue), out max))
            {
                for (var i = Position * interval + min; i <= max; i += interval)
                {
                    var j = repeat;
                    while (j > 0)
                    {
                        var item = new FreeDocument();

                        item.Add(Column, Math.Round(i, 5));
                        yield return(item);

                        j--;
                    }
                }
            }
        }
Example #18
0
        public override bool FilteDataBase(IFreeDocument data)
        {
            object item = data[this.Column];

            if (item == null)
            {
                return(false);
            }

            bool res = false;
            var  v   = (double)AttributeHelper.ConvertTo(item, SimpleDataType.DOUBLE, ref res);

            if (res == false)
            {
                return(false);
            }
            double max = 1, min = 0;

            if (double.TryParse(data.Query(Max), out max) && double.TryParse(data.Query(Min), out min))
            {
                return(v >= min && v <= max);
            }
            return(true);
        }
Example #19
0
        public override object TransformData(IFreeDocument dict)
        {
            object item = dict[Column];
            var    repl = dict.Query(ReplaceText);

            if (item == null)
            {
                return(null);
            }

            string r = regex.Replace(item.ToString(), repl);


            return(r);
        }
Example #20
0
        public override object TransformData(IFreeDocument document)
        {
            var path   = FileName;
            var result = document?.Query(FileName);

            if (result != null)
            {
                path = result;
            }
            var res = buffHelper.Get(path);

            if (res != null)
            {
                return(res);
            }

            res = File.ReadAllText(path, AttributeHelper.GetEncoding(EncodingType));
            buffHelper.Set(path, res);
            return(res);
        }
Example #21
0
        object TransformData(IFreeDocument datas)
        {
            var p    = datas[Column];
            var post = datas.Query(PostData);

            if (p == null)
            {
                return(new List <FreeDocument>());
            }
            var url = p.ToString();

            Crawler.SetCookie(Crawler.Http);
            var response = helper.GetHtml(Crawler.Http, url, post).Result;

            var content        = response.Html;
            var code           = response.Code;
            var responseHeader = response.ResponseHeaders;
            var keys           = responseHeader.AllKeys;

            if (!string.IsNullOrEmpty(HeaderFilter))
            {
                keys = HeaderFilter.Split(' ');
            }
            foreach (var key in keys)
            {
                var value = responseHeader.Get(key);
                if (value != null)
                {
                    datas.SetValue(key, value);
                }
            }
            if (keys.Contains("Location") && datas.ContainsKey("Location") == false)
            {
                datas["Location"] = url;
            }

            return(null);
        }
Example #22
0
        public override object TransformData(IFreeDocument document)
        {
            var item = document[Column];

            if (item is IFreeDocument)
            {
                return((item as IFreeDocument).GetDataFromXPath(XPath));
            }
            var docu = new HtmlDocument();

            docu.LoadHtml(item.ToString());
            string path;

            if (GetText)
            {
                path = docu.DocumentNode.GetTextNode();
                return(docu.DocumentNode.GetDataFromXPath(path, CrawlType));
            }
            else
            {
                path = document.Query(XPath);
                return(docu.DocumentNode.GetDataFromXPath(path, CrawlType, SelectorFormat));
            }
        }
Example #23
0
        public override object TransformData(IFreeDocument datas)
        {
            //初始化方案信息实体类。
            var item = datas[Column];

            if (item == null)
                return null;
            try
            {
                var newlocation = buffHelper.Get(item.ToString());
                if (newlocation == null)
                {
                    //以 Get 形式请求 Api 地址

                    var r = datas.Query(Region);
                    var apiUrl =
                        $"http://api.map.baidu.com/place/v2/search?q={item}&region={r}&output={format}&ak={apikey}";

                    //初始化方案信息实体类。
                    var result = HttpHelper.GetWebSourceHtml(apiUrl, "utf-8");
                    //以 Get 形式请求 Api 地址
                    //    var result = HttpHelper.DoGet(apiUrl, param);
                    dynamic info =  serialier.DeserializeObject(result);
                  //  if (info[0]["status"].ToInt32() == 0)
                    {
                        newlocation = Parse(info);
                    }
                    buffHelper.Set(item.ToString(), newlocation);
                }
                newlocation.DictCopyTo(datas);
            }
            catch (Exception ex)
            {
            }
            return true;
        }
Example #24
0
        public override object TransformData(IFreeDocument document)
        {
            var item = document[Column];

            if (item is IFreeDocument)
            {
                return (item as IFreeDocument).GetDataFromXPath(XPath);
            }
            var docu = new HtmlDocument();

            docu.LoadHtml(item.ToString());
            if (GetText)
            {
                var path = docu.DocumentNode.GetTextNode();
                var textnode = docu.DocumentNode.SelectSingleNode(path);
                if (textnode != null)
                    return textnode.GetNodeText();
            }
            if (GetCount)
            {
                var textnode = docu.DocumentNode.SelectNodes(XPath);
                return textnode.Count;
            }

            return docu.DocumentNode.GetDataFromXPath(document.Query(XPath));
        }
Example #25
0
        private List <FreeDocument> GetDatas(IFreeDocument data)
        {
            var p = data[Column];

            if (p == null)
            {
                return(new List <FreeDocument>());
            }
            var url    = p.ToString();
            var bufkey = url;
            var post   = data.Query(PostData);

            if (crawler.Http.Method == MethodType.POST)
            {
                bufkey += post;
            }
            var htmldoc = buffHelper.Get(bufkey);
            var docs    = new List <FreeDocument>();

            if (htmldoc == null)
            {
                var delay     = data.Query(DelayTime);
                var delaytime = 0;
                if (delay != null && int.TryParse(delay, out delaytime))
                {
                    if (delaytime != 0)
                    {
                        Thread.Sleep(delaytime);
                    }
                }

                HttpStatusCode code;
                int            maxcount = 1;
                int.TryParse(data.Query(MaxTryCount), out maxcount);

                int count = 0;
                while (count < maxcount)
                {
                    docs = crawler.CrawlData(url, out htmldoc, out code, post);
                    if (HttpHelper.IsSuccess(code))
                    {
                        buffHelper.Set(bufkey, htmldoc);
                        break;
                    }
                    Thread.Sleep(ErrorDelay);
                    count++;
                }
            }
            else
            {
                docs = crawler.CrawlData(htmldoc);
            }

            if (generator != null)
            {
                var others = htmldoc.DocumentNode.SelectNodes("//@href");

                var r3 = others.Select(d => d.Attributes["href"].Value).ToList();
                IEnumerable <string> r4;

                if (string.IsNullOrEmpty(Prefix))
                {
                    r4 = r3;
                }
                else if (IsRegex == false)
                {
                    r4 =
                        r3.Where(d => d.StartsWith(Prefix)).Where(d => true);
                }
                else
                {
                    r4 = r3.Where(d => regex.IsMatch(d));
                }
                foreach (var href in r4)
                {
                    generator.InsertQueue(href);
                }
            }
            return(docs);
        }
Example #26
0
        public override object TransformData(IFreeDocument datas)
        {
            //初始化方案信息实体类。
            var item = datas[Column];

            if (item == null)
            {
                return(null);
            }
            try
            {
                var source      = item.ToString();
                var dest        = datas.Query(Dest);
                var sourcecity  = datas.Query(SourceCity);
                var destcity    = datas.Query(DestCity);
                var mode        = map[ModeSelector.SelectItem];
                var key         = $"{source},{dest},{sourcecity},{destcity},{mode}";
                var newlocation = buffHelper.Get(key);
                if (newlocation == null)
                {
                    //以 Get 形式请求 Api 地址
                    var region = "";
                    if (mode == "transit" || mode == "walking")
                    {
                        region = $"region={sourcecity}";
                    }
                    else
                    {
                        region = $"origin_region={sourcecity}&destination_region={destcity}";
                    }

                    var apiUrl =
                        $"http://api.map.baidu.com/direction/v1?mode={mode}&origin={source}&destination={dest}&{region}&output={format}&ak={apikey}";


                    //初始化方案信息实体类。
                    var result = HttpHelper.GetWebSourceHtml(apiUrl, "utf-8");
                    //以 Get 形式请求 Api 地址
                    //    var result = HttpHelper.DoGet(apiUrl, param);
                    dynamic info = serialier.DeserializeObject(result);
                    if (info["status"].ToInt32() == 0 && info["type"].ToInt32() == 2)
                    {
                        var first = info["result"];
                        newlocation = new FreeDocument();


                        if (mode == "transit")
                        {
                            newlocation["distance"] = first["routes"]["scheme"]["distance"];
                            newlocation["duration"] = first["routes"]["scheme"]["duration"];

                            newlocation["price"] = first["routes"]["scheme"]["price"];
                        }
                        else if (mode == "walking")
                        {
                            newlocation["distance"] = first["routes"][0]["distance"];
                            newlocation["duration"] = first["routes"][0]["duration"];
                        }
                        else
                        {
                            newlocation["distance"] = first["routes"][0]["distance"];
                            newlocation["duration"] = first["routes"][0]["duration"];

                            newlocation["traffic_condition"] = first["traffic_condition"];
                            newlocation["toll"] = first["routes"]["toll"];
                        }
                    }
                    buffHelper.Set(item.ToString(), newlocation);
                }
                newlocation.DictCopyTo(datas);
            }
            catch (Exception)
            {
            }
            return(true);
        }
Example #27
0
        public override object TransformData(IFreeDocument datas)
        {
            //初始化方案信息实体类。
            var item = datas[Column];

            if (item == null)
                return null;
            try
            {
                var source = item.ToString();
                var dest = datas.Query( Dest);
                var sourcecity = datas.Query( SourceCity);
                var destcity = datas.Query(DestCity);
                var mode = map[ModeSelector.SelectItem];
                var key = $"{source},{dest},{sourcecity},{destcity},{mode}";
                var newlocation = buffHelper.Get(key);
                if (newlocation == null)
                {
                    //以 Get 形式请求 Api 地址
                    var region = "";
                    if (mode == "transit" || mode == "walking")
                    {
                        region = $"region={sourcecity}";
                    }
                    else
                    {
                        region = $"origin_region={sourcecity}&destination_region={destcity}";

                    }

                    var apiUrl =
                        $"http://api.map.baidu.com/direction/v1?mode={mode}&origin={source}&destination={dest}&{region}&output={format}&ak={apikey}";

                    //初始化方案信息实体类。
                    var result = HttpHelper.GetWebSourceHtml(apiUrl, "utf-8");
                    //以 Get 形式请求 Api 地址
                    //    var result = HttpHelper.DoGet(apiUrl, param);
                    dynamic info = serialier.DeserializeObject(result);
                    if (info["status"].ToInt32() == 0&& info["type"].ToInt32()==2)
                    {
                        var first= info["result"];
                        newlocation=new FreeDocument();

                        if (mode == "transit")
                        {
                            newlocation["distance"] = first["routes"]["scheme"]["distance"];
                            newlocation["duration"] = first["routes"]["scheme"]["duration"];

                            newlocation["price"] = first["routes"]["scheme"]["price"];
                        }
                        else if (mode == "walking")
                        {
                            newlocation["distance"] = first["routes"][0]["distance"];
                            newlocation["duration"] = first["routes"][0]["duration"];

                        }
                        else
                        {
                            newlocation["distance"] = first["routes"][0]["distance"];
                            newlocation["duration"] = first["routes"][0]["duration"];

                            newlocation["traffic_condition"] = first["traffic_condition"];
                            newlocation["toll"] = first["routes"]["toll"];
                        }

                    }
                    buffHelper.Set(item.ToString(), newlocation);
                }
                newlocation.DictCopyTo(datas);
            }
            catch (Exception ex)
            {
            }
            return true;
        }
Example #28
0
        private List<FreeDocument> GetDatas(IFreeDocument data)
        {
            var p = data[Column];
            if (p == null)
                return new List<FreeDocument>();
            var url = p.ToString();
            var bufkey = url;
            var post = data.Query(PostData);
            if (crawler.Http.Method == MethodType.POST)
            {
                bufkey += post;
            }
            var htmldoc = buffHelper.Get(bufkey);
            var docs = new List<FreeDocument>();
            if (htmldoc == null)
            {
                var delay = data.Query(DelayTime);
                var delaytime = 0;
                if (delay != null && int.TryParse(delay, out delaytime))
                {
                    if (delaytime != 0)
                        Thread.Sleep(delaytime);
                }

                HttpStatusCode code;
                int maxcount = 1;
                int.TryParse(data.Query(MaxTryCount),out maxcount);

                int count = 0;
                while (count<maxcount)
                {
                    docs = crawler.CrawlData(url, out htmldoc, out code, post);
                    if (HttpHelper.IsSuccess(code))
                    {
                        buffHelper.Set(bufkey, htmldoc);
                        break;
                    }
                    Thread.Sleep(ErrorDelay);
                    count++;

                }

            }
            else
            {
                docs = crawler.CrawlData(htmldoc);
            }

            if (generator != null)
            {

                var others = htmldoc.DocumentNode.SelectNodes("//@href");

                var r3 = others.Select(d => d.Attributes["href"].Value).ToList();
                IEnumerable<string> r4;

                if (string.IsNullOrEmpty(Prefix))
                    r4 = r3;
              else  if(IsRegex==false)
                 r4 =
                    r3.Where(d => d.StartsWith(Prefix)).Where(d => true);
              else
              {
                  r4 = r3.Where(d => regex.IsMatch(d));
              }
                foreach (var href in r4)
                {
                    generator.InsertQueue(href);
                }
            }
            return docs;
        }
 public override object TransformData(IFreeDocument free)
 {
     return free.Query(NewValue);
 }
Example #30
0
 public override object TransformData(IFreeDocument free)
 {
     return(free.Query(NewValue));
 }