Exemple #1
0
        public override object TransformData(IFreeDocument datas)
        {
            object item = datas[Column];

            IDataBaseConnector con = ConnectorSelector.SelectItem;

            if (con == null)
            {
                return(null);
            }
            TableInfo table = TableSelector.SelectItem;

            if (table == null)
            {
                return(null);
            }


            if (IsMutliDatas)
            {
                var r = con.TryFindEntities(table.Name, new Dictionary <string, object> {
                    { KeyName, item }
                }, null, -1,
                                            SearchStrategy);
                if (r.Any() == false)
                {
                    return(null);
                }
                var dicts = r.Select(d => d.DictSerialize()).ToList();
                foreach (string importColumn in ImportColumns)
                {
                    List <object> res = new List <object>();
                    for (int i = 0; i < dicts.Count; i++)
                    {
                        res.Add(dicts[i][importColumn]);
                    }
                    if (res.Count != 0)
                    {
                        datas.SetValue(importColumn, res);
                    }
                }
            }
            else
            {
                var r = con.TryFindEntities(table.Name, new Dictionary <string, object> {
                    { KeyName, item }
                }, null, 1, SearchStrategy).FirstOrDefault();
                if (r == null)
                {
                    return(null);
                }
                FreeDocument dict = r.DictSerialize();
                foreach (string importColumn in ImportColumns)
                {
                    datas.SetValue(importColumn, dict[importColumn]);
                }
            }

            return(null);
        }
Exemple #2
0
        public override object TransformData(IFreeDocument dict)
        {
            var item = dict[Column];

            if (item == null)
            {
                return(null);
            }

            var items = regex.Split(item.ToString());

            if (items.Length <= Index)
            {
                return(null);
            }
            if (FromBack == false)
            {
                dict.SetValue(NewColumn, items[Index]);
            }
            else
            {
                var index = items.Length - Index - 1;
                if (index < 0)
                {
                    return(null);
                }
                dict.SetValue(NewColumn, items[index]);
            }


            return(null);
        }
Exemple #3
0
 protected void SetValue(IFreeDocument doc, object item)
 {
     if (string.IsNullOrEmpty(NewColumn))
     {
         doc.SetValue(Column, item);
     }
     else
     {
         doc.SetValue(NewColumn, item);
     }
 }
Exemple #4
0
        public override object TransformData(IFreeDocument datas)
        {
            var p = datas[Column];

            if (p == null)
            {
                return(new List <FreeDocument>());
            }
            var url = p.ToString();
            WebHeaderCollection responseHeader;
            HttpStatusCode      code;

            var content = helper.GetHtml(crawler.Http, out responseHeader, out code, url);
            var keys    = responseHeader.AllKeys;

            if (!string.IsNullOrEmpty(HeaderFilter))
            {
                keys = HeaderFilter.Split(' ');
            }
            foreach (var key in keys)
            {
                var value = responseHeader.Get(key);
                if (value != null)
                {
                    datas.SetValue(key, value);
                }
            }
            if (keys.Contains("Location") && datas.ContainsKey("Location") == false)
            {
                datas["Location"] = url;
            }

            return(null);
        }
Exemple #5
0
        object TransformData(IFreeDocument datas)
        {
            var p = datas[Column];

            if (p == null)
            {
                return(new List <FreeDocument>());
            }
            var url      = p.ToString();
            var response = helper.GetHtml(Crawler.Http, url).Result;

            var content        = response.Html;
            var code           = response.Code;
            var responseHeader = response.ResponseHeaders;
            var keys           = responseHeader.AllKeys;

            if (!string.IsNullOrEmpty(HeaderFilter))
            {
                keys = HeaderFilter.Split(' ');
            }
            foreach (var key in keys)
            {
                var value = responseHeader.Get(key);
                if (value != null)
                {
                    datas.SetValue(key, value);
                }
            }
            if (keys.Contains("Location") && datas.ContainsKey("Location") == false)
            {
                datas["Location"] = url;
            }

            return(null);
        }
        public override object TransformData(IFreeDocument document)
        {
            object item = document[Column];

            if (item != null)
            {
                document.Remove(Column);

                if (!string.IsNullOrEmpty(NewColumn))
                {
                    document.SetValue(NewColumn, item);
                }
                else
                {
                    document.SetValue(Column + "1", item);
                }
            }
            return null;
        }
Exemple #7
0
        /// <summary>
        ///     从批量集合中获取数据
        /// </summary>
        /// <param name="doc"></param>
        /// <param name="crawItem"></param>
        /// <param name="shortv"></param>
        /// <param name="document"></param>
        public static void GetDataFromXPath(this HtmlDocument doc, CrawlItem crawItem, IFreeDocument document)
        {
            var result = doc.DocumentNode.GetDataFromXPath(crawItem.XPath, crawItem.IsHTML);


            if (result != null)
            {
                document.SetValue(crawItem.Name, result);
            }
        }
Exemple #8
0
        public override object TransformData(IFreeDocument document)
        {
            object item = document[Column];

            if (item != null)
            {
                document.Remove(Column);


                if (!string.IsNullOrEmpty(NewColumn))
                {
                    document.SetValue(NewColumn, item);
                }
                else
                {
                    document.SetValue(Column + "1", item);
                }
            }
            return(null);
        }
Exemple #9
0
        /// <summary>
        ///     从批量集合中获取数据
        /// </summary>
        /// <param name="doc"></param>
        /// <param name="crawItem"></param>
        /// <param name="shortv"></param>
        /// <param name="document"></param>
        public static void GetDataFromXPath(this HtmlDocument doc, CrawlItem crawItem, string shortv,
                                            IFreeDocument document)
        {
            var result = doc.DocumentNode.GetDataFromXPath(new XPath(crawItem.XPath).TakeOff(shortv).ToString(),
                                                           crawItem.IsHTML);


            if (result != null)
            {
                document.SetValue(crawItem.Name, result);
            }
        }
Exemple #10
0
        public override object TransformData(IFreeDocument dict)
        {
            var item = dict[Column];
            if (item == null)
                return null;

            var items = regex.Split(item.ToString());

            if (items.Length <= Index)
                return null;
            if (FromBack == false)
                dict.SetValue(NewColumn, items[Index]);
            else
            {
                var index = items.Length - Index - 1;
                if (index < 0)
                    return null;
                dict.SetValue(NewColumn, items[index]);
            }

            return null;
        }
Exemple #11
0
        public static IFreeDocument MergeQuery(this IFreeDocument document, IFreeDocument doc2, string Columns)
        {
            if (doc2 == null || string.IsNullOrWhiteSpace(Columns))
            {
                return(document);
            }
            var columns = Columns.Split(new string[] { " " }, StringSplitOptions.RemoveEmptyEntries);

            foreach (var column in columns)
            {
                document.SetValue(column, doc2[column]);
            }
            return(document);
        }
Exemple #12
0
        public override object TransformData(IFreeDocument datas)
        {
            HttpStatusCode      code;
            WebHeaderCollection responseHeader;
            var http = helper.GetHtml(crawler.Http, out responseHeader, out code, datas[Column].ToString());
            var keys = HeaderFilter.Split(' ');

            foreach (var key in keys)
            {
                if (responseHeader.AllKeys.Contains(key))
                {
                    datas.SetValue(key, responseHeader[key]);
                }
            }
            return(null);
        }
Exemple #13
0
        public static IFreeDocument MergeQuery(this IFreeDocument document, IFreeDocument doc2, string columnNames)
        {
            if (doc2 == null || string.IsNullOrWhiteSpace(columnNames))
            {
                return(document);
            }

            var columns = columnNames.Split(new[] { " " }, StringSplitOptions.RemoveEmptyEntries);

            if (columnNames.ToString() == "*")
            {
                columns = doc2.Keys.ToArray();
            }
            foreach (var column in columns)
            {
                document.SetValue(column, doc2[column]);
            }
            return(document);
        }
Exemple #14
0
 protected void SetValue(IFreeDocument doc,object item)
 {
     if(string.IsNullOrEmpty(NewColumn))
         doc.SetValue(Column,item);
     else
         doc.SetValue(NewColumn,item);
 }
Exemple #15
0
        /// <summary>
        ///     从批量集合中获取数据
        /// </summary>
        /// <param name="doc"></param>
        /// <param name="crawItem"></param>
        /// <param name="shortv"></param>
        /// <param name="document"></param>
        public static void GetDataFromXPath(this HtmlDocument doc, CrawlItem crawItem, string shortv,
            IFreeDocument document)
        {
            var result = doc.DocumentNode.GetDataFromXPath(new XPath(crawItem.XPath).TakeOff(shortv).ToString(),
                crawItem.IsHTML);

            if (result != null)
                document.SetValue(crawItem.Name, result);
        }
Exemple #16
0
        /// <summary>
        ///     从批量集合中获取数据
        /// </summary>
        /// <param name="doc"></param>
        /// <param name="crawItem"></param>
        /// <param name="shortv"></param>
        /// <param name="document"></param>
        public static void GetDataFromXPath(this HtmlDocument doc, CrawlItem crawItem, IFreeDocument document)
        {
            var result = doc.DocumentNode.GetDataFromXPath(crawItem.XPath, crawItem.IsHTML);

            if (result != null)
                document.SetValue(crawItem.Name, result);
        }
Exemple #17
0
        public override object TransformData(IFreeDocument datas)
        {
            object item = datas[Column];

            IDataBaseConnector con = ConnectorSelector.SelectItem;
            if (con == null)
                return null;
            TableInfo table = TableSelector.SelectItem;
            if (table == null)
                return null;

            var keys = KeyName.Split(' ');
            var query = keys.ToDictionary(d => d, d => datas[d]);
            if (IsMutliDatas)
            {
                var r = con.TryFindEntities(table.Name, query, null, -1,
                    SearchStrategy);
                if (r.Any() == false)
                    return null;
                var dicts = r.Select(d => d.DictSerialize()).ToList();
                foreach (string importColumn in ImportColumns)
                {
                    List<object> res = new List<object>();
                    for (int i = 0; i < dicts.Count; i++)
                    {

                        res.Add(dicts[i][importColumn]);
                    }
                    if(res.Count!=0)
                         datas.SetValue(importColumn, res);
                }

            }
            else
            {
                var r = con.TryFindEntities(table.Name, query,null,1,SearchStrategy).FirstOrDefault();
                if (r == null)
                    return null;
                FreeDocument dict = r.DictSerialize();
                foreach (string importColumn in ImportColumns)
                {
                    datas.SetValue(importColumn, dict[importColumn]);
                }

            }

            return null;
        }
Exemple #18
0
 public override object TransformData(IFreeDocument datas)
 {
     HttpStatusCode code;
     WebHeaderCollection responseHeader;
     var http = helper.GetHtml(crawler.Http,out responseHeader, out code, datas[Column].ToString());
     var keys = HeaderFilter.Split(' ');
     foreach (var key in keys)
     {
         if (responseHeader.AllKeys.Contains(key))
             datas.SetValue(key, responseHeader[key]);
     }
     return null;
 }