public override object TransformData(IFreeDocument datas) { object item = datas[Column]; IDataBaseConnector con = ConnectorSelector.SelectItem; if (con == null) { return(null); } TableInfo table = TableSelector.SelectItem; if (table == null) { return(null); } if (IsMutliDatas) { var r = con.TryFindEntities(table.Name, new Dictionary <string, object> { { KeyName, item } }, null, -1, SearchStrategy); if (r.Any() == false) { return(null); } var dicts = r.Select(d => d.DictSerialize()).ToList(); foreach (string importColumn in ImportColumns) { List <object> res = new List <object>(); for (int i = 0; i < dicts.Count; i++) { res.Add(dicts[i][importColumn]); } if (res.Count != 0) { datas.SetValue(importColumn, res); } } } else { var r = con.TryFindEntities(table.Name, new Dictionary <string, object> { { KeyName, item } }, null, 1, SearchStrategy).FirstOrDefault(); if (r == null) { return(null); } FreeDocument dict = r.DictSerialize(); foreach (string importColumn in ImportColumns) { datas.SetValue(importColumn, dict[importColumn]); } } return(null); }
public override object TransformData(IFreeDocument dict) { var item = dict[Column]; if (item == null) { return(null); } var items = regex.Split(item.ToString()); if (items.Length <= Index) { return(null); } if (FromBack == false) { dict.SetValue(NewColumn, items[Index]); } else { var index = items.Length - Index - 1; if (index < 0) { return(null); } dict.SetValue(NewColumn, items[index]); } return(null); }
protected void SetValue(IFreeDocument doc, object item) { if (string.IsNullOrEmpty(NewColumn)) { doc.SetValue(Column, item); } else { doc.SetValue(NewColumn, item); } }
public override object TransformData(IFreeDocument datas) { var p = datas[Column]; if (p == null) { return(new List <FreeDocument>()); } var url = p.ToString(); WebHeaderCollection responseHeader; HttpStatusCode code; var content = helper.GetHtml(crawler.Http, out responseHeader, out code, url); var keys = responseHeader.AllKeys; if (!string.IsNullOrEmpty(HeaderFilter)) { keys = HeaderFilter.Split(' '); } foreach (var key in keys) { var value = responseHeader.Get(key); if (value != null) { datas.SetValue(key, value); } } if (keys.Contains("Location") && datas.ContainsKey("Location") == false) { datas["Location"] = url; } return(null); }
object TransformData(IFreeDocument datas) { var p = datas[Column]; if (p == null) { return(new List <FreeDocument>()); } var url = p.ToString(); var response = helper.GetHtml(Crawler.Http, url).Result; var content = response.Html; var code = response.Code; var responseHeader = response.ResponseHeaders; var keys = responseHeader.AllKeys; if (!string.IsNullOrEmpty(HeaderFilter)) { keys = HeaderFilter.Split(' '); } foreach (var key in keys) { var value = responseHeader.Get(key); if (value != null) { datas.SetValue(key, value); } } if (keys.Contains("Location") && datas.ContainsKey("Location") == false) { datas["Location"] = url; } return(null); }
public override object TransformData(IFreeDocument document) { object item = document[Column]; if (item != null) { document.Remove(Column); if (!string.IsNullOrEmpty(NewColumn)) { document.SetValue(NewColumn, item); } else { document.SetValue(Column + "1", item); } } return null; }
/// <summary> /// 从批量集合中获取数据 /// </summary> /// <param name="doc"></param> /// <param name="crawItem"></param> /// <param name="shortv"></param> /// <param name="document"></param> public static void GetDataFromXPath(this HtmlDocument doc, CrawlItem crawItem, IFreeDocument document) { var result = doc.DocumentNode.GetDataFromXPath(crawItem.XPath, crawItem.IsHTML); if (result != null) { document.SetValue(crawItem.Name, result); } }
public override object TransformData(IFreeDocument document) { object item = document[Column]; if (item != null) { document.Remove(Column); if (!string.IsNullOrEmpty(NewColumn)) { document.SetValue(NewColumn, item); } else { document.SetValue(Column + "1", item); } } return(null); }
/// <summary> /// 从批量集合中获取数据 /// </summary> /// <param name="doc"></param> /// <param name="crawItem"></param> /// <param name="shortv"></param> /// <param name="document"></param> public static void GetDataFromXPath(this HtmlDocument doc, CrawlItem crawItem, string shortv, IFreeDocument document) { var result = doc.DocumentNode.GetDataFromXPath(new XPath(crawItem.XPath).TakeOff(shortv).ToString(), crawItem.IsHTML); if (result != null) { document.SetValue(crawItem.Name, result); } }
public override object TransformData(IFreeDocument dict) { var item = dict[Column]; if (item == null) return null; var items = regex.Split(item.ToString()); if (items.Length <= Index) return null; if (FromBack == false) dict.SetValue(NewColumn, items[Index]); else { var index = items.Length - Index - 1; if (index < 0) return null; dict.SetValue(NewColumn, items[index]); } return null; }
public static IFreeDocument MergeQuery(this IFreeDocument document, IFreeDocument doc2, string Columns) { if (doc2 == null || string.IsNullOrWhiteSpace(Columns)) { return(document); } var columns = Columns.Split(new string[] { " " }, StringSplitOptions.RemoveEmptyEntries); foreach (var column in columns) { document.SetValue(column, doc2[column]); } return(document); }
public override object TransformData(IFreeDocument datas) { HttpStatusCode code; WebHeaderCollection responseHeader; var http = helper.GetHtml(crawler.Http, out responseHeader, out code, datas[Column].ToString()); var keys = HeaderFilter.Split(' '); foreach (var key in keys) { if (responseHeader.AllKeys.Contains(key)) { datas.SetValue(key, responseHeader[key]); } } return(null); }
public static IFreeDocument MergeQuery(this IFreeDocument document, IFreeDocument doc2, string columnNames) { if (doc2 == null || string.IsNullOrWhiteSpace(columnNames)) { return(document); } var columns = columnNames.Split(new[] { " " }, StringSplitOptions.RemoveEmptyEntries); if (columnNames.ToString() == "*") { columns = doc2.Keys.ToArray(); } foreach (var column in columns) { document.SetValue(column, doc2[column]); } return(document); }
protected void SetValue(IFreeDocument doc,object item) { if(string.IsNullOrEmpty(NewColumn)) doc.SetValue(Column,item); else doc.SetValue(NewColumn,item); }
/// <summary> /// 从批量集合中获取数据 /// </summary> /// <param name="doc"></param> /// <param name="crawItem"></param> /// <param name="shortv"></param> /// <param name="document"></param> public static void GetDataFromXPath(this HtmlDocument doc, CrawlItem crawItem, string shortv, IFreeDocument document) { var result = doc.DocumentNode.GetDataFromXPath(new XPath(crawItem.XPath).TakeOff(shortv).ToString(), crawItem.IsHTML); if (result != null) document.SetValue(crawItem.Name, result); }
/// <summary> /// 从批量集合中获取数据 /// </summary> /// <param name="doc"></param> /// <param name="crawItem"></param> /// <param name="shortv"></param> /// <param name="document"></param> public static void GetDataFromXPath(this HtmlDocument doc, CrawlItem crawItem, IFreeDocument document) { var result = doc.DocumentNode.GetDataFromXPath(crawItem.XPath, crawItem.IsHTML); if (result != null) document.SetValue(crawItem.Name, result); }
public override object TransformData(IFreeDocument datas) { object item = datas[Column]; IDataBaseConnector con = ConnectorSelector.SelectItem; if (con == null) return null; TableInfo table = TableSelector.SelectItem; if (table == null) return null; var keys = KeyName.Split(' '); var query = keys.ToDictionary(d => d, d => datas[d]); if (IsMutliDatas) { var r = con.TryFindEntities(table.Name, query, null, -1, SearchStrategy); if (r.Any() == false) return null; var dicts = r.Select(d => d.DictSerialize()).ToList(); foreach (string importColumn in ImportColumns) { List<object> res = new List<object>(); for (int i = 0; i < dicts.Count; i++) { res.Add(dicts[i][importColumn]); } if(res.Count!=0) datas.SetValue(importColumn, res); } } else { var r = con.TryFindEntities(table.Name, query,null,1,SearchStrategy).FirstOrDefault(); if (r == null) return null; FreeDocument dict = r.DictSerialize(); foreach (string importColumn in ImportColumns) { datas.SetValue(importColumn, dict[importColumn]); } } return null; }
public override object TransformData(IFreeDocument datas) { HttpStatusCode code; WebHeaderCollection responseHeader; var http = helper.GetHtml(crawler.Http,out responseHeader, out code, datas[Column].ToString()); var keys = HeaderFilter.Split(' '); foreach (var key in keys) { if (responseHeader.AllKeys.Contains(key)) datas.SetValue(key, responseHeader[key]); } return null; }