public override IEnumerable <IFreeDocument> Generate(IFreeDocument document = null) { int interval; double max, min; if (int.TryParse(document.Query(Interval), out interval) && double.TryParse(document.Query(MinValue), out min) && double.TryParse(document.Query(MaxValue), out max)) { for (var i = min; i <= max; i += interval) { var item = new FreeDocument(); double value = Math.Round(i, 5); object result; if (!string.IsNullOrEmpty(Format)) { result = value.ToString(Format); } else { result = Math.Round(i, 5); } item.Add(Column, result); yield return(item); } } }
public override IEnumerable <IFreeDocument> TransformManyData(IEnumerable <IFreeDocument> datas) { foreach (var data in datas) { var item = data[Column]; var docu = new HtmlDocument(); docu.LoadHtml(item.ToString()); var d = new FreeDocument(); d.MergeQuery(data, NewColumn); IEnumerable <IFreeDocument> source = new List <IFreeDocument>() { d }; for (int index = 0; index < xpaths.Count; index++) { var xpath = xpaths[index]; source = Get(docu, source, xpath, index); } foreach (var r in source) { yield return(r); } } yield break; }
private IEnumerable <IFreeDocument> Get(HtmlDocument docu, IEnumerable <IFreeDocument> source, string xpath, int count) { HtmlNodeCollection nodes; try { nodes = docu.DocumentNode.SelectNodes(xpath); } catch (Exception ex) { XLogSys.Print.Warn("XPath表达式错误: " + xpath); return(source); } if (nodes.Count == 0) { XLogSys.Print.Warn("XPath表达式: " + xpath + "获取的节点数量为0"); return(source); } var new_docs = nodes.Select(node => { var doc = new FreeDocument(); doc.Add("xp_text_" + count, node.GetNodeText()); if (HasHtml) { doc.Add("xp_html_" + count, node.InnerHtml); doc.Add("xp_ohtml_" + count, node.OuterHtml); } return(doc); }); return(new_docs.Cross(source)); }
private IEnumerable <IFreeDocument> Get(HtmlDocument docu, IEnumerable <IFreeDocument> source, string name, string xpath) { HtmlNodeCollection nodes; try { nodes = docu.DocumentNode.SelectNodes(xpath); } catch (Exception) { XLogSys.Print.Warn(GlobalHelper.Get("key_196") + xpath); return(source); } if (nodes.Count == 0) { XLogSys.Print.Warn(GlobalHelper.Get("key_197") + xpath + GlobalHelper.Get("key_198")); return(source); } var new_docs = nodes.Select(node => { var doc = new FreeDocument(); doc.Add(name + "_text", node.GetNodeText()); doc.Add(name + "_ohtml", node.OuterHtml); return(doc); }); return(new_docs.Cross(source)); }
public static FreeDocument UnsafeDictSerialize(this object item) { var type = item.GetType(); if (type != lastType) { propertys = type.GetProperties().Where( d => d.CanRead && d.CanWrite && AttributeHelper.IsPOCOType(d.PropertyType)).ToArray(); } lastType = type; var doc = new FreeDocument(); foreach (var propertyInfo in propertys) { var v = propertyInfo.GetValue(item, null); if (v != null) { doc.Add(propertyInfo.Name, v); } } return(doc); }
public override IEnumerable <IFreeDocument> TransformManyData(IEnumerable <IFreeDocument> datas) { if (string.IsNullOrEmpty(Column) || string.IsNullOrEmpty(ValueColumn)) { foreach (var data in datas) { yield return(data); } yield break; } var result = new FreeDocument(); foreach (var data in datas) { var key = data[Column]?.ToString(); var value = data[ValueColumn]?.ToString(); if (string.IsNullOrEmpty(key) && string.IsNullOrEmpty(value)) { yield return(result.Clone()); } else { result.SetValue(key, value); } } }
private IEnumerable <IFreeDocument> ReadText(XmlDocument xdoc, Action <int> alreadyGetSize = null) { XmlNode xTable = xdoc.DocumentElement; if (xTable == null) { yield break; } alreadyGetSize?.Invoke(xTable.ChildNodes.Count); foreach (XmlNode xnode in xTable) { var data = PluginProvider.GetObjectInstance(DataType) as IFreeDocument; var dict = new FreeDocument(); dict.Name = xnode.Name; XMLNode2Dict(xnode, dict); data.DictDeserialize(dict.DictSerialize()); var doc = data as FreeDocument; if (doc != null) { doc.Children = dict.Children; } yield return(data); } }
public IFreeDocument MappingDocument(IFreeDocument doc) { if (doc == null) { return(null); } if (string.IsNullOrEmpty(MappingSet)) { return(doc); } var newdoc = new FreeDocument(); doc.DictCopyTo(newdoc); foreach (var item in MappingSet.Split(new[] { ' ' }, StringSplitOptions.RemoveEmptyEntries)) { var kv = item.Split(':'); if (kv.Length != 2) { continue; } if (kv[0] == kv[1]) { continue; } if (newdoc.Keys.Contains(kv[0])) { newdoc[kv[1]] = newdoc[kv[0]]; newdoc.Remove(kv[0]); } } return(newdoc); }
public override IEnumerable<IFreeDocument> TransformManyData(IEnumerable<IFreeDocument> datas) { if (string.IsNullOrEmpty(Column) || string.IsNullOrEmpty(ValueColumn)) { foreach (var data in datas) { yield return data; } yield break; } var result = new FreeDocument(); foreach (var data in datas) { var key = data[Column]?.ToString(); var value = data[ValueColumn]?.ToString(); if (string.IsNullOrEmpty(key) && string.IsNullOrEmpty(value)) { yield return result.Clone(); } else { result.SetValue(key, value); } } }
public void SaveTask(IDataProcess process, bool haveui) { var task = CurrentProject.Tasks.FirstOrDefault(d => d.Name == process.Name); if (haveui == false || MessageBox.Show( GlobalHelper.Get("key_311") + (task == null ? GlobalHelper.Get("key_312") : GlobalHelper.Get("key_313")), GlobalHelper.Get("key_99"), MessageBoxButton.OKCancel) == MessageBoxResult.OK) { configDocument = (process as IDictionarySerializable).DictSerialize(); if (task == null) { task = new ProcessTask { Name = process.Name, Description = GlobalHelper.Get("key_314") }; CurrentProject.Tasks.Add(task); } task.ProcessToDo = configDocument; // XLogSys.Print.Warn(string.Format(GlobalHelper.Get("key_315"),task.Name)); } }
public static List <FreeDocument> ToDocuments(dynamic obj) { var documents = new List <FreeDocument>(); if (obj is IEnumerable) { foreach (var value in obj) { var free = new FreeDocument(); if (value is IEnumerable) { foreach (var item in value) { if (item is string) { free.Add(item, value[item]); } else { free.Add(item.Key, item.Value); } } } documents.Add(free); } } return(documents); }
protected override IEnumerable <IFreeDocument> InternalTransformManyData(IFreeDocument data) { var item = data[Column]; var docu = new HtmlDocument(); docu.LoadHtml(item.ToString()); var path = data.Query(XPath); var p2 = docu.DocumentNode.SelectNodes(path, this.SelectorFormat); if (p2 == null) { return(new List <IFreeDocument>()); } return(p2.Select(node => { var doc = new FreeDocument(); doc.MergeQuery(data, NewColumn); doc.SetValue("Text", node.GetNodeText()); doc.SetValue("HTML", node.InnerHtml); doc.SetValue("OHTML", node.OuterHtml); return doc; })); }
public override object TransformData(IFreeDocument datas) { object item = datas[Column]; IDataBaseConnector con = ConnectorSelector.SelectItem; if (con == null) { return(null); } TableInfo table = TableSelector.SelectItem; if (table == null) { return(null); } if (IsMutliDatas) { var r = con.TryFindEntities(table.Name, new Dictionary <string, object> { { KeyName, item } }, null, -1, SearchStrategy); if (r.Any() == false) { return(null); } var dicts = r.Select(d => d.DictSerialize()).ToList(); foreach (string importColumn in ImportColumns) { List <object> res = new List <object>(); for (int i = 0; i < dicts.Count; i++) { res.Add(dicts[i][importColumn]); } if (res.Count != 0) { datas.SetValue(importColumn, res); } } } else { var r = con.TryFindEntities(table.Name, new Dictionary <string, object> { { KeyName, item } }, null, 1, SearchStrategy).FirstOrDefault(); if (r == null) { return(null); } FreeDocument dict = r.DictSerialize(); foreach (string importColumn in ImportColumns) { datas.SetValue(importColumn, dict[importColumn]); } } return(null); }
public void SaveOrUpdateFreeDocument(FreeDocument document) { using (var dbContext = new DataAccess.TeachersAssistantDbContext()) { _unitOfWork.InitializeDbContext(dbContext); if (document.FreeDocumentId != null && document.FreeDocumentId > 0) { //update var result = _unitOfWork._freeDocumentRepository.GetById((int)document.FreeDocumentId); if (result != null) { result.RoleName = document.RoleName; result.FilePath = document.FilePath; result.SubjectId = document.SubjectId; _unitOfWork.SaveChanges(); } } else { //create _unitOfWork._freeDocumentRepository.Add(document); _unitOfWork.SaveChanges(); } } }
//public bool AutoVisit(HttpItem item) //{ // var res = GetHtml(item); // XLogSys.Print.Info(res.Substring(0, Math.Min(res.Length, 300))); // var relocate = false; // item.Method = MethodType.GET; // while (true) // { // if (item.ResponseHeaders == null) // break; // var newpos = item.ResponseHeaders["Location"]; // if (newpos == null) // break; // XLogSys.Print.Debug("Redirect to " + newpos); // item.URL = newpos; // res = GetHtml(item); // relocate = true; // } // return relocate; //} private FreeDocument CookieToDict(string cookie) { var dict = new FreeDocument(); foreach (var s in cookie.Split(';')) { // foreach (var s in p.Split(';')) { var equalpos = s.IndexOf("="); if (equalpos != -1) //有可能cookie 无=,就直接一个cookiename;比如:a=3;ck;abc=; { var cookieKey = s.Substring(0, equalpos).Trim(); var cookievalue = ""; if (equalpos != s.Length - 1) //这种是等号后面无值,如:abc=; { cookievalue = s.Substring(equalpos + 1, s.Length - equalpos - 1).Trim(); } dict.SetValue(cookieKey.Trim(), cookievalue.Trim()); } else { var cookieKey = s.Trim(); dict.SetValue(cookieKey.Trim(), ""); } } } return(dict); }
public virtual void SaveOrUpdateEntity( IFreeDocument updateItem, string tableName, IDictionary <string, object> keys, EntityExecuteType executeType = EntityExecuteType.InsertOrUpdate) { FreeDocument data = updateItem.DictSerialize(Scenario.Database); foreach (var key in data.Keys.ToList()) { var value = ""; if (data[key] != null) { value = data[key].ToString(); } value = value.Replace("'", "''"); data[key] = value; } var str = ",".Join(data.Select(d => $"{d.Key}='{d.Value}'")); try { ExecuteNonQuery($"update {GetTableName(tableName)} set {str} "); } catch (Exception e) { XLogSys.Print.Debug($"insert database error {e.Message}"); } }
public override IEnumerable<FreeDocument> Generate(IFreeDocument document = null) { InsertQueue(StartURL); while (Enabled) { string currentURL; if (queue.TryDequeue(out currentURL)) { var urlhash = currentURL.GetHashCode(); if (StartURL != currentURL && URLHash.Contains(urlhash)) { continue; } URLHash.Add(urlhash); var doc = new FreeDocument(); doc.Add(Column, currentURL); yield return doc; } else { if (DelayTime > 0) { Thread.Sleep(1000); XLogSys.Print.Debug("empty queue,wait 1s"); } } } }
public override IEnumerable <IFreeDocument> Generate(IFreeDocument document = null) { //TODO DateTime min, max; TimeSpan span; if (DateTime.TryParseExact(MinValue, staticDateFormat, CultureInfo.InvariantCulture, DateTimeStyles.None, out min) && DateTime.TryParseExact(MaxValue, staticDateFormat, CultureInfo.InvariantCulture, DateTimeStyles.None, out max) && TimeSpan.TryParseExact(Interval, staticSpanFormat, CultureInfo.InvariantCulture, TimeSpanStyles.None, out span)) { for (var i = min; i <= max; i += span) { var item = new FreeDocument(); item.Add(Column, i.ToString(Format)); yield return(item); } } }
public virtual void SaveOrUpdateEntity( IFreeDocument updateItem, string tableName, IDictionary <string, object> keys, EntityExecuteType executeType = EntityExecuteType.InsertOrUpdate) { var sb = new StringBuilder(); FreeDocument data = updateItem.DictSerialize(Scenario.Database); if (data.Count >= 1) { foreach (var val in data) { sb.Append($" {val.Key} = '{val.Value}',"); } sb = sb.Remove(sb.Length - 1, 1); } try { ExecuteNonQuery($"update {GetTableName(tableName)} set {sb} where {ToString()};"); } catch (Exception e) { XLogSys.Print.Debug($"insert database error {e.Message}"); } }
//public bool AutoVisit(HttpItem item) //{ // var res = GetHtml(item); // XLogSys.Print.Info(res.Substring(0, Math.Min(res.Length, 300))); // var relocate = false; // item.Method = MethodType.GET; // while (true) // { // if (item.ResponseHeaders == null) // break; // var newpos = item.ResponseHeaders["Location"]; // if (newpos == null) // break; // XLogSys.Print.Debug("Redirect to " + newpos); // item.URL = newpos; // res = GetHtml(item); // relocate = true; // } // return relocate; //} private FreeDocument CookieToDict(string cookie) { var dict = new FreeDocument(); foreach (var s in cookie.Split(';')) { // foreach (var s in p.Split(';')) { var equalpos = s.IndexOf("="); if (equalpos != -1) //有可能cookie 无=,就直接一个cookiename;比如:a=3;ck;abc=; { var cookieKey = s.Substring(0, equalpos).Trim(); var cookievalue = ""; if (equalpos != s.Length - 1) //这种是等号后面无值,如:abc=; { cookievalue = s.Substring(equalpos + 1, s.Length - equalpos - 1).Trim(); } dict.SetValue(cookieKey.Trim(), cookievalue.Trim()); } else { var cookieKey = s.Trim(); dict.SetValue(cookieKey.Trim(), ""); } } } return dict; }
protected virtual string Insert(IFreeDocument data, string dbTableName) { FreeDocument item = data.DictSerialize(Scenario.Database); var sb = new StringBuilder(); foreach (var o in item) { string value; if (o.Value is DateTime) { value = ((DateTime)o.Value).ToString("s"); } else { if (o.Value == null) { value = "null"; } else { value = o.Value.ToString(); } } value = value.Replace("'", "''"); sb.Append($"'{value}',"); } sb.Remove(sb.Length - 1, 1); string sql = $"INSERT INTO {dbTableName} VALUES({sb})"; return(sql); }
public override IEnumerable <FreeDocument> Generate(IFreeDocument document = null) { InsertQueue(StartURL); while (Enabled) { string currentURL; if (queue.TryDequeue(out currentURL)) { var urlhash = currentURL.GetHashCode(); if (StartURL != currentURL && URLHash.Contains(urlhash)) { continue; } URLHash.Add(urlhash); var doc = new FreeDocument(); doc.Add(Column, currentURL); yield return(doc); } else { if (DelayTime > 0) { Thread.Sleep(1000); XLogSys.Print.Debug("empty queue,wait 1s"); } } } }
public IEnumerable <FreeDocument> ReadText(string text, Action <int> alreadyGetSize = null) { var totals = FormatJsonData(text); if (totals == null) { throw new Exception("文件不是合法的Json文件"); } var array = totals as JsonArray; if (array != null) { alreadyGetSize?.Invoke(array.Count); foreach (object d in array) { var data = new FreeDocument(); ItemtoNode(d, data); yield return(data); } } var obj = totals as JsonObject; if (obj != null) { if (alreadyGetSize != null) { alreadyGetSize(1); } var data = new FreeDocument(); ItemtoNode(obj, data); yield return(data); } }
public override IEnumerable <IFreeDocument> Generate(IFreeDocument document = null) { var path = FileName; var result = document?.Query(FileName); if (result != null) { path = result; } // var item = datas[Column].ToString(); var res = buffHelper.Get(path); if (res != null) { yield return(res); yield break; } var content = File.ReadAllText(path, AttributeHelper.GetEncoding(EncodingType)); var item = new FreeDocument(); item.Add(Column, content); buffHelper.Set(path, item); yield return(item); }
public override IEnumerable <IFreeDocument> TransformManyData(IEnumerable <IFreeDocument> datas) { foreach (var data in datas) { object item = data[Column]; if (string.IsNullOrEmpty(Script)) { break; } if (item == null) { continue; } MatchCollection r = regex.Matches(item.ToString()); foreach (var p in r) { var doc = new FreeDocument(); doc.MergeQuery(data, NewColumn); doc.SetValue(Column, p); yield return(doc.MergeQuery(data, NewColumn)); } } }
public FreeDocument DictSerialize(Scenario scenario = Scenario.Database) { var freedoc = new FreeDocument { { "MapperIndex1", MapperIndex1 }, { "MapperIndex2", MapperIndex2 }, { "OutputIndex", OutputIndex }, { "Name", Name }, { "Level", Level } }; var tool = Publisher as SmartETLTool; if (tool != null) { freedoc.Add("Publisher", tool.Name); freedoc.Add("GenerateMode", tool.GenerateMode); } if (Seeds == null) { return(freedoc); } var seed = new FreeDocument { Children = Seeds.Select(d => d.DictSerialize()).ToList() }; freedoc.Add("Seeds", seed); return(freedoc); }
public virtual void SaveOrUpdateEntity( IFreeDocument updateItem, string tableName, IDictionary <string, object> keys, EntityExecuteType executeType = EntityExecuteType.InsertOrUpdate) { var sb = new StringBuilder(); FreeDocument data = updateItem.DictSerialize(Scenario.Database); if (data.Count >= 1) { foreach (var val in data) { sb.Append(String.Format(" {0} = '{1}',", val.Key, val.Value)); } sb = sb.Remove(sb.Length - 1, 1); } try { ExecuteNonQuery(String.Format("update {0} set {1} where {2};", GetTableName(tableName), sb, ToString())); } catch { } }
public override IEnumerable <IFreeDocument> TransformManyData(IEnumerable <IFreeDocument> datas) { foreach (var data in datas) { var item = data[Column]; var docu = new HtmlDocument(); docu.LoadHtml(item.ToString()); var p2 = docu.DocumentNode.SelectNodes(XPath); if (p2 == null) { continue; } foreach (var node in p2) { var doc = new FreeDocument(); doc.Add("Text", node.GetNodeText()); doc.Add("HTML", node.InnerHtml); doc.Add("OHTML", node.OuterHtml); yield return(doc.MergeQuery(data, NewColumn)); } if (IsInsertNull) { yield return(new FreeDocument()); } } }
public FreeDocument DictSerialize(Scenario scenario = Scenario.Database) { var dict = new FreeDocument(); dict.Add("Name", Name); dict.Children = RealData.Select(d => d as FreeDocument).ToList(); return(dict); }
public virtual FreeDocument DictSerialize(Scenario scenario = Scenario.Database) { FreeDocument dict = this.UnsafeDictSerialize(); dict.Add("Type", this.GetType().Name); dict.Add("Group", "Generator"); return(dict); }
public override FreeDocument DictSerialize(Scenario scenario = Scenario.Database) { FreeDocument dict = base.DictSerialize(scenario); dict.Children = new List <FreeDocument>(); dict.Children.AddRange(Projects.Select(d => d.DictSerialize(scenario))); return(dict); }
public override FreeDocument DictSerialize(Scenario scenario = Scenario.Database) { FreeDocument dict = base.DictSerialize(scenario); dict.Children = new List <FreeDocument>(); dict.Children.AddRange(CurrentTables.Where(d => d.Name != openfile).Select(d => d.DictSerialize())); return(dict); }
public virtual FreeDocument DictSerialize(Scenario scenario = Scenario.Database) { var dict = new FreeDocument(); dict.Add("Type", this.GetType().Name); dict.Add("Encoding", EncodingType); return(dict); }
protected virtual FreeDocument Parse(dynamic info) { var first = info["results"][0]; var newlocation = new FreeDocument(); newlocation["pos_name"] = first["name"]; newlocation["pos_lat"] = first["location"]["lat"]; newlocation["pos_lng"] = first["location"]["lng"]; return newlocation; }
public override IEnumerable<FreeDocument> Generate(IFreeDocument document = null) { int interval; double max, min; if (int.TryParse(document.Query( Interval), out interval)&& double.TryParse(document.Query(MinValue), out min) && double.TryParse(document.Query(MaxValue), out max)) { for (var i = Position * interval + min; i <= max; i += interval) { var item = new FreeDocument(); item.Add(Column, Math.Round(i, 5)); yield return item; } } }
public override object TransformData(IFreeDocument datas) { //初始化方案信息实体类。 var item = datas[Column]; if (item == null) return null; try { var newlocation = buffHelper.Get(item.ToString()); if (newlocation == null) { //以 Get 形式请求 Api 地址 var apiUrl = $"http://api.map.baidu.com/location/ip?ak={apikey}&ip={item}&coor=bd09ll"; //初始化方案信息实体类。 var result = HttpHelper.GetWebSourceHtml(apiUrl, "utf-8"); //以 Get 形式请求 Api 地址 // var result = HttpHelper.DoGet(apiUrl, param); dynamic info = serialier.DeserializeObject(result); if (info["status"].ToInt32() == 0) { newlocation = new FreeDocument(); newlocation["ip_content"] = info["address"]; newlocation["pos_lat"] = info["content"]["point"]["x"]; newlocation["pos_lng"] = info["content"]["point"]["y"]; } buffHelper.Set(item.ToString(), newlocation); } newlocation.DictCopyTo(datas); } catch (Exception ex) { } return true; }
public FreeDocument DictSerialize(Scenario scenario = Scenario.Database) { var dict = new FreeDocument(); dict.Add("Name", Name); dict.Add("Count", Count); dict.Add("Source", Source); return dict; }
public DataCollection Clone(bool isdeep) { var docuts = new List<IFreeDocument>(); for (int i = 0; i < ComputeData.Count; i ++) { if (isdeep) { var fr = new FreeDocument(); ComputeData[i].DictCopyTo(fr); docuts.Add(fr); } else { docuts.Add(ComputeData[i] as IFreeDocument); } } var collection = new DataCollection(docuts); collection.Name = Name + '1'; collection.TableInfo = this.TableInfo.Clone(); return collection; }
public override object TransformData(IFreeDocument datas) { //初始化方案信息实体类。 var item = datas[Column]; if (item == null) return null; try { var lat = item.ToString(); var lng = datas[Lng].ToString(); var bufkey = $"{lat},{lng}"; var newlocation = buffHelper.Get(bufkey); if (newlocation == null) { var apiUrl = $"http://api.map.baidu.com/place/v2/search?ak={apikey}&output={format}&query={Query}&page_size=10&page_num=0&scope=2&location={lat},{lng}&radius={Radius}"; var result = HttpHelper.GetWebSourceHtml(apiUrl, "utf-8"); if (AllResult) { return result; } //以 Get 形式请求 Api 地址 // var result = HttpHelper.DoGet(apiUrl, param); dynamic info = serialier.DeserializeObject(result); if (info["status"].ToInt32() == 0) { var first = info["results"][0]; newlocation = new FreeDocument(); newlocation[Query] = first["name"]; newlocation[Query+"_lat"] = first["location"]["lat"]; newlocation[Query+"_lng"] = first["location"]["lng"]; newlocation[Query+"_distance"] = first["detail_info"]["distance"]; } buffHelper.Set(bufkey, newlocation); } newlocation.DictCopyTo(datas); } catch (Exception ex) { } return true; }
private int ItemtoNode(object d, IFreeDocument dict) { var doc = dict as FreeDocument; if (doc == null) { if (d is JsonObject) dict.DictDeserialize(d as JsonObject); } else { if (d is JsonArray) { foreach (var item in d as JsonArray) { var item2 = new FreeDocument(); ItemtoNode(item, item2); if (doc.Children == null) { doc.Children = new List<FreeDocument>(); } doc.Children.Add(item2); } return 1; } else if (d is JsonObject) { var jb = d as JsonObject; foreach (var b in jb) { var dict2 = new FreeDocument(); var res = ItemtoNode(b.Value, dict2); if (res == 0) { doc.Add(b.Name, b.Value); } else if (res == 1) { doc.Add(b.Name, dict2); } } return 2; } } return 0; }
public ProcessTask() { ProcessToDo = new FreeDocument(); }
public static string HeaderToString(FreeDocument docu) { StringBuilder sb=new StringBuilder(); foreach (var d in docu) { if(d.Key!="Headers") sb.Append($"{d.Key}:{d.Value}\n"); } sb.Append(docu["Headers"]); return sb.ToString(); }
public override IEnumerable<FreeDocument> Generate(IFreeDocument document = null) { foreach (string doc in fileList) { var item= new FreeDocument(); item.Add(Column, doc); yield return item; } }
public List<FreeDocument> CrawlData(HtmlDocument doc) { if (CrawlItems.Count == 0) { var freedoc = new FreeDocument(); freedoc.Add("Content", doc.DocumentNode.OuterHtml); return new List<FreeDocument> {freedoc}; } return doc.GetDataFromXPath(CrawlItems, IsMultiData, RootXPath); }
public virtual FreeDocument DictSerialize(Scenario scenario = Scenario.Database) { var dict = new FreeDocument { {"Name", Name}, {"Type", this.GetType().Name}, }; return dict; }
public FreeDocument DictSerialize(Scenario scenario = Scenario.Database) { var dict = new FreeDocument(); dict.Add("URL", URL); dict.Add("Allowautoredirect", Allowautoredirect); dict.Add("Postdata", Postdata); dict.Add("Encoding", Encoding); dict.Add("Method", Method); dict.Add("Parameters", Parameters); return dict; }
private void XMLNode2Dict(XmlNode xnode, FreeDocument dict) { if (xnode.Attributes != null) { for (int i = 0; i < xnode.Attributes.Count; i++) { dict.Add(xnode.Attributes[i].Name, xnode.Attributes[i].Value); } } if (xnode.HasChildNodes) { for (int i = 0; i < xnode.ChildNodes.Count; i++) { var docu = new FreeDocument(); XmlNode n = xnode.ChildNodes[i]; if (n.Name == "Children") { if (dict.Children == null) { dict.Children = new List<FreeDocument>(); } docu.Name = n.Name; XMLNode2Dict(n, docu); dict.Children.Add(docu); } else { docu.Name = n.Name; XMLNode2Dict(n, docu); dict.Add(docu.Name, docu); } } } }
private IEnumerable<IFreeDocument> ReadText(XmlDocument xdoc, Action<int> alreadyGetSize = null) { XmlNode xTable = xdoc.DocumentElement; if (xTable == null) yield break; alreadyGetSize?.Invoke(xTable.ChildNodes.Count); foreach (XmlNode xnode in xTable) { var data = PluginProvider.GetObjectInstance(DataType) as IFreeDocument; var dict = new FreeDocument(); dict.Name = xnode.Name; XMLNode2Dict(xnode, dict); data.DictDeserialize(dict.DictSerialize()); var doc = data as FreeDocument; if (doc != null) { doc.Children = dict.Children; } yield return data; } }
public virtual FreeDocument DictSerialize(Scenario scenario = Scenario.Database) { var dict=new FreeDocument(); dict.Add("Type", this.GetType().Name); dict.Add("Encoding", EncodingType); return dict; }
public IEnumerable<IFreeDocument> Execute(IEnumerable<IFreeDocument> documents) { foreach (var document in documents) { IFreeDocument doc = null; if (string.IsNullOrEmpty(NewColumn)) { doc = document.Clone(); } else { doc = new FreeDocument(); doc.MergeQuery(document, NewColumn + " " + Column); } if (AddTask) { var name = doc[Column]; ControlExtended.UIInvoke(() => { var task = TemporaryTask.AddTempTask("ETL" + name, func(new List<IFreeDocument> {doc}), d => d.ToList()); processManager.CurrentProcessTasks.Add(task); }); } else { var r = func(new List<IFreeDocument> {doc}).ToList(); } yield return document; } }
//public void AutoVisit() //{ // if (Documents.Any()) // { // var item = new HttpItem(); // Documents[0].DictCopyTo(item); // var res = helper != null && helper.AutoVisit(item); // XLogSys.Print.Info("成功模拟登录"); // Http.SetValue("Cookie", item.GetValue("Cookie")); // if (res) // { // URL = item.URL; // } // } //} private void GreatHand() { var crawitems = HtmlDoc.SearchPropertiesSmart(CrawlItems, IsAttribute).FirstOrDefault(); if ((crawitems != null).SafeCheck("网页属性获取", LogType.Info) == false) return; var datas = HtmlDoc.GetDataFromXPath(crawitems, IsMultiData); var propertyNames = new FreeDocument(datas.GetKeys().ToDictionary(d => d, d => (object) d)); datas.Insert(0, propertyNames); var view = PluginProvider.GetObjectInstance<IDataViewer>("可编辑列表"); var r = view.SetCurrentView(datas); var name = "手气不错_可修改第一列的属性名称"; var window = new Window {Title = name}; window.Content = r; window.Closing += (s, e) => { if (ControlExtended.UserCheck("是否确认选择当前的数据表") == false) return; foreach (var propertyName in propertyNames) { var item = crawitems.FirstOrDefault(d => d.Name == propertyName.Key); if (item == null) continue; if (propertyName.Value == null) continue; item.Name = propertyName.Value.ToString(); } CrawlItems.Clear(); CrawlItems.AddRange(crawitems); }; window.ShowDialog(); }
private void SaveTask(IDataProcess process, bool haveui) { var task = CurrentProject.Tasks.FirstOrDefault(d => d.Name == process.Name); if (haveui == false || MessageBox.Show("是否确定保存任务?" + (task == null ? "将新建任务" : "存在同名任务,将覆盖该任务"), "提示信息", MessageBoxButton.OKCancel) == MessageBoxResult.OK) { configDocument = (process as IDictionarySerializable).DictSerialize(); if (task == null) { task = new ProcessTask { Name = process.Name, Description = "任务描述", CreateTime = DateTime.Now }; CurrentProject.Tasks.Add(task); } task.ProcessToDo = configDocument; } }
public FreeDocument GetHeaderParameter() { var docu = new FreeDocument(); if (string.IsNullOrEmpty(this.Parameters) == false) { IEnumerable<string> items = this.Parameters.Split('\n').Select(d => d.Trim()); var otherheaders = ""; foreach (string item in items) { string[] p = item.Split(':'); if (p.Length < 2) continue; string name = p[0].Trim().ToLower(); string v = item.Substring(p[0].Length + 1); switch (name) { case "host": docu["Host"] = v; continue; case "user_agent": case "user-agent": docu["User-Agent"] = v; continue; case "accept": docu["Accept"] = v; continue; case "referer": docu["Referer"] = v; continue; case "content-type": case "contenttype": case "content_type": docu["Content_Type"] = v; continue; case "content-length": docu["Content_Length"] = int.Parse(v); continue; case "cookie": docu["Cookie"] = v; continue; } otherheaders += item+"\n"; } docu["Headers"] = otherheaders; } return docu; }
public override IEnumerable<IFreeDocument> TransformManyData(IEnumerable<IFreeDocument> datas) { foreach (var data in datas) { object item = data[Column]; if (string.IsNullOrEmpty(Script)) break; if (item == null) continue; MatchCollection r = regex.Matches(item.ToString()); foreach (var p in r) { var doc=new FreeDocument(); doc.Add("regex",p); yield return doc.MergeQuery( data, NewColumn); } } }
public override object TransformData(IFreeDocument datas) { //初始化方案信息实体类。 var item = datas[Column]; if (item == null) return null; try { var source = item.ToString(); var dest = datas.Query( Dest); var sourcecity = datas.Query( SourceCity); var destcity = datas.Query(DestCity); var mode = map[ModeSelector.SelectItem]; var key = $"{source},{dest},{sourcecity},{destcity},{mode}"; var newlocation = buffHelper.Get(key); if (newlocation == null) { //以 Get 形式请求 Api 地址 var region = ""; if (mode == "transit" || mode == "walking") { region = $"region={sourcecity}"; } else { region = $"origin_region={sourcecity}&destination_region={destcity}"; } var apiUrl = $"http://api.map.baidu.com/direction/v1?mode={mode}&origin={source}&destination={dest}&{region}&output={format}&ak={apikey}"; //初始化方案信息实体类。 var result = HttpHelper.GetWebSourceHtml(apiUrl, "utf-8"); //以 Get 形式请求 Api 地址 // var result = HttpHelper.DoGet(apiUrl, param); dynamic info = serialier.DeserializeObject(result); if (info["status"].ToInt32() == 0&& info["type"].ToInt32()==2) { var first= info["result"]; newlocation=new FreeDocument(); if (mode == "transit") { newlocation["distance"] = first["routes"]["scheme"]["distance"]; newlocation["duration"] = first["routes"]["scheme"]["duration"]; newlocation["price"] = first["routes"]["scheme"]["price"]; } else if (mode == "walking") { newlocation["distance"] = first["routes"][0]["distance"]; newlocation["duration"] = first["routes"][0]["duration"]; } else { newlocation["distance"] = first["routes"][0]["distance"]; newlocation["duration"] = first["routes"][0]["duration"]; newlocation["traffic_condition"] = first["traffic_condition"]; newlocation["toll"] = first["routes"]["toll"]; } } buffHelper.Set(item.ToString(), newlocation); } newlocation.DictCopyTo(datas); } catch (Exception ex) { } return true; }
//TODO: 此处不能使用枚举式迭代,除非在本模块之后没有其他操作 public override IEnumerable<IFreeDocument> TransformManyData(IEnumerable<IFreeDocument> datas) { List<string> collColum = CollectionColumns.Split(' ').Select(d => d.Trim()).ToList(); List<string> sumColum = SumColumns.Split(' ').Select(d => d.Trim()).ToList(); foreach (IFreeDocument data in datas) { object item = data[Column]; if (item == null) continue; string key = item.ToString(); IFreeDocument v; if (dictionary.TryGetValue(key, out v)) { foreach (var r in data) { if (collColum.Contains(r.Key)) { var list = v[r.Key] as IList; if (data[r.Key] != null) { if (list != null) { list.Add(data[r.Key]); } else { v[r.Key] = new List<object> { data[r.Key] }; } } } else if (sumColum.Contains(r.Key)) { object vnum = v[r.Key]; if (vnum == null) vnum = 0; double v4 = double.Parse(vnum.ToString()); object v3 = data[r.Key]; if (v3 == null) v3 = 0; double v5 = double.Parse(v3.ToString()); v4 += v5; v[r.Key] = v4; } else { if (v[r.Key] == null) { v[r.Key] = r.Value; } } } //yield return v; } else { //显然应当先生成一个新的字典,否则会修改原有集合 var newfree = new FreeDocument(); data.DictCopyTo(newfree); foreach (string col in collColum) { if (newfree[col] != null) newfree[col] = new List<object> { newfree[col] }; else { newfree[col] = new List<object>(); } } dictionary.Add(key, newfree); if(IsLazyLinq==false) yield return newfree; } } if (IsLazyLinq == true) { foreach (var item in dictionary) { yield return item.Value; } } }
public override IEnumerable<IFreeDocument> TransformManyData(IEnumerable<IFreeDocument> datas) { foreach (var data in datas) { var item = data[Column]; var docu = new HtmlDocument(); docu.LoadHtml(item.ToString()); var p2 = docu.DocumentNode.SelectNodes(XPath); if (p2 == null) continue; foreach (var node in p2) { var doc = new FreeDocument(); doc.Add("Text", node.GetNodeText()); doc.Add("HTML", node.InnerHtml); doc.Add("OHTML", node.OuterHtml); yield return doc.MergeQuery(data, NewColumn); } if(IsInsertNull) yield return new FreeDocument(); } }
public FreeDocument DictSerialize(Scenario scenario = Scenario.Database) { var doc = new FreeDocument { { "Name", Name }, { "XPath", XPath },{"IsHtml", IsHTML } }; return doc; }
public static List<FreeDocument> GetDataFromXPath(this HtmlDocument doc2, IList<CrawlItem> crawlItems, ListType type = ListType.List, string rootXPath = "") { if (crawlItems.Count == 0) return new List<FreeDocument>(); var documents = new List<FreeDocument>(); switch (type) { case ListType.List: var root = ""; var takeoff = ""; if (string.IsNullOrEmpty(rootXPath)) { root = XPath.GetMaxCompareXPath(crawlItems.Select(d => new XPath(d.XPath)).ToList()).ToString(); takeoff = root; } else { root = rootXPath; } var nodes = doc2.DocumentNode.SelectNodes(root); if (nodes == null) break; foreach (var node in nodes) { var document = new FreeDocument(); foreach (var r in crawlItems) { string path; if (string.IsNullOrEmpty(takeoff)) path = node.XPath + r.XPath; else { path = node.XPath + new XPath(r.XPath).TakeOff(takeoff); } var result = node.GetDataFromXPath(path, r.IsHTML); document.SetValue(r.Name, result); } documents.Add(document); } return documents; case ListType.One: var freeDocument = new FreeDocument(); foreach (var r in crawlItems) { doc2.GetDataFromXPath(r, freeDocument); } return new List<FreeDocument> {freeDocument}; } return new List<FreeDocument>(); }