public msg clone(object input) { msg m = Serializer.DeepClone <msg>(this); m.Input = input; return(m); }
public threadMsg(IAPI api, EventHandler <threadMsgEventArgs> on_message = null) { onMessageComplete = on_message; _api = api; _resetEvent = new ManualResetEvent(false); _threadEvent = new ManualResetEvent(false); _thread = new Thread(new ParameterizedThreadStart(delegate(object evt) { api.Init(); api.Open = true; app.postToAPI(_API.MEDIA, _API.MEDIA_KEY_INITED, null); threadMsgPara tm = (threadMsgPara)evt; while (_exit == false) { tm.ResetEvent.WaitOne(); if (_exit) { break; } else { msg m = api.Execute(_msg); //if (onMessageComplete != null) onMessageComplete.Invoke(this, new threadMsgEventArgs(m)); } tm.ResetEvent.Reset(); } })); _thread.Start(new threadMsgPara(_resetEvent)); }
public void response_toMainRuntime(msg m) { if (fom == null) { fom = app.get_Main(); } if (fom != null) { fom.api_responseMsg(null, new threadMsgEventArgs(m)); } }
msg f_CRAWLER_KEY_REQUEST_LINK(msg m) { if (CRAWLER_KEY_STOP) { f_CRAWLER_KEY_STOP_reset(); } string[] urls = new string[] { }; string[] uri_ok = dicHtml.Keys.ToArray(); listUrl.Truncate(x => !uri_ok.Any(o => o == x), true); urls = listUrl.Take(crawlMaxThread); listUrl.Truncate(x => !urls.Any(o => o == x)); Interlocked.Exchange(ref crawlPending, listUrl.Count); Interlocked.Exchange(ref crawlCounter, urls.Length); //if (Interlocked.CompareExchange(ref crawlPending, 0, 0) == 0) if (Interlocked.CompareExchange(ref crawlCounter, 0, 0) == 0) { string[] rs_out = dicHtml.Keys.ToArray(); response_toMain(new msg() { API = _API.CRAWLER, KEY = _API.CRAWLER_KEY_REQUEST_LINK, Log = "Crawle complete result: " + rs_out.Length + " links. Writing file ..." }); if (Interlocked.CompareExchange(ref crawlResult, 1, 1) > 1) { write_file_contentHTML(); } response_toMain(new msg() { API = _API.CRAWLER, KEY = _API.CRAWLER_KEY_REQUEST_LINK_COMPLETE, Input = rs_out }); return(m); } else { for (int i = 0; i < urls.Length; i++) { tasks[i].RunWorkerAsync(urls[i]); } } return(m); }
msg f_CRAWLER_KEY_STOP(msg m) { CRAWLER_KEY_STOP = true; string[] rs_out = dicHtml.Keys.ToArray(); response_toMain(new msg() { API = _API.CRAWLER, KEY = _API.CRAWLER_KEY_REQUEST_LINK, Log = "Crawle complete result: " + rs_out.Length + " links. Writing file ..." }); if (Interlocked.CompareExchange(ref crawlResult, 1, 1) > 1) { write_file_contentHTML(); } response_toMain(new msg() { API = _API.CRAWLER, KEY = _API.CRAWLER_KEY_REQUEST_LINK_COMPLETE, Input = rs_out }); return(m); }
public api_base() { if (timer_api == null) { timer_api = new System.Threading.Timer(new System.Threading.TimerCallback((obj) => { if (fom == null) { fom = app.get_Main(); } if (cache_api.Count > 0) { msg m = cache_api.Dequeue(); if (fom != null && m != null) { fom.api_responseMsg(null, new threadMsgEventArgs(m)); } } }), fom, 100, 100); } if (timer_msg == null) { timer_msg = new System.Threading.Timer(new System.Threading.TimerCallback((obj) => { if (fom == null) { fom = app.get_Main(); } if (cache_msg.Count > 0) { msg m = cache_msg.Dequeue(); if (fom != null && m != null) { fom.api_responseMsg(null, new threadMsgEventArgs(m)); } } }), fom, 500, 500); } }
public msg Execute(msg m) { if (m == null) { return(m); } switch (m.KEY) { case _API.CRAWLER_KEY_STOP: f_CRAWLER_KEY_STOP(m); break; case _API.CRAWLER_KEY_REGISTER_PATH: f_CRAWLER_KEY_REGISTER_PATH(m); break; case _API.CRAWLER_KEY_REQUEST_LINK: f_CRAWLER_KEY_REQUEST_LINK(m); break; case _API.CRAWLER_KEY_CONVERT_PACKAGE_TO_HTML: #region ////path_package = (string)m.Input; ////if (!string.IsNullOrEmpty(path_package) && File.Exists(path_package)) ////{ //// //var dicRaw = new Dictionary<string, string>(); //// //var dicCon = new Dictionary<string, string>(); //// //var list_XPath = new List<string>(); //// //using (var fileStream = File.OpenRead(path_package)) //// // dicRaw = Serializer.Deserialize<Dictionary<string, string>>(fileStream); //// ////foreach (var kv in dicRaw) //// ////{ //// //// string s = kv.Value; //// //// doc = new HtmlDocument(); //// //// doc.LoadHtml(s); //// //// foreach (var h1 in doc.DocumentNode.SelectNodes("//h1")) //// //// { //// //// //d1.Add(kv.Key, h1.ParentNode.InnerText); //// //// //d2.Add(kv.Key, h1.ParentNode.ParentNode.InnerText); //// //// //d3.Add(kv.Key, h1.ParentNode.ParentNode.ParentNode.InnerText); //// //// list_XPath.Add(h1.XPath); //// //// break; //// //// } //// ////} //// //foreach (var kv in dicRaw) //// //{ //// // string s = kv.Value, si = string.Empty; //// // doc = new HtmlDocument(); //// // doc.LoadHtml(s); //// // var ns = doc.DocumentNode.SelectNodes("/html[1]/body[1]/div[3]/article[1]/div[1]/div[1]/div[1]/div[1]/article[1]"); //// // if (ns != null && ns.Count > 0) //// // { //// // si = ns[0].InnerHtml; //// // dicCon.Add(kv.Key, si); //// // } //// //} //// //using (var file = File.Create("crawler.htm.bin")) //// // Serializer.Serialize<Dictionary<string, string>>(file, dicCon); ////} #endregion break; case _API.CRAWLER_KEY_CONVERT_PACKAGE_TO_TEXT: #region ////path_package = (string)m.Input; ////if (!string.IsNullOrEmpty(path_package) && File.Exists(path_package)) ////{ //// var dicRaw = new Dictionary<string, string>(); //// var dicText = new Dictionary<string, string>(); //// using (var fileStream = File.OpenRead(path_package)) //// dicRaw = Serializer.Deserialize<Dictionary<string, string>>(fileStream); //// foreach (var kv in dicRaw) //// { //// string s = new htmlToText().ConvertHtml(kv.Value).Trim(); //// dicText.Add(kv.Key, s); //// } //// using (var file = File.Create("crawler.txt.bin")) //// Serializer.Serialize<Dictionary<string, string>>(file, dicText); ////} #endregion break; } m.Output.Ok = true; m.Output.Data = null; return(m); }
msg f_CRAWLER_KEY_REGISTER_PATH(msg m) { CRAWLER_KEY_STOP = false; domain_current = string.Empty; setting_URL_CONTIANS = string.Empty; setting_PARA1 = string.Empty; setting_PARA2 = string.Empty; if (m.Input != null) { Interlocked.Exchange(ref crawlResult, 0); oLinkSetting st = (oLinkSetting)m.Input; string para_url = st.Url; if (st.Settings != null && st.Settings.Count > 0) { st.Settings.TryGetValue("URL_CONTIANS", out setting_URL_CONTIANS); st.Settings.TryGetValue("PARA1", out setting_PARA1); st.Settings.TryGetValue("PARA2", out setting_PARA2); } string[] a = para_url.Split('/'); domain_current = a[2].ToLower(); if (domain_current.StartsWith("www.")) { domain_current = domain_current.Substring(4); } if (a.Length > 3) { url_sub_path_current = a[3]; } dicHtml.Clear(); listUrl.Clear(); read_file_contentHTML(); HttpWebRequest w = (HttpWebRequest)WebRequest.Create(new Uri(para_url)); w.UserAgent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/64.0.3282.186 Safari/537.36"; w.BeginGetResponse(asyncResult => { HttpWebResponse rs = (HttpWebResponse)w.EndGetResponse(asyncResult); //add a break point here string url = rs.ResponseUri.ToString(); response_toMain(new msg() { API = _API.CRAWLER, KEY = _API.CRAWLER_KEY_REQUEST_LINK, Log = url }); if (rs.StatusCode == HttpStatusCode.OK) { string htm = string.Empty; StreamReader sr = new StreamReader(rs.GetResponseStream(), Encoding.UTF8); htm = sr.ReadToEnd(); sr.Close(); rs.Close(); if (!string.IsNullOrEmpty(htm)) { htm = HttpUtility.HtmlDecode(htm); htm = format_HTML(htm); if (!dicHtml.ContainsKey(url)) { dicHtml.Add(url, htm); Interlocked.Increment(ref crawlResult); } var us = get_Urls(url, htm); if (CRAWLER_KEY_STOP) { f_CRAWLER_KEY_STOP_reset(); return; } if (us.Url_Html.Length > 0) { listUrl.AddRange(us.Url_Html); Execute(new msg() { API = _API.CRAWLER, KEY = _API.CRAWLER_KEY_REQUEST_LINK }); } else { Execute(new msg() { API = _API.CRAWLER, KEY = _API.CRAWLER_KEY_REQUEST_LINK_COMPLETE, Input = dicHtml.Keys.ToArray() }); } } } }, w); } return(m); }
public void Execute(msg msg) { _msg = msg; _resetEvent.Set(); }
public threadMsgEventArgs(msg msg) { Message = msg; }
public void response_toMain(msg m) { cache_api.Enqueue(m); }
public void notification_toMain(msg m) { cache_msg.Enqueue(m); }