/// <summary> /// Image用 失敗時NULL /// </summary> /// <param name="url"></param> /// <returns></returns> public virtual async Task <byte[]> HttpGetByte(string url, string referer = default(string)) { await Sleep(); if (ReportUrl) { ReportManage.Report(this, "GET " + url, true, true); } byte[] data; try { System.Net.Http.HttpClient client = new System.Net.Http.HttpClient(handler, false); client.DefaultRequestHeaders.UserAgent.ParseAdd(this.UserAgent); client.DefaultRequestHeaders.Referrer = new Uri(referer); data = await client.GetByteArrayAsync(url); } catch (Exception e) { if (visbleErr) { ReportManage.ErrReport(this, "Url:" + url + " " + e.Message); } data = null; } if (data != null) { return(data); } else { return(null); } }
/// <summary> /// HttpPost /// </summary> /// <param name="url"></param> /// <param name="vals"></param> /// <returns></returns> public virtual async Task <byte[]> HttpPostByte(string url, List <KeyValue> vals, string referer = default(string)) { if (ReportUrl) { ReportManage.Report(this, "POST " + url, true, true); } try { await Sleep(); System.Net.Http.HttpClient client = new System.Net.Http.HttpClient(handler, false); client.DefaultRequestHeaders.UserAgent.ParseAdd(this.UserAgent); client.DefaultRequestHeaders.Referrer = new Uri(referer); var r = await client.PostAsync(url, new FormUrlEncodedContent(vals.Select(n => new KeyValuePair <string, string>(n.Key, n.Value)))); var d = await r.Content.ReadAsByteArrayAsync(); return(d); } catch (Exception e) { if (visbleErr) { ReportManage.ErrReport(this, "Url:" + url + " " + e.Message); } ErrMessage = e.Message; } return(null); }
public override void Run(bool runChildren) { Data data = null; IRawler current = this.Parent; while (current != null) { if (current is Data) { data = current as Data; break; } current = current.Parent; } if (data != null) { if (data.GetCurrentDataNull()) { if (ignoreDataNull == false) { ReportManage.ErrReport(this, "RowがNullです。Writeが動作していないようです。"); var list = this.GetAncestorRawler().Where(n => n is Page); if (DoPageReLoad) { if (list.Count() > 0) { var p = list.First() as Page; pageCount++; if (PageReLoadCount < pageCount) { ReportManage.Report(this, "再読み込み待機中。"); var t = Task.Delay(1000 * pageCount * pageCount); t.Wait(); p.Run(); } else { ReportManage.ErrReport(this, "書き込み先のData クラスが見つかりませんでした。"); } } } } } else { pageCount = 0; data.NextDataRow(); ReportManage.Report(this, "NextDataRow"); } } else { ReportManage.ErrReport(this, "書き込み先のData クラスが見つかりませんでした。"); } this.RunChildren(runChildren); }
public override void Run(bool runChildren) { var d = (IData)this.GetUpperInterface <IData>(); if (d != null) { ReportManage.Report(this, d.GetCurrentDataRow().ToString(), true, true); } }
/// <summary> /// 再びログインする。 /// </summary> public void ReLogin() { string html = this.HttpPost(loginPage, vals, false); if (this.GetCookieCount() > 0) { if (ErrString != null) { if (html.Contains(ErrString) == false) { hasLogin = true; ReportManage.Report(this, "ログイン成功"); } else { hasLogin = false; ReportManage.ErrReport(this, "ログイン失敗"); } } else { hasLogin = true; ReportManage.Report(this, "ログイン成功"); } } else { if (isNoCookieSite) { hasLogin = false; ReportManage.ErrReport(this, "ログイン失敗"); } else { if (ErrString != null) { if (html.Contains(ErrString) == false) { hasLogin = true; ReportManage.Report(this, "ログイン成功"); } else { hasLogin = false; ReportManage.ErrReport(this, "ログイン失敗"); } } else { hasLogin = true; ReportManage.Report(this, "ログイン成功"); } } } this.text = html; }
public override void Run(bool runChildren) { if (doSave) { if (FileName != null) { filename = FileName; } else { if (string.IsNullOrEmpty(filename)) { ReportManage.ErrReport(this, "FileNameが空です。"); return; } //Microsoft.Win32.SaveFileDialog saveDialog = new Microsoft.Win32.SaveFileDialog(); //saveDialog.Title = "保存ファイルの指定 CountData:" + this.Comment; //if (string.IsNullOrEmpty(ExtendFilter) == false) //{ // saveDialog.Filter = RawlerLib.Io.FilterStringCreate(ExtendFilter); //} //if (saveDialog.ShowDialog() == true) //{ // filename = saveDialog.FileName; //} //using (var file = System.IO.File.CreateText(filename)) //{ // file.WriteLine("Group\tKey\tCount"); //} } } base.Run(runChildren); if (doSave || FileName != null) { try { Write(filename); } catch (Exception e) { ReportManage.ErrReport(this, "CountDataでファイルの書き込みに失敗しました。" + e.Message); } } if (reportData) { ReportManage.Report(this, this.ToTsv(), true, true); } }
public override void Run(bool runChildren) { string text; if (viewParentText && string.IsNullOrEmpty(Message)) { text = Header.Convert(this) + GetText() + Footer.Convert(this); } else { text = Header.Convert(this) + Message.Convert(this) + this.Footer.Convert(this); } ReportManage.Report(this, System.Net.WebUtility.HtmlDecode(text), returncode, visible); this.RunChildren(runChildren); }
public void NextDataRow(DataRowObject cDataRow) { ReportManage.Report(this, "NextDataRow"); if (Commited != null) { Commited(this, new EventDataRow(cDataRow)); } if (FileSaveMode == FileSaveMode.Sequential) { File?.SequentialWriteLine(CreateSaveOneLine(cDataRow, SaveFileType)); } else { dataList.Add(cDataRow); } currentDataRow = new DataRowObject(); }
/// <summary> /// HttpPost /// </summary> /// <param name="url"></param> /// <param name="vals"></param> /// <returns></returns> public virtual async Task <string> HttpPost(string url, List <KeyValue> vals, string referer = default(string)) { if (ReportUrl) { var t = vals.Select(n => n.Key + ":" + n.Value.Trim().Replace("\n", "").Replace("\r", "")).JoinText("\n"); ReportManage.Report(this, "POST " + url + "\n" + t, true, true); } try { await Sleep(); System.Net.Http.HttpClient client = new System.Net.Http.HttpClient(handler, false); client.DefaultRequestHeaders.UserAgent.ParseAdd(this.UserAgent); client.DefaultRequestHeaders.Referrer = new Uri(referer); var r = await client.PostAsync(url, new FormUrlEncodedContent(vals.Select(n => new KeyValuePair <string, string>(n.Key, n.Value)))); if (encoder == null || encoder == System.Text.Encoding.UTF8) { var r2 = await r.Content.ReadAsStringAsync(); return(r2); } else { var d = await r.Content.ReadAsByteArrayAsync(); return(encoder.GetString(d, 0, d.Length)); } } catch (Exception e) { if (visbleErr) { ReportManage.ErrReport(this, "Url:" + url + " " + e.Message); } ErrMessage = e.Message; } return(string.Empty); }
public void Write(RawlerBase rawler, string text, bool viewTotal) { if (DoRun) { if (viewParent) { text = this.Parent.ToObjectString() + " " + text; } var t = sw.ElapsedMilliseconds + "\t" + text; total += sw.ElapsedMilliseconds; if (viewTotal) { ReportManage.Report(rawler, t, true, true); ReportManage.Report(rawler, "Total:" + total, true, true); } else { ReportManage.Report(rawler, t, true, true); } sw.Restart(); } }
protected async Task <bool> ReadPage(string url) { var client = GetWebClient(); if (MethodType == MethodType.GET) { this.text = await client.HttpGet(url, null, tmpReferer); } else if (MethodType == MethodType.POST) { parameterDic.Clear(); if (InputParameterTree != null) { RawlerBase.GetText(GetText(), InputParameterTree, this); } List <KeyValue> list = new List <KeyValue>(); foreach (var item in parameterDic) { list.Add(new KeyValue() { Key = item.Key, Value = item.Value }); } this.text = await client.HttpPost(url, list, tmpReferer); } this.currentUrl = url; this.pastUrl = this.currentUrl; if (this.Text.Length > 0) { return(true); } else { if (client.ErrMessage.Contains("503") || client.ErrMessage.Contains("500")) { ReportManage.Report(this, "待機します", true, true); await Task.Delay(new TimeSpan(0, 0, 30)); urlStack.Push(url); } else { if (visbleErr) { ReportManage.ErrReport(this, url + "の読み込みに失敗しました。"); } if (ErrorEvent != null) { ErrorEvent(this, new EventArgs()); } if (ErrEventTree != null) { ErrEventTree.SetParent(); Document d = new Document() { TextValue = client.ErrMessage }; d.SetParent(this); d.AddChildren(ErrEventTree); d.Run(); } } return(false); } }
///// <summary> ///// Basic認証用 ///// </summary> //public RawlerLib.BasicAuthorization BasicAuthorization { get; set; } /// <summary> /// HttpGet /// </summary> /// <param name="url"></param> /// <param name="enc"></param> /// <returns></returns> public virtual async Task <string> HttpGet(string url, Encoding enc = null, string referer = default(string)) { await Sleep(); if (UseCache) { var h = GetCashe(url); if (h != null) { return(h); } } ErrMessage = string.Empty; string result = string.Empty; bool retry = false; if (ReportUrl) { ReportManage.Report(this, "GET " + url, true, true); } try { System.Net.Http.HttpClient client = new System.Net.Http.HttpClient(handler, false); client.DefaultRequestHeaders.UserAgent.ParseAdd(this.UserAgent); client.DefaultRequestHeaders.Referrer = new Uri(referer); var data = await client.GetByteArrayAsync(url); if (enc != null) { result = enc.GetString(data, 0, data.Length); } else { result = GetAutoEncoding(data, out encoder); } } catch (Exception e) { if (visbleErr) { ReportManage.ErrReport(this, "Url:" + url + " " + e.Message); } ErrMessage = e.Message; retry = false; } if (retry) { count++; if (count <= tryCount) { ReportManage.ErrReport(this, "HttpGet:" + url + "にリトライ待機中"); await Task.Delay(new TimeSpan(0, 0, 10 * count * count)); result = await this.HttpGet(url, enc); } else { ReportManage.ErrReport(this, "HttpGet:" + url + "に失敗しました"); result = string.Empty; } } if (UseCache) { casheDic.GetValueOrAdd(url, new WeakReference <string>(result)); } return(result); }
public override void Run(bool runChildren) { var page = GetPage(); var u = GetText().Split('#'); string url = string.Empty; if (u.Length > 0) { url = u[0]; } if (this.Parent.Text != null && this.Parent.Text.Length > 0) { if (page != null) { //始まりがURLと違う場合初期化 if (page.GetStartUrl() != baseUrl) { baseUrl = page.GetStartUrl(); urlHash.Clear(); count = 0; } if (allowSameUrl == false) { if (urlHash.Contains(url) == false) { count++; urlHash.Add(url); if (count < maxCount) { // this.Text = this.Parent.Text; if (sleepTime > 0) { var t = Task.Delay(new TimeSpan(0, 0, sleepTime)); t.Wait(); } this.RunChildren(runChildren); ReportManage.Report(this, "NextPage:" + GetText()); page.PushUrl(url); } else { ReportManage.Report(this, "NextPage:指定ページ数を取得しました"); } } } else { if (count < maxCount) { // this.Text = this.Parent.Text; if (sleepTime > 0) { var t = Task.Delay(new TimeSpan(0, 0, sleepTime)); t.Wait(); } this.RunChildren(runChildren); ReportManage.Report(this, "NextPage:" + GetText()); page.PushUrl(url); } else { ReportManage.Report(this, "NextPage:指定ページ数を取得しました"); } } } } }