public static Byte[] HTMLTranceOutRegex(String URI, int range, String UA, String LastMod = null) { if (CompiledAssembly == null) { ViewModel.OnModelNotice("外部HTMLtoDatコードのコンパイルが行われていません"); return new byte[] { 0 }; } Type t = CompiledAssembly.GetType("HtmlToDatConverter", false, false); using (WebClient get = new WebClient()) { get.Headers["User-Agent"] = ViewModel.Setting.UserAgent4; try { String dat = "", ketu = ""; if (ViewModel.Setting.ProxyAddress != "") get.Proxy = new WebProxy(ViewModel.Setting.ProxyAddress); using (System.IO.StreamReader html = new System.IO.StreamReader(get.OpenRead(URI), Encoding.GetEncoding("Shift_JIS"))) { String thredhtml = html.ReadToEnd(); if (t != null) dat = (String)t.InvokeMember("HTMLConvert", BindingFlags.InvokeMethod, null, null, new object[] { thredhtml }); ketu = Regex.Match(thredhtml, @"<div class=.cLength.>(\d+)KB</div>").Groups[1].Value; } if (ViewModel.Setting.Replace5chURI || ViewModel.Setting.ReplaceHttpsLink) { dat = HTMLtoDat.ResContentReplace(dat); } Byte[] Bdat = Encoding.GetEncoding("Shift_JIS").GetBytes(dat); if (ViewModel.Setting.AllReturn || range < 0) return Bdat; int size; try { size = int.Parse(ketu); } catch (FormatException) { size = 0; } return DifferenceDetection(Bdat, LastMod, UA, range, size); } catch (System.Threading.ThreadAbortException e) { throw e; } catch (Exception err) { ViewModel.OnModelNotice(URI + "をHTMLから変換中にエラーが発生しました。\n" + err.ToString()); return new byte[] { 0 }; } } }
//public String Proxy { get; set; } //public String UserAgent { get; set; } //public bool CEExternalRead { get; set; } //public bool AllRes { get; set; } //public bool SkipeAliveCheck { get; set; } static public Byte[] Gethtml(String URI, int range, String UA, bool CRReplace, String LastMod = null) { URI = URI.Replace("2ch.net", "5ch.net"); if (ViewModel.Setting.CEExternalRead) { return HTMLTranceOutRegex(URI, range, UA, LastMod); } using (WebClient get = new WebClient()) { get.Headers["User-Agent"] = ViewModel.Setting.UserAgent4; try { if (ViewModel.Setting.ProxyAddress != "") get.Proxy = new WebProxy(ViewModel.Setting.ProxyAddress); using (System.IO.StreamReader html = new System.IO.StreamReader(get.OpenRead(URI), Encoding.GetEncoding("Shift_JIS"))) { String title = "もうずっと人大杉", ketu = ""; //dat構築用StringBuilder var Builddat = new StringBuilder(510 * 1024); bool alive = true, NewCGI = false; //タイトルの検索 for (String line = html.ReadLine(); !html.EndOfStream; line = html.ReadLine()) { if (Regex.IsMatch(line, @"<title>(.+?)<\/title>")) { title = Regex.Match(line, @"<title>(.+?)<\/title>").Groups[1].Value; break; } else if (Regex.IsMatch(line, @"<title>(.+?)$")) { title = Regex.Match(line, @"<title>(.+?)$").Groups[1].Value; NewCGI = true; break; } } if (Regex.IsMatch(title, @"(5ちゃんねる error \d+|もうずっと人大杉|datが存在しません.削除されたかURL間違ってますよ)")) return new byte[] { 0 }; if (Regex.IsMatch(title, @"(2|5)ch\.net\s(\[\d+\])")) { var tmatch = Regex.Match(title, @"(2|5)ch\.net\s(\[\d+\])").Groups; title = title.Replace(tmatch[0].Value, $"{tmatch[1].Value}ch.net\t {tmatch[2].Value}"); } if (CRReplace) title = title.Replace("©", "©"); //新CGI形式と古いCGI形式で処理を分ける if (NewCGI) { String line = html.ReadLine(); //スレッド本文探索 do { if (Regex.IsMatch(line, @"<d(?:iv|l) class=.(?:thread|post).+?>")) break; line = html.ReadLine(); } while (!html.EndOfStream); //スレ生存チェック if (!ViewModel.Setting.SkipAliveCheck) { if (Regex.IsMatch(line, @"<div class=" + '"' + @"[a-zA-Z\s]+?" + '"' + @">(.+?過去ログ倉庫.+?|レス数が\d{3,}を超えています.+?(書き込み.*?|表.?示)でき.+?)</div>") == false) { return new byte[] { 0, 0 }; } } var Bres = new StringBuilder(5 * 1024); //pinkレスずれ処理用 bool pink = URI.Contains("bbspink.com"); int datResnumber = 1, htmlResnumber = 0; long ThreadTime = long.Parse(Regex.Match(URI, @"/(\d{9,})").Groups[1].Value); var ResMatches = Regex.Matches(line, @"<(?:div|dl) class=.post. id=.\d.+?>(.+?(?:</div></div>|</dd></dl>))"); foreach (Match Res in ResMatches) { //Match date = Regex.Match(Res.Groups[1].Value, @"<(?:div|span) class=.date.+?>(.+?)</(?:div|span)>(?:<(?:div|span) class=.be\s.+?.>(.+?)</(?:div|span)>)?"); Match date = Regex.Match(Res.Groups[1].Value, @"<(?:div|span) class=.date.+?>(.+?(?:</span><span class=" + '"' + @"\w+?" + '"' + @">.*?)?)</(?:div|span)>(?:<(?:div|span) class=.be\s.+?.>(.+?)</(?:div|span)>)?"); String number = Regex.Match(Res.Groups[1].Value, @"<(?:div|span) class=.number.+?>(\d{1,5})(?: : )?</(?:div|span)>").Groups[1].Value; //0,NGの検出 if (number == "0" && date.Groups[1].Value == "NG") { //飛ばす continue; } //htmlでレスが飛んでいるときを検出 if (pink && int.TryParse(number, out htmlResnumber) && datResnumber < htmlResnumber) { for (int j = htmlResnumber - datResnumber; j > 0; --j) { Builddat.Append("うふ~ん<>うふ~ん<>うふ~ん ID:DELETED<>うふ~ん<>うふ~ん<>\n"); } datResnumber = htmlResnumber; } //String name = Regex.Match(Res.Groups[1].Value, $"<(?:div|span) class={'"'}name{'"'}>((?:{'"'}.*?{'"'}|'.*?'|[^'{'"'}])+?)</(?:div|span)>").Groups[1].Value; String name = Regex.Match(Res.Groups[1].Value, $"<(?:div|span) class=.name.+?>(.+?(?:</b>|</a>))</(?:div|span)>").Groups[1].Value; //目欄が空の時フォントカラー指定を消す if (!name.Contains("<a href=" + '"' + "mailto:")) { name = Regex.Replace(name, @"<font color=.green.>", ""); name = name.Replace("</font>", ""); } //ID部のspanタグ削除 String dateid = date.Groups[1].Value; if (dateid.Contains("</span><span ")) { dateid = Regex.Replace(dateid, $"</span><span class={'"'}" + @"\w+?" + $"{'"'}>", " "); } //日付IDがNGになっているとき if (dateid.Contains("NG NG")) { DateTime UnixEpoch = new DateTime(1970, 1, 1, 0, 0, 0, DateTimeKind.Utc); UnixEpoch = UnixEpoch.AddSeconds(ThreadTime); String time = UnixEpoch.ToLocalTime().ToString("yyyy/MM/dd(ddd) HH:mm:ss.00"); dateid = time + " ID:NG0"; } //beリンク処理 String be = ""; if (!string.IsNullOrEmpty(date.Groups[2].Value)) { var mb = Regex.Match(date.Groups[2].Value, @"<a href.+?(\d{2,}).+?>(.+)$"); be = $" <a href={'"'}javascript:be({mb.Groups[1].Value});{'"'}>{mb.Groups[2].Value}"; } String message = Regex.Match(Res.Groups[1].Value, @"<d(?:iv|d) class=.(?:message|thread_in).+?>(?:<span class=.escaped.>)?(.+?)(?:</span>)?(?:</div></div>|</dd></dl>)").Groups[1].Value; //安価のリンク修正、http://potato.2ch.net/test/read.cgi/jisaku/1447271149/9→../test/read.cgi/jisaku/1447271149/9 Bres.Append(message); foreach (Match item in Regex.Matches(message, @"(<a href=.)(?:https?:)?//\w+\.((?:2|5)ch\.net|bbspink\.com)(/test/read.cgi/\w+/\d+/\d{1,4}.\s.+?>>>\d{1,5}</a>)")) { Bres.Replace(item.Groups[0].Value, item.Groups[1].Value + ".." + item.Groups[3].Value); } //お絵かきリンク修正 foreach (Match item in Regex.Matches(message, $@"<a\s(?:class={'"'}image{'"'}\s)?href=" + '"' + @"(?:https?:)?//jump.(?:2|5)ch\.net/\?(https?://[a-zA-Z\d]+?\.8ch.net\/.+?\.\w+?)" + '"' + @">https?://[a-zA-Z\d]+?\.8ch\.net\/.+?\.\w+?</a>")) { Bres.Replace(item.Groups[0].Value, "<img src=" + '"' + item.Groups[1].Value + '"' + ">"); } //p53など、レス前後にスペースが無いときに補う。 if (!Regex.IsMatch(message, @"^\s.+\s$")) { Bres.Insert(0, " "); Bres.Append(" "); } Bres.Insert(0, ":" + dateid + be + "<dd>"); Bres.Insert(0, "<dt>" + number + " :" + name); Bres.Append("<br><br>"); Builddat.Append(html2dat(Bres.ToString())); if (!String.IsNullOrEmpty(title)) { Builddat.Append(title + "\n"); title = ""; } else Builddat.Append("\n"); Bres.Clear(); datResnumber++; } ketu = Regex.Match(line, @"<(?:div|li) class=.+?>(?<datsize>\d+?)KB</(?:div|li)>").Groups[1].Value; } else { if (!ViewModel.Setting.SkipAliveCheck) { //dat落ちかチェック for (String line = html.ReadLine(); !html.EndOfStream; line = html.ReadLine()) { if (Regex.IsMatch(line, @"<div.*?>(.+?過去ログ倉庫.+?|レス数が\d{3,}を超えています.+?(書き込み.*?でき|表示しません).+?)</div>")) { alive = false; break; } else if (Regex.IsMatch(line, @"<h1 style.+>.+?<\/h1>")) { alive = true; break; } } //生きているなら終了 if (alive) return new byte[] { 0, 0 }; } String ResHtml = html.ReadToEnd(); System.Collections.Concurrent.ConcurrentDictionary<int, string> Trancedat = new System.Collections.Concurrent.ConcurrentDictionary<int, string>(4, 1005); System.Threading.Tasks.ParallelOptions option = new System.Threading.Tasks.ParallelOptions(); option.MaxDegreeOfParallelism = 4; System.Threading.Tasks.Parallel.ForEach<Match>(Regex.Matches(ResHtml, @"<dt>(\d{1,4})\s:.+?<br><br>(?:\r|\n)").Cast<Match>(), option, match => { Trancedat[int.Parse(match.Groups[1].Value) - 1] = html2dat(match.Groups[0].Value) + "\n"; }); Builddat.Append(Trancedat[0].Substring(0, Trancedat[0].Length - 1) + title + "\n"); for (int i = 1; i < Trancedat.Count; ++i) Builddat.Append(Trancedat[i]); if (!ViewModel.Setting.AllReturn || range > -1) ketu = Regex.Match(ResHtml, @"<font\scolor.+?><b>(\d+)\sKB<\/b><\/font>").Groups[1].Value; } //if (ViewModel.Setting.Replace5chURI || ViewModel.Setting.ReplaceHttpsLink) //{ // Builddat = new StringBuilder(HTMLtoDat.ResContentReplace(Builddat.ToString())); //} //Byte[] Bdat = Encoding.GetEncoding("Shift_JIS").GetBytes(Builddat.ToString()); Byte[] Bdat = Encoding.GetEncoding("Shift_JIS").GetBytes((ViewModel.Setting.Replace5chURI || ViewModel.Setting.ReplaceHttpsLink) ? (HTMLtoDat.ResContentReplace(Builddat.ToString())) : (Builddat.ToString())); if (ViewModel.Setting.AllReturn || range < 0) return Bdat; int size; try { size = int.Parse(ketu); } catch (FormatException) { size = 0; } //差分返答処理 return DifferenceDetection(Bdat, LastMod, UA, range, size); } } catch (System.Threading.ThreadAbortException e) { throw e; } catch (Exception err) { ViewModel.OnModelNotice(URI + "をHTMLから変換中にエラーが発生しました。\n" + err.ToString()); return new byte[] { 0 }; } } }