Ejemplo n.º 1
0
 public static Byte[] HTMLTranceOutRegex(String URI, int range, String UA, String LastMod = null)
 {
     if (CompiledAssembly == null)
     {
         ViewModel.OnModelNotice("外部HTMLtoDatコードのコンパイルが行われていません");
         return new byte[] { 0 };
     }
     Type t = CompiledAssembly.GetType("HtmlToDatConverter", false, false);
     using (WebClient get = new WebClient())
     {
         get.Headers["User-Agent"] = ViewModel.Setting.UserAgent4;
         try
         {
             String dat = "", ketu = "";
             if (ViewModel.Setting.ProxyAddress != "") get.Proxy = new WebProxy(ViewModel.Setting.ProxyAddress);
             using (System.IO.StreamReader html = new System.IO.StreamReader(get.OpenRead(URI), Encoding.GetEncoding("Shift_JIS")))
             {
                 String thredhtml = html.ReadToEnd();
                 if (t != null) dat = (String)t.InvokeMember("HTMLConvert", BindingFlags.InvokeMethod, null, null, new object[] { thredhtml });
                 ketu = Regex.Match(thredhtml, @"<div class=.cLength.>(\d+)KB</div>").Groups[1].Value;
             }
             if (ViewModel.Setting.Replace5chURI || ViewModel.Setting.ReplaceHttpsLink)
             {
                 dat = HTMLtoDat.ResContentReplace(dat);
             }
             Byte[] Bdat = Encoding.GetEncoding("Shift_JIS").GetBytes(dat);
             if (ViewModel.Setting.AllReturn || range < 0) return Bdat;
             int size;
             try
             {
                 size = int.Parse(ketu);
             }
             catch (FormatException)
             {
                 size = 0;
             }
             return DifferenceDetection(Bdat, LastMod, UA, range, size);
         }
         catch (System.Threading.ThreadAbortException e)
         {
             throw e;
         }
         catch (Exception err)
         {
             ViewModel.OnModelNotice(URI + "をHTMLから変換中にエラーが発生しました。\n" + err.ToString());
             return new byte[] { 0 };
         }
     }
 }
Ejemplo n.º 2
0
        //public String Proxy { get; set; }
        //public String UserAgent { get; set; }
        //public bool CEExternalRead { get; set; }
        //public bool AllRes { get; set; }
        //public bool SkipeAliveCheck { get; set; }

        static public Byte[] Gethtml(String URI, int range, String UA, bool CRReplace, String LastMod = null)
        {
            URI = URI.Replace("2ch.net", "5ch.net");
            if (ViewModel.Setting.CEExternalRead)
            {
                return HTMLTranceOutRegex(URI, range, UA, LastMod);
            }
            using (WebClient get = new WebClient())
            {
                get.Headers["User-Agent"] = ViewModel.Setting.UserAgent4;
                try
                {
                    if (ViewModel.Setting.ProxyAddress != "") get.Proxy = new WebProxy(ViewModel.Setting.ProxyAddress);
                    using (System.IO.StreamReader html = new System.IO.StreamReader(get.OpenRead(URI), Encoding.GetEncoding("Shift_JIS")))
                    {
                        String title = "もうずっと人大杉", ketu = "";
                        //dat構築用StringBuilder
                        var Builddat = new StringBuilder(510 * 1024);
                        bool alive = true, NewCGI = false;
                        //タイトルの検索
                        for (String line = html.ReadLine(); !html.EndOfStream; line = html.ReadLine())
                        {
                            if (Regex.IsMatch(line, @"<title>(.+?)<\/title>"))
                            {
                                title = Regex.Match(line, @"<title>(.+?)<\/title>").Groups[1].Value;
                                break;
                            }
                            else if (Regex.IsMatch(line, @"<title>(.+?)$"))
                            {
                                title = Regex.Match(line, @"<title>(.+?)$").Groups[1].Value;
                                NewCGI = true;
                                break;
                            }
                        }
                        if (Regex.IsMatch(title, @"(5ちゃんねる error \d+|もうずっと人大杉|datが存在しません.削除されたかURL間違ってますよ)")) return new byte[] { 0 };
                        if (Regex.IsMatch(title, @"(2|5)ch\.net\s(\[\d+\])"))
                        {
                            var tmatch = Regex.Match(title, @"(2|5)ch\.net\s(\[\d+\])").Groups;
                            title = title.Replace(tmatch[0].Value, $"{tmatch[1].Value}ch.net\t {tmatch[2].Value}");
                        }
                        if (CRReplace) title = title.Replace("&#169;", "&copy;");
                        //新CGI形式と古いCGI形式で処理を分ける
                        if (NewCGI)
                        {
                            String line = html.ReadLine();
                            
                            //スレッド本文探索
                            do
                            {
                                if (Regex.IsMatch(line, @"<d(?:iv|l) class=.(?:thread|post).+?>")) break;
                                line = html.ReadLine();
                            } while (!html.EndOfStream);

                            //スレ生存チェック
                            if (!ViewModel.Setting.SkipAliveCheck)
                            {
                                if (Regex.IsMatch(line, @"<div class=" + '"' + @"[a-zA-Z\s]+?" + '"' + @">(.+?過去ログ倉庫.+?|レス数が\d{3,}を超えています.+?(書き込み.*?|表.?示)でき.+?)</div>") == false)
                                {
                                    return new byte[] { 0, 0 };
                                }
                            }

                            var Bres = new StringBuilder(5 * 1024);
                            //pinkレスずれ処理用
                            bool pink = URI.Contains("bbspink.com");
                            int datResnumber = 1, htmlResnumber = 0;
                            long ThreadTime = long.Parse(Regex.Match(URI, @"/(\d{9,})").Groups[1].Value);
                            var ResMatches = Regex.Matches(line, @"<(?:div|dl) class=.post. id=.\d.+?>(.+?(?:</div></div>|</dd></dl>))");
                            foreach (Match Res in ResMatches)
                            {
                                //Match date = Regex.Match(Res.Groups[1].Value, @"<(?:div|span) class=.date.+?>(.+?)</(?:div|span)>(?:<(?:div|span) class=.be\s.+?.>(.+?)</(?:div|span)>)?");
                                Match date = Regex.Match(Res.Groups[1].Value, @"<(?:div|span) class=.date.+?>(.+?(?:</span><span class=" + '"' + @"\w+?" + '"' + @">.*?)?)</(?:div|span)>(?:<(?:div|span) class=.be\s.+?.>(.+?)</(?:div|span)>)?");
                                String number = Regex.Match(Res.Groups[1].Value, @"<(?:div|span) class=.number.+?>(\d{1,5})(?: : )?</(?:div|span)>").Groups[1].Value;
                                //0,NGの検出
                                if (number == "0" && date.Groups[1].Value == "NG")
                                {
                                    //飛ばす
                                    continue;
                                }
                                //htmlでレスが飛んでいるときを検出
                                if (pink && int.TryParse(number, out htmlResnumber) && datResnumber < htmlResnumber)
                                {
                                    for (int j = htmlResnumber - datResnumber; j > 0; --j)
                                    {
                                        Builddat.Append("うふ~ん<>うふ~ん<>うふ~ん ID:DELETED<>うふ~ん<>うふ~ん<>\n");
                                    }
                                    datResnumber = htmlResnumber;
                                }
                                //String name = Regex.Match(Res.Groups[1].Value, $"<(?:div|span) class={'"'}name{'"'}>((?:{'"'}.*?{'"'}|'.*?'|[^'{'"'}])+?)</(?:div|span)>").Groups[1].Value;
                                String name = Regex.Match(Res.Groups[1].Value, $"<(?:div|span) class=.name.+?>(.+?(?:</b>|</a>))</(?:div|span)>").Groups[1].Value;
                                //目欄が空の時フォントカラー指定を消す
                                if (!name.Contains("<a href=" + '"' + "mailto:"))
                                {
                                    name = Regex.Replace(name, @"<font color=.green.>", "");
                                    name = name.Replace("</font>", "");
                                }
                                //ID部のspanタグ削除
                                String dateid = date.Groups[1].Value;
                                if (dateid.Contains("</span><span "))
                                {
                                    dateid = Regex.Replace(dateid, $"</span><span class={'"'}" + @"\w+?" + $"{'"'}>", " ");
                                }
                                //日付IDがNGになっているとき                         
                                if (dateid.Contains("NG NG"))
                                {
                                    DateTime UnixEpoch = new DateTime(1970, 1, 1, 0, 0, 0, DateTimeKind.Utc);
                                    UnixEpoch = UnixEpoch.AddSeconds(ThreadTime);
                                    String time = UnixEpoch.ToLocalTime().ToString("yyyy/MM/dd(ddd) HH:mm:ss.00");
                                    dateid = time + " ID:NG0";
                                }
                                //beリンク処理
                                String be = "";
                                if (!string.IsNullOrEmpty(date.Groups[2].Value))
                                {
                                    var mb = Regex.Match(date.Groups[2].Value, @"<a href.+?(\d{2,}).+?>(.+)$");
                                    be = $" <a href={'"'}javascript:be({mb.Groups[1].Value});{'"'}>{mb.Groups[2].Value}";
                                }
                                String message = Regex.Match(Res.Groups[1].Value, @"<d(?:iv|d) class=.(?:message|thread_in).+?>(?:<span class=.escaped.>)?(.+?)(?:</span>)?(?:</div></div>|</dd></dl>)").Groups[1].Value;
                                //安価のリンク修正、http://potato.2ch.net/test/read.cgi/jisaku/1447271149/9→../test/read.cgi/jisaku/1447271149/9
                                Bres.Append(message);
                                foreach (Match item in Regex.Matches(message, @"(<a href=.)(?:https?:)?//\w+\.((?:2|5)ch\.net|bbspink\.com)(/test/read.cgi/\w+/\d+/\d{1,4}.\s.+?>&gt;&gt;\d{1,5}</a>)"))
                                {
                                    Bres.Replace(item.Groups[0].Value, item.Groups[1].Value + ".." + item.Groups[3].Value);
                                }
                                //お絵かきリンク修正
                                foreach (Match item in Regex.Matches(message, $@"<a\s(?:class={'"'}image{'"'}\s)?href=" + '"' + @"(?:https?:)?//jump.(?:2|5)ch\.net/\?(https?://[a-zA-Z\d]+?\.8ch.net\/.+?\.\w+?)" + '"' + @">https?://[a-zA-Z\d]+?\.8ch\.net\/.+?\.\w+?</a>"))
                                {
                                    Bres.Replace(item.Groups[0].Value, "<img src=" + '"' + item.Groups[1].Value + '"' + ">");
                                }
                                //p53など、レス前後にスペースが無いときに補う。
                                if (!Regex.IsMatch(message, @"^\s.+\s$"))
                                {
                                    Bres.Insert(0, " ");
                                    Bres.Append(" ");
                                }
                                Bres.Insert(0, ":" + dateid + be + "<dd>");
                                Bres.Insert(0, "<dt>" + number + " :" + name);
                                Bres.Append("<br><br>");
                                Builddat.Append(html2dat(Bres.ToString()));
                                if (!String.IsNullOrEmpty(title))
                                {
                                    Builddat.Append(title + "\n");
                                    title = "";
                                }
                                else Builddat.Append("\n");
                                Bres.Clear();
                                datResnumber++;
                            }
                            ketu = Regex.Match(line, @"<(?:div|li) class=.+?>(?<datsize>\d+?)KB</(?:div|li)>").Groups[1].Value;
                        }
                        else
                        {
                            if (!ViewModel.Setting.SkipAliveCheck)
                            {
                                //dat落ちかチェック
                                for (String line = html.ReadLine(); !html.EndOfStream; line = html.ReadLine())
                                {
                                    if (Regex.IsMatch(line, @"<div.*?>(.+?過去ログ倉庫.+?|レス数が\d{3,}を超えています.+?(書き込み.*?でき|表示しません).+?)</div>"))
                                    {
                                        alive = false;
                                        break;
                                    }
                                    else if (Regex.IsMatch(line, @"<h1 style.+>.+?<\/h1>"))
                                    {
                                        alive = true;
                                        break;
                                    }
                                }
                                //生きているなら終了
                                if (alive) return new byte[] { 0, 0 };
                            }
                            String ResHtml = html.ReadToEnd();
                            System.Collections.Concurrent.ConcurrentDictionary<int, string> Trancedat = new System.Collections.Concurrent.ConcurrentDictionary<int, string>(4, 1005);
                            System.Threading.Tasks.ParallelOptions option = new System.Threading.Tasks.ParallelOptions();
                            option.MaxDegreeOfParallelism = 4;
                            System.Threading.Tasks.Parallel.ForEach<Match>(Regex.Matches(ResHtml, @"<dt>(\d{1,4})\s:.+?<br><br>(?:\r|\n)").Cast<Match>(), option, match =>
                            {
                                Trancedat[int.Parse(match.Groups[1].Value) - 1] = html2dat(match.Groups[0].Value) + "\n";
                            });
                            Builddat.Append(Trancedat[0].Substring(0, Trancedat[0].Length - 1) + title + "\n");
                            for (int i = 1; i < Trancedat.Count; ++i) Builddat.Append(Trancedat[i]);
                            if (!ViewModel.Setting.AllReturn || range > -1) ketu = Regex.Match(ResHtml, @"<font\scolor.+?><b>(\d+)\sKB<\/b><\/font>").Groups[1].Value;
                        }
                        //if (ViewModel.Setting.Replace5chURI || ViewModel.Setting.ReplaceHttpsLink)
                        //{
                        //    Builddat = new StringBuilder(HTMLtoDat.ResContentReplace(Builddat.ToString()));
                        //}
                        //Byte[] Bdat = Encoding.GetEncoding("Shift_JIS").GetBytes(Builddat.ToString());
                        Byte[] Bdat = Encoding.GetEncoding("Shift_JIS").GetBytes((ViewModel.Setting.Replace5chURI || ViewModel.Setting.ReplaceHttpsLink) ? (HTMLtoDat.ResContentReplace(Builddat.ToString())) : (Builddat.ToString()));
                        if (ViewModel.Setting.AllReturn  || range < 0) return Bdat;
                        int size;
                        try
                        {
                            size = int.Parse(ketu);
                        }
                        catch (FormatException)
                        {
                            size = 0;
                        }
                        //差分返答処理
                        return DifferenceDetection(Bdat, LastMod, UA, range, size);
                    }
                }
                catch (System.Threading.ThreadAbortException e)
                {
                    throw e;
                }
                catch (Exception err)
                {
                    ViewModel.OnModelNotice(URI + "をHTMLから変換中にエラーが発生しました。\n" + err.ToString());
                    return new byte[] { 0 };
                }
            }
        }