예제 #1
0
        private static void Main()
        {
            _meCabTagger = MeCabTagger.Create(MeCabParam);
            var markovDic = new MarkovDictionary();

            var reg = new Regex(@"<.*?>");

            using (var file = new System.IO.StreamReader(@"..\..\..\..\Sample.txt"))
            {
                string line;
                while ((line = file.ReadLine()) != null)
                {
                    line = reg.Replace(line, "");
                    var marList = CheckMeCab(line);

                    markovDic.AddSentence(marList);
                }
            }

            var results = new List <string>();

            for (var i = 0; i < 100;)
            {
                var text = string.Join("", markovDic.BuildSentence());
                if (text.Length > 80)
                {
                    continue;
                }

                results.Add(text);
                ++i;
            }

            var r = results.FindMax(c => c.Length);

            Console.WriteLine(r);

            Console.ReadLine();
        }
예제 #2
0
파일: Program.cs 프로젝트: kkrnt/mocho
        static void Main()
        {
            Console.WriteLine("Blogからデータを収集中...");

            var blog = "";
            var urls = "http://ameblo.jp/asakuramomoblog/entrylist";
            var count = 1;
            var backup = "";
            var flag = true;
            while (flag)
            {
                var wc = new WebClient();
                var openurl = "";
                if (count > 1)
                {
                    openurl = urls + "-" + count + ".html";
                }
                else
                {
                    openurl = urls + ".html";
                }
                var st = wc.OpenRead(openurl);
                var sr = new StreamReader(st, Encoding.UTF8);
                var html = sr.ReadToEnd();
                sr.Close();
                st.Close();

                var first = true;

                var re = new Regex("<a class=\"contentTitle\" href=\"(?<url>.*?)\".*?>", RegexOptions.IgnoreCase | RegexOptions.Singleline);
                for (var m = re.Match(html); m.Success; m = m.NextMatch())
                {
                    var url = m.Groups["url"].Value;
                    var _wc = new WebClient();
                    var _st = _wc.OpenRead(url);
                    var _sr = new StreamReader(_st, Encoding.UTF8);
                    var _html = _sr.ReadToEnd();
                    _sr.Close();
                    _st.Close();
                    var _re = new Regex("<div class=\"articleText\">(?<t>.*?)<!--entryBottom-->", RegexOptions.IgnoreCase | RegexOptions.Singleline);
                    for (var _m = _re.Match(_html); _m.Success; _m = _m.NextMatch())
                    {
                        var text = _m.Groups["t"].Value;
                        text = text.Replace("<br />", "")
                            .Replace(Environment.NewLine, "")
                            .Replace("\r", "")
                            .Replace("\n", "")
                            .Replace("<div>", "")
                            .Replace("</div>", "")
                            .Replace("&gt;", ">")
                            .Replace("&lt;", "<")
                            .Replace("&amp;", "&")
                            .Replace("&quot;", "\"")
                            .Replace("<p>", "")
                            .Replace("</p>", "")
                            .Replace("<!-- google_ad_section_start(name=s1, weight=.9) -->", "")
                            .Replace("<!-- google_ad_section_end(name=s1) -->", "")
                            .Replace("</span>", "");
                        text = Regex.Replace(text, "<a (?<a>.*?)</a>", "");
                        text = Regex.Replace(text, "<img (?<a>.*?)>", "");
                        text = Regex.Replace(text, "<div (?<a>.*?)>", "");
                        text = Regex.Replace(text, "<span (?<a>.*?)>", "") + Environment.NewLine;
                        text = text.Replace("<a", "")
                            .Replace("</a>", "")
                            .Replace("<img>", "")
                            .Replace("<span>", "");
                        //Console.Write(text);
                        blog += text;
                        if (first)
                        {
                            if (!backup.Equals(text))
                            {
                                backup = String.Copy(text);
                                first = false;
                            }
                            else
                            {
                                flag = false;
                                break;
                            }
                        }
                    }
                }
                count++;
            }

            Console.WriteLine("収集しました");
            Console.WriteLine("記事を作成します\n\n");

            var mocho = blog.Split('\n');
            var mecab = MeCabTagger.Create();
            var data = new List<string>();
            foreach (var s in mocho)
            {
                var node = mecab.ParseToNode(s);
                while (node != null)
                {
                    if (!s.Equals(node.Surface) && node.Surface[0] != 0x00 && node.Surface[0] != 13)
                    {
                        data.Add(node.Surface + "|");
                    }
                    node = node.Next;
                }
                data.Add("\n");
            }
            var lines = string.Join("", data).Split('\n');
            var markovDic = new MarkovDictionary();
            foreach (var line in lines)
            {
                markovDic.AddSentence(line.Split('|'));
            }

            var output = new StreamWriter("output.log", false, Encoding.UTF8);

            for (var i = 0; i < 5; i++)
            {
                var sentence = markovDic.BuildSentence();
                Console.WriteLine(string.Join("", sentence) + "\n\n");
                output.WriteLine(string.Join("", sentence));
            }

            output.Close();
        }
예제 #3
0
        static void Markov()
        {
            _meCabTagger = MeCabTagger.Create(MeCabParam);
            var markovDic = new MarkovDictionary();

            // 正規表現(パターン検索)・・・この場合<>で囲まれた箇所にマッチする(+後述のreg.Replaceで文字を消す)
            var reg = new Regex(@"<.*?>");


            // 読み込む行数を先に定義
            var n = 10;

            // lock構文の準備
            object lockReed = new object();

            lock (lockReed)
            {
                using (var file = new FileStream(path, FileMode.Open, FileAccess.Read, FileShare.ReadWrite))
                {
                    var    lines = File.ReadAllLines(path);
                    string line;
                    foreach (var ln in lines.Skip(lines.Length - n).Take(n))  // (最後の行-n行)目に跳んで、n行分読み込む
                    {
                        line = reg.Replace(ln, "");
                        var marList = CheckMeCab(line);

                        markovDic.AddSentence(marList);
                    }
                }
            }



            var results = new List <string>();

            for (var i = 0; i < 100;)
            {
                var text = string.Join("", markovDic.BuildSentence());
                if (text.Length > 25)
                {
                    continue;
                }

                results.Add(text);
                ++i;
            }

            // ここもlock構文の準備
            object lockWrite = new object();

            // マルコフ連鎖から返ってきた結果をresultファイルに投げる
            lock (lockWrite)
            {
                var r = results.FindMax(c => c.Length);
                //Console.WriteLine(r);
                using (var writer = new StreamWriter(@"C:\botVTuber\result.txt", true))  // StreamWriterメソッドの定義とresultファイルのディレクトリ
                {
                    writer.WriteLine(r + "\r\n");
                    Clipboard.SetText(r);
                }
            }
        }