예제 #1
0
        public static string getTitle(string url)
        {
            bool mainContentExtracted;

            //結果
            string source = "";
            string title = "";

            //トランスコーダー
            NReadabilityTranscoder nReadabilityTranscoder = new NReadabilityTranscoder();
            //パーサー
            HtmlParser hp = new HtmlParser();

            //仮想ブラウザ
            NonDispBrowser nb = new NonDispBrowser();
            //HTMLの取得
            source = hp.getHtmlSource(url);

            try
            {
                nb.NavigateAndWaitFromSource(hp.getHtmlPlainTextFromSourceWB(nReadabilityTranscoder.Transcode(source, out mainContentExtracted)));
                title = nb.Document.Title;
            }
            catch
            {

            }
            finally
            {
                //確実に破棄
                nb.Dispose();
            }

            //結果を返す
            return title;

            //return hp.getHtmlPlainTextFromSource(nReadabilityTranscoder.Transcode(getHtmlSource(url), out mainContentExtracted));
        }
예제 #2
0
        public static string transeForJapa(string url)
        {
            bool mainContentExtracted;

            //結果
            string result = "";
            string source = "";
            string title = "";

            //トランスコーダー
            NReadabilityTranscoder nReadabilityTranscoder = new NReadabilityTranscoder();
            //パーサー
            HtmlParser hp = new HtmlParser();

            //仮想ブラウザ
            using (NonDispBrowser nb = new NonDispBrowser())
            {
                //HTMLの取得
                source = hp.getHtmlSource(url);

                try
                {
                    //まずは要約データからボディの取得を試みる
                    nb.NavigateAndWaitFromSource(hp.getHtmlPlainTextFromSourceWB(nReadabilityTranscoder.Transcode(source, out mainContentExtracted)));
                    title = nb.Document.Title;
                    result = nb.Document.Body.InnerText.Replace(title, "");

                    if (result != "") { return result; }

                    result = HtmlParser.htmlGomiRegularRemove(HtmlParser.htmlTagRegularRemove(source));
                }
                catch
                {

                }
            }

            //結果を返す
            return result;

            //return hp.getHtmlPlainTextFromSource(nReadabilityTranscoder.Transcode(getHtmlSource(url), out mainContentExtracted));
        }
        /// <summary>
        /// getRssUrlを抽出する
        /// rssURLが見つかったらURLを返す。見つからなかったら空を返す。
        /// </summary>
        /// <param name="targetUrl">ターゲットサイトURL</param>
        /// <returns>RSSURL</returns>
        public static List<string> getRssUrl(string targetUrl)
        {
            List<string> res = new List<string>();

            //パーサー
            HtmlParser hp = new HtmlParser();

            //HTMLの取得
            string source = hp.getHtmlSource(targetUrl);
            string[] lst = source.Split(Environment.NewLine.ToCharArray());

            foreach (string line in lst)
            {
                if (line.IndexOf("application/rss+xml") > 0)
                {
                    int startIdx = 0;
                    int endIdx = 0;
                    string str ="";
                    try
                    {
                        do
                        {
                            startIdx = line.IndexOf("href", endIdx);
                            if (startIdx != -1)
                            {
                                startIdx += 6;
                                endIdx = line.IndexOf("\"", startIdx);
                                str = line.Substring(startIdx, endIdx - startIdx);
                                res.Add(str);
                            }

                        } while (startIdx > 0);
                    }
                    catch
                    {

                    }

                }
            }

            //仮想ブラウザ
            //using (NonDispBrowser nb = new NonDispBrowser())
            //{
            //    //まずは要約データからボディの取得を試みる
            //    nb.NavigateAndWaitFromSource(hp.getHtmlPlainTextFromSourceWB(source));

            //    HtmlDocument doc = nb.Document;
            //    HtmlElementCollection links = doc.GetElementsByTagName("link");

            //    foreach (HtmlElement ht in links)
            //    {
            //        if (ht.GetAttribute("type") == "application/rss+xml")
            //        {
            //            //string title = ht.GetAttribute("title");
            //            string href = ht.GetAttribute("href");

            //            return href;
            //        }
            //    }

            //}

            return res;
        }