예제 #1
0
        public static string getTitle(string url)
        {
            bool mainContentExtracted;

            //結果
            string source = "";
            string title = "";

            //トランスコーダー
            NReadabilityTranscoder nReadabilityTranscoder = new NReadabilityTranscoder();
            //パーサー
            HtmlParser hp = new HtmlParser();

            //仮想ブラウザ
            NonDispBrowser nb = new NonDispBrowser();
            //HTMLの取得
            source = hp.getHtmlSource(url);

            try
            {
                nb.NavigateAndWaitFromSource(hp.getHtmlPlainTextFromSourceWB(nReadabilityTranscoder.Transcode(source, out mainContentExtracted)));
                title = nb.Document.Title;
            }
            catch
            {

            }
            finally
            {
                //確実に破棄
                nb.Dispose();
            }

            //結果を返す
            return title;

            //return hp.getHtmlPlainTextFromSource(nReadabilityTranscoder.Transcode(getHtmlSource(url), out mainContentExtracted));
        }
예제 #2
0
        public static string transeForJapa(string url)
        {
            bool mainContentExtracted;

            //結果
            string result = "";
            string source = "";
            string title = "";

            //トランスコーダー
            NReadabilityTranscoder nReadabilityTranscoder = new NReadabilityTranscoder();
            //パーサー
            HtmlParser hp = new HtmlParser();

            //仮想ブラウザ
            using (NonDispBrowser nb = new NonDispBrowser())
            {
                //HTMLの取得
                source = hp.getHtmlSource(url);

                try
                {
                    //まずは要約データからボディの取得を試みる
                    nb.NavigateAndWaitFromSource(hp.getHtmlPlainTextFromSourceWB(nReadabilityTranscoder.Transcode(source, out mainContentExtracted)));
                    title = nb.Document.Title;
                    result = nb.Document.Body.InnerText.Replace(title, "");

                    if (result != "") { return result; }

                    result = HtmlParser.htmlGomiRegularRemove(HtmlParser.htmlTagRegularRemove(source));
                }
                catch
                {

                }
            }

            //結果を返す
            return result;

            //return hp.getHtmlPlainTextFromSource(nReadabilityTranscoder.Transcode(getHtmlSource(url), out mainContentExtracted));
        }