public static string getTitle(string url) { bool mainContentExtracted; //結果 string source = ""; string title = ""; //トランスコーダー NReadabilityTranscoder nReadabilityTranscoder = new NReadabilityTranscoder(); //パーサー HtmlParser hp = new HtmlParser(); //仮想ブラウザ NonDispBrowser nb = new NonDispBrowser(); //HTMLの取得 source = hp.getHtmlSource(url); try { nb.NavigateAndWaitFromSource(hp.getHtmlPlainTextFromSourceWB(nReadabilityTranscoder.Transcode(source, out mainContentExtracted))); title = nb.Document.Title; } catch { } finally { //確実に破棄 nb.Dispose(); } //結果を返す return title; //return hp.getHtmlPlainTextFromSource(nReadabilityTranscoder.Transcode(getHtmlSource(url), out mainContentExtracted)); }
public static string transeForJapa(string url) { bool mainContentExtracted; //結果 string result = ""; string source = ""; string title = ""; //トランスコーダー NReadabilityTranscoder nReadabilityTranscoder = new NReadabilityTranscoder(); //パーサー HtmlParser hp = new HtmlParser(); //仮想ブラウザ using (NonDispBrowser nb = new NonDispBrowser()) { //HTMLの取得 source = hp.getHtmlSource(url); try { //まずは要約データからボディの取得を試みる nb.NavigateAndWaitFromSource(hp.getHtmlPlainTextFromSourceWB(nReadabilityTranscoder.Transcode(source, out mainContentExtracted))); title = nb.Document.Title; result = nb.Document.Body.InnerText.Replace(title, ""); if (result != "") { return result; } result = HtmlParser.htmlGomiRegularRemove(HtmlParser.htmlTagRegularRemove(source)); } catch { } } //結果を返す return result; //return hp.getHtmlPlainTextFromSource(nReadabilityTranscoder.Transcode(getHtmlSource(url), out mainContentExtracted)); }
/// <summary> /// getRssUrlを抽出する /// rssURLが見つかったらURLを返す。見つからなかったら空を返す。 /// </summary> /// <param name="targetUrl">ターゲットサイトURL</param> /// <returns>RSSURL</returns> public static List<string> getRssUrl(string targetUrl) { List<string> res = new List<string>(); //パーサー HtmlParser hp = new HtmlParser(); //HTMLの取得 string source = hp.getHtmlSource(targetUrl); string[] lst = source.Split(Environment.NewLine.ToCharArray()); foreach (string line in lst) { if (line.IndexOf("application/rss+xml") > 0) { int startIdx = 0; int endIdx = 0; string str =""; try { do { startIdx = line.IndexOf("href", endIdx); if (startIdx != -1) { startIdx += 6; endIdx = line.IndexOf("\"", startIdx); str = line.Substring(startIdx, endIdx - startIdx); res.Add(str); } } while (startIdx > 0); } catch { } } } //仮想ブラウザ //using (NonDispBrowser nb = new NonDispBrowser()) //{ // //まずは要約データからボディの取得を試みる // nb.NavigateAndWaitFromSource(hp.getHtmlPlainTextFromSourceWB(source)); // HtmlDocument doc = nb.Document; // HtmlElementCollection links = doc.GetElementsByTagName("link"); // foreach (HtmlElement ht in links) // { // if (ht.GetAttribute("type") == "application/rss+xml") // { // //string title = ht.GetAttribute("title"); // string href = ht.GetAttribute("href"); // return href; // } // } //} return res; }