/// <summary> /// Process the specified URL. /// </summary> /// <param name="url">The URL to process.</param> /// <param name="optionList">Whcih option list to process.</param> public void Process(Uri url, int optionList) { String value = ""; WebRequest http = HttpWebRequest.Create(url); HttpWebResponse response = (HttpWebResponse)http.GetResponse(); Stream istream = response.GetResponseStream(); ParseHTML parse = new ParseHTML(istream); StringBuilder buffer = new StringBuilder(); int ch; while ((ch = parse.Read()) != -1) { if (ch == 0) { HTMLTag tag = parse.Tag; if (String.Compare(tag.Name, "a", true) == 0) { value = tag["href"]; Uri u = new Uri(url, value.ToString()); value = u.ToString(); buffer.Length = 0; } else if (String.Compare(tag.Name, "/a", true) == 0) { ProcessOption(buffer.ToString(), value); } } else { buffer.Append((char)ch); } } }
static string Categoria(ParseHTML analizador) { int ch; bool leer = false; StringBuilder buffer = new StringBuilder(); while ((ch = analizador.Read()) != -1) { if (ch == 0) { if (analizador.Tag.Name == "a") { leer = true; } else if (analizador.Tag.Name == "/a") { return(buffer.ToString()); } } else if (leer) { buffer.Append((char)ch); } } return("no se encontro la categoria :S"); }
/// <summary> /// Process the specified URL and extract data from all of the subpages /// that this page links to. /// </summary> /// <param name="url">The URL to process.</param> public void Process(Uri url) { String value = ""; WebRequest http = HttpWebRequest.Create(url); HttpWebResponse response = (HttpWebResponse)http.GetResponse(); Stream istream = response.GetResponseStream(); ParseHTML parse = new ParseHTML(istream); int ch; while ((ch = parse.Read()) != -1) { if (ch == 0) { HTMLTag tag = parse.Tag; if (String.Compare(tag.Name, "a", true) == 0) { value = tag["href"]; Uri u = new Uri(url, value.ToString()); value = u.ToString(); ProcessSubPage(u); } } } }
/// <summary> /// Process the specified URL and download the images. /// </summary> /// <param name="url">The URL to process.</param> /// <param name="saveTo">A directory to save the images to.</param> public void Process(Uri url, String saveTo) { WebRequest http = HttpWebRequest.Create(url); HttpWebResponse response = (HttpWebResponse)http.GetResponse(); Stream istream = response.GetResponseStream(); ParseHTML parse = new ParseHTML(istream); int ch; while ((ch = parse.Read()) != -1) { if (ch == 0) { HTMLTag tag = parse.Tag; if (String.Compare(tag.Name, "img", true) == 0) { String src = tag["src"]; Uri u = new Uri(url, src); String filename = ExtractFile(u); String saveFile = Path.Combine(saveTo, filename); WebRequest http2 = HttpWebRequest.Create(u); HttpWebResponse response2 = (HttpWebResponse)http2.GetResponse(); this.DownloadBinaryFile(response2, saveFile); response2.Close(); } } } }
public MonitorIPViewModel(string filePath, IEventAggregator eventAggregator) { this.eventAggregator = eventAggregator; SelectedFilePath = filePath; IpData = new ObservableCollectionPropertyNotify <IPDataModel>(); IpAddressList = new List <IPAddress>(); IpDataGrid = new DataGrid(); uiContext = SynchronizationContext.Current; RefreshRateInputVisibility = Visibility.Hidden; //sets default refresh rate at 500 ms internalRefreshRate = 500; //add the ips from IpDataModel to the ipaddr list that is given to the asyncping command ParseHTML.ParseIpHTML(IpData, filePath); for (int i = 0; i < IpData.Count; i++) { IpAddressList.Add(IpData[i].Ip); } RefreshIP = true; //~~~~~~~~~~~!!DO NOT CHANGE THIS!!~~~~~~~~~~~~~~~~~~~~~~ //There is a bug in .NET frameworks 4 and up that causes a PROCCESS_HAS_LOCKED_PAGES Windows Blue Screen of Death //if you have a debugger attached and stop debugging while the ping is in process //If you want to test ping functionality you must build/run without debugging (CTRL-F5) if (!System.Diagnostics.Debugger.IsAttached) { StartIpPingThread(); } //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ }
/// <summary> /// This method looks for each of the <option> tags that contain /// a link to each of the pages. For each page found the /// downloadArticlePage method is called. /// </summary> public void Process() { Uri url = new Uri("http://www.httprecipes.com/1/9/article.php"); WebRequest http = HttpWebRequest.Create(url); http.Timeout = 30000; WebResponse response = http.GetResponse(); Stream stream = response.GetResponseStream(); ParseHTML parse = new ParseHTML(stream); int ch; while ((ch = parse.Read()) != -1) { if (ch == 0) { HTMLTag tag = parse.Tag; if (String.Compare(tag.Name, "option", true) == 0) { String str = tag["value"]; Uri u = new Uri(url, str); Console.WriteLine(DownloadArticlePage(u)); } } } }
static string Descripcion(ParseHTML analizador) { int ch; StringBuilder buffer = new StringBuilder(); while ((ch = analizador.Read()) > 0) { buffer.Append((char)ch); } return(buffer.ToString()); }
public Analizador(string url) { HttpWebRequest peticion = (HttpWebRequest)HttpWebRequest.Create(url); peticion.Timeout = 200000; //System.Net.WebProxy x = new System.Net.WebProxy("192.168.1.34", 808); //peticion.AllowAutoRedirect = true; //peticion.Proxy = x; peticion.UserAgent = "Mozilla/5.0 (Windows; U; Windows NT 5.1; es-ES; rv:1.9.0.3) Gecko/2008092417 Firefox/3.0.3"; respuesta = (HttpWebResponse)peticion.GetResponse(); istream = respuesta.GetResponseStream(); html = new ParseHTML(istream); }
// Very simple: just parse the EventLists out of the album front page HTML. // No need to get the event list names (usually years) from the HTML, as these // names should be embedded in the EventLists. public Album ReadAlbum(string masterFilename, out string aDiagnostic) { aDiagnostic = null; string xmlFilename = GetDirectory(masterFilename) + "Album.xml"; Album album = new Album(xmlFilename); string html = ReadFile(masterFilename); if (html == null) { aDiagnostic = "ReadAlbum: bad HTML Album file " + masterFilename; return(null); } else { // Determine the master directory string masterDirectory = GetDirectory(masterFilename); //Console.WriteLine("HtmlReader: parsing " + masterFilename); ParseHTML parse = new ParseHTML(); parse.Source = html; while (!parse.Eof()) { char ch = parse.Parse(); if (ch == 0) { AttributeList tag = parse.GetTag(); if (tag["href"] != null) { string href = tag["href"].Value.Replace('/', '\\'); //Console.WriteLine("HtmlReader: add year " + href + " to master XML file"); // Process child events file EventList events = ReadEvents(masterDirectory + href, out aDiagnostic); if (events == null) { return(null); } else { album.Add(events); } } } } return(album); } }
/** * Called to extract a list from the specified URL. * @param url The URL to extract the list from. * @param listType What type of list, specify its beginning tag (i.e. <UL>) * @param optionList Which list to search, zero for first. * @throws IOException Thrown if an IO exception occurs. */ public void Process(Uri url, String listType, int optionList) { String listTypeEnd = listType + "/"; WebRequest http = HttpWebRequest.Create(url); HttpWebResponse response = (HttpWebResponse)http.GetResponse(); Stream istream = response.GetResponseStream(); ParseHTML parse = new ParseHTML(istream); StringBuilder buffer = new StringBuilder(); bool capture = false; Advance(parse, listType, optionList); int ch; while ((ch = parse.Read()) != -1) { if (ch == 0) { HTMLTag tag = parse.Tag; if (String.Compare(tag.Name, "li", true) == 0) { if (buffer.Length > 0) { ProcessItem(buffer.ToString()); } buffer.Length = 0; capture = true; } else if (String.Compare(tag.Name, "/li", true) == 0) { Console.WriteLine(buffer.ToString()); ProcessItem(buffer.ToString()); buffer.Length = 0; capture = false; } else if (String.Compare(tag.Name, listTypeEnd, true) == 0) { break; } } else { if (capture) { buffer.Append((char)ch); } } } }
public static string[] GetHTMLUrls(string Page, string HostAbsolutePath, int MaxLen, int FindUrlLevel) { ArrayList list = new ArrayList(); ParseHTML ehtml = new ParseHTML(); ehtml.Source = Page; while (!ehtml.Eof()) { if (ehtml.Parse() == '\0') { Shove.HTML.HtmlParse.Attribute attribute = ehtml.GetTag()["HREF"]; if (attribute != null) { string str = attribute.Value.Trim().ToLower(); if ((((str != "") && !str.StartsWith("mailto")) && !str.StartsWith("#")) && (((FindUrlLevel == 2) || str.StartsWith("http://")) || str.StartsWith("https://"))) { str = GetPath(str, HostAbsolutePath); if ((MaxLen < 1) || (str.Length <= MaxLen)) { list.Add(str); } } } attribute = ehtml.GetTag()["SRC"]; if (attribute != null) { string str2 = attribute.Value.Trim().ToLower(); if ((str2 != "") && (((FindUrlLevel == 2) || str2.StartsWith("http://")) || str2.StartsWith("https://"))) { str2 = GetPath(str2, HostAbsolutePath); if ((MaxLen < 1) || (str2.Length <= MaxLen)) { list.Add(str2); } } } } } if (list.Count == 0) { return(null); } string[] strArray = new string[list.Count]; for (int i = 0; i < list.Count; i++) { strArray[i] = list[i].ToString(); } return(strArray); }
// Навігація по файловій системі - подвійний клік на елементі ListView private void listView1_MouseDoubleClick(object sender, MouseEventArgs e) { // Отримуємо інформацію за кліком на елемент ListView за індексом та SubItems[0] try { int selectedIndex = listView1.SelectedIndices[0]; string rootPath = listView1.Items[selectedIndex].SubItems[0].Text, extens = listView1.Items[selectedIndex].SubItems[2].Text; if (extens == @"Папка" || extens == @"Диск") { RefreshListView(rootPath, listView1); GlobalNavigationStr = rootPath; toolStripTextBox1.Text = rootPath; } //якщо обрана директорія else if (extens == @"text") { Form3.FileToBeOpened = rootPath; Form3 form3 = new Form3(); form3.Show(); } //якщо обрана таблиця else if (extens == @"html") { FileClass file = new FileClass(rootPath); ParseHTML html = new ParseHTML(file.GetFullPath(), file.GetParentPath(), file.GetName()); html.Parse(); } //якщо обраний html else if (extens == @"txt") { TextEditor te = new TextEditor(rootPath); te.Owner = this; te.Show(); } //якщо обраний текстовий файл else { NewTestingClass n = new NewTestingClass(); FileClass file = new FileClass(rootPath); if (n.TestGettingFileName(file)) { MessageBox.Show("OK!!!"); } } //якщо обрано щось інше - не робити нічого } catch (Exception ex) { MessageBox.Show(ex.ToString()); } }
/// <summary> /// Versión mejorada para avanzar a un tag que cumpla cierto atributo /// </summary> /// <param name="analizador">El parseHTML que queramos avanzar en</param> /// <param name="etiqueta">La etiqueta a la que queramos llegar</param> /// <param name="nombreAtributo">El nombre del atributo como "src"</param> /// <param name="atributo">el propio atributo como "/imagenes/...</param> /// <returns></returns> public static bool AvanzarA(ParseHTML analizador, String etiqueta, String nombreAtributo, String atributo) { int ch; while ((ch = analizador.Read()) != -1) { if (ch == 0) { if (analizador.Tag.Name == etiqueta && analizador.Tag[nombreAtributo] == atributo) { return(true); } } } return(false); }
static string Enlace(ParseHTML analizador) { int ch; while ((ch = analizador.Read()) != -1) { if (ch == 0) { if (analizador.Tag.Name == "a") { return(analizador.Tag["href"]); } } } return(null); }
static string Imagen(ParseHTML analizador) { int ch; while ((ch = analizador.Read()) != -1) { if (ch == 0) { if (analizador.Tag.Name == "img") { return(analizador.Tag["src"]); } } } return("no se encontro una imagen :S"); }
private ICollection <Uri> DoSearch(Uri url) { ICollection <Uri> result = new List <Uri>(); // submit the search WebRequest http = HttpWebRequest.Create(url); HttpWebResponse response = (HttpWebResponse)http.GetResponse(); using (Stream istream = response.GetResponseStream()) { ParseHTML parse = new ParseHTML(istream); StringBuilder buffer = new StringBuilder(); bool capture = false; // parse the results int ch; while ((ch = parse.Read()) != -1) { if (ch == 0) { HTMLTag tag = parse.Tag; if (tag.Name.Equals("Url", StringComparison.CurrentCultureIgnoreCase)) { buffer.Length = 0; capture = true; } else if (tag.Name.Equals("/Url", StringComparison.CurrentCultureIgnoreCase)) { result.Add(new Uri(buffer.ToString())); buffer.Length = 0; capture = false; } } else { if (capture) { buffer.Append((char)ch); } } } } response.Close(); return(result); }
static string Titulo(ParseHTML analizador) { int ch; StringBuilder sb1 = new StringBuilder(); while ((ch = analizador.Read()) != -1) { if (ch > 0) { sb1.Append((char)ch); } else { return(sb1.ToString()); } } return(null); }
static string Titulo(ParseHTML analizador) { int ch; StringBuilder buffer = new StringBuilder(); while ((ch = analizador.Read()) != -1) { if (ch > 0) { buffer.Append((char)ch); } else { return(buffer.ToString()); } } return("No se encontro el titulo :S"); }
static string Descripcion(ParseHTML analizador) { int ch; StringBuilder sb1 = new StringBuilder(); while ((ch = analizador.Read()) != -1) { if (ch > 0) { sb1.Append((char)ch); } else { return(sb1.ToString().Replace("á", "a").Replace("é", "e").Replace("í", "i").Replace("ó", "o").Replace("ú", "u").Replace("ñ", "n").Replace("É", "E").Replace("Á", "A").Replace("Í", "Í").Replace("Ó", "O").Replace("Ú", "Ú").Replace("'", "")); } } return(null); }
/// <summary> /// Advance to the specified HTML tag. /// </summary> /// <param name="parse">The HTML parse object to use.</param> /// <param name="tag">The HTML tag.</param> /// <param name="count">How many tags like this to find.</param> /// <returns>True if found, false otherwise.</returns> private bool Advance(ParseHTML parse, String tag, int count) { int ch; while ((ch = parse.Read()) != -1) { if (ch == 0) { if (String.Compare(parse.Tag.Name, tag, true) == 0) { count--; if (count <= 0) { return(true); } } } } return(false); }
/// <summary> /// Check the specified URL for a birth year. This will occur if one /// sentence is found that has the word born, and a numeric value less /// than 3000. /// </summary> /// <param name="url">The URL to check.</param> public void CheckURL(Uri url) { int ch; StringBuilder sentence = new StringBuilder(); try { WebRequest http = HttpWebRequest.Create(url); HttpWebResponse response = (HttpWebResponse)http.GetResponse(); Stream istream = response.GetResponseStream(); ParseHTML html = new ParseHTML(istream); do { ch = html.Read(); if ((ch != -1) && (ch != 0)) { if (ch == '.') { String str = sentence.ToString(); int year = ExtractBirth(str); if ((year > 1) && (year < 3000)) { Console.WriteLine("URL supports year: " + year); IncreaseYear(year); } sentence.Length = 0; } else { sentence.Append((char)ch); } } } while (ch != -1); } catch (WebException) { } catch (IOException) { } }
/// <summary> /// Called to download the text from a page. If any JavaScript /// include is found, the text from that page is read too. /// </summary> public void Process() { Uri url = new Uri("http://www.httprecipes.com/1/9/includes.php"); WebRequest http = HttpWebRequest.Create(url); http.Timeout = 30000; WebResponse response = http.GetResponse(); Stream stream = response.GetResponseStream(); ParseHTML parse = new ParseHTML(stream); StringBuilder buffer = new StringBuilder(); int ch; while ((ch = parse.Read()) != -1) { if (ch == 0) { HTMLTag tag = parse.Tag; if (String.Compare(tag.Name, "script", true) == 0 && tag["src"] != null) { String src = tag["src"]; Uri u = new Uri(url, src); String include = DownloadPage(u); buffer.Append("<script>"); buffer.Append(include); buffer.Append("</script>"); } else { buffer.Append(tag.ToString()); } } else { buffer.Append((char)ch); } } Console.WriteLine(buffer.ToString()); }
/// <summary> /// This method looks for a link tag at the specified URL. If a link /// tag is found that specifies an RSS feed, then that feed is /// displayed. /// </summary> /// <param name="url">The URL of the web site.</param> public void Process(Uri url) { String href = null; WebRequest http = HttpWebRequest.Create(url); http.Timeout = 30000; WebResponse response = http.GetResponse(); Stream stream = response.GetResponseStream(); ParseHTML parse = new ParseHTML(stream); int ch; do { ch = parse.Read(); if (ch == 0) { HTMLTag tag = parse.Tag; if (String.Compare(tag.Name, "link", true) == 0) { String type = tag["type"]; if (type != null && type.IndexOf("rss") != -1) { href = tag["href"]; } } } } while (ch != -1); if (href == null) { Console.WriteLine("No RSS link found."); } else { ProcessRSS(new Uri(href)); } }
/// <summary> /// Process the specified URL and extract the option list there. /// </summary> /// <param name="url">The URL to process.</param> /// <param name="optionList">Which option list to process, zero for first.</param> public void Process(Uri url, int optionList) { String value = ""; WebRequest http = HttpWebRequest.Create(url); HttpWebResponse response = (HttpWebResponse)http.GetResponse(); Stream istream = response.GetResponseStream(); ParseHTML parse = new ParseHTML(istream); StringBuilder buffer = new StringBuilder(); Advance(parse, "select", optionList); int ch; while ((ch = parse.Read()) != -1) { if (ch == 0) { HTMLTag tag = parse.Tag; if (String.Compare(tag.Name, "option") == 0) { value = tag["value"]; buffer.Length = 0; } else if (String.Compare(tag.Name, "/option") == 0) { ProcessOption(buffer.ToString(), value); } else if (String.Compare(tag.Name, "/choice") == 0) { break; } } else { buffer.Append((char)ch); } } }
private void refresh(Object o) { disableView(); Object[] login = (Object[])o; bool showMsg = (login.Length == 3) ? (bool)login[2] : true; String html = null; WebProxy proxy = XMLConfig.GetProxy(); try { html = ScoreHTMLUtil.TryGetHTMLString((String)login[0], (String)login[1], "9999", proxy, IsNormal); if (string.IsNullOrEmpty(html)) { throwError("无法抓取成绩信息,可能是服务器正忙或者是网络连接出错。", "网络错误", showMsg); } if (html.Contains("Course.jsp")) { throwError("由于使用错误密码请求多次,请求被URP拒绝。\n请用浏览器重新登录URP输入正确的验证码后再使用本工具", "URP登录错误", showMsg); } else if (html.Contains("复旦大学统一身份认证服务")) { throwError("学号密码不正确", "URP登录错误", showMsg); } List <Lesson> lessons = ParseHTML.TryParseHTML(html, GpaInfo, IsNormal); if (lessons == null) { throwError("抓取的网页无法解析", "解析数据错误", showMsg); } refreshListView(lessons); } catch (Exception e) { MessageBox.Show(e.Message, e.HelpLink); } enableView(); }
static string Imagen(ParseHTML analizador) { int ch, x = 0; while ((ch = analizador.Read()) != -1) { if (ch == 0) { if (analizador.Tag.Name == "img") { if (x == 3) { Console.WriteLine(analizador.Tag["src"]); return(analizador.Tag["src"]); } else { x++; } } } } return(null); }
static string Categoria(ParseHTML analizador) { int ch; bool x = false; while ((ch = analizador.Read()) != -1) { if (ch == 0) { if (analizador.Tag.Name == "a") { if (x) { return(analizador.Tag["href"]); } else { x = true; } } } } return(null); }
/// <summary> /// Called to parse a table. The table number at the specified URL /// will be parsed. /// </summary> /// <param name="url">The URL of the HTML page that contains the table.</param> /// <param name="tableNum">The table number to parse, zero for the first.</param> public void Process(Uri url, int tableNum) { WebRequest http = HttpWebRequest.Create(url); HttpWebResponse response = (HttpWebResponse)http.GetResponse(); Stream istream = response.GetResponseStream(); ParseHTML parse = new ParseHTML(istream); StringBuilder buffer = new StringBuilder(); List <String> list = new List <String>(); bool capture = false; Advance(parse, "table", tableNum); int ch; while ((ch = parse.Read()) != -1) { if (ch == 0) { HTMLTag tag = parse.Tag; if (String.Compare(tag.Name, "tr", true) == 0) { list.Clear(); capture = false; buffer.Length = 0; } else if (String.Compare(tag.Name, "/tr", true) == 0) { if (list.Count > 0) { ProcessTableRow(list); list.Clear(); } } else if (String.Compare(tag.Name, "td", true) == 0) { if (buffer.Length > 0) { list.Add(buffer.ToString()); } buffer.Length = 0; capture = true; } else if (String.Compare(tag.Name, "/td", true) == 0) { list.Add(buffer.ToString()); buffer.Length = 0; capture = false; } else if (String.Compare(tag.Name, "/table", true) == 0) { break; } } else { if (capture) { buffer.Append((char)ch); } } } }
/// <summary> /// Access the website and perform a search for either states or capitals. /// </summary> /// <param name="search">A search string.</param> /// <param name="type">What to search for(s=state, c=capital)</param> public void Process(String search, String type) { String listType = "ul"; String listTypeEnd = "/ul"; StringBuilder buffer = new StringBuilder(); bool capture = false; // Build the URL. MemoryStream mstream = new MemoryStream(); FormUtility form = new FormUtility(mstream, null); form.Add("search", search); form.Add("type", type); form.Add("action", "Search"); form.Complete(); System.Text.ASCIIEncoding enc = new System.Text.ASCIIEncoding(); String str = enc.GetString(mstream.GetBuffer()); String surl = "http://www.httprecipes.com/1/7/get.php?" + str; Uri url = new Uri(surl); WebRequest http = HttpWebRequest.Create(url); HttpWebResponse response = (HttpWebResponse)http.GetResponse(); Stream istream = response.GetResponseStream(); ParseHTML parse = new ParseHTML(istream); // Parse from the URL. Advance(parse, listType, 0); int ch; while ((ch = parse.Read()) != -1) { if (ch == 0) { HTMLTag tag = parse.Tag; if (String.Compare(tag.Name, "li", true) == 0) { if (buffer.Length > 0) { ProcessItem(buffer.ToString()); } buffer.Length = 0; capture = true; } else if (String.Compare(tag.Name, "/li", true) == 0) { ProcessItem(buffer.ToString()); buffer.Length = 0; capture = false; } else if (String.Compare(tag.Name, listTypeEnd, true) == 0) { ProcessItem(buffer.ToString()); break; } } else { if (capture) { buffer.Append((char)ch); } } } }
// Read one HTML page and return a slide show public SlideShow ReadSlideShow(string aSlideFile, out string aDiagnostic) { aDiagnostic = null; // Determine the name of the future XML slide show file string xmlFilePath = aSlideFile.Replace(".htm", ".xml"); SlideShow slideShow = new SlideShow(xmlFilePath); //Console.WriteLine(" HtmlReader ReadSlideShow: parsing " + aSlideFile); string html = ReadFile(aSlideFile); if (html == null) { aDiagnostic = "ReadSlideShow: bad HTML slideshow file " + aSlideFile; return(null); } else { ParseHTML parse = new ParseHTML(); parse.Source = html; HtmlPreprocess htmlPreprocess = new HtmlPreprocess(); // Default overall title for the slide show, hopefully replaced with something better string title = "A most peculiar day"; bool collectingTitle = false; bool collectingCaption = false; Caption caption = new Caption(); string link = ""; while (!parse.Eof()) { char ch = parse.Parse(); if (ch == 0) { AttributeList tag = parse.GetTag(); if (tag.Name.Equals("title", StringComparison.CurrentCultureIgnoreCase)) { collectingTitle = true; // Start collecting title title = string.Empty; } else if (tag.Name.Equals("/title", StringComparison.CurrentCultureIgnoreCase)) { collectingTitle = false; // Title now complete slideShow.Title = title.Trim(); } if (tag.Name.Equals("td", StringComparison.CurrentCultureIgnoreCase)) { collectingCaption = true; // Start collecting new caption caption = new Caption(); } else if (tag.Name.Equals("/td", StringComparison.CurrentCultureIgnoreCase)) { collectingCaption = false; // Any caption is now complete if (!link.Equals("")) { // Got a link to go with the caption slideShow.Add(link, caption); link = ""; } } else if (collectingCaption && tag.Name.Equals("p", StringComparison.CurrentCultureIgnoreCase)) { // HTML paragraph tag within caption caption.NewLine(); } else if (tag["href"] != null) { string href = tag["href"].Value.Replace('/', '\\'); if (IsPhoto(href)) { //Console.WriteLine(" + HtmlReader ReadSlideShow: add " + href + // " from tag " + tag.Name); link = href; } } // Preprocessing of regular character stream starts with clean sheet after tag htmlPreprocess.Reset(); } else { // Got a character ch = htmlPreprocess.Add(ch); if (ch != HtmlPreprocess.NullChar) { if (collectingTitle) { title += ch; } else if (collectingCaption) { caption.AddChar(ch); } } } } return(slideShow); } }
private string ProcessURL(int currentRecursive , string strURL,string fromURL,string strDownloadPath,string startLinkURL,string endLinkURL,Boolean onlyLink) { Invoke(new MethodInvoker(delegate() { // 階層表示を更新 recursiveLevelLabel.Text = "" + currentRecursive; recursiveLevelLabel.Update(); toolStripStatusLabel.Text = strURL; statusStrip.Update(); notifyIcon1.Text = "CSMDown:"; if (strURL.Length > 50) { notifyIcon1.Text += strURL.Substring(0, 50); } else { notifyIcon1.Text += strURL; } })); // -----< HTMLを読み込む >----- // Invoke(new MethodInvoker(delegate() { statusTextBox.AppendText("ProcessURL : "); })); Invoke(new MethodInvoker(delegate() { statusTextBox.AppendText(strURL); })); Invoke(new MethodInvoker(delegate() { statusTextBox.AppendText("\r\n"); })); Invoke(new MethodInvoker(delegate() { statusTextBox.AppendText("RefererURL : "); })); if (fromURL != null) { Invoke(new MethodInvoker(delegate() { statusTextBox.AppendText(fromURL); })); } Invoke(new MethodInvoker(delegate() { statusTextBox.AppendText("\r\n"); })); Invoke(new MethodInvoker(delegate() { statusTextBox.Update(); })); string strHTML = ""; string mimeType = ""; if (m_cansel == true) { return mimeType; } // キャンセル処理 mimeType = ReadHTML(strURL, ref strHTML, fromURL, currentRecursive); if (strHTML == null) { Invoke(new MethodInvoker(delegate() { statusTextBox.AppendText("HTMLを読み込めませんでした。:"+strURL+"\r\n"); })); return mimeType; } if (isHTML(mimeType) == false) { Invoke(new MethodInvoker(delegate() { statusTextBox.AppendText("HTMLではないので無視します。:" + strURL+"\r\n"); })); return mimeType; } insertAlreadyAccessedURL(strURL, currentRecursive); // -----< ブラウザをナビゲートする >----- // Invoke(new MethodInvoker(delegate() { // ブラウザをナビゲート if (m_BrowserForm != null) { if (m_BrowserForm.IsDisposed == false) { m_BrowserForm.Navigate(strURL); } } })); if (m_cansel == true) { return mimeType; } // キャンセル処理 // -----< HTMLをパースする >----- // Invoke(new MethodInvoker(delegate() { statusTextBox.AppendText("HTMLをパースします。\r\n"); })); Invoke(new MethodInvoker(delegate() { statusTextBox.Update(); })); // <<< パース結果の格納先を準備する >>> ArrayList URLArray = new ArrayList(); URLArray.Clear(); // <<< パースを実行する >>> Boolean startLinkProcessed = false; ParseHTML parser = new ParseHTML(); parser.Source = strHTML; while (!parser.Eof()) { if (m_cansel == true) { return mimeType; } // キャンセル処理 // パースする char ch = parser.Parse(); if (ch != 0) { continue; } string linkURL = ""; // href のとき、リンク先を確保する AttributeList tag = parser.GetTag(); if (tag["href"] != null) { linkURL = (string)( tag["href"].Value ); } // src のとき、リンク先を確保する if (tag["src"] != null) { linkURL = (string)( tag["src"].Value ); } // # が付いているときには、#以降を消す if (linkURL.IndexOf("#") >= 0) { int index = linkURL.IndexOf("#"); linkURL = linkURL.Substring(0, index); } if (linkURL.Length > 0) { string linkAbsolute = null; if (linkURL.Length > 0) { linkAbsolute = CreateLinkURL(linkURL, strURL); } else { linkAbsolute = linkURL; } if (linkAbsolute == null) { //Invoke(new MethodInvoker(delegate() { statusTextBox.AppendText("絶対パスを作れなかったか、もしくは無効なリンクと判定しました。" + linkURL + "\r\n"); })); //Invoke(new MethodInvoker(delegate() { statusTextBox.Update(); })); continue; } if ((startLinkURL.Length > 0)&&(startLinkProcessed == false)) { if (startLinkURL.Equals(linkAbsolute)) { startLinkProcessed = true; } else { Invoke(new MethodInvoker(delegate() { statusTextBox.AppendText("StartLink以前なので無視します:" + linkURL + "\r\n"); })); Invoke(new MethodInvoker(delegate() { statusTextBox.Update(); })); continue; } } linkAbsolute = linkAbsolute.Replace("\r\n", ""); linkAbsolute = linkAbsolute.Replace("\n", ""); URLArray.Add(linkAbsolute); // EndLinkで終了 if (endLinkURL.Length > 0) { if (endLinkURL.Equals(linkAbsolute)) { // EndLinkを検出 Invoke(new MethodInvoker(delegate() { statusTextBox.AppendText("EndLinkを検出。パースを終了します。:" + linkURL + "\r\n"); })); Invoke(new MethodInvoker(delegate() { statusTextBox.Update(); })); break; } } } } Invoke(new MethodInvoker(delegate() { statusTextBox.AppendText("リンク数:" + URLArray.Count); })); Invoke(new MethodInvoker(delegate() { statusTextBox.AppendText("\r\n"); })); Invoke(new MethodInvoker(delegate() { statusTextBox.Update(); })); if (m_cansel == true) { return mimeType; } // キャンセル処理 // -----< ダウンロードする >----- // Boolean result = true; Invoke(new MethodInvoker(delegate() { statusTextBox.AppendText("ダウンロードを開始します。\r\n"); })); Invoke(new MethodInvoker(delegate() { statusTextBox.Update(); })); if (onlyLink == false) { result = HTTPDownload(URLArray, strDownloadPath, strURL, currentRecursive); if (result != true) { Invoke(new MethodInvoker(delegate() { statusTextBox.AppendText("ダウンロードに失敗しました。"); })); } } // -----< HTMLの再起呼び出し >------ // int recursiveMax = int.Parse(recursiveLevelTextBox.Text); if (currentRecursive < recursiveMax) { int i; int c; // 0:優先URLを処理する 1:同一ドメインを処理する for (c = 0; c < 2; c++) { // 優先パス指定がなければ、優先関連は何もしない。 if (c == 0) { if (highPriorityPathTextBox.Text.Length <= 0) { continue; } } // 優先URLのみなら、それ以外のパターンは処理しない if ((c == 1) && (highPriorityPathTextBox.Text.Length>0)) { if (priorityOnlyCheckBox.Checked == true) { continue; } } // URLを切り分けつつ処理する for (i = 0; i < URLArray.Count; i++) { if (m_cansel == true) { break; } // キャンセル処理 try { // 優先URLでなければ無視する if (c == 0) { // 優先URLを処理する string priorityURL = highPriorityPathTextBox.Text.Trim(); if (!((string)URLArray[i]).StartsWith(priorityURL)) { // 優先が指定されていて、URLが指定優先パターンと異なるなら、次へ進む continue; } } else { // 優先URLでないパターンを処理する if ((highPriorityPathTextBox.Text.Length > 0) && (strURL.StartsWith(highPriorityPathTextBox.Text))) { // 優先が指定されていて、URLが指定優先パターンと一致するなら、次へ進む continue; } } } catch (Exception e) { Invoke(new MethodInvoker(delegate() { statusTextBox.AppendText("URI Compare failed\r\n"); })); Invoke(new MethodInvoker(delegate() { statusTextBox.AppendText(e.Message+"\r\n"); })); Invoke(new MethodInvoker(delegate() { statusTextBox.Update(); })); continue; } if (isAlreadyAccessed((string)URLArray[i],currentRecursive+1) == true) { Invoke(new MethodInvoker(delegate() { statusTextBox.AppendText("アクセス済みURL:"); })); Invoke(new MethodInvoker(delegate() { statusTextBox.AppendText((string)URLArray[i]); })); Invoke(new MethodInvoker(delegate() { statusTextBox.AppendText("\r\n"); })); continue; } // -----< 再起呼び出し >----- // string res; res = ProcessURL(currentRecursive + 1, (string)URLArray[i],strURL, strDownloadPath,"","",false); if (isHTML(res) != true) { Invoke(new MethodInvoker(delegate() { statusTextBox.AppendText("HTMLではありませんでした。:"); })); Invoke(new MethodInvoker(delegate() { statusTextBox.AppendText((string)URLArray[i]); })); Invoke(new MethodInvoker(delegate() { statusTextBox.AppendText("\r\n"); })); Invoke(new MethodInvoker(delegate() { recursiveLevelLabel.Text = "" + currentRecursive; })); Invoke(new MethodInvoker(delegate() { recursiveLevelLabel.Update(); })); } Invoke(new MethodInvoker(delegate() { recursiveLevelLabel.Text = "" + currentRecursive; })); Invoke(new MethodInvoker(delegate() { recursiveLevelLabel.Update(); })); Invoke(new MethodInvoker(delegate() { toolStripStatusLabel.Text = strURL; })); Invoke(new MethodInvoker(delegate() { statusStrip.Update(); })); if (m_cansel == true) { return mimeType; } // キャンセル処理 } } } return mimeType; }