Пример #1
0
        /// <summary>
        /// Process the specified URL.
        /// </summary>
        /// <param name="url">The URL to process.</param>
        /// <param name="optionList">Whcih option list to process.</param>
        public void Process(Uri url, int optionList)
        {
            String          value    = "";
            WebRequest      http     = HttpWebRequest.Create(url);
            HttpWebResponse response = (HttpWebResponse)http.GetResponse();
            Stream          istream  = response.GetResponseStream();
            ParseHTML       parse    = new ParseHTML(istream);
            StringBuilder   buffer   = new StringBuilder();

            int ch;

            while ((ch = parse.Read()) != -1)
            {
                if (ch == 0)
                {
                    HTMLTag tag = parse.Tag;
                    if (String.Compare(tag.Name, "a", true) == 0)
                    {
                        value = tag["href"];
                        Uri u = new Uri(url, value.ToString());
                        value         = u.ToString();
                        buffer.Length = 0;
                    }
                    else if (String.Compare(tag.Name, "/a", true) == 0)
                    {
                        ProcessOption(buffer.ToString(), value);
                    }
                }
                else
                {
                    buffer.Append((char)ch);
                }
            }
        }
Пример #2
0
        static string Categoria(ParseHTML analizador)
        {
            int           ch;
            bool          leer   = false;
            StringBuilder buffer = new StringBuilder();

            while ((ch = analizador.Read()) != -1)
            {
                if (ch == 0)
                {
                    if (analizador.Tag.Name == "a")
                    {
                        leer = true;
                    }
                    else if (analizador.Tag.Name == "/a")
                    {
                        return(buffer.ToString());
                    }
                }
                else if (leer)
                {
                    buffer.Append((char)ch);
                }
            }
            return("no se encontro la categoria :S");
        }
Пример #3
0
        /// <summary>
        /// Process the specified URL and extract data from all of the subpages
        /// that this page links to.
        /// </summary>
        /// <param name="url">The URL to process.</param>
        public void Process(Uri url)
        {
            String          value    = "";
            WebRequest      http     = HttpWebRequest.Create(url);
            HttpWebResponse response = (HttpWebResponse)http.GetResponse();
            Stream          istream  = response.GetResponseStream();
            ParseHTML       parse    = new ParseHTML(istream);

            int ch;

            while ((ch = parse.Read()) != -1)
            {
                if (ch == 0)
                {
                    HTMLTag tag = parse.Tag;
                    if (String.Compare(tag.Name, "a", true) == 0)
                    {
                        value = tag["href"];
                        Uri u = new Uri(url, value.ToString());
                        value = u.ToString();
                        ProcessSubPage(u);
                    }
                }
            }
        }
        /// <summary>
        /// Process the specified URL and download the images.
        /// </summary>
        /// <param name="url">The URL to process.</param>
        /// <param name="saveTo">A directory to save the images to.</param>
        public void Process(Uri url, String saveTo)
        {
            WebRequest      http     = HttpWebRequest.Create(url);
            HttpWebResponse response = (HttpWebResponse)http.GetResponse();
            Stream          istream  = response.GetResponseStream();
            ParseHTML       parse    = new ParseHTML(istream);

            int ch;

            while ((ch = parse.Read()) != -1)
            {
                if (ch == 0)
                {
                    HTMLTag tag = parse.Tag;
                    if (String.Compare(tag.Name, "img", true) == 0)
                    {
                        String          src       = tag["src"];
                        Uri             u         = new Uri(url, src);
                        String          filename  = ExtractFile(u);
                        String          saveFile  = Path.Combine(saveTo, filename);
                        WebRequest      http2     = HttpWebRequest.Create(u);
                        HttpWebResponse response2 = (HttpWebResponse)http2.GetResponse();
                        this.DownloadBinaryFile(response2, saveFile);
                        response2.Close();
                    }
                }
            }
        }
Пример #5
0
        public MonitorIPViewModel(string filePath, IEventAggregator eventAggregator)
        {
            this.eventAggregator = eventAggregator;
            SelectedFilePath     = filePath;
            IpData        = new ObservableCollectionPropertyNotify <IPDataModel>();
            IpAddressList = new List <IPAddress>();
            IpDataGrid    = new DataGrid();
            uiContext     = SynchronizationContext.Current;
            RefreshRateInputVisibility = Visibility.Hidden;
            //sets default refresh rate at 500 ms
            internalRefreshRate = 500;

            //add the ips from IpDataModel to the ipaddr list that is given to the asyncping command
            ParseHTML.ParseIpHTML(IpData, filePath);
            for (int i = 0; i < IpData.Count; i++)
            {
                IpAddressList.Add(IpData[i].Ip);
            }

            RefreshIP = true;

            //~~~~~~~~~~~!!DO NOT CHANGE THIS!!~~~~~~~~~~~~~~~~~~~~~~
            //There is a bug in .NET frameworks 4 and up that causes a PROCCESS_HAS_LOCKED_PAGES  Windows Blue Screen of Death
            //if you have a debugger attached and stop debugging while the ping is in process
            //If you want to test ping functionality you must build/run without debugging (CTRL-F5)
            if (!System.Diagnostics.Debugger.IsAttached)
            {
                StartIpPingThread();
            }
            //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
            //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
            //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
        }
        /// <summary>
        /// This method looks for each of the <option> tags that contain
        /// a link to each of the pages.  For each page found the
        /// downloadArticlePage method is called.
        /// </summary>
        public void Process()
        {
            Uri        url  = new Uri("http://www.httprecipes.com/1/9/article.php");
            WebRequest http = HttpWebRequest.Create(url);

            http.Timeout = 30000;
            WebResponse response = http.GetResponse();
            Stream      stream   = response.GetResponseStream();
            ParseHTML   parse    = new ParseHTML(stream);

            int ch;

            while ((ch = parse.Read()) != -1)
            {
                if (ch == 0)
                {
                    HTMLTag tag = parse.Tag;
                    if (String.Compare(tag.Name, "option", true) == 0)
                    {
                        String str = tag["value"];
                        Uri    u   = new Uri(url, str);
                        Console.WriteLine(DownloadArticlePage(u));
                    }
                }
            }
        }
Пример #7
0
        static string Descripcion(ParseHTML analizador)
        {
            int           ch;
            StringBuilder buffer = new StringBuilder();

            while ((ch = analizador.Read()) > 0)
            {
                buffer.Append((char)ch);
            }
            return(buffer.ToString());
        }
Пример #8
0
        public Analizador(string url)
        {
            HttpWebRequest peticion = (HttpWebRequest)HttpWebRequest.Create(url);

            peticion.Timeout = 200000;
            //System.Net.WebProxy x = new System.Net.WebProxy("192.168.1.34", 808);
            //peticion.AllowAutoRedirect = true;
            //peticion.Proxy = x;
            peticion.UserAgent = "Mozilla/5.0 (Windows; U; Windows NT 5.1; es-ES; rv:1.9.0.3) Gecko/2008092417 Firefox/3.0.3";
            respuesta          = (HttpWebResponse)peticion.GetResponse();
            istream            = respuesta.GetResponseStream();
            html = new ParseHTML(istream);
        }
Пример #9
0
        // Very simple: just parse the EventLists out of the album front page HTML.
        // No need to get the event list names (usually years) from the HTML, as these
        // names should be embedded in the EventLists.
        public Album ReadAlbum(string masterFilename, out string aDiagnostic)
        {
            aDiagnostic = null;
            string xmlFilename = GetDirectory(masterFilename) + "Album.xml";
            Album  album       = new Album(xmlFilename);

            string html = ReadFile(masterFilename);

            if (html == null)
            {
                aDiagnostic = "ReadAlbum: bad HTML Album file " + masterFilename;
                return(null);
            }
            else
            {
                // Determine the master directory
                string masterDirectory = GetDirectory(masterFilename);

                //Console.WriteLine("HtmlReader: parsing " + masterFilename);
                ParseHTML parse = new ParseHTML();
                parse.Source = html;

                while (!parse.Eof())
                {
                    char ch = parse.Parse();
                    if (ch == 0)
                    {
                        AttributeList tag = parse.GetTag();
                        if (tag["href"] != null)
                        {
                            string href = tag["href"].Value.Replace('/', '\\');
                            //Console.WriteLine("HtmlReader: add year " + href + " to master XML file");

                            // Process child events file
                            EventList events = ReadEvents(masterDirectory + href, out aDiagnostic);
                            if (events == null)
                            {
                                return(null);
                            }
                            else
                            {
                                album.Add(events);
                            }
                        }
                    }
                }

                return(album);
            }
        }
Пример #10
0
        /**
         * Called to extract a list from the specified URL.
         * @param url The URL to extract the list from.
         * @param listType What type of list, specify its beginning tag (i.e. <UL>)
         * @param optionList Which list to search, zero for first.
         * @throws IOException Thrown if an IO exception occurs.
         */
        public void Process(Uri url, String listType, int optionList)
        {
            String          listTypeEnd = listType + "/";
            WebRequest      http        = HttpWebRequest.Create(url);
            HttpWebResponse response    = (HttpWebResponse)http.GetResponse();
            Stream          istream     = response.GetResponseStream();
            ParseHTML       parse       = new ParseHTML(istream);
            StringBuilder   buffer      = new StringBuilder();
            bool            capture     = false;

            Advance(parse, listType, optionList);

            int ch;

            while ((ch = parse.Read()) != -1)
            {
                if (ch == 0)
                {
                    HTMLTag tag = parse.Tag;
                    if (String.Compare(tag.Name, "li", true) == 0)
                    {
                        if (buffer.Length > 0)
                        {
                            ProcessItem(buffer.ToString());
                        }
                        buffer.Length = 0;
                        capture       = true;
                    }
                    else if (String.Compare(tag.Name, "/li", true) == 0)
                    {
                        Console.WriteLine(buffer.ToString());
                        ProcessItem(buffer.ToString());
                        buffer.Length = 0;
                        capture       = false;
                    }
                    else if (String.Compare(tag.Name, listTypeEnd, true) == 0)
                    {
                        break;
                    }
                }
                else
                {
                    if (capture)
                    {
                        buffer.Append((char)ch);
                    }
                }
            }
        }
Пример #11
0
        public static string[] GetHTMLUrls(string Page, string HostAbsolutePath, int MaxLen, int FindUrlLevel)
        {
            ArrayList list  = new ArrayList();
            ParseHTML ehtml = new ParseHTML();

            ehtml.Source = Page;
            while (!ehtml.Eof())
            {
                if (ehtml.Parse() == '\0')
                {
                    Shove.HTML.HtmlParse.Attribute attribute = ehtml.GetTag()["HREF"];
                    if (attribute != null)
                    {
                        string str = attribute.Value.Trim().ToLower();
                        if ((((str != "") && !str.StartsWith("mailto")) && !str.StartsWith("#")) && (((FindUrlLevel == 2) || str.StartsWith("http://")) || str.StartsWith("https://")))
                        {
                            str = GetPath(str, HostAbsolutePath);
                            if ((MaxLen < 1) || (str.Length <= MaxLen))
                            {
                                list.Add(str);
                            }
                        }
                    }
                    attribute = ehtml.GetTag()["SRC"];
                    if (attribute != null)
                    {
                        string str2 = attribute.Value.Trim().ToLower();
                        if ((str2 != "") && (((FindUrlLevel == 2) || str2.StartsWith("http://")) || str2.StartsWith("https://")))
                        {
                            str2 = GetPath(str2, HostAbsolutePath);
                            if ((MaxLen < 1) || (str2.Length <= MaxLen))
                            {
                                list.Add(str2);
                            }
                        }
                    }
                }
            }
            if (list.Count == 0)
            {
                return(null);
            }
            string[] strArray = new string[list.Count];
            for (int i = 0; i < list.Count; i++)
            {
                strArray[i] = list[i].ToString();
            }
            return(strArray);
        }
Пример #12
0
        // Навігація по файловій системі - подвійний клік на елементі ListView
        private void listView1_MouseDoubleClick(object sender, MouseEventArgs e)
        {
            // Отримуємо інформацію за кліком на елемент ListView за індексом та SubItems[0]
            try
            {
                int    selectedIndex = listView1.SelectedIndices[0];
                string rootPath      = listView1.Items[selectedIndex].SubItems[0].Text,
                       extens        = listView1.Items[selectedIndex].SubItems[2].Text;

                if (extens == @"Папка" || extens == @"Диск")
                {
                    RefreshListView(rootPath, listView1);
                    GlobalNavigationStr    = rootPath;
                    toolStripTextBox1.Text = rootPath;
                } //якщо обрана директорія
                else if (extens == @"text")
                {
                    Form3.FileToBeOpened = rootPath;
                    Form3 form3 = new Form3();
                    form3.Show();
                } //якщо обрана таблиця
                else if (extens == @"html")
                {
                    FileClass file = new FileClass(rootPath);
                    ParseHTML html = new ParseHTML(file.GetFullPath(), file.GetParentPath(), file.GetName());
                    html.Parse();
                } //якщо обраний html
                else if (extens == @"txt")
                {
                    TextEditor te = new TextEditor(rootPath);
                    te.Owner = this;
                    te.Show();
                } //якщо обраний текстовий файл
                else
                {
                    NewTestingClass n    = new NewTestingClass();
                    FileClass       file = new FileClass(rootPath);
                    if (n.TestGettingFileName(file))
                    {
                        MessageBox.Show("OK!!!");
                    }
                } //якщо обрано щось інше - не робити нічого
            }
            catch (Exception ex)
            {
                MessageBox.Show(ex.ToString());
            }
        }
Пример #13
0
        /// <summary>
        /// Versión mejorada para avanzar a un tag que cumpla cierto atributo
        /// </summary>
        /// <param name="analizador">El parseHTML que queramos avanzar en</param>
        /// <param name="etiqueta">La etiqueta a la que queramos llegar</param>
        /// <param name="nombreAtributo">El nombre del atributo como "src"</param>
        /// <param name="atributo">el propio atributo como "/imagenes/...</param>
        /// <returns></returns>
        public static bool AvanzarA(ParseHTML analizador, String etiqueta, String nombreAtributo, String atributo)
        {
            int ch;

            while ((ch = analizador.Read()) != -1)
            {
                if (ch == 0)
                {
                    if (analizador.Tag.Name == etiqueta && analizador.Tag[nombreAtributo] == atributo)
                    {
                        return(true);
                    }
                }
            }
            return(false);
        }
Пример #14
0
        static string Enlace(ParseHTML analizador)
        {
            int ch;

            while ((ch = analizador.Read()) != -1)
            {
                if (ch == 0)
                {
                    if (analizador.Tag.Name == "a")
                    {
                        return(analizador.Tag["href"]);
                    }
                }
            }
            return(null);
        }
Пример #15
0
        static string Imagen(ParseHTML analizador)
        {
            int ch;

            while ((ch = analizador.Read()) != -1)
            {
                if (ch == 0)
                {
                    if (analizador.Tag.Name == "img")
                    {
                        return(analizador.Tag["src"]);
                    }
                }
            }
            return("no se encontro una imagen :S");
        }
Пример #16
0
        private ICollection <Uri> DoSearch(Uri url)
        {
            ICollection <Uri> result = new List <Uri>();
            // submit the search
            WebRequest      http     = HttpWebRequest.Create(url);
            HttpWebResponse response = (HttpWebResponse)http.GetResponse();

            using (Stream istream = response.GetResponseStream())
            {
                ParseHTML     parse   = new ParseHTML(istream);
                StringBuilder buffer  = new StringBuilder();
                bool          capture = false;

                // parse the results
                int ch;
                while ((ch = parse.Read()) != -1)
                {
                    if (ch == 0)
                    {
                        HTMLTag tag = parse.Tag;
                        if (tag.Name.Equals("Url", StringComparison.CurrentCultureIgnoreCase))
                        {
                            buffer.Length = 0;
                            capture       = true;
                        }
                        else if (tag.Name.Equals("/Url", StringComparison.CurrentCultureIgnoreCase))
                        {
                            result.Add(new Uri(buffer.ToString()));
                            buffer.Length = 0;
                            capture       = false;
                        }
                    }
                    else
                    {
                        if (capture)
                        {
                            buffer.Append((char)ch);
                        }
                    }
                }
            }

            response.Close();

            return(result);
        }
Пример #17
0
        static string Titulo(ParseHTML analizador)
        {
            int           ch;
            StringBuilder sb1 = new StringBuilder();

            while ((ch = analizador.Read()) != -1)
            {
                if (ch > 0)
                {
                    sb1.Append((char)ch);
                }
                else
                {
                    return(sb1.ToString());
                }
            }
            return(null);
        }
Пример #18
0
        static string Titulo(ParseHTML analizador)
        {
            int           ch;
            StringBuilder buffer = new StringBuilder();

            while ((ch = analizador.Read()) != -1)
            {
                if (ch > 0)
                {
                    buffer.Append((char)ch);
                }
                else
                {
                    return(buffer.ToString());
                }
            }
            return("No se encontro el titulo :S");
        }
Пример #19
0
        static string Descripcion(ParseHTML analizador)
        {
            int           ch;
            StringBuilder sb1 = new StringBuilder();

            while ((ch = analizador.Read()) != -1)
            {
                if (ch > 0)
                {
                    sb1.Append((char)ch);
                }
                else
                {
                    return(sb1.ToString().Replace("á", "a").Replace("é", "e").Replace("í", "i").Replace("ó", "o").Replace("ú", "u").Replace("ñ", "n").Replace("É", "E").Replace("Á", "A").Replace("Í", "Í").Replace("Ó", "O").Replace("Ú", "Ú").Replace("'", ""));
                }
            }
            return(null);
        }
Пример #20
0
        /// <summary>
        /// Advance to the specified HTML tag.
        /// </summary>
        /// <param name="parse">The HTML parse object to use.</param>
        /// <param name="tag">The HTML tag.</param>
        /// <param name="count">How many tags like this to find.</param>
        /// <returns>True if found, false otherwise.</returns>
        private bool Advance(ParseHTML parse, String tag, int count)
        {
            int ch;

            while ((ch = parse.Read()) != -1)
            {
                if (ch == 0)
                {
                    if (String.Compare(parse.Tag.Name, tag, true) == 0)
                    {
                        count--;
                        if (count <= 0)
                        {
                            return(true);
                        }
                    }
                }
            }
            return(false);
        }
Пример #21
0
        /// <summary>
        /// Check the specified URL for a birth year.  This will occur if one
        /// sentence is found that has the word born, and a numeric value less
        /// than 3000.
        /// </summary>
        /// <param name="url">The URL to check.</param>
        public void CheckURL(Uri url)
        {
            int           ch;
            StringBuilder sentence = new StringBuilder();

            try
            {
                WebRequest      http     = HttpWebRequest.Create(url);
                HttpWebResponse response = (HttpWebResponse)http.GetResponse();
                Stream          istream  = response.GetResponseStream();
                ParseHTML       html     = new ParseHTML(istream);
                do
                {
                    ch = html.Read();
                    if ((ch != -1) && (ch != 0))
                    {
                        if (ch == '.')
                        {
                            String str  = sentence.ToString();
                            int    year = ExtractBirth(str);
                            if ((year > 1) && (year < 3000))
                            {
                                Console.WriteLine("URL supports year: " + year);
                                IncreaseYear(year);
                            }
                            sentence.Length = 0;
                        }
                        else
                        {
                            sentence.Append((char)ch);
                        }
                    }
                } while (ch != -1);
            }
            catch (WebException)
            {
            }
            catch (IOException)
            {
            }
        }
Пример #22
0
        /// <summary>
        /// Called to download the text from a page.  If any JavaScript
        /// include is found, the text from that page is read too.
        /// </summary>
        public void Process()
        {
            Uri        url  = new Uri("http://www.httprecipes.com/1/9/includes.php");
            WebRequest http = HttpWebRequest.Create(url);

            http.Timeout = 30000;
            WebResponse   response = http.GetResponse();
            Stream        stream   = response.GetResponseStream();
            ParseHTML     parse    = new ParseHTML(stream);
            StringBuilder buffer   = new StringBuilder();

            int ch;

            while ((ch = parse.Read()) != -1)
            {
                if (ch == 0)
                {
                    HTMLTag tag = parse.Tag;
                    if (String.Compare(tag.Name, "script", true) == 0 && tag["src"] != null)
                    {
                        String src     = tag["src"];
                        Uri    u       = new Uri(url, src);
                        String include = DownloadPage(u);
                        buffer.Append("<script>");
                        buffer.Append(include);
                        buffer.Append("</script>");
                    }
                    else
                    {
                        buffer.Append(tag.ToString());
                    }
                }
                else
                {
                    buffer.Append((char)ch);
                }
            }

            Console.WriteLine(buffer.ToString());
        }
Пример #23
0
        /// <summary>
        /// This method looks for a link tag at the specified URL.  If a link
        /// tag is found that specifies an RSS feed, then that feed is
        /// displayed.
        /// </summary>
        /// <param name="url">The URL of the web site.</param>
        public void Process(Uri url)
        {
            String     href = null;
            WebRequest http = HttpWebRequest.Create(url);

            http.Timeout = 30000;
            WebResponse response = http.GetResponse();
            Stream      stream   = response.GetResponseStream();
            ParseHTML   parse    = new ParseHTML(stream);

            int ch;

            do
            {
                ch = parse.Read();
                if (ch == 0)
                {
                    HTMLTag tag = parse.Tag;
                    if (String.Compare(tag.Name, "link", true) == 0)
                    {
                        String type = tag["type"];
                        if (type != null && type.IndexOf("rss") != -1)
                        {
                            href = tag["href"];
                        }
                    }
                }
            } while (ch != -1);

            if (href == null)
            {
                Console.WriteLine("No RSS link found.");
            }
            else
            {
                ProcessRSS(new Uri(href));
            }
        }
        /// <summary>
        /// Process the specified URL and extract the option list there.
        /// </summary>
        /// <param name="url">The URL to process.</param>
        /// <param name="optionList">Which option list to process, zero for first.</param>
        public void Process(Uri url, int optionList)
        {
            String          value    = "";
            WebRequest      http     = HttpWebRequest.Create(url);
            HttpWebResponse response = (HttpWebResponse)http.GetResponse();
            Stream          istream  = response.GetResponseStream();
            ParseHTML       parse    = new ParseHTML(istream);
            StringBuilder   buffer   = new StringBuilder();

            Advance(parse, "select", optionList);

            int ch;

            while ((ch = parse.Read()) != -1)
            {
                if (ch == 0)
                {
                    HTMLTag tag = parse.Tag;
                    if (String.Compare(tag.Name, "option") == 0)
                    {
                        value         = tag["value"];
                        buffer.Length = 0;
                    }
                    else if (String.Compare(tag.Name, "/option") == 0)
                    {
                        ProcessOption(buffer.ToString(), value);
                    }
                    else if (String.Compare(tag.Name, "/choice") == 0)
                    {
                        break;
                    }
                }
                else
                {
                    buffer.Append((char)ch);
                }
            }
        }
Пример #25
0
        private void refresh(Object o)
        {
            disableView();
            Object[] login   = (Object[])o;
            bool     showMsg = (login.Length == 3) ? (bool)login[2] : true;
            String   html    = null;
            WebProxy proxy   = XMLConfig.GetProxy();

            try
            {
                html = ScoreHTMLUtil.TryGetHTMLString((String)login[0], (String)login[1], "9999", proxy, IsNormal);
                if (string.IsNullOrEmpty(html))
                {
                    throwError("无法抓取成绩信息,可能是服务器正忙或者是网络连接出错。", "网络错误", showMsg);
                }
                if (html.Contains("Course.jsp"))
                {
                    throwError("由于使用错误密码请求多次,请求被URP拒绝。\n请用浏览器重新登录URP输入正确的验证码后再使用本工具", "URP登录错误", showMsg);
                }
                else if (html.Contains("复旦大学统一身份认证服务"))
                {
                    throwError("学号密码不正确", "URP登录错误", showMsg);
                }
                List <Lesson> lessons = ParseHTML.TryParseHTML(html, GpaInfo, IsNormal);
                if (lessons == null)
                {
                    throwError("抓取的网页无法解析", "解析数据错误", showMsg);
                }
                refreshListView(lessons);
            }
            catch (Exception e)
            {
                MessageBox.Show(e.Message, e.HelpLink);
            }
            enableView();
        }
Пример #26
0
        static string Imagen(ParseHTML analizador)
        {
            int ch, x = 0;

            while ((ch = analizador.Read()) != -1)
            {
                if (ch == 0)
                {
                    if (analizador.Tag.Name == "img")
                    {
                        if (x == 3)
                        {
                            Console.WriteLine(analizador.Tag["src"]);
                            return(analizador.Tag["src"]);
                        }
                        else
                        {
                            x++;
                        }
                    }
                }
            }
            return(null);
        }
Пример #27
0
        static string Categoria(ParseHTML analizador)
        {
            int  ch;
            bool x = false;

            while ((ch = analizador.Read()) != -1)
            {
                if (ch == 0)
                {
                    if (analizador.Tag.Name == "a")
                    {
                        if (x)
                        {
                            return(analizador.Tag["href"]);
                        }
                        else
                        {
                            x = true;
                        }
                    }
                }
            }
            return(null);
        }
Пример #28
0
        /// <summary>
        /// Called to parse a table.  The table number at the specified URL
        /// will be parsed.
        /// </summary>
        /// <param name="url">The URL of the HTML page that contains the table.</param>
        /// <param name="tableNum">The table number to parse, zero for the first.</param>
        public void Process(Uri url, int tableNum)
        {
            WebRequest      http     = HttpWebRequest.Create(url);
            HttpWebResponse response = (HttpWebResponse)http.GetResponse();
            Stream          istream  = response.GetResponseStream();
            ParseHTML       parse    = new ParseHTML(istream);
            StringBuilder   buffer   = new StringBuilder();
            List <String>   list     = new List <String>();
            bool            capture  = false;

            Advance(parse, "table", tableNum);

            int ch;

            while ((ch = parse.Read()) != -1)
            {
                if (ch == 0)
                {
                    HTMLTag tag = parse.Tag;
                    if (String.Compare(tag.Name, "tr", true) == 0)
                    {
                        list.Clear();
                        capture       = false;
                        buffer.Length = 0;
                    }
                    else if (String.Compare(tag.Name, "/tr", true) == 0)
                    {
                        if (list.Count > 0)
                        {
                            ProcessTableRow(list);
                            list.Clear();
                        }
                    }
                    else if (String.Compare(tag.Name, "td", true) == 0)
                    {
                        if (buffer.Length > 0)
                        {
                            list.Add(buffer.ToString());
                        }
                        buffer.Length = 0;
                        capture       = true;
                    }
                    else if (String.Compare(tag.Name, "/td", true) == 0)
                    {
                        list.Add(buffer.ToString());
                        buffer.Length = 0;
                        capture       = false;
                    }
                    else if (String.Compare(tag.Name, "/table", true) == 0)
                    {
                        break;
                    }
                }
                else
                {
                    if (capture)
                    {
                        buffer.Append((char)ch);
                    }
                }
            }
        }
Пример #29
0
        /// <summary>
        /// Access the website and perform a search for either states or capitals.
        /// </summary>
        /// <param name="search">A search string.</param>
        /// <param name="type">What to search for(s=state, c=capital)</param>
        public void Process(String search, String type)
        {
            String        listType    = "ul";
            String        listTypeEnd = "/ul";
            StringBuilder buffer      = new StringBuilder();
            bool          capture     = false;

            // Build the URL.
            MemoryStream mstream = new MemoryStream();
            FormUtility  form    = new FormUtility(mstream, null);

            form.Add("search", search);
            form.Add("type", type);
            form.Add("action", "Search");
            form.Complete();

            System.Text.ASCIIEncoding enc = new System.Text.ASCIIEncoding();

            String          str      = enc.GetString(mstream.GetBuffer());
            String          surl     = "http://www.httprecipes.com/1/7/get.php?" + str;
            Uri             url      = new Uri(surl);
            WebRequest      http     = HttpWebRequest.Create(url);
            HttpWebResponse response = (HttpWebResponse)http.GetResponse();
            Stream          istream  = response.GetResponseStream();
            ParseHTML       parse    = new ParseHTML(istream);

            // Parse from the URL.

            Advance(parse, listType, 0);

            int ch;

            while ((ch = parse.Read()) != -1)
            {
                if (ch == 0)
                {
                    HTMLTag tag = parse.Tag;
                    if (String.Compare(tag.Name, "li", true) == 0)
                    {
                        if (buffer.Length > 0)
                        {
                            ProcessItem(buffer.ToString());
                        }
                        buffer.Length = 0;
                        capture       = true;
                    }
                    else if (String.Compare(tag.Name, "/li", true) == 0)
                    {
                        ProcessItem(buffer.ToString());
                        buffer.Length = 0;
                        capture       = false;
                    }
                    else if (String.Compare(tag.Name, listTypeEnd, true) == 0)
                    {
                        ProcessItem(buffer.ToString());
                        break;
                    }
                }
                else
                {
                    if (capture)
                    {
                        buffer.Append((char)ch);
                    }
                }
            }
        }
Пример #30
0
        // Read one HTML page and return a slide show
        public SlideShow ReadSlideShow(string aSlideFile, out string aDiagnostic)
        {
            aDiagnostic = null;

            // Determine the name of the future XML slide show file
            string    xmlFilePath = aSlideFile.Replace(".htm", ".xml");
            SlideShow slideShow   = new SlideShow(xmlFilePath);

            //Console.WriteLine("     HtmlReader ReadSlideShow: parsing " + aSlideFile);
            string html = ReadFile(aSlideFile);

            if (html == null)
            {
                aDiagnostic = "ReadSlideShow: bad HTML slideshow file " + aSlideFile;
                return(null);
            }
            else
            {
                ParseHTML parse = new ParseHTML();
                parse.Source = html;

                HtmlPreprocess htmlPreprocess = new HtmlPreprocess();

                // Default overall title for the slide show, hopefully replaced with something better
                string title           = "A most peculiar day";
                bool   collectingTitle = false;

                bool    collectingCaption = false;
                Caption caption           = new Caption();
                string  link = "";
                while (!parse.Eof())
                {
                    char ch = parse.Parse();
                    if (ch == 0)
                    {
                        AttributeList tag = parse.GetTag();
                        if (tag.Name.Equals("title", StringComparison.CurrentCultureIgnoreCase))
                        {
                            collectingTitle = true;       // Start collecting title
                            title           = string.Empty;
                        }
                        else if (tag.Name.Equals("/title", StringComparison.CurrentCultureIgnoreCase))
                        {
                            collectingTitle = false;      // Title now complete
                            slideShow.Title = title.Trim();
                        }
                        if (tag.Name.Equals("td", StringComparison.CurrentCultureIgnoreCase))
                        {
                            collectingCaption = true;       // Start collecting new caption
                            caption           = new Caption();
                        }
                        else if (tag.Name.Equals("/td", StringComparison.CurrentCultureIgnoreCase))
                        {
                            collectingCaption = false;      // Any caption is now complete
                            if (!link.Equals(""))
                            {
                                // Got a link to go with the caption
                                slideShow.Add(link, caption);
                                link = "";
                            }
                        }
                        else if (collectingCaption &&
                                 tag.Name.Equals("p", StringComparison.CurrentCultureIgnoreCase))
                        {
                            // HTML paragraph tag within caption
                            caption.NewLine();
                        }
                        else if (tag["href"] != null)
                        {
                            string href = tag["href"].Value.Replace('/', '\\');
                            if (IsPhoto(href))
                            {
                                //Console.WriteLine("     + HtmlReader ReadSlideShow: add " + href +
                                //                  " from tag " + tag.Name);
                                link = href;
                            }
                        }

                        // Preprocessing of regular character stream starts with clean sheet after tag
                        htmlPreprocess.Reset();
                    }
                    else
                    {
                        // Got a character
                        ch = htmlPreprocess.Add(ch);
                        if (ch != HtmlPreprocess.NullChar)
                        {
                            if (collectingTitle)
                            {
                                title += ch;
                            }
                            else if (collectingCaption)
                            {
                                caption.AddChar(ch);
                            }
                        }
                    }
                }

                return(slideShow);
            }
        }
Пример #31
0
        private string ProcessURL(int currentRecursive , string strURL,string fromURL,string strDownloadPath,string startLinkURL,string endLinkURL,Boolean onlyLink)
        {
            Invoke(new MethodInvoker(delegate()
            {
                // 階層表示を更新
                recursiveLevelLabel.Text = "" + currentRecursive;
                recursiveLevelLabel.Update();
                toolStripStatusLabel.Text = strURL;
                statusStrip.Update();
                notifyIcon1.Text = "CSMDown:";
                if (strURL.Length > 50)
                {
                    notifyIcon1.Text += strURL.Substring(0, 50);
                }
                else
                {
                    notifyIcon1.Text += strURL;
                }
            }));

            // -----< HTMLを読み込む >-----
            //
            Invoke(new MethodInvoker(delegate() { statusTextBox.AppendText("ProcessURL : "); }));
            Invoke(new MethodInvoker(delegate() { statusTextBox.AppendText(strURL); }));
            Invoke(new MethodInvoker(delegate() { statusTextBox.AppendText("\r\n"); }));
            Invoke(new MethodInvoker(delegate() { statusTextBox.AppendText("RefererURL : "); }));
            if (fromURL != null)
            {
                Invoke(new MethodInvoker(delegate() { statusTextBox.AppendText(fromURL); }));
            }
            Invoke(new MethodInvoker(delegate() { statusTextBox.AppendText("\r\n"); }));
            Invoke(new MethodInvoker(delegate() { statusTextBox.Update(); }));

            string strHTML = "";
            string mimeType = "";
            if (m_cansel == true) { return mimeType; }  // キャンセル処理
            mimeType = ReadHTML(strURL, ref strHTML, fromURL, currentRecursive);
            if (strHTML == null)
            {
                Invoke(new MethodInvoker(delegate() { statusTextBox.AppendText("HTMLを読み込めませんでした。:"+strURL+"\r\n"); }));
                return mimeType;
            }
            if (isHTML(mimeType) == false)
            {
                Invoke(new MethodInvoker(delegate() { statusTextBox.AppendText("HTMLではないので無視します。:" + strURL+"\r\n"); }));
                return mimeType;
            }
            insertAlreadyAccessedURL(strURL, currentRecursive);

            // -----< ブラウザをナビゲートする >-----
            //
            Invoke(new MethodInvoker(delegate()
            {
                // ブラウザをナビゲート
                if (m_BrowserForm != null)
                {
                    if (m_BrowserForm.IsDisposed == false)
                    {
                        m_BrowserForm.Navigate(strURL);
                    }
                }
            }));

            if (m_cansel == true) { return mimeType; }  // キャンセル処理

            // -----< HTMLをパースする >-----
            //
            Invoke(new MethodInvoker(delegate() { statusTextBox.AppendText("HTMLをパースします。\r\n"); }));
            Invoke(new MethodInvoker(delegate() { statusTextBox.Update(); }));

            // <<< パース結果の格納先を準備する >>>
            ArrayList URLArray = new ArrayList();
            URLArray.Clear();

            // <<< パースを実行する >>>
            Boolean startLinkProcessed = false;
            ParseHTML parser = new ParseHTML();
            parser.Source = strHTML;
            while (!parser.Eof())
            {
                if (m_cansel == true) { return mimeType; }  // キャンセル処理

                // パースする
                char ch = parser.Parse();
                if (ch != 0) { continue; }

                string linkURL = "";

                // href のとき、リンク先を確保する
                AttributeList tag = parser.GetTag();
                if (tag["href"] != null)
                {
                    linkURL = (string)( tag["href"].Value );
                }

                // src のとき、リンク先を確保する
                if (tag["src"] != null)
                {
                    linkURL = (string)( tag["src"].Value );
                }

                // # が付いているときには、#以降を消す
                if (linkURL.IndexOf("#") >= 0)
                {
                    int index = linkURL.IndexOf("#");
                    linkURL = linkURL.Substring(0, index);
                }

                if (linkURL.Length > 0)
                {
                    string linkAbsolute = null;
                    if (linkURL.Length > 0)
                    {
                        linkAbsolute = CreateLinkURL(linkURL, strURL);
                    }
                    else
                    {
                        linkAbsolute = linkURL;
                    }
                    if (linkAbsolute == null)
                    {
                        //Invoke(new MethodInvoker(delegate() { statusTextBox.AppendText("絶対パスを作れなかったか、もしくは無効なリンクと判定しました。" + linkURL + "\r\n"); }));
                        //Invoke(new MethodInvoker(delegate() { statusTextBox.Update(); }));
                        continue;
                    }

                    if ((startLinkURL.Length > 0)&&(startLinkProcessed == false))
                    {
                        if (startLinkURL.Equals(linkAbsolute))
                        {
                            startLinkProcessed = true;
                        }
                        else
                        {
                            Invoke(new MethodInvoker(delegate() { statusTextBox.AppendText("StartLink以前なので無視します:" + linkURL + "\r\n"); }));
                            Invoke(new MethodInvoker(delegate() { statusTextBox.Update(); }));
                            continue;
                        }
                    }
                    linkAbsolute = linkAbsolute.Replace("\r\n", "");
                    linkAbsolute = linkAbsolute.Replace("\n", "");
                    URLArray.Add(linkAbsolute);

                    // EndLinkで終了
                    if (endLinkURL.Length > 0)
                    {
                        if (endLinkURL.Equals(linkAbsolute))
                        {
                            // EndLinkを検出
                            Invoke(new MethodInvoker(delegate() { statusTextBox.AppendText("EndLinkを検出。パースを終了します。:" + linkURL + "\r\n"); }));
                            Invoke(new MethodInvoker(delegate() { statusTextBox.Update(); }));
                            break;
                        }
                    }
                }
            }
            Invoke(new MethodInvoker(delegate() { statusTextBox.AppendText("リンク数:" + URLArray.Count); }));
            Invoke(new MethodInvoker(delegate() { statusTextBox.AppendText("\r\n"); }));
            Invoke(new MethodInvoker(delegate() { statusTextBox.Update(); }));
            if (m_cansel == true) { return mimeType; }  // キャンセル処理

            // -----< ダウンロードする >-----
            //
            Boolean result = true;
            Invoke(new MethodInvoker(delegate() { statusTextBox.AppendText("ダウンロードを開始します。\r\n"); }));
            Invoke(new MethodInvoker(delegate() { statusTextBox.Update(); }));
            if (onlyLink == false)
            {
                result = HTTPDownload(URLArray, strDownloadPath, strURL, currentRecursive);
                if (result != true)
                {
                    Invoke(new MethodInvoker(delegate() { statusTextBox.AppendText("ダウンロードに失敗しました。"); }));
                }
            }

            // -----< HTMLの再起呼び出し >------
            //
            int recursiveMax = int.Parse(recursiveLevelTextBox.Text);
            if (currentRecursive < recursiveMax)
            {
                int i;
                int c;      // 0:優先URLを処理する  1:同一ドメインを処理する
                for (c = 0; c < 2; c++)
                {
                    // 優先パス指定がなければ、優先関連は何もしない。
                    if (c == 0)
                    {
                        if (highPriorityPathTextBox.Text.Length <= 0)
                        {
                            continue;
                        }
                    }

                    // 優先URLのみなら、それ以外のパターンは処理しない
                    if ((c == 1) && (highPriorityPathTextBox.Text.Length>0))
                    {
                        if (priorityOnlyCheckBox.Checked == true)
                        {
                            continue;
                        }
                    }

                    // URLを切り分けつつ処理する
                    for (i = 0; i < URLArray.Count; i++)
                    {
                        if (m_cansel == true) { break; }  // キャンセル処理

                        try
                        {
                            // 優先URLでなければ無視する
                            if (c == 0)
                            {
                                // 優先URLを処理する
                                string priorityURL = highPriorityPathTextBox.Text.Trim();
                                if (!((string)URLArray[i]).StartsWith(priorityURL))
                                {
                                    // 優先が指定されていて、URLが指定優先パターンと異なるなら、次へ進む
                                    continue;
                                }
                            }
                            else
                            {
                                // 優先URLでないパターンを処理する
                                if ((highPriorityPathTextBox.Text.Length > 0) && (strURL.StartsWith(highPriorityPathTextBox.Text)))
                                {
                                    // 優先が指定されていて、URLが指定優先パターンと一致するなら、次へ進む
                                    continue;
                                }
                            }
                        }
                        catch (Exception e)
                        {
                            Invoke(new MethodInvoker(delegate() { statusTextBox.AppendText("URI Compare failed\r\n"); }));
                            Invoke(new MethodInvoker(delegate() { statusTextBox.AppendText(e.Message+"\r\n"); }));
                            Invoke(new MethodInvoker(delegate() { statusTextBox.Update(); }));
                            continue;
                        }
                        if (isAlreadyAccessed((string)URLArray[i],currentRecursive+1) == true)
                        {
                            Invoke(new MethodInvoker(delegate() { statusTextBox.AppendText("アクセス済みURL:"); }));
                            Invoke(new MethodInvoker(delegate() { statusTextBox.AppendText((string)URLArray[i]); }));
                            Invoke(new MethodInvoker(delegate() { statusTextBox.AppendText("\r\n"); }));
                            continue;
                        }

                        // -----< 再起呼び出し >-----
                        //
                        string res;
                        res = ProcessURL(currentRecursive + 1, (string)URLArray[i],strURL, strDownloadPath,"","",false);
                        if (isHTML(res) != true)
                        {
                            Invoke(new MethodInvoker(delegate() { statusTextBox.AppendText("HTMLではありませんでした。:"); }));
                            Invoke(new MethodInvoker(delegate() { statusTextBox.AppendText((string)URLArray[i]); }));
                            Invoke(new MethodInvoker(delegate() { statusTextBox.AppendText("\r\n"); }));
                            Invoke(new MethodInvoker(delegate() { recursiveLevelLabel.Text = "" + currentRecursive; }));
                            Invoke(new MethodInvoker(delegate() { recursiveLevelLabel.Update(); }));
                        }

                        Invoke(new MethodInvoker(delegate() { recursiveLevelLabel.Text = "" + currentRecursive; }));
                        Invoke(new MethodInvoker(delegate() { recursiveLevelLabel.Update(); }));
                        Invoke(new MethodInvoker(delegate() { toolStripStatusLabel.Text = strURL; }));
                        Invoke(new MethodInvoker(delegate() { statusStrip.Update(); }));

                        if (m_cansel == true) { return mimeType; }  // キャンセル処理
                    }
                }
            }

            return mimeType;
        }