Пример #1
0
        /// <summary>
        /// Process the specified URL.
        /// </summary>
        /// <param name="url">The URL to process.</param>
        /// <param name="optionList">Whcih option list to process.</param>
        public void Process(Uri url, int optionList)
        {
            String          value    = "";
            WebRequest      http     = HttpWebRequest.Create(url);
            HttpWebResponse response = (HttpWebResponse)http.GetResponse();
            Stream          istream  = response.GetResponseStream();
            ParseHTML       parse    = new ParseHTML(istream);
            StringBuilder   buffer   = new StringBuilder();

            int ch;

            while ((ch = parse.Read()) != -1)
            {
                if (ch == 0)
                {
                    HTMLTag tag = parse.Tag;
                    if (String.Compare(tag.Name, "a", true) == 0)
                    {
                        value = tag["href"];
                        Uri u = new Uri(url, value.ToString());
                        value         = u.ToString();
                        buffer.Length = 0;
                    }
                    else if (String.Compare(tag.Name, "/a", true) == 0)
                    {
                        ProcessOption(buffer.ToString(), value);
                    }
                }
                else
                {
                    buffer.Append((char)ch);
                }
            }
        }
        /// <summary>
        /// Process the specified URL and download the images.
        /// </summary>
        /// <param name="url">The URL to process.</param>
        /// <param name="saveTo">A directory to save the images to.</param>
        public void Process(Uri url, String saveTo)
        {
            WebRequest      http     = HttpWebRequest.Create(url);
            HttpWebResponse response = (HttpWebResponse)http.GetResponse();
            Stream          istream  = response.GetResponseStream();
            ParseHTML       parse    = new ParseHTML(istream);

            int ch;

            while ((ch = parse.Read()) != -1)
            {
                if (ch == 0)
                {
                    HTMLTag tag = parse.Tag;
                    if (String.Compare(tag.Name, "img", true) == 0)
                    {
                        String          src       = tag["src"];
                        Uri             u         = new Uri(url, src);
                        String          filename  = ExtractFile(u);
                        String          saveFile  = Path.Combine(saveTo, filename);
                        WebRequest      http2     = HttpWebRequest.Create(u);
                        HttpWebResponse response2 = (HttpWebResponse)http2.GetResponse();
                        this.DownloadBinaryFile(response2, saveFile);
                        response2.Close();
                    }
                }
            }
        }
        /// <summary>
        /// This method looks for each of the <option> tags that contain
        /// a link to each of the pages.  For each page found the
        /// downloadArticlePage method is called.
        /// </summary>
        public void Process()
        {
            Uri        url  = new Uri("http://www.httprecipes.com/1/9/article.php");
            WebRequest http = HttpWebRequest.Create(url);

            http.Timeout = 30000;
            WebResponse response = http.GetResponse();
            Stream      stream   = response.GetResponseStream();
            ParseHTML   parse    = new ParseHTML(stream);

            int ch;

            while ((ch = parse.Read()) != -1)
            {
                if (ch == 0)
                {
                    HTMLTag tag = parse.Tag;
                    if (String.Compare(tag.Name, "option", true) == 0)
                    {
                        String str = tag["value"];
                        Uri    u   = new Uri(url, str);
                        Console.WriteLine(DownloadArticlePage(u));
                    }
                }
            }
        }
Пример #4
0
        static string Categoria(ParseHTML analizador)
        {
            int           ch;
            bool          leer   = false;
            StringBuilder buffer = new StringBuilder();

            while ((ch = analizador.Read()) != -1)
            {
                if (ch == 0)
                {
                    if (analizador.Tag.Name == "a")
                    {
                        leer = true;
                    }
                    else if (analizador.Tag.Name == "/a")
                    {
                        return(buffer.ToString());
                    }
                }
                else if (leer)
                {
                    buffer.Append((char)ch);
                }
            }
            return("no se encontro la categoria :S");
        }
Пример #5
0
        /// <summary>
        /// Process the specified URL and extract data from all of the subpages
        /// that this page links to.
        /// </summary>
        /// <param name="url">The URL to process.</param>
        public void Process(Uri url)
        {
            String          value    = "";
            WebRequest      http     = HttpWebRequest.Create(url);
            HttpWebResponse response = (HttpWebResponse)http.GetResponse();
            Stream          istream  = response.GetResponseStream();
            ParseHTML       parse    = new ParseHTML(istream);

            int ch;

            while ((ch = parse.Read()) != -1)
            {
                if (ch == 0)
                {
                    HTMLTag tag = parse.Tag;
                    if (String.Compare(tag.Name, "a", true) == 0)
                    {
                        value = tag["href"];
                        Uri u = new Uri(url, value.ToString());
                        value = u.ToString();
                        ProcessSubPage(u);
                    }
                }
            }
        }
Пример #6
0
        static string Descripcion(ParseHTML analizador)
        {
            int           ch;
            StringBuilder buffer = new StringBuilder();

            while ((ch = analizador.Read()) > 0)
            {
                buffer.Append((char)ch);
            }
            return(buffer.ToString());
        }
Пример #7
0
        /**
         * Called to extract a list from the specified URL.
         * @param url The URL to extract the list from.
         * @param listType What type of list, specify its beginning tag (i.e. <UL>)
         * @param optionList Which list to search, zero for first.
         * @throws IOException Thrown if an IO exception occurs.
         */
        public void Process(Uri url, String listType, int optionList)
        {
            String          listTypeEnd = listType + "/";
            WebRequest      http        = HttpWebRequest.Create(url);
            HttpWebResponse response    = (HttpWebResponse)http.GetResponse();
            Stream          istream     = response.GetResponseStream();
            ParseHTML       parse       = new ParseHTML(istream);
            StringBuilder   buffer      = new StringBuilder();
            bool            capture     = false;

            Advance(parse, listType, optionList);

            int ch;

            while ((ch = parse.Read()) != -1)
            {
                if (ch == 0)
                {
                    HTMLTag tag = parse.Tag;
                    if (String.Compare(tag.Name, "li", true) == 0)
                    {
                        if (buffer.Length > 0)
                        {
                            ProcessItem(buffer.ToString());
                        }
                        buffer.Length = 0;
                        capture       = true;
                    }
                    else if (String.Compare(tag.Name, "/li", true) == 0)
                    {
                        Console.WriteLine(buffer.ToString());
                        ProcessItem(buffer.ToString());
                        buffer.Length = 0;
                        capture       = false;
                    }
                    else if (String.Compare(tag.Name, listTypeEnd, true) == 0)
                    {
                        break;
                    }
                }
                else
                {
                    if (capture)
                    {
                        buffer.Append((char)ch);
                    }
                }
            }
        }
Пример #8
0
        public string Leer(string tag, string atributo)
        {
            int ch;

            while ((ch = html.Read()) != -1)
            {
                if (ch == 0)
                {
                    if (html.Tag.Name == tag)
                    {
                        return(html.Tag[atributo]);
                    }
                }
            }
            return(null);
        }
Пример #9
0
        /// <summary>
        /// Versión mejorada para avanzar a un tag que cumpla cierto atributo
        /// </summary>
        /// <param name="analizador">El parseHTML que queramos avanzar en</param>
        /// <param name="etiqueta">La etiqueta a la que queramos llegar</param>
        /// <param name="nombreAtributo">El nombre del atributo como "src"</param>
        /// <param name="atributo">el propio atributo como "/imagenes/...</param>
        /// <returns></returns>
        public static bool AvanzarA(ParseHTML analizador, String etiqueta, String nombreAtributo, String atributo)
        {
            int ch;

            while ((ch = analizador.Read()) != -1)
            {
                if (ch == 0)
                {
                    if (analizador.Tag.Name == etiqueta && analizador.Tag[nombreAtributo] == atributo)
                    {
                        return(true);
                    }
                }
            }
            return(false);
        }
Пример #10
0
        static string Imagen(ParseHTML analizador)
        {
            int ch;

            while ((ch = analizador.Read()) != -1)
            {
                if (ch == 0)
                {
                    if (analizador.Tag.Name == "img")
                    {
                        return(analizador.Tag["src"]);
                    }
                }
            }
            return("no se encontro una imagen :S");
        }
Пример #11
0
        static string Enlace(ParseHTML analizador)
        {
            int ch;

            while ((ch = analizador.Read()) != -1)
            {
                if (ch == 0)
                {
                    if (analizador.Tag.Name == "a")
                    {
                        return(analizador.Tag["href"]);
                    }
                }
            }
            return(null);
        }
Пример #12
0
        private ICollection <Uri> DoSearch(Uri url)
        {
            ICollection <Uri> result = new List <Uri>();
            // submit the search
            WebRequest      http     = HttpWebRequest.Create(url);
            HttpWebResponse response = (HttpWebResponse)http.GetResponse();

            using (Stream istream = response.GetResponseStream())
            {
                ParseHTML     parse   = new ParseHTML(istream);
                StringBuilder buffer  = new StringBuilder();
                bool          capture = false;

                // parse the results
                int ch;
                while ((ch = parse.Read()) != -1)
                {
                    if (ch == 0)
                    {
                        HTMLTag tag = parse.Tag;
                        if (tag.Name.Equals("Url", StringComparison.CurrentCultureIgnoreCase))
                        {
                            buffer.Length = 0;
                            capture       = true;
                        }
                        else if (tag.Name.Equals("/Url", StringComparison.CurrentCultureIgnoreCase))
                        {
                            result.Add(new Uri(buffer.ToString()));
                            buffer.Length = 0;
                            capture       = false;
                        }
                    }
                    else
                    {
                        if (capture)
                        {
                            buffer.Append((char)ch);
                        }
                    }
                }
            }

            response.Close();

            return(result);
        }
Пример #13
0
        static string Descripcion(ParseHTML analizador)
        {
            int           ch;
            StringBuilder sb1 = new StringBuilder();

            while ((ch = analizador.Read()) != -1)
            {
                if (ch > 0)
                {
                    sb1.Append((char)ch);
                }
                else
                {
                    return(sb1.ToString().Replace("á", "a").Replace("é", "e").Replace("í", "i").Replace("ó", "o").Replace("ú", "u").Replace("ñ", "n").Replace("É", "E").Replace("Á", "A").Replace("Í", "Í").Replace("Ó", "O").Replace("Ú", "Ú").Replace("'", ""));
                }
            }
            return(null);
        }
Пример #14
0
        static string Titulo(ParseHTML analizador)
        {
            int           ch;
            StringBuilder sb1 = new StringBuilder();

            while ((ch = analizador.Read()) != -1)
            {
                if (ch > 0)
                {
                    sb1.Append((char)ch);
                }
                else
                {
                    return(sb1.ToString());
                }
            }
            return(null);
        }
Пример #15
0
        static string Titulo(ParseHTML analizador)
        {
            int           ch;
            StringBuilder buffer = new StringBuilder();

            while ((ch = analizador.Read()) != -1)
            {
                if (ch > 0)
                {
                    buffer.Append((char)ch);
                }
                else
                {
                    return(buffer.ToString());
                }
            }
            return("No se encontro el titulo :S");
        }
Пример #16
0
        /// <summary>
        /// Advance to the specified HTML tag.
        /// </summary>
        /// <param name="parse">The HTML parse object to use.</param>
        /// <param name="tag">The HTML tag.</param>
        /// <param name="count">How many tags like this to find.</param>
        /// <returns>True if found, false otherwise.</returns>
        private bool Advance(ParseHTML parse, String tag, int count)
        {
            int ch;

            while ((ch = parse.Read()) != -1)
            {
                if (ch == 0)
                {
                    if (String.Compare(parse.Tag.Name, tag, true) == 0)
                    {
                        count--;
                        if (count <= 0)
                        {
                            return(true);
                        }
                    }
                }
            }
            return(false);
        }
Пример #17
0
        /// <summary>
        /// Check the specified URL for a birth year.  This will occur if one
        /// sentence is found that has the word born, and a numeric value less
        /// than 3000.
        /// </summary>
        /// <param name="url">The URL to check.</param>
        public void CheckURL(Uri url)
        {
            int           ch;
            StringBuilder sentence = new StringBuilder();

            try
            {
                WebRequest      http     = HttpWebRequest.Create(url);
                HttpWebResponse response = (HttpWebResponse)http.GetResponse();
                Stream          istream  = response.GetResponseStream();
                ParseHTML       html     = new ParseHTML(istream);
                do
                {
                    ch = html.Read();
                    if ((ch != -1) && (ch != 0))
                    {
                        if (ch == '.')
                        {
                            String str  = sentence.ToString();
                            int    year = ExtractBirth(str);
                            if ((year > 1) && (year < 3000))
                            {
                                Console.WriteLine("URL supports year: " + year);
                                IncreaseYear(year);
                            }
                            sentence.Length = 0;
                        }
                        else
                        {
                            sentence.Append((char)ch);
                        }
                    }
                } while (ch != -1);
            }
            catch (WebException)
            {
            }
            catch (IOException)
            {
            }
        }
Пример #18
0
        /// <summary>
        /// Called to download the text from a page.  If any JavaScript
        /// include is found, the text from that page is read too.
        /// </summary>
        public void Process()
        {
            Uri        url  = new Uri("http://www.httprecipes.com/1/9/includes.php");
            WebRequest http = HttpWebRequest.Create(url);

            http.Timeout = 30000;
            WebResponse   response = http.GetResponse();
            Stream        stream   = response.GetResponseStream();
            ParseHTML     parse    = new ParseHTML(stream);
            StringBuilder buffer   = new StringBuilder();

            int ch;

            while ((ch = parse.Read()) != -1)
            {
                if (ch == 0)
                {
                    HTMLTag tag = parse.Tag;
                    if (String.Compare(tag.Name, "script", true) == 0 && tag["src"] != null)
                    {
                        String src     = tag["src"];
                        Uri    u       = new Uri(url, src);
                        String include = DownloadPage(u);
                        buffer.Append("<script>");
                        buffer.Append(include);
                        buffer.Append("</script>");
                    }
                    else
                    {
                        buffer.Append(tag.ToString());
                    }
                }
                else
                {
                    buffer.Append((char)ch);
                }
            }

            Console.WriteLine(buffer.ToString());
        }
        /// <summary>
        /// Process the specified URL and extract the option list there.
        /// </summary>
        /// <param name="url">The URL to process.</param>
        /// <param name="optionList">Which option list to process, zero for first.</param>
        public void Process(Uri url, int optionList)
        {
            String          value    = "";
            WebRequest      http     = HttpWebRequest.Create(url);
            HttpWebResponse response = (HttpWebResponse)http.GetResponse();
            Stream          istream  = response.GetResponseStream();
            ParseHTML       parse    = new ParseHTML(istream);
            StringBuilder   buffer   = new StringBuilder();

            Advance(parse, "select", optionList);

            int ch;

            while ((ch = parse.Read()) != -1)
            {
                if (ch == 0)
                {
                    HTMLTag tag = parse.Tag;
                    if (String.Compare(tag.Name, "option") == 0)
                    {
                        value         = tag["value"];
                        buffer.Length = 0;
                    }
                    else if (String.Compare(tag.Name, "/option") == 0)
                    {
                        ProcessOption(buffer.ToString(), value);
                    }
                    else if (String.Compare(tag.Name, "/choice") == 0)
                    {
                        break;
                    }
                }
                else
                {
                    buffer.Append((char)ch);
                }
            }
        }
Пример #20
0
        /// <summary>
        /// This method looks for a link tag at the specified URL.  If a link
        /// tag is found that specifies an RSS feed, then that feed is
        /// displayed.
        /// </summary>
        /// <param name="url">The URL of the web site.</param>
        public void Process(Uri url)
        {
            String     href = null;
            WebRequest http = HttpWebRequest.Create(url);

            http.Timeout = 30000;
            WebResponse response = http.GetResponse();
            Stream      stream   = response.GetResponseStream();
            ParseHTML   parse    = new ParseHTML(stream);

            int ch;

            do
            {
                ch = parse.Read();
                if (ch == 0)
                {
                    HTMLTag tag = parse.Tag;
                    if (String.Compare(tag.Name, "link", true) == 0)
                    {
                        String type = tag["type"];
                        if (type != null && type.IndexOf("rss") != -1)
                        {
                            href = tag["href"];
                        }
                    }
                }
            } while (ch != -1);

            if (href == null)
            {
                Console.WriteLine("No RSS link found.");
            }
            else
            {
                ProcessRSS(new Uri(href));
            }
        }
Пример #21
0
        static string Categoria(ParseHTML analizador)
        {
            int  ch;
            bool x = false;

            while ((ch = analizador.Read()) != -1)
            {
                if (ch == 0)
                {
                    if (analizador.Tag.Name == "a")
                    {
                        if (x)
                        {
                            return(analizador.Tag["href"]);
                        }
                        else
                        {
                            x = true;
                        }
                    }
                }
            }
            return(null);
        }
Пример #22
0
        static string Imagen(ParseHTML analizador)
        {
            int ch, x = 0;

            while ((ch = analizador.Read()) != -1)
            {
                if (ch == 0)
                {
                    if (analizador.Tag.Name == "img")
                    {
                        if (x == 3)
                        {
                            Console.WriteLine(analizador.Tag["src"]);
                            return(analizador.Tag["src"]);
                        }
                        else
                        {
                            x++;
                        }
                    }
                }
            }
            return(null);
        }
        /// <summary>
        /// This method will download an amortization table for the
        /// specified parameters.
        /// </summary>
        /// <param name="interest">The interest rate for the loan.</param>
        /// <param name="term">The term(in months) of the loan.</param>
        /// <param name="principle">The principle amount of the loan.</param>
        public void process(double interest, int term, int principle)
        {
            Uri        url  = new Uri("http://www.httprecipes.com/1/9/loan.php");
            WebRequest http = HttpWebRequest.Create(url);

            http.Timeout     = 30000;
            http.ContentType = "application/x-www-form-urlencoded";
            http.Method      = "POST";
            Stream ostream = http.GetRequestStream();



            FormUtility form = new FormUtility(ostream, null);

            form.Add("interest", "" + interest);
            form.Add("term", "" + term);
            form.Add("principle", "" + principle);
            form.Complete();
            ostream.Close();
            WebResponse response = http.GetResponse();

            Stream        istream = response.GetResponseStream();
            ParseHTML     parse   = new ParseHTML(istream);
            StringBuilder buffer  = new StringBuilder();
            List <String> list    = new List <String>();
            bool          capture = false;

            Advance(parse, "table", 3);

            int ch;

            while ((ch = parse.Read()) != -1)
            {
                if (ch == 0)
                {
                    HTMLTag tag = parse.Tag;
                    if (String.Compare(tag.Name, "tr", true) == 0)
                    {
                        list.Clear();
                        capture       = false;
                        buffer.Length = 0;
                    }
                    else if (String.Compare(tag.Name, "/tr", true) == 0)
                    {
                        if (list.Count > 0)
                        {
                            ProcessTableRow(list);
                            list.Clear();
                        }
                    }
                    else if (String.Compare(tag.Name, "td", true) == 0)
                    {
                        if (buffer.Length > 0)
                        {
                            list.Add(buffer.ToString());
                        }
                        buffer.Length = 0;
                        capture       = true;
                    }
                    else if (String.Compare(tag.Name, "/td", true) == 0)
                    {
                        list.Add(buffer.ToString());
                        buffer.Length = 0;
                        capture       = false;
                    }
                    else if (String.Compare(tag.Name, "/table", true) == 0)
                    {
                        break;
                    }
                }
                else
                {
                    if (capture)
                    {
                        buffer.Append((char)ch);
                    }
                }
            }
        }
Пример #24
0
        /// <summary>
        /// Called to parse a table.  The table number at the specified URL
        /// will be parsed.
        /// </summary>
        /// <param name="url">The URL of the HTML page that contains the table.</param>
        /// <param name="tableNum">The table number to parse, zero for the first.</param>
        public void Process(Uri url, int tableNum)
        {
            WebRequest      http     = HttpWebRequest.Create(url);
            HttpWebResponse response = (HttpWebResponse)http.GetResponse();
            Stream          istream  = response.GetResponseStream();
            ParseHTML       parse    = new ParseHTML(istream);
            StringBuilder   buffer   = new StringBuilder();
            List <String>   list     = new List <String>();
            bool            capture  = false;

            Advance(parse, "table", tableNum);

            int ch;

            while ((ch = parse.Read()) != -1)
            {
                if (ch == 0)
                {
                    HTMLTag tag = parse.Tag;
                    if (String.Compare(tag.Name, "tr", true) == 0)
                    {
                        list.Clear();
                        capture       = false;
                        buffer.Length = 0;
                    }
                    else if (String.Compare(tag.Name, "/tr", true) == 0)
                    {
                        if (list.Count > 0)
                        {
                            ProcessTableRow(list);
                            list.Clear();
                        }
                    }
                    else if (String.Compare(tag.Name, "td", true) == 0)
                    {
                        if (buffer.Length > 0)
                        {
                            list.Add(buffer.ToString());
                        }
                        buffer.Length = 0;
                        capture       = true;
                    }
                    else if (String.Compare(tag.Name, "/td", true) == 0)
                    {
                        list.Add(buffer.ToString());
                        buffer.Length = 0;
                        capture       = false;
                    }
                    else if (String.Compare(tag.Name, "/table", true) == 0)
                    {
                        break;
                    }
                }
                else
                {
                    if (capture)
                    {
                        buffer.Append((char)ch);
                    }
                }
            }
        }
Пример #25
0
        /// <summary>
        /// Access the website and perform a search for either states or capitals.
        /// </summary>
        /// <param name="search">A search string.</param>
        /// <param name="type">What to search for(s=state, c=capital)</param>
        public void Process(String search, String type)
        {
            String        listType    = "ul";
            String        listTypeEnd = "/ul";
            StringBuilder buffer      = new StringBuilder();
            bool          capture     = false;

            // Build the URL.
            MemoryStream mstream = new MemoryStream();
            FormUtility  form    = new FormUtility(mstream, null);

            form.Add("search", search);
            form.Add("type", type);
            form.Add("action", "Search");
            form.Complete();

            System.Text.ASCIIEncoding enc = new System.Text.ASCIIEncoding();

            String          str      = enc.GetString(mstream.GetBuffer());
            String          surl     = "http://www.httprecipes.com/1/7/get.php?" + str;
            Uri             url      = new Uri(surl);
            WebRequest      http     = HttpWebRequest.Create(url);
            HttpWebResponse response = (HttpWebResponse)http.GetResponse();
            Stream          istream  = response.GetResponseStream();
            ParseHTML       parse    = new ParseHTML(istream);

            // Parse from the URL.

            Advance(parse, listType, 0);

            int ch;

            while ((ch = parse.Read()) != -1)
            {
                if (ch == 0)
                {
                    HTMLTag tag = parse.Tag;
                    if (String.Compare(tag.Name, "li", true) == 0)
                    {
                        if (buffer.Length > 0)
                        {
                            ProcessItem(buffer.ToString());
                        }
                        buffer.Length = 0;
                        capture       = true;
                    }
                    else if (String.Compare(tag.Name, "/li", true) == 0)
                    {
                        ProcessItem(buffer.ToString());
                        buffer.Length = 0;
                        capture       = false;
                    }
                    else if (String.Compare(tag.Name, listTypeEnd, true) == 0)
                    {
                        ProcessItem(buffer.ToString());
                        break;
                    }
                }
                else
                {
                    if (capture)
                    {
                        buffer.Append((char)ch);
                    }
                }
            }
        }
Пример #26
0
        /// <summary>
        /// Called to process each partial page.
        /// </summary>
        /// <param name="url">The URL of the partial page.</param>
        /// <returns>Returns the next partial page, or null if no more.</returns>
        public Uri Process(Uri url)
        {
            Uri           result = null;
            StringBuilder buffer = new StringBuilder();
            String        value  = "";
            String        src    = "";

            WebRequest      http     = HttpWebRequest.Create(url);
            HttpWebResponse response = (HttpWebResponse)http.GetResponse();
            Stream          istream  = response.GetResponseStream();
            ParseHTML       parse    = new ParseHTML(istream);
            bool            first    = true;

            int ch;

            while ((ch = parse.Read()) != -1)
            {
                if (ch == 0)
                {
                    HTMLTag tag = parse.Tag;
                    if (String.Compare(tag.Name, "a", true) == 0)
                    {
                        buffer.Length = 0;
                        value         = tag["href"];
                        Uri u = new Uri(url, value.ToString());
                        value = u.ToString();
                        src   = null;
                    }
                    else if (String.Compare(tag.Name, "img", true) == 0)
                    {
                        src = tag["src"];
                    }
                    else if (String.Compare(tag.Name, "/a", true) == 0)
                    {
                        if (String.Compare(buffer.ToString(), "[Next 5]", true) == 0)
                        {
                            result = new Uri(url, value);
                        }
                        else if (src != null)
                        {
                            if (!first)
                            {
                                Uri urlOfficial = new Uri(url, value);
                                Uri urlFlag     = new Uri(url, src);
                                ProcessItem(urlOfficial, urlFlag);
                            }
                            else
                            {
                                first = false;
                            }
                        }
                    }
                }
                else
                {
                    buffer.Append((char)ch);
                }
            }

            return(result);
        }
Пример #27
0
        /// <summary>
        /// Use the session to search for the specified state or capital.  The search
        /// method can be called multiple times per login.
        /// </summary>
        /// <param name="session">The session to use.</param>
        /// <param name="search">The search string to use.</param>
        /// <param name="type">What to search for(s=state,c=capital).</param>
        /// <returns>A list of states or capitals.</returns>
        public List <String> Search(String session, String search, String type)
        {
            String        listType    = "ul";
            String        listTypeEnd = "/ul";
            StringBuilder buffer      = new StringBuilder();
            bool          capture     = false;
            List <String> result      = new List <String>();

            // Build the URL.
            MemoryStream mstream = new MemoryStream();
            FormUtility  form    = new FormUtility(mstream, null);

            form.Add("search", search);
            form.Add("type", type);
            form.Add("action", "Search");
            form.Complete();

            Uri url = new Uri("http://www.httprecipes.com/1/8/menunc.php?session="
                              + session);
            WebRequest http = HttpWebRequest.Create(url);

            http.Timeout     = 30000;
            http.ContentType = "application/x-www-form-urlencoded";
            http.Method      = "POST";
            Stream ostream = http.GetRequestStream();

            // Perform the post.
            byte[] b = mstream.GetBuffer();
            ostream.Write(b, 0, b.Length);
            ostream.Close();

            // Read the results.
            WebResponse response = http.GetResponse();
            Stream      istream  = response.GetResponseStream();

            ParseHTML parse = new ParseHTML(istream);

            // Parse from the URL.
            Advance(parse, listType, 0);

            int ch;

            while ((ch = parse.Read()) != -1)
            {
                if (ch == 0)
                {
                    HTMLTag tag = parse.Tag;
                    if (String.Compare(tag.Name, "li", true) == 0)
                    {
                        if (buffer.Length > 0)
                        {
                            result.Add(buffer.ToString());
                        }
                        buffer.Length = 0;
                        capture       = true;
                    }
                    else if (String.Compare(tag.Name, "/li", true) == 0)
                    {
                        result.Add(buffer.ToString());
                        buffer.Length = 0;
                        capture       = false;
                    }
                    else if (String.Compare(tag.Name, listTypeEnd, true) == 0)
                    {
                        result.Add(buffer.ToString());
                        break;
                    }
                }
                else
                {
                    if (capture)
                    {
                        buffer.Append((char)ch);
                    }
                }
            }

            return(result);
        }
Пример #28
0
        /// <summary>
        /// Check the specified URL for a birth year. This will occur if one sentence
        /// is found that has the word born, and a numeric value less than 3000.
        /// </summary>
        /// <param name="report">Object to report to.</param>
        /// <param name="url">The url.</param>
        /// <param name="desiredYear">The desired year.</param>
        public static void CheckURL(ScanReportable report, Uri url,
                                    int desiredYear)
        {
            int           ch;
            StringBuilder sentence    = new StringBuilder();
            String        ignoreUntil = null;

            WebRequest http = HttpWebRequest.Create(url);

            http.Timeout = 10000;
            HttpWebResponse response = (HttpWebResponse)http.GetResponse();
            Stream          istream  = response.GetResponseStream();
            ParseHTML       html     = new ParseHTML(istream);


            do
            {
                ch = html.Read();
                if ((ch != -1) && (ch != 0) && (ignoreUntil == null))
                {
                    if (".?!".IndexOf((char)ch) != -1)
                    {
                        String str  = sentence.ToString();
                        int    year = Text.ExtractYear(str);

                        if (desiredYear == -1)
                        {
                            // looking for any year
                            if (year != -1)
                            {
                                report.ReceiveGoodSentence(str);
                            }
                        }
                        else
                        {
                            // looking for a specific year
                            if (year == desiredYear)
                            {
                                report.ReceiveGoodSentence(str);
                            }
                            else if (year != -1)
                            {
                                report.ReceiveBadSentence(str);
                            }
                        }
                        sentence.Length = 0;
                    }
                    else if (ch == ' ')
                    {
                        string str = sentence.ToString();
                        if ((sentence.Length > 0) &&
                            (str[str.Length - 1] != ' '))
                        {
                            sentence.Append(' ');
                        }
                    }
                    else if ((ch != '\n') && (ch != '\t') && (ch != '\r'))
                    {
                        if ((ch) < 128)
                        {
                            sentence.Append((char)ch);
                        }
                    }
                }
                else if (ch == 0)
                {
                    // clear anything before a body tag
                    if (html.Tag.Name.Equals("body", StringComparison.CurrentCultureIgnoreCase) ||
                        html.Tag.Name.Equals("br", StringComparison.CurrentCultureIgnoreCase) ||
                        html.Tag.Name.Equals("li", StringComparison.CurrentCultureIgnoreCase) ||
                        html.Tag.Name.Equals("p", StringComparison.CurrentCultureIgnoreCase) ||
                        html.Tag.Name.Equals("h1", StringComparison.CurrentCultureIgnoreCase) ||
                        html.Tag.Name.Equals("h2", StringComparison.CurrentCultureIgnoreCase) ||
                        html.Tag.Name.Equals("h3", StringComparison.CurrentCultureIgnoreCase) ||
                        html.Tag.Name.Equals("td", StringComparison.CurrentCultureIgnoreCase) ||
                        html.Tag.Name.Equals("th", StringComparison.CurrentCultureIgnoreCase))
                    {
                        sentence.Length = 0;
                    }
                    // ignore everything between script and style tags
                    if (ignoreUntil == null)
                    {
                        if (html.Tag.Name.Equals("script", StringComparison.CurrentCultureIgnoreCase))
                        {
                            ignoreUntil = "/script";
                        }
                        else if (html.Tag.Name
                                 .Equals("style", StringComparison.CurrentCultureIgnoreCase))
                        {
                            ignoreUntil = "/style";
                        }
                    }
                    else
                    {
                        if (html.Tag.Name.Equals(ignoreUntil, StringComparison.CurrentCultureIgnoreCase))
                        {
                            ignoreUntil = null;
                        }
                    }

                    // add a space after the tag
                    if (sentence.Length > 0)
                    {
                        string str = sentence.ToString();
                        if (str[str.Length - 1] != ' ')
                        {
                            sentence.Append(' ');
                        }
                    }
                }
            } while (ch != -1);
        }
Пример #29
0
        /**
         * Access the website and perform a search for either states or capitals.
         * @param search A search string.
         * @param type What to search for(s=state, c=capital)
         * @throws IOException Thrown if an IO exception occurs.
         */
        public void Process(String search, String type)
        {
            String        listType    = "ul";
            String        listTypeEnd = "/ul";
            StringBuilder buffer      = new StringBuilder();
            bool          capture     = false;

            // Build the URL and POST.
            Uri        url  = new Uri("http://www.httprecipes.com/1/7/post.php");
            WebRequest http = HttpWebRequest.Create(url);

            http.Timeout     = 30000;
            http.ContentType = "application/x-www-form-urlencoded";
            http.Method      = "POST";
            Stream ostream = http.GetRequestStream();

            FormUtility form = new FormUtility(ostream, null);

            form.Add("search", search);
            form.Add("type", type);
            form.Add("action", "Search");
            form.Complete();
            ostream.Close();

            // read the results
            HttpWebResponse response = (HttpWebResponse)http.GetResponse();
            Stream          istream  = response.GetResponseStream();

            ParseHTML parse = new ParseHTML(istream);

            // parse from the URL

            Advance(parse, listType, 0);

            int ch;

            while ((ch = parse.Read()) != -1)
            {
                if (ch == 0)
                {
                    HTMLTag tag = parse.Tag;
                    if (String.Compare(tag.Name, "li", true) == 0)
                    {
                        if (buffer.Length > 0)
                        {
                            ProcessItem(buffer.ToString());
                        }
                        buffer.Length = 0;
                        capture       = true;
                    }
                    else if (String.Compare(tag.Name, "/li", true) == 0)
                    {
                        ProcessItem(buffer.ToString());
                        buffer.Length = 0;
                        capture       = false;
                    }
                    else if (String.Compare(tag.Name, listTypeEnd, true) == 0)
                    {
                        ProcessItem(buffer.ToString());
                        break;
                    }
                }
                else
                {
                    if (capture)
                    {
                        buffer.Append((char)ch);
                    }
                }
            }
        }