コード例 #1
0
ファイル: Program.cs プロジェクト: Mograbi/Ebay-User-Crawler
        public int getSumPages()
        {
            string positive_link   = getParameterLink(getSource(getUserLink()), positive);
            string negative_link   = getParameterLink(getSource(getUserLink()), negative);
            string positive_source = getSource(positive_link);
            string negative_source = getSource(positive_link);

            //List<string> res = searchPage(param_source, parameter);
            HtmlAgilityPack.HtmlDocument htmlDoc = new HtmlAgilityPack.HtmlDocument();
            htmlDoc.LoadHtml(positive_source);
            HtmlAgilityPack.HtmlNode pagination     = htmlDoc.DocumentNode.SelectSingleNode("//body").SelectSingleNode("//div[@id='CentralArea']").SelectSingleNode("//div[@class='newPagination']");
            HtmlAgilityPack.HtmlNode pgn_pagination = pagination.SelectSingleNode("//b[@id='PGN_pagination1']");
            Regex max_page_regex = new Regex(@">(\d+)</a></b>");
            int   max_pages      = Int32.Parse(max_page_regex.Match(pgn_pagination.WriteTo()).Groups[1].ToString());

            SUM_OF_PAGES += max_pages;
            htmlDoc       = new HtmlAgilityPack.HtmlDocument();
            htmlDoc.LoadHtml(negative_source);
            pagination     = htmlDoc.DocumentNode.SelectSingleNode("//body").SelectSingleNode("//div[@id='CentralArea']").SelectSingleNode("//div[@class='newPagination']");
            pgn_pagination = pagination.SelectSingleNode("//b[@id='PGN_pagination1']");
            max_pages      = Int32.Parse(max_page_regex.Match(pgn_pagination.WriteTo()).Groups[1].ToString());
            SUM_OF_PAGES  += max_pages;
            return(SUM_OF_PAGES);
        }
コード例 #2
0
        public static MvcHtmlString AddAttributes(this MvcHtmlString html, params string[] attributes)
        {
            if (attributes.IsNullOrEmpty())
            {
                return(html);
            }
            HtmlAgilityPack.HtmlNode mainNode = HtmlAgilityPack.HtmlNode.CreateNode(html.ToHtmlString().Trim());
            attributes.ForEach(attribute =>
            {
                string[] av = attribute.Split('=');
                mainNode.SetAttribute(av.First(), av.Last());
            });

            return(new MvcHtmlString(mainNode.WriteTo()));
        }
コード例 #3
0
ファイル: Program.cs プロジェクト: Mograbi/Ebay-User-Crawler
        public void searchProductIn(object param)
        {
            string parameter = param.ToString();

            if (!CheckForInternetConnection())
            {
                throw new Exception("No Internet Connection");
            }
            if (!initialized)
            {
                initialize();
            }
            Console.WriteLine("Searching " + parameter);
            string        link         = getParameterLink(getSource(getUserLink()), parameter);
            string        param_source = getSource(link);
            List <string> res          = searchPage(param_source, parameter);

            HtmlAgilityPack.HtmlDocument htmlDoc = new HtmlAgilityPack.HtmlDocument();
            htmlDoc.LoadHtml(param_source);
            HtmlAgilityPack.HtmlNode pagination     = htmlDoc.DocumentNode.SelectSingleNode("//body").SelectSingleNode("//div[@id='CentralArea']").SelectSingleNode("//div[@class='newPagination']");
            HtmlAgilityPack.HtmlNode pn_pagination  = pagination.SelectSingleNode("//b[@id='PN_pagination1']");
            HtmlAgilityPack.HtmlNode pgn_pagination = pagination.SelectSingleNode("//b[@id='PGN_pagination1']");
            //Regex max_page_regex = new Regex(@">(\d+)</a></b>");
            //Console.WriteLine("Max Page: " + max_page_regex.Match(pgn_pagination.WriteTo()).Groups[1].ToString());
            //int max_pages = Int32.Parse(max_page_regex.Match(pgn_pagination.WriteTo()).Groups[1].ToString());

            /*
             * if (mutex.WaitOne(1000))
             * {
             *  SUM_OF_PAGES += max_pages;
             *  mutex.ReleaseMutex();
             * }*/
            Regex reg            = new Regex(@"<a href=\x22([^\x22]+)\x22");
            Regex page_num_regex = new Regex(@"page=(\d+)");

            while (pn_pagination.WriteTo().Contains("<a href="))
            {
                string next_page   = reg.Match(pn_pagination.WriteTo()).Groups[1].ToString().Replace("amp;", "");
                string page_number = page_num_regex.Match(next_page).Groups[1].ToString();
                //Console.WriteLine("searching page: " + page_number);
                List <string> tmp = searchPage(getSource(next_page), parameter);

                /* if (mutex.WaitOne(1000))
                 * {
                 *   Console.WriteLine("=====================");
                 *   mutex.ReleaseMutex();
                 * }*/
                //Console.WriteLine("=====================");
                res     = res.Concat(tmp).ToList();
                htmlDoc = new HtmlAgilityPack.HtmlDocument();
                htmlDoc.LoadHtml(getSource(next_page));
                pagination    = htmlDoc.DocumentNode.SelectSingleNode("//body").SelectSingleNode("//div[@id='CentralArea']").SelectSingleNode("//div[@class='newPagination']");
                pn_pagination = pagination.SelectSingleNode("//b[@id='PN_pagination1']");
                if (mutex.WaitOne(1000))
                {
                    App.progressBar.Advance();
                    mutex.ReleaseMutex();
                }
            }
            dict[parameter] = dict[parameter].Concat(res).ToList();
        }