private bool AddWebPage(Uri l_baseUri, string newUri)
        {
            // Dim url As String = StrUtil.LeftIndexOf(newUri, "#")


            // Dim uri As New Uri(l_baseUri, url)
            Uri uri = new Uri(l_baseUri, newUri);

            if (!ValidPage(uri.LocalPath) || mWebPages.Contains(uri))
            {
                return(false);
            }
            WebPageStatus state = new WebPageStatus(uri);

            state.OriginalUrl = newUri;

            if ((uri.AbsoluteUri.StartsWith(BaseUri.AbsoluteUri)))
            {
                state.TaskInformation += "Handle Links";
            }

            m_webPagesPending.Enqueue(state);
            mWebPages.Add(uri, state);

            return(true);
        }
        //New

        public void Execute()
        {
            UrlCrawledCount = 0;

            DateTime startTime = DateTime.Now;

            AddWebPage(StartUri, StartUri.AbsoluteUri);

            try
            {
                while (WebPagesPending.Count > 0 && (MaximumUrlAllowed == -1 || UrlCrawledCount < MaximumUrlAllowed))
                {
                    WebPageStatus state = (WebPageStatus)m_webPagesPending.Dequeue();
                    mWebPageManager.Process(state);
                    if (!KeepWebContent)
                    {
                        state.Content = null;
                    }
                    UrlCrawledCount += 1;
                }
            }
            catch (Exception ex)
            {
                MessageBox.Show("There was some error in crawling the website. Try again later." + Constants.vbCrLf + "Error:" + ex.ToString());
            }

            DateTime endTime                = DateTime.Now;
            float    elasped                = (endTime.Ticks - startTime.Ticks) / 10000000;
            var      diffTimeInSeconds      = (endTime - startTime).TotalMilliseconds;
            var      diffTimeInMiliiSeconds = (endTime.Ticks - startTime.Ticks) / 10000;
        }
        private void spider_WebPageContentHandler(WebPageStatus state)
        {
            this.Dispatcher.Invoke(new Action(() =>
            {
                CrawlPageDetail detail = new CrawlPageDetail();
                detail.WebsiteUrl      = state.Uri.ToString() + "";
                detail.SerialNumber    = mCrawlDetail.Pages.Count + 1;
                detail.Status          = state.StatusCode;
                mCrawlDetail.Pages.Add(detail);

                mCrawlDetail.TotalCrawled += 1;
                if (state.TaskCompleted == false && state.TaskStarted == true)
                {
                    detail.IsSuccess              = false;
                    mCrawlDetail.TotalBrokenLink += 1;
                }
                else
                {
                    detail.IsSuccess = true;
                }

                Uri homeUri = new Uri(txtSearchUrl.Text);
                if (detail.IsSuccess == true && state.OriginalUrl.ToLowerInvariant() == homeUri.ToString().ToLowerInvariant())
                {
                    mWebsiteContent = state.Content;
                }

                if (detail.IsSuccess == false)
                {
                    UpdateWebsiteBrowser(state.Uri.ToString());
                }
            }));
        }
 public void HandleLinks(WebPageStatus state)
 {
     if (state.TaskInformation != null && !(state.TaskInformation.IndexOf("Handle Links") == -1))
     {
         int   counter = 0;
         Match m       = RegExUtil.GetMatchRegEx(RegularExpression.UrlExtractor, state.Content);
         while (m.Success)
         {
             if (AddWebPage(state.Uri, m.Groups["url"].ToString()))
             {
                 counter += 1;
             }
             m = m.NextMatch();
         }
     }
 }
        //Process

        #region "local interface"

        //
        private void HandleException(Exception ex, ref WebPageStatus state)
        {
            if (ex.ToString().IndexOf("(404)") != -1)
            {
                state.StatusCode        = "404";
                state.StatusDescription = "(404) Not Found";
            }
            else if (ex.ToString().IndexOf("(403)") != -1)
            {
                state.StatusDescription = "(403) Forbidden";
            }
            else if (ex.ToString().IndexOf("(500)") != -1)
            {
                state.TaskCompleted     = true;
                state.StatusCode        = "OK";
                state.StatusDescription = "(500) Internal Server Error";
            }
            else if (ex.ToString().IndexOf("(502)") != -1)
            {
                state.StatusCode        = "502";
                state.StatusDescription = "(502) Bad Gateway";
            }
            else if (ex.ToString().IndexOf("(503)") != -1)
            {
                state.StatusCode        = "503";
                state.StatusDescription = "(503) Server Unavailable";
            }
            else if (ex.ToString().IndexOf("(504)") != -1)
            {
                state.StatusCode        = "504";
                state.StatusDescription = "(504) Gateway Timeout";
            }
            else if ((ex.InnerException != null) && ex.InnerException is FileNotFoundException)
            {
                state.StatusCode        = "FileNotFound";
                state.StatusDescription = ex.InnerException.Message;
            }
            else
            {
                state.StatusDescription = ex.ToString();
            }
        }
        public bool Process(WebPageStatus state)
        {
            state.TaskStarted   = true;
            state.TaskCompleted = false;

            try
            {
                Console.WriteLine("Process Uri: {0}", state.Uri.AbsoluteUri);

                WebRequest  req = WebRequest.Create(state.Uri);
                WebResponse res = null;

                try
                {
                    res = req.GetResponse();

                    if (res is HttpWebResponse)
                    {
                        state.StatusCode        = ((HttpWebResponse)res).StatusCode.ToString();
                        state.StatusDescription = ((HttpWebResponse)res).StatusDescription;
                    }

                    if (res is FileWebResponse)
                    {
                        state.StatusCode        = "OK";
                        state.StatusDescription = "OK";
                    }

                    if (state.StatusCode.Equals("OK"))
                    {
                        StreamReader sr = new StreamReader(res.GetResponseStream());

                        state.Content = sr.ReadToEnd();

                        if ((WebPageContentHandler != null))
                        {
                            WebPageContentDelegate handler = WebPageContentHandler;
                            handler(state);
                        }
                    }

                    state.TaskCompleted = true;
                }
                catch (Exception ex)
                {
                    HandleException(ex, ref state);
                }
                finally
                {
                    if ((res != null))
                    {
                        res.Close();
                    }
                }
            }
            catch (Exception ex)
            {
                Console.WriteLine(ex.ToString());
            }

            Console.WriteLine("Completed: {0}", state.TaskCompleted);

            if (WebPageTaskCompleted != null)
            {
                WebPageContentDelegate taskHandler = WebPageTaskCompleted;
                taskHandler(state);
            }


            return(state.TaskCompleted);
        }