Exemple #1
0
        void Crawler(string URL, int Depth)
        {
            ProgressBar.Value = limit;
            if (limit != 0)
            {
                string htmlCode = null;
                try
                {
                    htmlCode = client.DownloadString(URL);
                }
                catch
                {
                    MessageBox.Show("Internet Connection is not working.");
                }

                string title = Regex.Match(htmlCode, @"\<title\b[^>]*\>\s*(?<Title>[\s\S]*?)\</title\>", RegexOptions.IgnoreCase).Groups["Title"].Value;

                try
                {
                    ID++;
                    string Page = ExtractText(htmlCode).Replace("'", "''");
                    System.IO.File.WriteAllText($"{ID}.html", htmlCode);
                    Query($"Insert into web values ('{URL}','{ID}','{title.Substring(0, title.Length > 98 ? 98 : title.Length)}','{Page.Substring(0, Page.Length> 7999?7999:Page.Length-1)}');");
                    limit--;
                }
                catch (Exception ex) { ID--; }// MessageBox.Show(ex.Message); }

                Depth--;
                foreach (LinkItem i in LinkFinder.Find(htmlCode))
                {
                    try
                    {
                        if (Depth > 0)
                        {
                            if (Uri.IsWellFormedUriString(i.Href, UriKind.Absolute))
                            {
                                Crawler(i.Href, Depth);
                            }
                        }
                    }
                    catch
                    { }
                }
            }
        }
Exemple #2
0
        public List <Link> ChildLinks()
        {
            LinkFinder  linkFinder = new LinkFinder(linkStr);
            List <Link> childLinks = new List <Link>();

            try
            {
                Thread.Sleep(2000);
                List <string> temp = linkFinder.GetSiteLinks();
                foreach (var element in temp)
                {
                    childLinks.Add((Link)element);
                    Console.WriteLine("current item received: " + element);
                }
            }
            catch
            {
                IsValid = false;
            }
            return(childLinks);
        }