C# (CSharp) WebCrawler WebCrawler примеры использования

Язык программирования: C# (CSharp)

Пространство имен/Пакет: WebCrawler

Класс/Тип: WebCrawler

Примеров на hotexamples.com: 10

C# (CSharp) WebCrawler WebCrawler - 10 примеров найдено. Это лучшие примеры C# (CSharp) кода для WebCrawler.WebCrawler, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

FetchUrlsFromSource(1)

GetDisallowedSites(1)

GetResultUrls(1)

GetRobotsTxt(1)

PerformCrawlingAsync(1)

Start(1)

StartCrawlingAsync(1)

craw(1)

getLinks(1)

getUrlContent(1)

Пример #1

Показать файл

Файл: CrawlerTreeViewModel.cs Проект: nexon-97/MPP.WebCrawler

        public async Task OnStartCrawling(object param)
        {
            ClearCrawlerTree();

            CrawlerInputParser inputParser   = new CrawlerInputParser();
            List <Uri>         rootResources = inputParser.Parse(ViewModelsMediator.Instance.SourceFilePath);

            if (rootResources != null)
            {
                StartBtnEnabled = false;
                StopBtnEnabled  = true;

                // Init crawler
                WebCrawler.WebCrawler crawler = new WebCrawler.WebCrawler();
                crawler.MaxDepth         = crawlingDepth;
                crawler.Logger           = LoggerViewModel.Instance;
                crawler.LoadingFinished += OnPageLoadingFinished;

                foreach (var rootUri in rootResources)
                {
                    WebCrawlerOutput crawlerOutput = await crawler.PerformCrawlingAsync(rootUri, 0, -1);
                }

                StartBtnEnabled = true;
                StopBtnEnabled  = false;
            }
        }

Пример #2

Показать файл

        // Dictionary<string, string>

        public static void Main(string[] args)
        {
            WebCrawler crawler = new WebCrawler();

            crawler.urlList.Add("http://tw.msn.com/");
            crawler.craw();
        }

Пример #3

Показать файл

Файл: Program.cs Проект: cnduru/webcrawler

        static void Main(string[] args)
        {
            WebCrawler wc = new WebCrawler();

            WebClient wbc = new WebClient();
            string src = wbc.DownloadString("http://www.youtube.com/robots.txt");
            string[] lines = src.Split('\n');

            List<String> disallowedSites = wc.GetDisallowedSites(lines, "*");
            List<String> frontier = wc.FetchUrlsFromSource("http://www.youtube.com");
            int x = 5;
        }

Пример #4

Показать файл

Файл: Program.cs Проект: cnduru/webcrawler

        static void Main(string[] args)
        {
            WebCrawler wc = new WebCrawler();

            WebClient wbc = new WebClient();
            string    src = wbc.DownloadString("http://www.youtube.com/robots.txt");

            string[] lines = src.Split('\n');

            List <String> disallowedSites = wc.GetDisallowedSites(lines, "*");
            List <String> frontier        = wc.FetchUrlsFromSource("http://www.youtube.com");
            int           x = 5;
        }

Пример #5

Показать файл

Файл: Ranker.cs Проект: sw701e14/web-crawler-team-awesome

        private double[] getTF_IDF(WebCrawler.Index.DocumentReference[] documents, int docCount)
        {
            double N = docCount;

            double[] values = new double[documents.Length];

            for (int i = 0; i < documents.Length; i++)
            {
                Document d = documents[i].Document;
                int c = documents[i].Count;
                if (!lengths.ContainsKey(d))
                    lengths.Add(d, c * c);
                else
                    lengths[d] += c * c;

                values[i] = (1 + Math.Log10(c)) * Math.Log10(N / documents.Length);
            }
            return values;
        }

Пример #6

Показать файл

Файл: UserInput.cs Проект: kidagine/WebCrawler

        private void FindUrl()
        {
            Console.Write("Enter a URL: ");
            bool hasSetMaximumLinkAmount = default;
            int  maximumLinkAmount       = default;

            do
            {
                Console.Write("Enter the amount of links to visit: ");
                bool isNumber = int.TryParse(Console.ReadLine().Trim(), out int outMaximumLinkAmount);
                if (isNumber)
                {
                    hasSetMaximumLinkAmount = true;
                    maximumLinkAmount       = outMaximumLinkAmount;
                }
                else
                {
                    Console.WriteLine("The value you inserted was not a number.");
                }
            } while (!hasSetMaximumLinkAmount);

            Console.WriteLine("Crawling...");
            Console.WriteLine();
            WebCrawler webCrawler = new WebCrawler();

            webCrawler.Start(_userUrl, maximumLinkAmount);

            Console.Write($"---Found web page: {_userUrl}---");
            Queue <Uri> results = webCrawler.GetResultUrls();

            foreach (Uri url in results)
            {
                Console.WriteLine(url);
            }
            Console.WriteLine($"*Total found links: {results.Count}");
        }

Пример #7

Показать файл

        static void Main(string[] args)
        {
            List <Uri> uris = new List <Uri>()
            {
                new Uri("https://www.rbc.ru/"),
                new Uri("https://habrahabr.ru/"),
                new Uri("https://zr.ru/"),
                new Uri("https://youtube.com/"),
                new Uri("https://rp5.ru/"),
            };

            List <WebCrawlerItem> crawlerItems = new List <WebCrawlerItem>();

            foreach (var uri in uris)
            {
                crawlerItems.Add(new WebCrawlerItem(uri, new DomainCrawlerConfiguration()));
            }

            var crawler = new WebCrawler.WebCrawler(new CrawlerConfiguration());

            crawler.StartCrawlingAsync(crawlerItems);

            Console.ReadLine();
        }

Пример #8

Показать файл

Файл: Program.cs Проект: brandon-ai/COMP5425-WebCrawler

        static void Main(string[] args)
        {
            String indexPath = @"C:\Users\Brandon\Desktop\Multimedia Retrieval\W3 Files\Index";
            //Analyzers build token streams which analyze text
            Analyzer    analyzer = new StandardAnalyzer();
            IndexWriter writer   = new IndexWriter(indexPath, analyzer, true);

            //Set the seedUrl and initialize the crawler
            String         seedUrl   = "http://sydney.edu.au/engineering/it/";
            WebCrawler     crawler   = new WebCrawler();
            Queue <String> linkQueue = new Queue <String>();

            linkQueue.Enqueue(seedUrl);
            HashSet <String> linkSet = new HashSet <String>();

            Console.Write("Sites Explored: 0");

            //Iteratively extract links from the first URL in the frontier
            //and adds its content to index
            while (linkQueue.Count != 0 && linkSet.Count < 50)
            {
                String currentLink = linkQueue.Dequeue();
                try
                {
                    if (linkSet.Contains(currentLink))
                    {
                        continue;
                    }
                    String content = crawler.getUrlContent(currentLink);
                    crawler.getLinks(linkQueue, content, currentLink);
                    linkSet.Add(currentLink);
                    Document doc = new Document();
                    doc.Add(new Field("link", currentLink, Field.Store.YES, Field.Index.NOT_ANALYZED));
                    doc.Add(new Field("content", content, Field.Store.YES, Field.Index.ANALYZED));
                    writer.AddDocument(doc);

                    Console.Write("\rSites Explored: {0}", linkSet.Count);
                }
                catch (Exception) { continue; }
            }
            writer.Optimize();
            writer.Close();
            Console.WriteLine();

            //Execute the search
            String      search   = "suits";
            QueryParser parser   = new QueryParser("content", analyzer);
            Query       query    = parser.Parse(search);
            var         searcher = new IndexSearcher(indexPath);
            Hits        hits     = searcher.Search(query);
            int         results  = hits.Length();

            Console.WriteLine("Found {0} results for \"{1}\"", results, search);
            for (int i = 0; i < results; i++)
            {
                Document doc   = hits.Doc(i);
                float    score = hits.Score(i);
                Console.WriteLine("Result num {0}, score {1}", i + 1, score);
                Console.WriteLine("URL: {0}", doc.Get("link"));
            }
        }

Пример #9

Показать файл

Файл: Program.cs Проект: Mortiz09/UtilityCraft

 // Dictionary<string, string>
 public static void Main(string[] args)
 {
     WebCrawler crawler = new WebCrawler();
     crawler.urlList.Add("http://tw.msn.com/");
     crawler.craw();
 }

Пример #10

Показать файл

Файл: Program.cs Проект: palmelund/Web-Intelligence-Exercises

 static void Main(string[] args)
 {
     var webCrawler = new WebCrawler("https://www.google.com");
     var robotstxt  = webCrawler.GetRobotsTxt();
     var rules      = RobotsTxtReader.RobotsTxtParser.GetRulesApplyingForAll(robotstxt);
 }