예제 #1
0
        public void Download(CrawlerWebsite website)
        {
            try
            {
                try
                {
                    using var webClient = new WebClient();
                    website.Html        = webClient.DownloadString(website.Url);
                }
                catch (WebException ex)
                {
                    if (((HttpWebResponse)ex.Response).StatusCode == HttpStatusCode.NotFound)
                    {
                        website.ResponseCode = 404;
                    }

                    website.ResponseCode = 0;
                    website.Html         = ((HttpWebResponse)ex.Response).StatusDescription;
                }

                _logger.LogInformation($"Downloaded: {website.Url} with code {website.ResponseCode}");
            }
            catch (Exception e)
            {
                _logger.LogError(e.Message);
                website.Crawled          = false;
                website.CrawledTimestamp = null;
                throw;
            }
        }
예제 #2
0
        public async Task Propose(
            ProposerThreadOptions?options
            )
        {
            options ??= new ProposerThreadOptions();
            options.Validate();

            List <CsWordNounSpecification> wordsNounSpec;

            do
            {
                wordsNounSpec = _context.CsWordNounSpecifications
                                .Include(e => e.Word)
                                .Where(
                    specification => specification.CrawlerProposedWiki != true &&
                    specification.DeclensionSg1 == true &&
                    specification.Word.CrawlerMeaningCheckProposed != true
                    )
                                .OrderBy(r => Guid.NewGuid()) // Random order (warning: it is very slow operation!)
                                .Take(options.StepCount)
                                .ToList();

                int counter = 0;
                foreach (var wordNounSpec in wordsNounSpec)
                {
                    counter++;
                    wordNounSpec.CrawlerProposedWiki = true;
                    wordNounSpec.Word.CrawlerMeaningCheckProposed     = true;
                    wordNounSpec.Word.CrawlerMeaningCheckProposedTime = DateTime.Now;

                    var crawlerWebsite = new CrawlerWebsite
                    {
                        Url    = $"https://cs.wikipedia.org/wiki/{wordNounSpec.Word.Text.FirstCharToUpper()}",
                        WordId = wordNounSpec.WordId
                    };

                    await _entityManager.Persist(wordNounSpec);

                    await _entityManager.Persist(crawlerWebsite);

                    if (counter % options.FlushAfterCount == 0)
                    {
                        await _entityManager.Flush();
                    }
                }

                await _entityManager.Flush();

                Thread.Sleep(options.SleepTime);
            } while (wordsNounSpec.Count > 0);
        }
예제 #3
0
 private static Task Post(
     [NotNull] CrawlerWebsite webpage
     )
 {
     throw new NotImplementedException();
 }
예제 #4
0
 public void Scrape(CrawlerWebsite website)
 {
 }