Exemplo n.º 1
0
        public static void Performance(bool useUriNormalization)
        {
            IDomainNormalizer normalizer;

            if (useUriNormalization)
            {
                normalizer = new UriNormalizer();
            }
            else
            {
                normalizer = new IdnMappingNormalizer();
            }

            var domainParser = new DomainParser(new FileTldRuleProvider("effective_tld_names.dat"), normalizer);

            var sw = new Stopwatch();

            sw.Start();
            for (var i = 0; i < 100000; i++)
            {
                var domainInfo = domainParser.Get($"sub{i}.test.co.uk");
            }
            sw.Stop();

            Console.WriteLine("Elapsed:{0}ms", sw.Elapsed.TotalMilliseconds);
        }
Exemplo n.º 2
0
        private IEnumerable <string> GetLinks(IEnumerable <Chunk> chunks, CancellationToken cancellationToken)
        {
            var result          = new ConcurrentBag <string>();
            var regexData       = new RegexUrlBuilder().Build();
            var totalCount      = chunks.Count();
            var doneChunksCount = 0;

            var po = new ParallelOptions
            {
                CancellationToken      = cancellationToken,
                MaxDegreeOfParallelism = System.Environment.ProcessorCount
            };

            Parallel.ForEach(chunks, po, async chunk =>
            {
                var uriNormalizer = new UriNormalizer();

                var matches = regexData.Regex.Matches(chunk.Content);
                var links   = matches.Select(m => m.Value);

                foreach (var link in links)
                {
                    var normalized = await uriNormalizer.Normalize(link);

                    if (normalized != null)
                    {
                        result.Add(normalized);
                    }
                }

                doneChunksCount++;
                _logger.LogInformation($"Processing done for {doneChunksCount} of {totalCount}");

                po.CancellationToken.ThrowIfCancellationRequested();
            });

            return(result.Distinct());
        }
Exemplo n.º 3
0
 public void SetUp()
 {
     _uriNormalizer = new UriNormalizer();
 }