public static void Performance(bool useUriNormalization) { IDomainNormalizer normalizer; if (useUriNormalization) { normalizer = new UriNormalizer(); } else { normalizer = new IdnMappingNormalizer(); } var domainParser = new DomainParser(new FileTldRuleProvider("effective_tld_names.dat"), normalizer); var sw = new Stopwatch(); sw.Start(); for (var i = 0; i < 100000; i++) { var domainInfo = domainParser.Get($"sub{i}.test.co.uk"); } sw.Stop(); Console.WriteLine("Elapsed:{0}ms", sw.Elapsed.TotalMilliseconds); }
private IEnumerable <string> GetLinks(IEnumerable <Chunk> chunks, CancellationToken cancellationToken) { var result = new ConcurrentBag <string>(); var regexData = new RegexUrlBuilder().Build(); var totalCount = chunks.Count(); var doneChunksCount = 0; var po = new ParallelOptions { CancellationToken = cancellationToken, MaxDegreeOfParallelism = System.Environment.ProcessorCount }; Parallel.ForEach(chunks, po, async chunk => { var uriNormalizer = new UriNormalizer(); var matches = regexData.Regex.Matches(chunk.Content); var links = matches.Select(m => m.Value); foreach (var link in links) { var normalized = await uriNormalizer.Normalize(link); if (normalized != null) { result.Add(normalized); } } doneChunksCount++; _logger.LogInformation($"Processing done for {doneChunksCount} of {totalCount}"); po.CancellationToken.ThrowIfCancellationRequested(); }); return(result.Distinct()); }
public void SetUp() { _uriNormalizer = new UriNormalizer(); }