예제 #1
0
        private async void Worker()
        {
            for (int i = parserSettings.StartPoint; i <= parserSettings.EndPoint; i++)
            {
                // raise completed event and exit if parser is deactivate
                if (!isActive)
                {
                    OnCompleted?.Invoke(this);
                    return;
                }

                // get page source by page id
                var source = await loader.GetSourceByPageId(i);

                // create html document from source
                var document = await new HtmlParser().ParseDocumentAsync(source);

                // get urls for this page
                var result = parser.CollectUrls(document);

                // if urls is empty then get next page
                if (result.Count() == 0)
                {
                    continue;
                }

                // create urls queue
                var urlQueue = new ConcurrentQueue <string>(result);

                // parse each url in multithreaded mode
                Task[] tasks = new Task[parserSettings.ThreadCount];
                for (int j = 0; j < parserSettings.ThreadCount; j++)
                {
                    tasks[j] = ParseEachUrl(urlQueue);
                }

                // wait for all tasks to complete
                Task.WaitAll(tasks);

                // raise page completed event
                OnPageCompleted?.Invoke(this, i);
            }

            // raise completed event
            OnCompleted?.Invoke(this);

            // parser is deactivate
            isActive = false;
        }
예제 #2
0
        private async void Worker()
        {
            for (int i = parserSettings.StartPoint; i <= parserSettings.EndPoint; i++)
            {
                if (!isActive)
                {
                    OnCompleted?.Invoke(this);
                    return;
                }
                var source = await loader.GetSourceByPageId(i);

                var domParser = new HtmlParser();

                var document = await domParser.ParseAsync(source);

                var result = parser.Parse(document);
                OnNewData.Invoke(this, result);
            }
            OnCompleted?.Invoke(this);
            isActive = false;
        }