public WikipediaTextResult GetWikipediaTextFromUrlSynchronously(string url)
        {
            Task <WikipediaTextResult> task = Task.Run <WikipediaTextResult>(async() => await GetWikipediaTextFromUrl(url));

            _wikiResult = task.Result;
            return(task.Result);
        }
        public virtual WikipediaTextResult GetWikipediaTextFromUrlSynchronously()
        {
            Task <WikipediaTextResult> task = Task.Run <WikipediaTextResult>(async() => await GetWikipediaTextFromUrl());

            _wikiResult = task.Result;
            return(task.Result);
        }
Beispiel #3
0
        public string GetNextTextSection()
        {
            string textSection = string.Empty;
            int    counter     = 0;

            while (string.IsNullOrEmpty(textSection) && counter < 5)
            {
                if (_wikiText == null || _wikiText.TextSections.Count == 0)
                {
                    _wikiText = _cachedData.Dequeue();
                }
                else
                {
                    textSection = _wikiText.TextSections[0];
                    _wikiText.TextSections.RemoveAt(0);
                    break;
                }

                counter++;
            }

            PrefetchMoreIfNeeded();

            return(textSection);
        }
        public async Task <WikipediaTextResult> GetWikipediaTextFromUrl(string url)
        {
            AngleSharp.Dom.IDocument document = await _angleSharpContext.OpenAsync(url);

            WikipediaTextResult result = ParseHtml(document);

            return(result);
        }
        public async Task <WikipediaTextResult> GetWikipediaTextFromString(string html)
        {
            var document = await _angleSharpContext.OpenAsync(req => req.Content(html));

            WikipediaTextResult result = ParseHtml(document);

            return(result);
        }
Beispiel #6
0
        public void Init()
        {
            //_wikiText = await base.GetWikipediaTextFromUrl();
            _wikiText   = base.GetWikipediaTextFromUrlSynchronously();
            _cachedData = new Queue <WikipediaTextResult>();

            PrefetchMoreIfNeeded();
        }
Beispiel #7
0
        public async void PrefetchMoreIfNeeded()
        {
            for (int i = 1; i <= NUM_PREFETCHES_TO_KEEP_ON_HAND - _cachedData.Count; i++)
            {
                var data = await base.GetWikipediaTextFromUrl();

                _cachedData.Enqueue(data);
                if (_wikiText == null)
                {
                    _wikiText = data;
                }
            }
        }
        public ITextSample ExtractTextWithFunction(Func <string, char[], string> ParsingFunction, char[] separator)
        {
            if (_wikiResult == null)
            {
                _wikiResult = GetWikipediaTextFromUrlSynchronously();
            }

            if (_wikiResult == null || _wikiResult.TextSections.Count == 0)
            {
                return(null);
            }

            string text = _wikiResult.TextSections[base.GetRandomIndex(_wikiResult.TextSections)];

            text = ParsingFunction(text, separator);
            return(TextToTextSample(text, _wikiResult.Url));
        }
        public WikipediaTextResult ParseHtml(AngleSharp.Dom.IDocument document)
        {
            bool isMobile = IsMobile(document.Url);
            var  result   = new WikipediaTextResult
            {
                Url      = document.Url,
                IsMobile = isMobile,
                Title    = ExtractTitle(document, isMobile)
            };

            IEnumerable <string> textSections = ExtractTextSections(document, isMobile);

            foreach (var section in textSections)
            {
                result.AddText(base.NormalizeText(section));
            }

            return(result);
        }