public WikipediaTextResult GetWikipediaTextFromUrlSynchronously(string url) { Task <WikipediaTextResult> task = Task.Run <WikipediaTextResult>(async() => await GetWikipediaTextFromUrl(url)); _wikiResult = task.Result; return(task.Result); }
public virtual WikipediaTextResult GetWikipediaTextFromUrlSynchronously() { Task <WikipediaTextResult> task = Task.Run <WikipediaTextResult>(async() => await GetWikipediaTextFromUrl()); _wikiResult = task.Result; return(task.Result); }
public string GetNextTextSection() { string textSection = string.Empty; int counter = 0; while (string.IsNullOrEmpty(textSection) && counter < 5) { if (_wikiText == null || _wikiText.TextSections.Count == 0) { _wikiText = _cachedData.Dequeue(); } else { textSection = _wikiText.TextSections[0]; _wikiText.TextSections.RemoveAt(0); break; } counter++; } PrefetchMoreIfNeeded(); return(textSection); }
public async Task <WikipediaTextResult> GetWikipediaTextFromUrl(string url) { AngleSharp.Dom.IDocument document = await _angleSharpContext.OpenAsync(url); WikipediaTextResult result = ParseHtml(document); return(result); }
public async Task <WikipediaTextResult> GetWikipediaTextFromString(string html) { var document = await _angleSharpContext.OpenAsync(req => req.Content(html)); WikipediaTextResult result = ParseHtml(document); return(result); }
public void Init() { //_wikiText = await base.GetWikipediaTextFromUrl(); _wikiText = base.GetWikipediaTextFromUrlSynchronously(); _cachedData = new Queue <WikipediaTextResult>(); PrefetchMoreIfNeeded(); }
public async void PrefetchMoreIfNeeded() { for (int i = 1; i <= NUM_PREFETCHES_TO_KEEP_ON_HAND - _cachedData.Count; i++) { var data = await base.GetWikipediaTextFromUrl(); _cachedData.Enqueue(data); if (_wikiText == null) { _wikiText = data; } } }
public ITextSample ExtractTextWithFunction(Func <string, char[], string> ParsingFunction, char[] separator) { if (_wikiResult == null) { _wikiResult = GetWikipediaTextFromUrlSynchronously(); } if (_wikiResult == null || _wikiResult.TextSections.Count == 0) { return(null); } string text = _wikiResult.TextSections[base.GetRandomIndex(_wikiResult.TextSections)]; text = ParsingFunction(text, separator); return(TextToTextSample(text, _wikiResult.Url)); }
public WikipediaTextResult ParseHtml(AngleSharp.Dom.IDocument document) { bool isMobile = IsMobile(document.Url); var result = new WikipediaTextResult { Url = document.Url, IsMobile = isMobile, Title = ExtractTitle(document, isMobile) }; IEnumerable <string> textSections = ExtractTextSections(document, isMobile); foreach (var section in textSections) { result.AddText(base.NormalizeText(section)); } return(result); }