public void Get(ParseInfo parseInfo) { if (parseInfo != null) { HtmlDocument document = _htmlLoader.Load(parseInfo.Url); if (document != null) { } } }
public IArticle Get(string url) { if (url == null) { throw new ArgumentNullException("url"); } string html = _loader.Load(url); Match siteNameMatch = _siteNameRegex.Match(url); string siteName = siteNameMatch.Success ? siteNameMatch.Captures[0].Value : url; IArticle result = _cleaner.Clean(siteName.ToLower(), html); return(result); }
public FluentHtmlSelector(IHtmlLoader htmlLoader) { _document = new HtmlDocument(); _document.LoadHtml(htmlLoader.Load()); }
public async Task <IActionResult> Load() { var loadResult = await _htmlLoader.Load(); return(Ok(loadResult)); }
/// <summary> /// 從 stream 中取得 html /// </summary> /// <param name="stream">stream</param> /// <returns>html structures</returns> private IEnumerable <HtmlElement> LoadHtml(Stream stream) => _html.Load(stream).Select(node => new HtmlElement(node));
public async Task <Recipe> Import(string url) { var html = await uriLoader.Load(url); var doc = new HtmlAgilityPack.HtmlDocument(); doc.LoadHtml(html); //var nameOfIngredientGroupOne = doc.DocumentNode.SelectNodes("//*[@id=\"ingredient-section\"]/div/p[2]") // .FirstOrDefault() // .InnerText; //var ingredientsOfGroupOne = doc.DocumentNode.SelectNodes("//*[@id=\"ingredient-section\"]/div/ul[1]") // .FirstOrDefault() // .InnerHtml; //var nameOfIngredientGroupTwo = doc.DocumentNode.SelectNodes("//*[@id=\"ingredient-section\"]/div/p[3]") // .FirstOrDefault() // .InnerText; //var ingredientsOfGroupTwo = doc.DocumentNode.SelectNodes("//*[@id=\"ingredient-section\"]/div/ul[2]") // .FirstOrDefault() // .InnerHtml; var names = doc.DocumentNode.SelectNodes("//*[@id=\"ingredient-section\"]/div/p").Skip(1).ToArray(); var ingredient = doc.DocumentNode.SelectNodes("//*[@id=\"ingredient-section\"]/div/ul").ToArray(); if (names.Length != ingredient.Length) { throw new Exception("Unable to import"); } var name = _nameRegex.Match(html).Groups["name"].Value; var match1 = _previewPath.Match(html); var preview = match1.Groups["url"].Value; List <IngredientGroup> ingredientGroups = new List <IngredientGroup>(); for (var index = 0; index < names.Length; index++) { var x = names[index]; ingredientGroups.Add(Get(x.InnerHtml, ingredient[index].InnerHtml)); } //var ingredientMatches = _ingredientRegex.Matches(html) // .Select(match => match.Groups["name"].Value) // .Select(value => value.Split("\r\n")) // .Select(parts => // { // if (parts.Length == 3) // { // return new Ingredient(parts[1].Trim().RemoveMoreThenOneSpace(), 0); // } // if (parts.Length == 4) // { // return new Ingredient(parts[3].Trim().RemoveMoreThenOneSpace(), double.Parse(parts[1]), Parse(parts[2])); // } // if (parts.Length == 5) // { // return new Ingredient(parts[3].Trim().RemoveMoreThenOneSpace(), double.Parse(parts[1]), Parse(parts[2])); // } // throw new ArgumentException(); // }) // .ToArray(); return(new Recipe(name, url, ingredientGroups.ToArray(), preview)); }