예제 #1
0
파일: Spidy.cs 프로젝트: ocinbat/Spidy
        public void Get(ParseInfo parseInfo)
        {
            if (parseInfo != null)
            {
                HtmlDocument document = _htmlLoader.Load(parseInfo.Url);

                if (document != null)
                {
                }
            }
        }
예제 #2
0
        public IArticle Get(string url)
        {
            if (url == null)
            {
                throw new ArgumentNullException("url");
            }

            string   html          = _loader.Load(url);
            Match    siteNameMatch = _siteNameRegex.Match(url);
            string   siteName      = siteNameMatch.Success ? siteNameMatch.Captures[0].Value : url;
            IArticle result        = _cleaner.Clean(siteName.ToLower(), html);

            return(result);
        }
 public FluentHtmlSelector(IHtmlLoader htmlLoader)
 {
     _document = new HtmlDocument();
     _document.LoadHtml(htmlLoader.Load());
 }
예제 #4
0
        public async Task <IActionResult> Load()
        {
            var loadResult = await _htmlLoader.Load();

            return(Ok(loadResult));
        }
예제 #5
0
 /// <summary>
 /// 從 stream 中取得 html
 /// </summary>
 /// <param name="stream">stream</param>
 /// <returns>html structures</returns>
 private IEnumerable <HtmlElement> LoadHtml(Stream stream)
 => _html.Load(stream).Select(node => new HtmlElement(node));
예제 #6
0
파일: Rzwde.cs 프로젝트: BADF00D/Recipes
        public async Task <Recipe> Import(string url)
        {
            var html = await uriLoader.Load(url);

            var doc = new HtmlAgilityPack.HtmlDocument();

            doc.LoadHtml(html);
            //var nameOfIngredientGroupOne = doc.DocumentNode.SelectNodes("//*[@id=\"ingredient-section\"]/div/p[2]")
            //    .FirstOrDefault()
            //    .InnerText;
            //var ingredientsOfGroupOne = doc.DocumentNode.SelectNodes("//*[@id=\"ingredient-section\"]/div/ul[1]")
            //    .FirstOrDefault()
            //    .InnerHtml;
            //var nameOfIngredientGroupTwo = doc.DocumentNode.SelectNodes("//*[@id=\"ingredient-section\"]/div/p[3]")
            //    .FirstOrDefault()
            //    .InnerText;
            //var ingredientsOfGroupTwo = doc.DocumentNode.SelectNodes("//*[@id=\"ingredient-section\"]/div/ul[2]")
            //    .FirstOrDefault()
            //    .InnerHtml;

            var names      = doc.DocumentNode.SelectNodes("//*[@id=\"ingredient-section\"]/div/p").Skip(1).ToArray();
            var ingredient = doc.DocumentNode.SelectNodes("//*[@id=\"ingredient-section\"]/div/ul").ToArray();

            if (names.Length != ingredient.Length)
            {
                throw new Exception("Unable to import");
            }



            var name    = _nameRegex.Match(html).Groups["name"].Value;
            var match1  = _previewPath.Match(html);
            var preview = match1.Groups["url"].Value;
            List <IngredientGroup> ingredientGroups = new List <IngredientGroup>();

            for (var index = 0; index < names.Length; index++)
            {
                var x = names[index];
                ingredientGroups.Add(Get(x.InnerHtml, ingredient[index].InnerHtml));
            }
            //var ingredientMatches = _ingredientRegex.Matches(html)
            //    .Select(match => match.Groups["name"].Value)
            //    .Select(value => value.Split("\r\n"))
            //    .Select(parts =>
            //    {
            //        if (parts.Length == 3)
            //        {
            //            return new Ingredient(parts[1].Trim().RemoveMoreThenOneSpace(), 0);
            //        }
            //        if (parts.Length == 4)
            //        {
            //            return new Ingredient(parts[3].Trim().RemoveMoreThenOneSpace(), double.Parse(parts[1]), Parse(parts[2]));
            //        }
            //        if (parts.Length == 5)
            //        {
            //            return new Ingredient(parts[3].Trim().RemoveMoreThenOneSpace(), double.Parse(parts[1]), Parse(parts[2]));
            //        }
            //        throw new ArgumentException();
            //    })
            //    .ToArray();

            return(new Recipe(name, url, ingredientGroups.ToArray(), preview));
        }