public Parser() { moviePageUrl = new Regex(@"^/film/\d+/$"); htmlParser = new AngleSharp.Parser.Html.HtmlParser(); MoviePageParser = new Dictionary <RequiredValueType, Func <string, string> > { [RequiredValueType.Country] = x => { throw new NotImplementedException(); }, [RequiredValueType.Genre] = x => { throw new NotImplementedException(); }, [RequiredValueType.Name] = x => { var dom = htmlParser.Parse(x); var element = dom .QuerySelector("div#headerFilm>h1.moviename-big"); if (element.InnerHtml == element.TextContent) { return(element.TextContent); } else { return(Regex.Replace(element.InnerHtml, " <.*", "")); } }, [RequiredValueType.Producer] = x => { throw new NotImplementedException(); }, [RequiredValueType.Rate] = x => { throw new NotImplementedException(); }, [RequiredValueType.Year] = x => { var dom = htmlParser.Parse(x); return(dom.All .Where(m => m.TagName == "td" && m.ClassName == "type" && m.TextContent == "год") .First() .NextElementSibling .Children .Where(m => m.TagName == "a") .First() .TextContent); }, }; SearchResultEntryParser = new Dictionary <RequiredValueType, Func <string, string> > { [RequiredValueType.Country] = x => { var dom = htmlParser.Parse(x); return(dom .QuerySelector("span.grey+span.grey") .TextContent); }, [RequiredValueType.Genre] = x => { throw new NotImplementedException(); }, [RequiredValueType.Name] = x => { var dom = htmlParser.Parse(x); return(dom .QuerySelector("p.name>a") .TextContent); }, [RequiredValueType.Producer] = x => { throw new NotImplementedException(); }, [RequiredValueType.Rate] = x => { throw new NotImplementedException(); }, [RequiredValueType.Year] = x => { var dom = htmlParser.Parse(x); return(dom .QuerySelector("p.name>span.year") .TextContent); }, }; }
public override List <VideoHosting> Parse() { var parser = new AngleSharp.Parser.Html.HtmlParser(); string htmlSourse; using (WebClient client = new WebClient()) { client.Encoding = Encoding.UTF8; htmlSourse = client.DownloadString(GetUrlLink()); } IHtmlDocument document = parser.Parse(htmlSourse); var h2s = document .GetElementsByTagName("h2"); var result = document .GetElementsByTagName("h2") // Получили массив тэгов h2 .Where(t => t.ClassList.Contains("serp-item__title")) // Отфильтровали те, которые содержат класс serp-item__title .Select(t => new VideoHosting // Преобразовали результаты в нужный вид { NameVideo = ExcludeTagB(t.InnerHtml), NameHosting = HostNames.Yandex, Time = DateTime.Now }) .ToList(); // Преобразовали к типу "Список" return(result); }
////////////////////////Files///////////////////////////// ///////////////////////Parser/////////////////////////////// public void runParser() { //Получаем файлы, необходимые для парсинга и обработки foreach (string file in getFiles()) { //1. Формируем новое название файла (для вывески на сайт) var newName = this.eNewName(file); if (newName != "") { //2. Получаем содержимое файла //var html = File.Open(file,FileMode.Open); FileStream fstream = File.OpenRead(file); // преобразуем строку в байты byte[] array = new byte[fstream.Length]; // считываем данные fstream.Read(array, 0, array.Length); // декодируем байты в строку string html = System.Text.Encoding.Default.GetString(array); fstream.Close(); //3. Парсим HTML var parser = new AngleSharp.Parser.Html.HtmlParser(); var document = parser.Parse(html); var table = document.QuerySelector("table"); //Осталось найти только необходимые элементы и их удалить. var a = 1; //4. Убираем из HTML две строки (конкурсная группа и количество мест) } } }
private IEnumerable <HouseInfo> GetRoomList(string url) { var htmlResult = HTTPHelper.GetHTMLByURL(url); var page = new AngleSharp.Parser.Html.HtmlParser().Parse(htmlResult); return(page.QuerySelector("ul.screening_left_ul").QuerySelectorAll("li").Select(element => { var screening_time = element.QuerySelector("p.screening_time").TextContent; var screening_price = element.QuerySelector("h5").TextContent; var locationInfo = element.QuerySelector("a"); var locationContent = locationInfo.TextContent.Split(',').FirstOrDefault(); var location = locationContent.Remove(0, locationContent.IndexOf("租") + 1); decimal housePrice = 0; decimal.TryParse(screening_price.Replace("¥", "").Replace("元/月", ""), out housePrice); var markBGType = (housePrice / 1000) > (int)LocationMarkBGType.Black ? LocationMarkBGType.Black : (LocationMarkBGType)(housePrice / 1000); return new HouseInfo { Money = screening_price, HouseURL = "http://www.huzhumaifang.com" + locationInfo.GetAttribute("href"), HouseLocation = location, HouseTime = screening_time, HousePrice = housePrice, LocationMarkBG = markBGType.ToString() + ".png", }; })); }
/// <summary> /// Retrieve remote html document /// </summary> /// <param name="uri"></param> /// <returns></returns> private IDocument GetHtmlDocument(string uri) { try { //html to extract from document string html = ""; string url = uri; //download html document from remote URI using (WebClient client = new WebClient()) { //add request headers to appear as browser client.Headers.Add("user-agent", "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/51.0.2704.103 Safari/537.36"); client.Headers.Add("accept", "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8"); html = client.DownloadString(url); } //parse html document var parser = new AngleSharp.Parser.Html.HtmlParser(); //return parsed IDocument return(parser.Parse(html)); } catch (Exception ex) { LogEvent("File " + uri + " not found or document is empty."); //return null if any exception occurs return(null); } }
public static List <Article> ParseArticleList(string html, string urlPage) { List <Article> articles = new List <Article>(); //Won't find some encodings : just press F5 several times... var htmlDocument = new AngleSharp.Parser.Html.HtmlParser().Parse(html); foreach (var articleElement in htmlDocument.QuerySelectorAll("article[data-acturowid][data-datepubli]")) { String date = articleElement.Attributes["data-datepubli"].Value; var image = articleElement.QuerySelectorAll("img[class=ded-image]").First().Attributes["data-frz-src"].GetAbsUrl(urlPage); var urlElement = articleElement.QuerySelectorAll("h1 > a[href]").First(); var subTitle = articleElement.QuerySelectorAll("span[class=soustitre]").First().TextContent.Substring(2); var commentsCountElement = articleElement.QuerySelectorAll("span[class=nb_comments]").FirstOrDefault(); int.TryParse(commentsCountElement?.TextContent, out int commentsCount); articles.Add(new Article { Id = articleElement.Attributes["data-acturowid"].Value, PublicationTimeStamp = ConvertToTimeStamp(date, Constants.FORMAT_DATE_ARTICLE), UrlIllustration = image, Url = urlElement.Attributes["href"].GetAbsUrl(urlPage), Title = urlElement.TextContent, SubTitle = subTitle, TotalCommentsCount = commentsCount, HasSubscription = articleElement.QuerySelectorAll("img[alt=badge_abonne]").Count() > 0, }); } return(articles); }
private int GetPageCount(string indexURL) { var htmlResult = HTTPHelper.GetHTMLByURL(indexURL); var page = new AngleSharp.Parser.Html.HtmlParser().Parse(htmlResult); return(Convert.ToInt32(page.QuerySelector("a.end")?.TextContent ?? "0")); }
public ProxyCollection GetProxyByCountry(Country country) { string url = String.Format("http://spys.one/free-proxy-list/{0}/", country.abb); string response = client.DownloadString(url); HtmlDocument html = new HtmlDocument(); html.LoadHtml(response); JavaScriptEngine parser = new JavaScriptEngine(); var jscontext = new AngleSharp.Parser.Html.HtmlParser().Parse(response); parser.EvaluateScript(jscontext, html.DocumentNode.SelectSingleNode(@"/html/body/script").InnerText); var res = new ProxyCollection(); foreach (Match proxyitem in Regex.Matches(response, @"((\d+)\.(\d+)\.(\d+)\.(\d+))(<script(.*?)</script>)(.*?)(HTTP</font></a>|HTTP</font><font|SOCKS5)")) { var proxy = new Proxy(); string port = (string)parser.EvaluateScript(jscontext, "\"\"+" + Regex.Match(proxyitem.Value, @"(?<=\+)(.*?)(?=\)</script)").Value); string ip = Regex.Match(proxyitem.Value, @"(\d+)\.(\d+)\.(\d+)\.(\d+)").Value; proxy.ip = ip; proxy.port = port; switch (proxyitem.Groups[9].Value) { case @"HTTP</font></a>": proxy.protocol = "http"; break; case @"HTTP</font><font": proxy.protocol = "https"; break; case @"SOCKS5": proxy.protocol = "socks5"; break; default: proxy.protocol = "unknown"; break; } res.Add(proxy); } return(res); }
public async Task <Parsing> ParseAsync(string source) { if (string.IsNullOrWhiteSpace(_source) || !Equals(_source, source)) { _source = source; var domParser = new AngleSharp.Parser.Html.HtmlParser(); _document = await domParser.ParseAsync(source); } Parsing result = new Parsing(); List <Tag> tags = new List <Tag>(); IHtmlCollection <IElement> parsedTags; await Task.Run(() => { foreach (string element in Settings.Tags) { parsedTags = _document?.QuerySelectorAll(element); foreach (IElement tag in parsedTags) { tags.Add(new Tag() { Name = tag.TagName.ToLower(), Attributes = ParseTag(tag), Parsing = result }); } } result.Date = DateTime.UtcNow; result.Tags = tags; }); return(result); }
public static IEnumerator TextContent(string _text, string _Selector) // public static void Main(string _HTTP, string _Selector) { string _return_string = ""; var parser = new AngleSharp.Parser.Html.HtmlParser(); var document = parser.Parse(_text); //var blueListItemsCSS = document.QuerySelectorAll("p"); var blueListItemsCSS = document.QuerySelectorAll(_Selector); //var blueListItemsCSS = document.QuerySelectorAll("p").Where(item => item.ClassName != null && item.ClassName.Contains("post__title_link")); foreach (var item in blueListItemsCSS) { if (item.TextContent != null) { _return_string = _return_string + item.TextContent; } } yield return(_return_string); //return _return_string; }
public Dictionary <int, List <Shedule> > GetShedule(string faculty, string teacher, string group, string sdate, string edate) { string request = "faculty=" + faculty + "&teacher=" + teacher + " &group=" + group + "&sdate=" + sdate + "&edate=" + edate + "&n=700"; Dictionary <int, List <Shedule> > SheduleDictionary = new Dictionary <int, List <Shedule> >(); List <Shedule> shedules = new List <Shedule>(); Encoding.RegisterProvider(CodePagesEncodingProvider.Instance); wc.Encoding = Encoding.GetEncoding(1251); string htmlresult = wc.UploadString(UrlShedule, request); IHtmlDocument IHD = new AngleSharp.Parser.Html.HtmlParser().Parse(htmlresult); var tables = IHD.QuerySelectorAll("table"); for (var a = 0; a < tables.Length; a++) { var trs = tables[a].Children[0].Children; foreach (var tr in trs) { string time = tr.Children[1].InnerHtml.Replace("<br>", "-"); string subject = tr.Children[2].InnerHtml; shedules.Add(new Shedule() { time = time, subject = subject }); } SheduleDictionary.Add(a, shedules); shedules = new List <Shedule>(); } return(SheduleDictionary); }
public async Task Parse() { string source; if (isUrl) { source = await _loader.GetPage(); } else { source = Source; } Console.WriteLine("loader"); var domParser = new AngleSharp.Parser.Html.HtmlParser(); var document = await domParser.ParseAsync(source); Console.WriteLine("parser"); _result = await _parser.Parse(document); ParsingCompleted?.Invoke(this, new ParseResult <Dictionary <string, List <StringDictionary> > >() { Res = _result }); //OnNewData?.Invoke(this, result); }
public async Task <Video> parse() { string html; using (var handler = new HttpClientHandler()) using (var client = new HttpClient(handler)) { //client.DefaultRequestHeaders.Accept.ParseAdd("text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8"); var response = await client.GetAsync(url); if (!response.IsSuccessStatusCode) { throw new Exception(response.ReasonPhrase); } html = await response.Content.ReadAsStringAsync(); } var video = new Video(); var document = new AngleSharp.Parser.Html.HtmlParser().Parse(html); video.title = document.QuerySelectorAll("meta[property='og:title']").First().GetAttribute("content"); video.thumbnailUrl = document.QuerySelectorAll("meta[property='og:image']").First().GetAttribute("content"); video.description = document.QuerySelectorAll("p[id='eow-description']").First().TextContent; return(video); }
public List <AssetContent> GetAssetsContents(string html, string pagePath, string htmlName) { var assetsContents = new List <AssetContent>(); var parser = new AngleSharp.Parser.Html.HtmlParser(); var doc = parser.Parse(html); var htmlAssets = GetHtmlAssets(doc); var nonCssAssets = htmlAssets.Where(a => a.Suffix.ToLower() != "css").ToList(); AddBinaryAssetsContents(assetsContents, nonCssAssets, pagePath); foreach (var asset in htmlAssets.Where(a => a.Suffix.ToLower() == "css")) { try { var cssStringContent = GetStringAsset(asset.Uri, mapPathResolver, webRoot, pagePath); if (!string.IsNullOrEmpty(cssStringContent)) { var cssAssets = GetCssAssets(cssStringContent); AddBinaryAssetsContents(assetsContents, cssAssets, asset.Uri); foreach (var assetContent in assetsContents) { // TODO: use regex to avoid replace uri that is not link but text cssStringContent = cssStringContent.Replace(assetContent.Uri, assetContent.NewUri + "." + assetContent.Suffix); } var cssassetContent = Encoding.UTF8.GetBytes(cssStringContent); if (!assetsContents.Any(a => a.Uri == asset.Uri)) { assetsContents.Add(new AssetContent { Uri = asset.Uri, NewUri = asset.NewUri, Suffix = asset.Suffix, Content = cssassetContent }); } } } catch (Exception ex) { /* TODO: Trace somewhere */ } } // finally add index html foreach (var assetContent in assetsContents) { // TODO: use regex to avoid replace uri that is not link but text html = html.Replace(assetContent.Uri, assetContent.NewUri + "." + assetContent.Suffix); } html = "<!DOCTYPE html>" + html; var htmlContent = Encoding.UTF8.GetBytes(html); assetsContents.Add(new AssetContent { Uri = pagePath, NewUri = htmlName, Suffix = "html", Content = htmlContent }); return(assetsContents); }
private void PostSearch(IElement item, List <string> dict) { GeneralPost tweet = new GeneralPost(); var h = item.QuerySelectorAll("div").Where(k => k.ClassName.Contains("AdaptiveMediaOuterContainer")); if (!(h.Count() == 0)) { if (h.First().QuerySelectorAll("img").Count() != 0) { tweet.Image = h.First().QuerySelectorAll("img").First().Attributes["src"].Value; } } long id = long.Parse(item.Attributes["data-item-id"].Value); tweet.Text = item.QuerySelectorAll("p").Where(k => k.ClassName.Contains("tweet-text")).First().InnerHtml; Cenzor cenzor = new Cenzor(); tweet.Text = cenzor.Cenz(tweet.Text, dict); tweet.Social = SocialMedia.Twitter; tweet.AuthorName = item.QuerySelectorAll("div").Where(k => k.ClassName.Contains("tweet")).First().Attributes["data-name"].Value; string linkname = item.QuerySelectorAll("div").Where(k => k.ClassName.Contains("tweet")).First().Attributes["data-screen-name"].Value; tweet.PostLink = "https://twitter.com/" + linkname + "/status/" + id; tweet.AuthorLink = "https://twitter.com/" + linkname; tweet.AuthorAvatar = item.QuerySelectorAll("img").Where(y => y.ClassName.Contains("avatar")).First().Attributes["src"].Value; try { var elemwithdate = item.QuerySelectorAll("div").Where(k => k.ClassName.Contains("content")).First().QuerySelectorAll("div").Where(o => o.ClassName.Contains("stream-item-header")).First().QuerySelectorAll("small").Where(u => u.ClassName.Contains("time")).First().QuerySelectorAll("a").Where(f => f.ClassName.Contains("tweet-timestamp")).First().Attributes["title"].Value; var massivstrdate = elemwithdate.Split('-'); var massivyearmohtn = massivstrdate[1].Split(' '); var h1 = massivstrdate[0].TrimEnd(' '); var h2 = massivyearmohtn[1]; var h3 = massivyearmohtn[2]; var h4 = massivyearmohtn[3]; var d = item.QuerySelectorAll("div").Where(k => k.ClassName.Contains("content")).First().QuerySelectorAll("div").Where(o => o.ClassName.Contains("stream-item-header")).First().QuerySelectorAll("small").Where(u => u.ClassName.Contains("time")).First().QuerySelectorAll("a").Where(f => f.ClassName.Contains("tweet-timestamp")).First().QuerySelectorAll("span").Where(p => p.ClassName.Contains("_timestamp")).First().Attributes["data-time-ms"].Value; var s1 = h1.Split(':'); tweet.Date = (new DateTime(Int32.Parse(h4), getMonth(h3), Int32.Parse(h2), Int32.Parse(s1[0]), Int32.Parse(s1[1]), 0)); } catch { } AngleSharp.Parser.Html.HtmlParser parser = new AngleSharp.Parser.Html.HtmlParser(); AngleSharp.Dom.Html.IHtmlDocument htmldocument = parser.Parse(tweet.Text); var links = htmldocument.QuerySelectorAll("a"); foreach (var link in links) { tweet.Text = tweet.Text.Replace(link.OuterHtml, link.InnerHtml); } lock (posts) { posts.Add(tweet); } }
public void SetUp() { parser = new AngleSharp.Parser.Html.HtmlParser(); htmlProvider = new Mock <IHtmlProvider>(); parserConfig = new Mock <IParserConfigProvider>(); provider = new ConcertProvider(parser, htmlProvider.Object, parserConfig.Object); }
public IEnumerable <TModel> BindModelWithStream <TModel>(Stream stream, Action <TModel> postBindAction = null) where TModel : class, new() { HtmlParser htmlParser = new HtmlParser(); IHtmlDocument htmlDocument = htmlParser.Parse(stream); return(BindModelWithHtmlDocument <TModel>(htmlDocument, postBindAction)); }
public IEnumerable <TModel> BindModelHtmlContent <TModel>(string htmlContent, Action <TModel> postBindAction = null) where TModel : class, new() { HtmlParser htmlParser = new HtmlParser(); IHtmlDocument htmlDocument = htmlParser.Parse(htmlContent); return(BindModelWithHtmlDocument(htmlDocument, postBindAction)); }
public List <Asset> GetHtmlAssets(string html) { var assets = new List <Asset>(); var parser = new AngleSharp.Parser.Html.HtmlParser(); var doc = parser.Parse(html.ToLowerInvariant()); assets = GetHtmlAssets(doc); return(assets); }
public static int GetCommentsCount(string unContenu, string urlPage) { var htmlDocument = new AngleSharp.Parser.Html.HtmlParser().Parse(unContenu); var stringNbComms = htmlDocument.QuerySelectorAll("span[class=actu_separator_comms]").First().TextContent; int spacePosition = stringNbComms.IndexOf(" "); string value = stringNbComms.JavaSubString(0, spacePosition).Trim(); int.TryParse(value, out int nbComms); return(nbComms); }
public static async Task <IHtmlDocument> GetHtmlByWebsite(Website website) { var client = new WebClient(); var download = client.DownloadString(website.WebSiteUri); var parser = new AngleSharp.Parser.Html.HtmlParser(); var result = await parser.ParseAsync(download); Tools.DebugTrace(" Lib.GetHtmlByWebsite", Thread.CurrentThread.ManagedThreadId); return(result); }
private async Task <AssetPathCollection> ExtractAssets(string source, string suffix, string pagePath, AssetPathCollection assets) { if (suffix.ToLower().StartsWith("htm")) { var parser = new AngleSharp.Parser.Html.HtmlParser(); var doc = parser.Parse(source); var images = doc.Images .Where(x => x.HasAttribute("src")); var styles = doc.GetElementsByTagName("link") .Where(l => l.Attributes["rel"].Value.Trim().ToLower() == "stylesheet") .Where(c => c.HasAttribute("href")); var scripts = doc.GetElementsByTagName("script") .Where(x => x.HasAttribute("src")); assets.AddSerializedAssets(images, "src"); assets.AddSerializedAssets(scripts, "src"); assets.AddSerializedAssets(styles, "href"); foreach (var asset in assets.Assets.Where(a => a.AssetType == "css")) { var content = await Zipper.GetStringAsset(asset.OriginalPath, mapPathResolver, webRoot, pagePath); var binaryContent = Encoding.UTF8.GetBytes(content); asset.Content = binaryContent; await ExtractAssets(content, "css", asset.OriginalPath, assets); } foreach (var asset in assets.Assets.Where(a => a.AssetType != "css")) { var binaryContent = await Zipper.GetBinaryAsset(asset.OriginalPath, mapPathResolver, webRoot, pagePath); asset.Content = binaryContent; } } else if (suffix.ToLower() == "css") { var urls = Zipper.ExtaxtUrlsFromStyle(source); foreach (var url in urls) { var csslocalPath = await Zipper.ReturnLocalPath(url); var csssuffix = csslocalPath.Split('.').Last(); var newUrl = Guid.NewGuid().ToString().Replace("-", "") + "." + suffix; source = source.Replace(url, newUrl); //assets.AddSerializedAssets() //if (!doneAssets.Contains(newUrl)) //{ // zipArchive.AddBinaryAssetToArchive(newUrl, localPath, mapPathResolver, webRoot, serialStyle.Key); // doneAssets.Add(newUrl); //} } } return(assets); }
public static AngleSharp.Dom.Html.IHtmlDocument ReadAndParseHtmlFile(string path) { // Set up angleSharp html parser var angleSharConfig = Configuration.Default.WithCss(); var parser = new AngleSharp.Parser.Html.HtmlParser(angleSharConfig); // Read the source html file FileStream fs = File.OpenRead(path); // Parse it into a DOM with AngleSharp var doc = parser.Parse(fs); return(doc); }
/// <summary> /// Convert a text file with HTML content to plain text. /// </summary> /// <param name="html">The HTML string to convert.</param> /// <returns>The plain text representation of the HTML content.</returns> public string ToPlainText(string html) { var parser = new AngleSharp.Parser.Html.HtmlParser(); var document = parser.Parse(html); var sw = new StringWriter(); ConvertContentToText(document.ChildNodes, sw); sw.Flush(); var text = sw.ToString(); // strip leading white space and more than 2 consecutive line breaks return(text.Trim()); }
public void Bundle() { var html = "<html><head><link href=\"/Content/css?v=peG2vCX8wlIEw2lPUnRL6uPAxina05CUT_UoTb_UXfw1\" rel=\"stylesheet\"/><script src=\"/bundles/modernizr?v=wBEWDufH_8Md-Pbioxomt90vm6tJN2Pyy9u9zHtWsPo1\"></script></head>Hello <img src=\"Content/test.png\" /></html>"; string rootpath = AppDomain.CurrentDomain.BaseDirectory; var mockPathResolver = new Mock <IMapPathResolver>(); mockPathResolver.Setup(x => x.MapPath("", "Content/test.png")) .Returns(Path.Combine(rootpath, "Content", "test.png")); mockPathResolver.Setup(x => x.MapPath("", "../fonts/glyphicons-halflings-regular.eot")) .Returns(Path.Combine(rootpath, "fonts", "glyphicons-halflings-regular.eot")); mockPathResolver.Setup(x => x.MapPath("", "../fonts/glyphicons-halflings-regular.svg")) .Returns(Path.Combine(rootpath, "fonts", "glyphicons-halflings-regular.svg")); mockPathResolver.Setup(x => x.MapPath("", "../fonts/glyphicons-halflings-regular.ttf")) .Returns(Path.Combine(rootpath, "fonts", "glyphicons-halflings-regular.ttf")); mockPathResolver.Setup(x => x.MapPath("", "../fonts/glyphicons-halflings-regular.woff")) .Returns(Path.Combine(rootpath, "fonts", "glyphicons-halflings-regular.woff")); mockPathResolver.Setup(x => x.MapPath("", "../../images/cheap_diagonal_fabric.png")) .Returns(Path.Combine(rootpath, "Content", "test.png")); byte[] zippedHtml = Zipper.ZipPage(html, mockPathResolver.Object, "http://localhost:57399", ""); var fileStream = new MemoryStream(zippedHtml); //fileStream.Position = 0; using (var zip = new ZipArchive(fileStream, ZipArchiveMode.Read)) { Assert.Equal(4, zip.Entries.Count); Assert.True(zip.Entries.ToList().Exists(x => x.Name.EndsWith(".css")), "no css file"); foreach (var entry in zip.Entries) { using (var stream = entry.Open()) { // do whatever we want with stream // ... //stream.Position = 0; var sr = new StreamReader(stream); if (entry.Name.ToLower() == "index.html") { var myStr = sr.ReadToEnd(); var parser = new AngleSharp.Parser.Html.HtmlParser(); var doc = parser.Parse(myStr); Assert.Equal(1, doc.Images.Count()); Assert.NotEqual("Content/test.png", doc.Images[0].Attributes["src"].Value); //Assert.Equal(html, myStr); } } } } }
private string getSmall() { article = GetLsi(); var parser = new AngleSharp.Parser.Html.HtmlParser(); var document = parser.Parse(text); string anchorHTML = string.Empty; AngleSharp.Dom.IHtmlCollection <AngleSharp.Dom.IElement> anchors = document.QuerySelectorAll("a"); foreach (AngleSharp.Dom.IElement anchor in anchors) { anchorHTML = anchor.OuterHtml; } article = anchorHTML + Environment.NewLine + article; return(article); }
public static IEnumerator GetAttribute(string _text, string _Selector, string _Attribute) // public static void Main(string _HTTP, string _Selector) { var parser = new AngleSharp.Parser.Html.HtmlParser(); var document = parser.Parse(_text); var blueListItemsCSS = document.QuerySelectorAll(_Selector); foreach (var item in blueListItemsCSS) { if (item.GetAttribute(_Attribute) != null) { yield return(item.GetAttribute("href")); } } }
private IEnumerable <FilmSession> GetDataFromSource(string source, string date) { List <FilmSession> filmSessionList = new List <FilmSession>(); Dictionary <string, Film> existingFilmList = new Dictionary <string, Film>(); var parser = new AngleSharp.Parser.Html.HtmlParser(); var webClient = new WebClient() { Encoding = System.Text.Encoding.UTF8 }; var document = parser.Parse(webClient.DownloadString(source)); foreach (var item in document.QuerySelectorAll("#afishaKtName")) { foreach (var subItem in item.NextElementSibling.QuerySelectorAll("#afishaItem")) { var filmSession = new FilmSession(); var filmDetailsLink = subItem.QuerySelector(".filmName a").GetAttribute("href").Trim(); if (existingFilmList.Keys.Contains(filmDetailsLink)) { filmSession.Film = existingFilmList[filmDetailsLink]; } else { Thread filmParserThread = new Thread(() => { filmSession.Film = ParseFilmDetails(subItem.QuerySelector(".filmName a").GetAttribute("href").Trim()); }); filmParserThread.Start(); filmParserThread.Join(); } filmSession.Cinema = new Cinema() { Name = Regex.Replace(item.QuerySelector("a").TextContent.Trim(), "[\".]", "") }; filmSession.HallName = Regex.Replace(subItem.QuerySelector(".filmZal").TextContent.Trim(), "[\".]", ""); AngleSharp.Dom.IElement el = subItem.QuerySelector(".filmShows .time a"); filmSession.DateTime = DateTime.Parse( (el == null) ? date + " 14:00" : date + " " + el.TextContent.Trim()); filmSession.Price = subItem.QuerySelector(".filmPrices").TextContent.Trim(); filmSession.SeatIsFree = seatIsFree; filmSessionList.Add(filmSession); } } return(filmSessionList); }
private static string GetSynonyms(string kata) { // Memanggil method CreateClient untuk membuat object WebClientEx baru. var client = CreateClient(); // Mendapatkan response berupa HTML string var response = client.DownloadString(BaseURL + kata); // Parse response menggunakan HtmlParser (AngleSharp) var parser = new AngleSharp.Parser.Html.HtmlParser(); var parsed = parser.Parse(response); // Select element menggunakan selector dan ambil text content var sinonim = parsed.QuerySelector(Selector)?.TextContent; return(sinonim ?? "Maaf, sinonim tidak ditemukan"); }
private void button3_Click(object sender, EventArgs e) { readFile(); article = text; article = article + Environment.NewLine + GetLsi(); Clipboard.SetText(article); webBrowser1.DocumentText = article; var parser = new AngleSharp.Parser.Html.HtmlParser(); var document = parser.Parse(article); AngleSharp.Dom.IHtmlCollection <AngleSharp.Dom.IElement> anchors = document.QuerySelectorAll("a"); foreach (AngleSharp.Dom.IElement anchor in anchors) { wName.Text = anchor.InnerHtml; wUrl.Text = anchor.GetAttribute("href"); } }
public async Task<IndexerConfigurationStatus> ApplyConfiguration(JToken configJson) { configData.LoadValuesFromJson(configJson); var pairs = new Dictionary<string, string> { { "username", configData.Username.Value }, { "password", configData.Password.Value } }; // Get inital cookies CookieHeader = string.Empty; var response = await RequestLoginAndFollowRedirect(LoginPostUrl, pairs, CookieHeader, true, null, LoginUrl); await ConfigureIfOK(response.Cookies, response.Content != null && response.Content.Contains("/logout.php"), () => { var parser = new AngleSharp.Parser.Html.HtmlParser(); var document = parser.Parse(response.Content); var messageEl = document.QuerySelector(".error_text"); var errorMessage = messageEl.TextContent.Trim(); throw new ExceptionWithConfigData(errorMessage, configData); }); return IndexerConfigurationStatus.RequiresTesting; }
private List<ReleaseInfo> ParseResponse(string htmlResponse) { TimeZoneInfo.TransitionTime startTransition = TimeZoneInfo.TransitionTime.CreateFloatingDateRule(new DateTime(1, 1, 1, 3, 0, 0), 3, 5, DayOfWeek.Sunday); TimeZoneInfo.TransitionTime endTransition = TimeZoneInfo.TransitionTime.CreateFloatingDateRule(new DateTime(1, 1, 1, 4, 0, 0), 10, 5, DayOfWeek.Sunday); TimeSpan delta = new TimeSpan(1, 0, 0); TimeZoneInfo.AdjustmentRule adjustment = TimeZoneInfo.AdjustmentRule.CreateAdjustmentRule(new DateTime(1999, 10, 1), DateTime.MaxValue.Date, delta, startTransition, endTransition); TimeZoneInfo.AdjustmentRule[] adjustments = { adjustment }; TimeZoneInfo romaniaTz = TimeZoneInfo.CreateCustomTimeZone("Romania Time", new TimeSpan(2, 0, 0), "(GMT+02:00) Romania Time", "Romania Time", "Romania Daylight Time", adjustments); List<ReleaseInfo> releases = new List<ReleaseInfo>(); try { var parser = new AngleSharp.Parser.Html.HtmlParser(); var document = parser.Parse(htmlResponse); var rows = document.QuerySelectorAll("#highlight > tbody > tr:not(:First-child)"); foreach (var row in rows) { var release = new ReleaseInfo(); var linkNameElement = row.QuerySelector("a.torrent_name_link"); release.Title = linkNameElement.GetAttribute("title"); release.Description = release.Title; release.Guid = new Uri(SiteLink + linkNameElement.GetAttribute("href")); release.Comments = release.Guid; release.Link = new Uri(SiteLink + row.QuerySelector("td.table_links > a").GetAttribute("href")); release.Category = TvCategoryParser.ParseTvShowQuality(release.Title); release.Seeders = ParseUtil.CoerceInt(row.QuerySelector("td.table_seeders").TextContent.Trim()); release.Peers = ParseUtil.CoerceInt(row.QuerySelector("td.table_leechers").TextContent.Trim()) + release.Seeders; release.Size = ReleaseInfo.GetBytes(row.QuerySelector("td.table_size").TextContent); release.MinimumRatio = 1; release.MinimumSeedTime = 172800; DateTime pubDateRomania; var dateString = row.QuerySelector("td.table_added").TextContent.Trim(); if (dateString.StartsWith("Today ")) { pubDateRomania = DateTime.SpecifyKind(DateTime.UtcNow.Date, DateTimeKind.Unspecified) + TimeSpan.Parse(dateString.Split(' ')[1]); } else if (dateString.StartsWith("Yesterday ")) { pubDateRomania = DateTime.SpecifyKind(DateTime.UtcNow.Date, DateTimeKind.Unspecified) + TimeSpan.Parse(dateString.Split(' ')[1]) - TimeSpan.FromDays(1); } else { pubDateRomania = DateTime.SpecifyKind(DateTime.ParseExact(dateString, "d-MMM-yyyy HH:mm:ss", CultureInfo.InvariantCulture), DateTimeKind.Unspecified); } DateTime pubDateUtc = TimeZoneInfo.ConvertTimeToUtc(pubDateRomania, romaniaTz); release.PublishDate = pubDateUtc.ToLocalTime(); releases.Add(release); } } catch (Exception ex) { OnParseError(htmlResponse, ex); } return releases; }